Compare commits
233 Commits
temp-node-
...
temp-compo
Author | SHA1 | Date | |
---|---|---|---|
4a22faf15a | |||
12a5a60557 | |||
a808c5ae65 | |||
c0995c27a8 | |||
23791db145 | |||
4ee97f129a | |||
9708b1f317 | |||
e9daca77d6 | |||
0cd3d46246 | |||
ba4e5399fc | |||
d04d27b406 | |||
919e513fa8 | |||
cc6a3509a0 | |||
1de922f88c | |||
1b79b4dd30 | |||
c7b237e7d1 | |||
9f9e2dd25d | |||
0d4c53ecfe | |||
0f58cc1594 | |||
719c319055 | |||
c0a5b13b5e | |||
13a28d9e6f | |||
123255be6b | |||
7fc11744e6 | |||
87a36cba1a | |||
00bd631c7c | |||
eadbacdbb0 | |||
70cc80ea1c | |||
439c9b0b84 | |||
877ba6b251 | |||
306e9bff46 | |||
04857cc8ef | |||
0fd0b0643a | |||
8a6f224e26 | |||
bbfa6a92cf | |||
b7dc0346aa | |||
0194e54fd3 | |||
75fbf6f17e | |||
536109b4ec | |||
2012d541ae | |||
![]() |
12c66854bd | ||
c6275da852 | |||
e41dddd29a | |||
cda20a7af8 | |||
539575b585 | |||
335f40ebfa | |||
03f8c1abd0 | |||
18c6314e26 | |||
3c4537cd38 | |||
ac9ec52e9e | |||
4ab8212e1a | |||
9ed19db539 | |||
b5ea3d2c09 | |||
![]() |
ca0450feef | ||
85267ec1ae | |||
68dc970219 | |||
82f0e4948c | |||
ce66075d00 | |||
c73a550e90 | |||
876b250499 | |||
ba95cf6234 | |||
8e2a21e862 | |||
9161993e02 | |||
0e590f9078 | |||
df8f507f41 | |||
bf35dba7fb | |||
b534806ecb | |||
0d68d7baa3 | |||
8113b8391a | |||
76de3ac4ce | |||
11d31addf8 | |||
c148eba16f | |||
b93e947306 | |||
dcac86f4f1 | |||
fd592538d9 | |||
26dac33ce1 | |||
6d2b486e43 | |||
7b5835c793 | |||
432d5bc692 | |||
16e7a7b5b1 | |||
88c02bf826 | |||
0a1a173e57 | |||
6eefcd7d78 | |||
11be9edae2 | |||
c11585a82f | |||
6e268a749f | |||
758f3f7456 | |||
dbe3981b0a | |||
9a0850c8c2 | |||
b1e6e63c22 | |||
55b8fc718a | |||
71cf9f4b3f | |||
b6ad0735a6 | |||
7df6f66ea2 | |||
9824df49c0 | |||
7a66a9f22e | |||
1d49293b80 | |||
08511b1c3d | |||
300403a38b | |||
caac532565 | |||
![]() |
1b22650fbf | ||
2b66b372bc | |||
2dace5f3ef | |||
92c449776d | |||
18959c502d | |||
c38d2513c5 | |||
d1ade756a9 | |||
655ce5dc3e | |||
c8d59b60b5 | |||
3391a2ef1d | |||
ffa20de050 | |||
65b5023df4 | |||
076d797bda | |||
301ee97b93 | |||
f2c896a9ad | |||
2b6f2072f1 | |||
4fd7ce321d | |||
0bc4056455 | |||
4a3464050c | |||
b536605e78 | |||
4882208633 | |||
cc636db8f2 | |||
f806bd8261 | |||
fc4886a314 | |||
326bd76d3b | |||
a80a2f07b7 | |||
76238af213 | |||
8ca7250982 | |||
dc4c2815a6 | |||
![]() |
e62ce9e08e | ||
23d9953c80 | |||
ce6a24976a | |||
f2b86471ea | |||
37003cbbc1 | |||
87a3cb3bff | |||
8c55333a8e | |||
fc6228bd85 | |||
93e92ac126 | |||
357acd1d50 | |||
e43fcc014a | |||
3c3669894f | |||
6e48a51af7 | |||
606271966e | |||
e0e7a5522f | |||
e7274dedc4 | |||
cc8fa3ee90 | |||
dfdc9c6219 | |||
b6195f6664 | |||
272a38e0c2 | |||
c9af025936 | |||
1e5cfebf66 | |||
57272d598d | |||
ee79bde54d | |||
c4dca65228 | |||
3b1a243039 | |||
2305f270c5 | |||
adc084a3e9 | |||
ae86584404 | |||
98fe05fb5b | |||
1833ebea31 | |||
bf354cde96 | |||
64d07ffcc3 | |||
17e2d54a3d | |||
a812fe8ceb | |||
2f52f5683c | |||
06c3bac23b | |||
fb34cdc7da | |||
0998856c92 | |||
8fc97a871f | |||
5d5a753d96 | |||
e1e75bd62c | |||
a8d6a86981 | |||
c206fa9627 | |||
f2da98d816 | |||
d3afe0c126 | |||
e863e05697 | |||
b57b4dfab1 | |||
f49dff97d4 | |||
74f45ed9c5 | |||
![]() |
c5c94e3eae | ||
34cf33eb12 | |||
![]() |
54927caf4f | ||
12e8c78353 | |||
1476d35870 | |||
1fb364491b | |||
eacdc0ab4a | |||
dc30a9087c | |||
9e456ca695 | |||
aae96176e8 | |||
e1952c541a | |||
![]() |
fb820496f5 | ||
f497e471f8 | |||
3be4cb5b27 | |||
56ce51d1f7 | |||
1c7ce7e0b4 | |||
271210126e | |||
f9acf21063 | |||
bdc66c9569 | |||
eb3a8fb4e8 | |||
798e593002 | |||
21c29480c3 | |||
928d644895 | |||
af0b7925db | |||
7843cd63d8 | |||
ae4b45145c | |||
2e6c6426d3 | |||
bdb7d262aa | |||
b559fb178e | |||
4485dc483c | |||
![]() |
3a59ddb292 | ||
66fe1c79f3 | |||
e2df5c8a56 | |||
f3274bfa70 | |||
4569d9c0c3 | |||
![]() |
33dc584b37 | ||
![]() |
213554f24a | ||
8d60ac2bb0 | |||
![]() |
1a134c4c30 | ||
a754e35198 | |||
ac582056e2 | |||
![]() |
be70827e6f | ||
827e30bd15 | |||
6cff1d6480 | |||
dd3391dd99 | |||
4389067929 | |||
4ee2d9df42 | |||
42ce88f15c | |||
628fab696c | |||
5d42ea0369 | |||
d754d85845 | |||
1a72744ddc | |||
779ea49af7 | |||
07c5d02a11 |
@@ -180,6 +180,7 @@ ForEachMacros:
|
||||
- CTX_DATA_BEGIN_WITH_ID
|
||||
- DEG_OBJECT_ITER_BEGIN
|
||||
- DEG_OBJECT_ITER_FOR_RENDER_ENGINE_BEGIN
|
||||
- DRW_ENABLED_ENGINE_ITER
|
||||
- DRIVER_TARGETS_LOOPER_BEGIN
|
||||
- DRIVER_TARGETS_USED_LOOPER_BEGIN
|
||||
- FOREACH_BASE_IN_EDIT_MODE_BEGIN
|
||||
|
@@ -406,6 +406,7 @@ mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
||||
set(CYCLES_TEST_DEVICES CPU CACHE STRING "Run regression tests on the specified device types (CPU CUDA OPTIX)" )
|
||||
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
|
||||
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
|
||||
option(WITH_CYCLES_HIP_BINARIES "Build Cycles HIP binaries" OFF)
|
||||
unset(PLATFORM_DEFAULT)
|
||||
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
|
||||
option(WITH_CYCLES_DEBUG_NAN "Build Cycles with additional asserts for detecting NaNs and invalid values" OFF)
|
||||
|
81
build_files/cmake/Modules/FindHIP.cmake
Normal file
81
build_files/cmake/Modules/FindHIP.cmake
Normal file
@@ -0,0 +1,81 @@
|
||||
# - Find HIP compiler
|
||||
#
|
||||
# This module defines
|
||||
# HIP_HIPCC_EXECUTABLE, the full path to the hipcc executable
|
||||
# HIP_VERSION, the HIP compiler version
|
||||
#
|
||||
# HIP_FOUND, if the HIP toolkit is found.
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2021 Blender Foundation.
|
||||
#
|
||||
# Distributed under the OSI-approved BSD 3-Clause License,
|
||||
# see accompanying file BSD-3-Clause-license.txt for details.
|
||||
#=============================================================================
|
||||
|
||||
# If HIP_ROOT_DIR was defined in the environment, use it.
|
||||
if(NOT HIP_ROOT_DIR AND NOT $ENV{HIP_ROOT_DIR} STREQUAL "")
|
||||
set(HIP_ROOT_DIR $ENV{HIP_ROOT_DIR})
|
||||
endif()
|
||||
|
||||
set(_hip_SEARCH_DIRS
|
||||
${HIP_ROOT_DIR}
|
||||
)
|
||||
|
||||
find_program(HIP_HIPCC_EXECUTABLE
|
||||
NAMES
|
||||
hipcc
|
||||
HINTS
|
||||
${_hip_SEARCH_DIRS}
|
||||
PATH_SUFFIXES
|
||||
bin
|
||||
)
|
||||
|
||||
if(HIP_HIPCC_EXECUTABLE AND NOT EXISTS ${HIP_HIPCC_EXECUTABLE})
|
||||
message(WARNING "Cached or directly specified hipcc executable does not exist.")
|
||||
set(HIP_FOUND FALSE)
|
||||
elseif(HIP_HIPCC_EXECUTABLE)
|
||||
set(HIP_FOUND TRUE)
|
||||
|
||||
set(HIP_VERSION_MAJOR 0)
|
||||
set(HIP_VERSION_MINOR 0)
|
||||
set(HIP_VERSION_PATCH 0)
|
||||
|
||||
# Get version from the output.
|
||||
execute_process(COMMAND ${HIP_HIPCC_EXECUTABLE} --version
|
||||
OUTPUT_VARIABLE HIP_VERSION_RAW
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# Parse parts.
|
||||
if(HIP_VERSION_RAW MATCHES "HIP version: .*")
|
||||
# Strip the HIP prefix and get list of individual version components.
|
||||
string(REGEX REPLACE
|
||||
".*HIP version: ([.0-9]+).*" "\\1"
|
||||
HIP_SEMANTIC_VERSION "${HIP_VERSION_RAW}")
|
||||
string(REPLACE "." ";" HIP_VERSION_PARTS "${HIP_SEMANTIC_VERSION}")
|
||||
list(LENGTH HIP_VERSION_PARTS NUM_HIP_VERSION_PARTS)
|
||||
|
||||
# Extract components into corresponding variables.
|
||||
if(NUM_HIP_VERSION_PARTS GREATER 0)
|
||||
list(GET HIP_VERSION_PARTS 0 HIP_VERSION_MAJOR)
|
||||
endif()
|
||||
if(NUM_HIP_VERSION_PARTS GREATER 1)
|
||||
list(GET HIP_VERSION_PARTS 1 HIP_VERSION_MINOR)
|
||||
endif()
|
||||
if(NUM_HIP_VERSION_PARTS GREATER 2)
|
||||
list(GET HIP_VERSION_PARTS 2 HIP_VERSION_PATCH)
|
||||
endif()
|
||||
|
||||
# Unset temp variables.
|
||||
unset(NUM_HIP_VERSION_PARTS)
|
||||
unset(HIP_SEMANTIC_VERSION)
|
||||
unset(HIP_VERSION_PARTS)
|
||||
endif()
|
||||
|
||||
# Construct full semantic version.
|
||||
set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
|
||||
unset(HIP_VERSION_RAW)
|
||||
else()
|
||||
set(HIP_FOUND FALSE)
|
||||
endif()
|
@@ -24,6 +24,7 @@ import project_source_info
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from typing import (
|
||||
Any,
|
||||
@@ -35,7 +36,6 @@ USE_QUIET = (os.environ.get("QUIET", None) is not None)
|
||||
|
||||
CHECKER_IGNORE_PREFIX = [
|
||||
"extern",
|
||||
"intern/moto",
|
||||
]
|
||||
|
||||
CHECKER_BIN = "cppcheck"
|
||||
@@ -47,13 +47,19 @@ CHECKER_ARGS = [
|
||||
"--max-configs=1", # speeds up execution
|
||||
# "--check-config", # when includes are missing
|
||||
"--enable=all", # if you want sixty hundred pedantic suggestions
|
||||
|
||||
# Quiet output, otherwise all defines/includes are printed (overly verbose).
|
||||
# Only enable this for troubleshooting (if defines are not set as expected for example).
|
||||
"--quiet",
|
||||
|
||||
# NOTE: `--cppcheck-build-dir=<dir>` is added later as a temporary directory.
|
||||
]
|
||||
|
||||
if USE_QUIET:
|
||||
CHECKER_ARGS.append("--quiet")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
def cppcheck() -> None:
|
||||
source_info = project_source_info.build_info(ignore_prefix_list=CHECKER_IGNORE_PREFIX)
|
||||
source_defines = project_source_info.build_defines_as_args()
|
||||
|
||||
@@ -78,7 +84,10 @@ def main() -> None:
|
||||
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
|
||||
|
||||
sys.stdout.flush()
|
||||
sys.stdout.write("%s " % percent_str)
|
||||
sys.stdout.write("%s %s\n" % (
|
||||
percent_str,
|
||||
os.path.relpath(c, project_source_info.SOURCE_DIR)
|
||||
))
|
||||
|
||||
return subprocess.Popen(cmd)
|
||||
|
||||
@@ -90,5 +99,11 @@ def main() -> None:
|
||||
print("Finished!")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
CHECKER_ARGS.append("--cppcheck-build-dir=" + temp_dir)
|
||||
cppcheck()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@@ -243,7 +243,9 @@ def build_defines_as_args() -> List[str]:
|
||||
# use this module.
|
||||
def queue_processes(
|
||||
process_funcs: Sequence[Tuple[Callable[..., subprocess.Popen[Any]], Tuple[Any, ...]]],
|
||||
*,
|
||||
job_total: int =-1,
|
||||
sleep: float = 0.1,
|
||||
) -> None:
|
||||
""" Takes a list of function arg pairs, each function must return a process
|
||||
"""
|
||||
@@ -271,14 +273,20 @@ def queue_processes(
|
||||
|
||||
if len(processes) <= job_total:
|
||||
break
|
||||
else:
|
||||
time.sleep(0.1)
|
||||
time.sleep(sleep)
|
||||
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
|
||||
processes.append(func(*args))
|
||||
|
||||
# Don't return until all jobs have finished.
|
||||
while 1:
|
||||
processes[:] = [p for p in processes if p.poll() is None]
|
||||
if not processes:
|
||||
break
|
||||
time.sleep(sleep)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if not os.path.exists(join(CMAKE_DIR, "CMakeCache.txt")):
|
||||
|
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
This method enables conversions between Local and Pose space for bones in
|
||||
the middle of updating the armature without having to update dependencies
|
||||
after each change, by manually carrying updated matrices in a recursive walk.
|
||||
"""
|
||||
|
||||
def set_pose_matrices(obj, matrix_map):
|
||||
"Assign pose space matrices of all bones at once, ignoring constraints."
|
||||
|
||||
def rec(pbone, parent_matrix):
|
||||
matrix = matrix_map[pbone.name]
|
||||
|
||||
## Instead of:
|
||||
# pbone.matrix = matrix
|
||||
# bpy.context.view_layer.update()
|
||||
|
||||
# Compute and assign local matrix, using the new parent matrix
|
||||
if pbone.parent:
|
||||
pbone.matrix_basis = pbone.bone.convert_local_to_pose(
|
||||
matrix,
|
||||
pbone.bone.matrix_local,
|
||||
parent_matrix=parent_matrix,
|
||||
parent_matrix_local=pbone.parent.bone.matrix_local,
|
||||
invert=True
|
||||
)
|
||||
else:
|
||||
pbone.matrix_basis = pbone.bone.convert_local_to_pose(
|
||||
matrix,
|
||||
pbone.bone.matrix_local,
|
||||
invert=True
|
||||
)
|
||||
|
||||
# Recursively process children, passing the new matrix through
|
||||
for child in pbone.children:
|
||||
rec(child, matrix)
|
||||
|
||||
# Scan all bone trees from their roots
|
||||
for pbone in obj.pose.bones:
|
||||
if not pbone.parent:
|
||||
rec(pbone, None)
|
@@ -1101,6 +1101,7 @@ context_type_map = {
|
||||
"scene": ("Scene", False),
|
||||
"sculpt_object": ("Object", False),
|
||||
"selectable_objects": ("Object", True),
|
||||
"selected_asset_files": ("FileSelectEntry", True),
|
||||
"selected_bones": ("EditBone", True),
|
||||
"selected_editable_bones": ("EditBone", True),
|
||||
"selected_editable_fcurves": ("FCurve", True),
|
||||
|
1
extern/cuew/include/cuew.h
vendored
1
extern/cuew/include/cuew.h
vendored
@@ -609,6 +609,7 @@ typedef enum cudaError_enum {
|
||||
CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
|
||||
CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
|
||||
CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,
|
||||
CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222,
|
||||
CUDA_ERROR_INVALID_SOURCE = 300,
|
||||
CUDA_ERROR_FILE_NOT_FOUND = 301,
|
||||
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
|
||||
|
1
extern/cuew/src/cuew.c
vendored
1
extern/cuew/src/cuew.c
vendored
@@ -736,6 +736,7 @@ const char *cuewErrorString(CUresult result) {
|
||||
case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context";
|
||||
case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable";
|
||||
case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found";
|
||||
case CUDA_ERROR_UNSUPPORTED_PTX_VERSION: return "Unsupported PTX version";
|
||||
case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
|
||||
case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
|
||||
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
|
||||
|
2
extern/smaa_areatex/smaa_areatex.cpp
vendored
2
extern/smaa_areatex/smaa_areatex.cpp
vendored
@@ -574,6 +574,8 @@ Dbl2 AreaDiag::area(Dbl2 p1, Dbl2 p2, int left)
|
||||
Dbl2 d = p2 - p1;
|
||||
if (d.x == 0.0)
|
||||
return Dbl2(0.0, 1.0);
|
||||
if (d.y == 0.0)
|
||||
return Dbl2(1.0, 0.0);
|
||||
|
||||
double x1 = (double)(1 + left);
|
||||
double x2 = x1 + 1.0;
|
||||
|
@@ -64,6 +64,8 @@ if(WITH_CYCLES_STANDALONE)
|
||||
cycles_standalone.cpp
|
||||
cycles_xml.cpp
|
||||
cycles_xml.h
|
||||
oiio_output_driver.cpp
|
||||
oiio_output_driver.h
|
||||
)
|
||||
add_executable(cycles ${SRC} ${INC} ${INC_SYS})
|
||||
unset(SRC)
|
||||
@@ -73,7 +75,7 @@ if(WITH_CYCLES_STANDALONE)
|
||||
|
||||
if(APPLE)
|
||||
if(WITH_OPENCOLORIO)
|
||||
set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit")
|
||||
set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit -framework Carbon")
|
||||
endif()
|
||||
if(WITH_OPENIMAGEDENOISE AND "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
|
||||
# OpenImageDenoise uses BNNS from the Accelerate framework.
|
||||
|
@@ -36,6 +36,9 @@
|
||||
#include "util/util_unique_ptr.h"
|
||||
#include "util/util_version.h"
|
||||
|
||||
#include "app/cycles_xml.h"
|
||||
#include "app/oiio_output_driver.h"
|
||||
|
||||
#ifdef WITH_CYCLES_STANDALONE_GUI
|
||||
# include "util/util_view.h"
|
||||
#endif
|
||||
@@ -54,6 +57,7 @@ struct Options {
|
||||
bool quiet;
|
||||
bool show_help, interactive, pause;
|
||||
string output_filepath;
|
||||
string output_pass;
|
||||
} options;
|
||||
|
||||
static void session_print(const string &str)
|
||||
@@ -89,30 +93,6 @@ static void session_print_status()
|
||||
session_print(status);
|
||||
}
|
||||
|
||||
static bool write_render(const uchar *pixels, int w, int h, int channels)
|
||||
{
|
||||
string msg = string_printf("Writing image %s", options.output_path.c_str());
|
||||
session_print(msg);
|
||||
|
||||
unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path));
|
||||
if (!out) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ImageSpec spec(w, h, channels, TypeDesc::UINT8);
|
||||
if (!out->open(options.output_path, spec)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* conversion for different top/bottom convention */
|
||||
out->write_image(
|
||||
TypeDesc::UINT8, pixels + (h - 1) * w * channels, AutoStride, -w * channels, AutoStride);
|
||||
|
||||
out->close();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static BufferParams &session_buffer_params()
|
||||
{
|
||||
static BufferParams buffer_params;
|
||||
@@ -147,9 +127,14 @@ static void scene_init()
|
||||
|
||||
static void session_init()
|
||||
{
|
||||
options.session_params.write_render_cb = write_render;
|
||||
options.output_pass = "combined";
|
||||
options.session = new Session(options.session_params, options.scene_params);
|
||||
|
||||
if (!options.output_filepath.empty()) {
|
||||
options.session->set_output_driver(make_unique<OIIOOutputDriver>(
|
||||
options.output_filepath, options.output_pass, session_print));
|
||||
}
|
||||
|
||||
if (options.session_params.background && !options.quiet)
|
||||
options.session->progress.set_update_callback(function_bind(&session_print_status));
|
||||
#ifdef WITH_CYCLES_STANDALONE_GUI
|
||||
@@ -160,6 +145,11 @@ static void session_init()
|
||||
/* load scene */
|
||||
scene_init();
|
||||
|
||||
/* add pass for output. */
|
||||
Pass *pass = options.scene->create_node<Pass>();
|
||||
pass->set_name(ustring(options.output_pass.c_str()));
|
||||
pass->set_type(PASS_COMBINED);
|
||||
|
||||
options.session->reset(options.session_params, session_buffer_params());
|
||||
options.session->start();
|
||||
}
|
||||
|
@@ -333,6 +333,7 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node
|
||||
}
|
||||
|
||||
snode = (ShaderNode *)node_type->create(node_type);
|
||||
snode->set_owner(graph);
|
||||
}
|
||||
|
||||
xml_read_node(graph_reader, snode, node);
|
||||
|
71
intern/cycles/app/oiio_output_driver.cpp
Normal file
71
intern/cycles/app/oiio_output_driver.cpp
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "app/oiio_output_driver.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
OIIOOutputDriver::OIIOOutputDriver(const string_view filepath,
|
||||
const string_view pass,
|
||||
LogFunction log)
|
||||
: filepath_(filepath), pass_(pass), log_(log)
|
||||
{
|
||||
}
|
||||
|
||||
OIIOOutputDriver::~OIIOOutputDriver()
|
||||
{
|
||||
}
|
||||
|
||||
void OIIOOutputDriver::write_render_tile(const Tile &tile)
|
||||
{
|
||||
/* Only write the full buffer, no intermediate tiles. */
|
||||
if (!(tile.size == tile.full_size)) {
|
||||
return;
|
||||
}
|
||||
|
||||
log_(string_printf("Writing image %s", filepath_.c_str()));
|
||||
|
||||
unique_ptr<ImageOutput> image_output(ImageOutput::create(filepath_));
|
||||
if (image_output == nullptr) {
|
||||
log_("Failed to create image file");
|
||||
return;
|
||||
}
|
||||
|
||||
const int width = tile.size.x;
|
||||
const int height = tile.size.y;
|
||||
|
||||
ImageSpec spec(width, height, 4, TypeDesc::FLOAT);
|
||||
if (!image_output->open(filepath_, spec)) {
|
||||
log_("Failed to create image file");
|
||||
return;
|
||||
}
|
||||
|
||||
vector<float> pixels(width * height * 4);
|
||||
if (!tile.get_pass_pixels(pass_, 4, pixels.data())) {
|
||||
log_("Failed to read render pass pixels");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Manipulate offset and stride to convert from bottom-up to top-down convention. */
|
||||
image_output->write_image(TypeDesc::FLOAT,
|
||||
pixels.data() + (height - 1) * width * 4,
|
||||
AutoStride,
|
||||
-width * 4 * sizeof(float),
|
||||
AutoStride);
|
||||
image_output->close();
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
42
intern/cycles/app/oiio_output_driver.h
Normal file
42
intern/cycles/app/oiio_output_driver.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "render/output_driver.h"
|
||||
|
||||
#include "util/util_function.h"
|
||||
#include "util/util_image.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_unique_ptr.h"
|
||||
#include "util/util_vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class OIIOOutputDriver : public OutputDriver {
|
||||
public:
|
||||
typedef function<void(const string &)> LogFunction;
|
||||
|
||||
OIIOOutputDriver(const string_view filepath, const string_view pass, LogFunction log);
|
||||
virtual ~OIIOOutputDriver();
|
||||
|
||||
void write_render_tile(const Tile &tile) override;
|
||||
|
||||
protected:
|
||||
string filepath_;
|
||||
string pass_;
|
||||
LogFunction log_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -31,13 +31,14 @@ set(INC_SYS
|
||||
set(SRC
|
||||
blender_camera.cpp
|
||||
blender_device.cpp
|
||||
blender_display_driver.cpp
|
||||
blender_image.cpp
|
||||
blender_geometry.cpp
|
||||
blender_gpu_display.cpp
|
||||
blender_light.cpp
|
||||
blender_mesh.cpp
|
||||
blender_object.cpp
|
||||
blender_object_cull.cpp
|
||||
blender_output_driver.cpp
|
||||
blender_particles.cpp
|
||||
blender_curves.cpp
|
||||
blender_logging.cpp
|
||||
@@ -51,10 +52,11 @@ set(SRC
|
||||
|
||||
CCL_api.h
|
||||
blender_device.h
|
||||
blender_gpu_display.h
|
||||
blender_display_driver.h
|
||||
blender_id_map.h
|
||||
blender_image.h
|
||||
blender_object_cull.h
|
||||
blender_output_driver.h
|
||||
blender_sync.h
|
||||
blender_session.h
|
||||
blender_texture.h
|
||||
|
@@ -211,7 +211,6 @@ def list_render_passes(scene, srl):
|
||||
if crl.use_pass_shadow_catcher: yield ("Shadow Catcher", "RGB", 'COLOR')
|
||||
|
||||
# Debug passes.
|
||||
if crl.pass_debug_render_time: yield ("Debug Render Time", "X", 'VALUE')
|
||||
if crl.pass_debug_sample_count: yield ("Debug Sample Count", "X", 'VALUE')
|
||||
|
||||
# Cryptomatte passes.
|
||||
|
@@ -1197,12 +1197,6 @@ class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
|
||||
|
||||
class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
|
||||
|
||||
pass_debug_render_time: BoolProperty(
|
||||
name="Debug Render Time",
|
||||
description="Render time in milliseconds per sample and pixel",
|
||||
default=False,
|
||||
update=update_render_passes,
|
||||
)
|
||||
pass_debug_sample_count: BoolProperty(
|
||||
name="Debug Sample Count",
|
||||
description="Number of samples/camera rays per pixel",
|
||||
|
@@ -792,7 +792,6 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
|
||||
col.prop(view_layer, "use_pass_material_index")
|
||||
|
||||
col = layout.column(heading="Debug", align=True)
|
||||
col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
|
||||
col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")
|
||||
|
||||
layout.prop(view_layer, "pass_alpha_threshold")
|
||||
|
@@ -927,6 +927,9 @@ BufferParams BlenderSync::get_buffer_params(
|
||||
params.height = height;
|
||||
}
|
||||
|
||||
params.window_width = params.width;
|
||||
params.window_height = params.height;
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
|
@@ -14,7 +14,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "blender/blender_gpu_display.h"
|
||||
#include "blender/blender_display_driver.h"
|
||||
|
||||
#include "device/device.h"
|
||||
#include "util/util_logging.h"
|
||||
@@ -273,17 +273,17 @@ uint BlenderDisplaySpaceShader::get_shader_program()
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* BlenderGPUDisplay.
|
||||
* BlenderDisplayDriver.
|
||||
*/
|
||||
|
||||
BlenderGPUDisplay::BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene)
|
||||
BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
|
||||
: b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
|
||||
{
|
||||
/* Create context while on the main thread. */
|
||||
gl_context_create();
|
||||
}
|
||||
|
||||
BlenderGPUDisplay::~BlenderGPUDisplay()
|
||||
BlenderDisplayDriver::~BlenderDisplayDriver()
|
||||
{
|
||||
gl_resources_destroy();
|
||||
}
|
||||
@@ -292,19 +292,18 @@ BlenderGPUDisplay::~BlenderGPUDisplay()
|
||||
* Update procedure.
|
||||
*/
|
||||
|
||||
bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams ¶ms,
|
||||
bool BlenderDisplayDriver::update_begin(const Params ¶ms,
|
||||
int texture_width,
|
||||
int texture_height)
|
||||
{
|
||||
/* Note that it's the responsibility of BlenderGPUDisplay to ensure updating and drawing
|
||||
/* Note that it's the responsibility of BlenderDisplayDriver to ensure updating and drawing
|
||||
* the texture does not happen at the same time. This is achieved indirectly.
|
||||
*
|
||||
* When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock.
|
||||
* This same lock is also held when do_draw() is called, which together ensure mutual
|
||||
* exclusion.
|
||||
*
|
||||
* This locking is not performed at the GPU display level, because that would cause lock
|
||||
* inversion. */
|
||||
* This locking is not performed on the Cycles side, because that would cause lock inversion. */
|
||||
if (!gl_context_enable()) {
|
||||
return false;
|
||||
}
|
||||
@@ -361,7 +360,7 @@ bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams ¶ms,
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::do_update_end()
|
||||
void BlenderDisplayDriver::update_end()
|
||||
{
|
||||
gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
glFlush();
|
||||
@@ -369,54 +368,18 @@ void BlenderGPUDisplay::do_update_end()
|
||||
gl_context_disable();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Texture update from CPU buffer.
|
||||
*/
|
||||
|
||||
void BlenderGPUDisplay::do_copy_pixels_to_texture(
|
||||
const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height)
|
||||
{
|
||||
/* This call copies pixels to a Pixel Buffer Object (PBO) which is much cheaper from CPU time
|
||||
* point of view than to copy data directly to the OpenGL texture.
|
||||
*
|
||||
* The possible downside of this approach is that it might require a higher peak memory when
|
||||
* doing partial updates of the texture (although, in practice even partial updates might peak
|
||||
* with a full-frame buffer stored on the CPU if the GPU is currently occupied). */
|
||||
|
||||
half4 *mapped_rgba_pixels = map_texture_buffer();
|
||||
if (!mapped_rgba_pixels) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (texture_x == 0 && texture_y == 0 && pixels_width == texture_.width &&
|
||||
pixels_height == texture_.height) {
|
||||
const size_t size_in_bytes = sizeof(half4) * texture_.width * texture_.height;
|
||||
memcpy(mapped_rgba_pixels, rgba_pixels, size_in_bytes);
|
||||
}
|
||||
else {
|
||||
const half4 *rgba_row = rgba_pixels;
|
||||
half4 *mapped_rgba_row = mapped_rgba_pixels + texture_y * texture_.width + texture_x;
|
||||
for (int y = 0; y < pixels_height;
|
||||
++y, rgba_row += pixels_width, mapped_rgba_row += texture_.width) {
|
||||
memcpy(mapped_rgba_row, rgba_row, sizeof(half4) * pixels_width);
|
||||
}
|
||||
}
|
||||
|
||||
unmap_texture_buffer();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Texture buffer mapping.
|
||||
*/
|
||||
|
||||
half4 *BlenderGPUDisplay::do_map_texture_buffer()
|
||||
half4 *BlenderDisplayDriver::map_texture_buffer()
|
||||
{
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
|
||||
|
||||
half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
|
||||
glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
|
||||
if (!mapped_rgba_pixels) {
|
||||
LOG(ERROR) << "Error mapping BlenderGPUDisplay pixel buffer object.";
|
||||
LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
|
||||
}
|
||||
|
||||
if (texture_.need_clear) {
|
||||
@@ -431,7 +394,7 @@ half4 *BlenderGPUDisplay::do_map_texture_buffer()
|
||||
return mapped_rgba_pixels;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::do_unmap_texture_buffer()
|
||||
void BlenderDisplayDriver::unmap_texture_buffer()
|
||||
{
|
||||
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
|
||||
|
||||
@@ -442,9 +405,9 @@ void BlenderGPUDisplay::do_unmap_texture_buffer()
|
||||
* Graphics interoperability.
|
||||
*/
|
||||
|
||||
DeviceGraphicsInteropDestination BlenderGPUDisplay::do_graphics_interop_get()
|
||||
BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get()
|
||||
{
|
||||
DeviceGraphicsInteropDestination interop_dst;
|
||||
GraphicsInterop interop_dst;
|
||||
|
||||
interop_dst.buffer_width = texture_.buffer_width;
|
||||
interop_dst.buffer_height = texture_.buffer_height;
|
||||
@@ -456,12 +419,12 @@ DeviceGraphicsInteropDestination BlenderGPUDisplay::do_graphics_interop_get()
|
||||
return interop_dst;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::graphics_interop_activate()
|
||||
void BlenderDisplayDriver::graphics_interop_activate()
|
||||
{
|
||||
gl_context_enable();
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::graphics_interop_deactivate()
|
||||
void BlenderDisplayDriver::graphics_interop_deactivate()
|
||||
{
|
||||
gl_context_disable();
|
||||
}
|
||||
@@ -470,17 +433,17 @@ void BlenderGPUDisplay::graphics_interop_deactivate()
|
||||
* Drawing.
|
||||
*/
|
||||
|
||||
void BlenderGPUDisplay::clear()
|
||||
void BlenderDisplayDriver::clear()
|
||||
{
|
||||
texture_.need_clear = true;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::set_zoom(float zoom_x, float zoom_y)
|
||||
void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
|
||||
{
|
||||
zoom_ = make_float2(zoom_x, zoom_y);
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::do_draw(const GPUDisplayParams ¶ms)
|
||||
void BlenderDisplayDriver::draw(const Params ¶ms)
|
||||
{
|
||||
/* See do_update_begin() for why no locking is required here. */
|
||||
const bool transparent = true; // TODO(sergey): Derive this from Film.
|
||||
@@ -497,7 +460,7 @@ void BlenderGPUDisplay::do_draw(const GPUDisplayParams ¶ms)
|
||||
/* Texture is requested to be cleared and was not yet cleared.
|
||||
*
|
||||
* Do early return which should be equivalent of drawing all-zero texture.
|
||||
* Watchout for the lock though so that the clear happening during update is properly
|
||||
* Watch out for the lock though so that the clear happening during update is properly
|
||||
* synchronized here. */
|
||||
gl_context_mutex_.unlock();
|
||||
return;
|
||||
@@ -584,7 +547,7 @@ void BlenderGPUDisplay::do_draw(const GPUDisplayParams ¶ms)
|
||||
}
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::gl_context_create()
|
||||
void BlenderDisplayDriver::gl_context_create()
|
||||
{
|
||||
/* When rendering in viewport there is no render context available via engine.
|
||||
* Check whether own context is to be created here.
|
||||
@@ -613,7 +576,7 @@ void BlenderGPUDisplay::gl_context_create()
|
||||
}
|
||||
}
|
||||
|
||||
bool BlenderGPUDisplay::gl_context_enable()
|
||||
bool BlenderDisplayDriver::gl_context_enable()
|
||||
{
|
||||
if (use_gl_context_) {
|
||||
if (!gl_context_) {
|
||||
@@ -628,7 +591,7 @@ bool BlenderGPUDisplay::gl_context_enable()
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::gl_context_disable()
|
||||
void BlenderDisplayDriver::gl_context_disable()
|
||||
{
|
||||
if (use_gl_context_) {
|
||||
if (gl_context_) {
|
||||
@@ -641,7 +604,7 @@ void BlenderGPUDisplay::gl_context_disable()
|
||||
RE_engine_render_context_disable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::gl_context_dispose()
|
||||
void BlenderDisplayDriver::gl_context_dispose()
|
||||
{
|
||||
if (gl_context_) {
|
||||
const bool drw_state = DRW_opengl_context_release();
|
||||
@@ -653,7 +616,7 @@ void BlenderGPUDisplay::gl_context_dispose()
|
||||
}
|
||||
}
|
||||
|
||||
bool BlenderGPUDisplay::gl_draw_resources_ensure()
|
||||
bool BlenderDisplayDriver::gl_draw_resources_ensure()
|
||||
{
|
||||
if (!texture_.gl_id) {
|
||||
/* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
|
||||
@@ -680,7 +643,7 @@ bool BlenderGPUDisplay::gl_draw_resources_ensure()
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::gl_resources_destroy()
|
||||
void BlenderDisplayDriver::gl_resources_destroy()
|
||||
{
|
||||
gl_context_enable();
|
||||
|
||||
@@ -703,7 +666,7 @@ void BlenderGPUDisplay::gl_resources_destroy()
|
||||
gl_context_dispose();
|
||||
}
|
||||
|
||||
bool BlenderGPUDisplay::gl_texture_resources_ensure()
|
||||
bool BlenderDisplayDriver::gl_texture_resources_ensure()
|
||||
{
|
||||
if (texture_.creation_attempted) {
|
||||
return texture_.is_created;
|
||||
@@ -740,7 +703,7 @@ bool BlenderGPUDisplay::gl_texture_resources_ensure()
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::texture_update_if_needed()
|
||||
void BlenderDisplayDriver::texture_update_if_needed()
|
||||
{
|
||||
if (!texture_.need_update) {
|
||||
return;
|
||||
@@ -754,7 +717,7 @@ void BlenderGPUDisplay::texture_update_if_needed()
|
||||
texture_.need_update = false;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams ¶ms)
|
||||
void BlenderDisplayDriver::vertex_buffer_update(const Params ¶ms)
|
||||
{
|
||||
/* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
|
||||
* rendered. */
|
||||
@@ -767,23 +730,23 @@ void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams ¶ms)
|
||||
|
||||
vpointer[0] = 0.0f;
|
||||
vpointer[1] = 0.0f;
|
||||
vpointer[2] = params.offset.x;
|
||||
vpointer[3] = params.offset.y;
|
||||
vpointer[2] = params.full_offset.x;
|
||||
vpointer[3] = params.full_offset.y;
|
||||
|
||||
vpointer[4] = 1.0f;
|
||||
vpointer[5] = 0.0f;
|
||||
vpointer[6] = (float)params.size.x + params.offset.x;
|
||||
vpointer[7] = params.offset.y;
|
||||
vpointer[6] = (float)params.size.x + params.full_offset.x;
|
||||
vpointer[7] = params.full_offset.y;
|
||||
|
||||
vpointer[8] = 1.0f;
|
||||
vpointer[9] = 1.0f;
|
||||
vpointer[10] = (float)params.size.x + params.offset.x;
|
||||
vpointer[11] = (float)params.size.y + params.offset.y;
|
||||
vpointer[10] = (float)params.size.x + params.full_offset.x;
|
||||
vpointer[11] = (float)params.size.y + params.full_offset.y;
|
||||
|
||||
vpointer[12] = 0.0f;
|
||||
vpointer[13] = 1.0f;
|
||||
vpointer[14] = params.offset.x;
|
||||
vpointer[15] = (float)params.size.y + params.offset.y;
|
||||
vpointer[14] = params.full_offset.x;
|
||||
vpointer[15] = (float)params.size.y + params.full_offset.y;
|
||||
|
||||
glUnmapBuffer(GL_ARRAY_BUFFER);
|
||||
}
|
@@ -22,12 +22,14 @@
|
||||
|
||||
#include "RNA_blender_cpp.h"
|
||||
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/display_driver.h"
|
||||
|
||||
#include "util/util_thread.h"
|
||||
#include "util/util_unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Base class of shader used for GPU display rendering. */
|
||||
/* Base class of shader used for display driver rendering. */
|
||||
class BlenderDisplayShader {
|
||||
public:
|
||||
static constexpr const char *position_attribute_name = "pos";
|
||||
@@ -96,11 +98,11 @@ class BlenderDisplaySpaceShader : public BlenderDisplayShader {
|
||||
uint shader_program_ = 0;
|
||||
};
|
||||
|
||||
/* GPU display implementation which is specific for Blender viewport integration. */
|
||||
class BlenderGPUDisplay : public GPUDisplay {
|
||||
/* Display driver implementation which is specific for Blender viewport integration. */
|
||||
class BlenderDisplayDriver : public DisplayDriver {
|
||||
public:
|
||||
BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene);
|
||||
~BlenderGPUDisplay();
|
||||
BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene);
|
||||
~BlenderDisplayDriver();
|
||||
|
||||
virtual void graphics_interop_activate() override;
|
||||
virtual void graphics_interop_deactivate() override;
|
||||
@@ -110,22 +112,15 @@ class BlenderGPUDisplay : public GPUDisplay {
|
||||
void set_zoom(float zoom_x, float zoom_y);
|
||||
|
||||
protected:
|
||||
virtual bool do_update_begin(const GPUDisplayParams ¶ms,
|
||||
int texture_width,
|
||||
int texture_height) override;
|
||||
virtual void do_update_end() override;
|
||||
virtual bool update_begin(const Params ¶ms, int texture_width, int texture_height) override;
|
||||
virtual void update_end() override;
|
||||
|
||||
virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels,
|
||||
int texture_x,
|
||||
int texture_y,
|
||||
int pixels_width,
|
||||
int pixels_height) override;
|
||||
virtual void do_draw(const GPUDisplayParams ¶ms) override;
|
||||
virtual half4 *map_texture_buffer() override;
|
||||
virtual void unmap_texture_buffer() override;
|
||||
|
||||
virtual half4 *do_map_texture_buffer() override;
|
||||
virtual void do_unmap_texture_buffer() override;
|
||||
virtual GraphicsInterop graphics_interop_get() override;
|
||||
|
||||
virtual DeviceGraphicsInteropDestination do_graphics_interop_get() override;
|
||||
virtual void draw(const Params ¶ms) override;
|
||||
|
||||
/* Helper function which allocates new GPU context. */
|
||||
void gl_context_create();
|
||||
@@ -152,13 +147,13 @@ class BlenderGPUDisplay : public GPUDisplay {
|
||||
* This buffer is used to render texture in the viewport.
|
||||
*
|
||||
* NOTE: The buffer needs to be bound. */
|
||||
void vertex_buffer_update(const GPUDisplayParams ¶ms);
|
||||
void vertex_buffer_update(const Params ¶ms);
|
||||
|
||||
BL::RenderEngine b_engine_;
|
||||
|
||||
/* OpenGL context which is used the render engine doesn't have its own. */
|
||||
void *gl_context_ = nullptr;
|
||||
/* The when Blender RenderEngine side context is not available and the GPUDisplay is to create
|
||||
/* The when Blender RenderEngine side context is not available and the DisplayDriver is to create
|
||||
* its own context. */
|
||||
bool use_gl_context_ = false;
|
||||
/* Mutex used to guard the `gl_context_`. */
|
127
intern/cycles/blender/blender_output_driver.cpp
Normal file
127
intern/cycles/blender/blender_output_driver.cpp
Normal file
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "blender/blender_output_driver.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
BlenderOutputDriver::BlenderOutputDriver(BL::RenderEngine &b_engine) : b_engine_(b_engine)
|
||||
{
|
||||
}
|
||||
|
||||
BlenderOutputDriver::~BlenderOutputDriver()
|
||||
{
|
||||
}
|
||||
|
||||
bool BlenderOutputDriver::read_render_tile(const Tile &tile)
|
||||
{
|
||||
/* Get render result. */
|
||||
BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
|
||||
tile.offset.y,
|
||||
tile.size.x,
|
||||
tile.size.y,
|
||||
tile.layer.c_str(),
|
||||
tile.view.c_str());
|
||||
|
||||
/* Can happen if the intersected rectangle gives 0 width or height. */
|
||||
if (b_rr.ptr.data == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BL::RenderResult::layers_iterator b_single_rlay;
|
||||
b_rr.layers.begin(b_single_rlay);
|
||||
|
||||
/* layer will be missing if it was disabled in the UI */
|
||||
if (b_single_rlay == b_rr.layers.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BL::RenderLayer b_rlay = *b_single_rlay;
|
||||
|
||||
vector<float> pixels(tile.size.x * tile.size.y * 4);
|
||||
|
||||
/* Copy each pass.
|
||||
* TODO:copy only the required ones for better performance? */
|
||||
for (BL::RenderPass &b_pass : b_rlay.passes) {
|
||||
tile.set_pass_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
|
||||
}
|
||||
|
||||
b_engine_.end_result(b_rr, false, false, false);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BlenderOutputDriver::update_render_tile(const Tile &tile)
|
||||
{
|
||||
/* Use final write for preview renders, otherwise render result wouldn't be be updated
|
||||
* quickly on Blender side. For all other cases we use the display driver. */
|
||||
if (b_engine_.is_preview()) {
|
||||
write_render_tile(tile);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
/* Don't highlight full-frame tile. */
|
||||
if (!(tile.size == tile.full_size)) {
|
||||
b_engine_.tile_highlight_clear_all();
|
||||
b_engine_.tile_highlight_set(tile.offset.x, tile.offset.y, tile.size.x, tile.size.y, true);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void BlenderOutputDriver::write_render_tile(const Tile &tile)
|
||||
{
|
||||
b_engine_.tile_highlight_clear_all();
|
||||
|
||||
/* Get render result. */
|
||||
BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
|
||||
tile.offset.y,
|
||||
tile.size.x,
|
||||
tile.size.y,
|
||||
tile.layer.c_str(),
|
||||
tile.view.c_str());
|
||||
|
||||
/* Can happen if the intersected rectangle gives 0 width or height. */
|
||||
if (b_rr.ptr.data == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
BL::RenderResult::layers_iterator b_single_rlay;
|
||||
b_rr.layers.begin(b_single_rlay);
|
||||
|
||||
/* Layer will be missing if it was disabled in the UI. */
|
||||
if (b_single_rlay == b_rr.layers.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
BL::RenderLayer b_rlay = *b_single_rlay;
|
||||
|
||||
vector<float> pixels(tile.size.x * tile.size.y * 4);
|
||||
|
||||
/* Copy each pass. */
|
||||
for (BL::RenderPass &b_pass : b_rlay.passes) {
|
||||
if (!tile.get_pass_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
|
||||
memset(&pixels[0], 0, pixels.size() * sizeof(float));
|
||||
}
|
||||
|
||||
b_pass.rect(&pixels[0]);
|
||||
}
|
||||
|
||||
b_engine_.end_result(b_rr, true, false, true);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
40
intern/cycles/blender/blender_output_driver.h
Normal file
40
intern/cycles/blender/blender_output_driver.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "MEM_guardedalloc.h"
|
||||
|
||||
#include "RNA_blender_cpp.h"
|
||||
|
||||
#include "render/output_driver.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BlenderOutputDriver : public OutputDriver {
|
||||
public:
|
||||
BlenderOutputDriver(BL::RenderEngine &b_engine);
|
||||
~BlenderOutputDriver();
|
||||
|
||||
virtual void write_render_tile(const Tile &tile) override;
|
||||
virtual bool update_render_tile(const Tile &tile) override;
|
||||
virtual bool read_render_tile(const Tile &tile) override;
|
||||
|
||||
protected:
|
||||
BL::RenderEngine b_engine_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -42,7 +42,8 @@
|
||||
#include "util/util_progress.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
#include "blender/blender_gpu_display.h"
|
||||
#include "blender/blender_display_driver.h"
|
||||
#include "blender/blender_output_driver.h"
|
||||
#include "blender/blender_session.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_util.h"
|
||||
@@ -157,11 +158,13 @@ void BlenderSession::create_session()
|
||||
b_v3d, b_rv3d, scene->camera, width, height);
|
||||
session->reset(session_params, buffer_params);
|
||||
|
||||
/* Create GPU display. */
|
||||
/* Create GPU display.
|
||||
* TODO(sergey): Investigate whether DisplayDriver can be used for the preview as well. */
|
||||
if (!b_engine.is_preview() && !headless) {
|
||||
unique_ptr<BlenderGPUDisplay> gpu_display = make_unique<BlenderGPUDisplay>(b_engine, b_scene);
|
||||
gpu_display_ = gpu_display.get();
|
||||
session->set_gpu_display(move(gpu_display));
|
||||
unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(b_engine,
|
||||
b_scene);
|
||||
display_driver_ = display_driver.get();
|
||||
session->set_display_driver(move(display_driver));
|
||||
}
|
||||
|
||||
/* Viewport and preview (as in, material preview) does not do tiled rendering, so can inform
|
||||
@@ -278,96 +281,6 @@ void BlenderSession::free_session()
|
||||
session = nullptr;
|
||||
}
|
||||
|
||||
void BlenderSession::read_render_tile()
|
||||
{
|
||||
const int2 tile_offset = session->get_render_tile_offset();
|
||||
const int2 tile_size = session->get_render_tile_size();
|
||||
|
||||
/* get render result */
|
||||
BL::RenderResult b_rr = b_engine.begin_result(tile_offset.x,
|
||||
tile_offset.y,
|
||||
tile_size.x,
|
||||
tile_size.y,
|
||||
b_rlay_name.c_str(),
|
||||
b_rview_name.c_str());
|
||||
|
||||
/* can happen if the intersected rectangle gives 0 width or height */
|
||||
if (b_rr.ptr.data == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
BL::RenderResult::layers_iterator b_single_rlay;
|
||||
b_rr.layers.begin(b_single_rlay);
|
||||
|
||||
/* layer will be missing if it was disabled in the UI */
|
||||
if (b_single_rlay == b_rr.layers.end())
|
||||
return;
|
||||
|
||||
BL::RenderLayer b_rlay = *b_single_rlay;
|
||||
|
||||
vector<float> pixels(tile_size.x * tile_size.y * 4);
|
||||
|
||||
/* Copy each pass.
|
||||
* TODO:copy only the required ones for better performance? */
|
||||
for (BL::RenderPass &b_pass : b_rlay.passes) {
|
||||
session->set_render_tile_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
|
||||
}
|
||||
|
||||
b_engine.end_result(b_rr, false, false, false);
|
||||
}
|
||||
|
||||
void BlenderSession::write_render_tile()
|
||||
{
|
||||
const int2 tile_offset = session->get_render_tile_offset();
|
||||
const int2 tile_size = session->get_render_tile_size();
|
||||
|
||||
const string_view render_layer_name = session->get_render_tile_layer();
|
||||
const string_view render_view_name = session->get_render_tile_view();
|
||||
|
||||
b_engine.tile_highlight_clear_all();
|
||||
|
||||
/* get render result */
|
||||
BL::RenderResult b_rr = b_engine.begin_result(tile_offset.x,
|
||||
tile_offset.y,
|
||||
tile_size.x,
|
||||
tile_size.y,
|
||||
render_layer_name.c_str(),
|
||||
render_view_name.c_str());
|
||||
|
||||
/* can happen if the intersected rectangle gives 0 width or height */
|
||||
if (b_rr.ptr.data == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
BL::RenderResult::layers_iterator b_single_rlay;
|
||||
b_rr.layers.begin(b_single_rlay);
|
||||
|
||||
/* layer will be missing if it was disabled in the UI */
|
||||
if (b_single_rlay == b_rr.layers.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
BL::RenderLayer b_rlay = *b_single_rlay;
|
||||
|
||||
write_render_result(b_rlay);
|
||||
|
||||
b_engine.end_result(b_rr, true, false, true);
|
||||
}
|
||||
|
||||
void BlenderSession::update_render_tile()
|
||||
{
|
||||
if (!session->has_multiple_render_tiles()) {
|
||||
/* Don't highlight full-frame tile. */
|
||||
return;
|
||||
}
|
||||
|
||||
const int2 tile_offset = session->get_render_tile_offset();
|
||||
const int2 tile_size = session->get_render_tile_size();
|
||||
|
||||
b_engine.tile_highlight_clear_all();
|
||||
b_engine.tile_highlight_set(tile_offset.x, tile_offset.y, tile_size.x, tile_size.y, true);
|
||||
}
|
||||
|
||||
void BlenderSession::full_buffer_written(string_view filename)
|
||||
{
|
||||
full_buffer_files_.emplace_back(filename);
|
||||
@@ -441,18 +354,8 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
|
||||
return;
|
||||
}
|
||||
|
||||
/* set callback to write out render results */
|
||||
session->write_render_tile_cb = [&]() { write_render_tile(); };
|
||||
|
||||
/* Use final write for preview renders, otherwise render result wouldn't be be updated on Blender
|
||||
* side. */
|
||||
/* TODO(sergey): Investigate whether GPUDisplay can be used for the preview as well. */
|
||||
if (b_engine.is_preview()) {
|
||||
session->update_render_tile_cb = [&]() { write_render_tile(); };
|
||||
}
|
||||
else {
|
||||
session->update_render_tile_cb = [&]() { update_render_tile(); };
|
||||
}
|
||||
/* Create driver to write out render results. */
|
||||
session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
|
||||
|
||||
session->full_buffer_written_cb = [&](string_view filename) { full_buffer_written(filename); };
|
||||
|
||||
@@ -598,10 +501,13 @@ void BlenderSession::render_frame_finish()
|
||||
path_remove(filename);
|
||||
}
|
||||
|
||||
/* clear callback */
|
||||
session->write_render_tile_cb = function_null;
|
||||
session->update_render_tile_cb = function_null;
|
||||
/* Clear driver. */
|
||||
session->set_output_driver(nullptr);
|
||||
session->full_buffer_written_cb = function_null;
|
||||
|
||||
/* All the files are handled.
|
||||
* Clear the list so that this session can be re-used by Persistent Data. */
|
||||
full_buffer_files_.clear();
|
||||
}
|
||||
|
||||
static PassType bake_type_to_pass(const string &bake_type_str, const int bake_filter)
|
||||
@@ -706,9 +612,8 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
|
||||
pass->set_type(bake_type_to_pass(bake_type, bake_filter));
|
||||
pass->set_include_albedo((bake_filter & BL::BakeSettings::pass_filter_COLOR));
|
||||
|
||||
session->read_render_tile_cb = [&]() { read_render_tile(); };
|
||||
session->write_render_tile_cb = [&]() { write_render_tile(); };
|
||||
session->set_gpu_display(nullptr);
|
||||
session->set_display_driver(nullptr);
|
||||
session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
|
||||
|
||||
if (!session->progress.get_cancel()) {
|
||||
/* Sync scene. */
|
||||
@@ -751,43 +656,7 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
|
||||
session->wait();
|
||||
}
|
||||
|
||||
session->read_render_tile_cb = function_null;
|
||||
session->write_render_tile_cb = function_null;
|
||||
}
|
||||
|
||||
void BlenderSession::write_render_result(BL::RenderLayer &b_rlay)
|
||||
{
|
||||
if (!session->copy_render_tile_from_device()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int2 tile_size = session->get_render_tile_size();
|
||||
vector<float> pixels(tile_size.x * tile_size.y * 4);
|
||||
|
||||
/* Copy each pass. */
|
||||
for (BL::RenderPass &b_pass : b_rlay.passes) {
|
||||
if (!session->get_render_tile_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
|
||||
memset(&pixels[0], 0, pixels.size() * sizeof(float));
|
||||
}
|
||||
|
||||
b_pass.rect(&pixels[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void BlenderSession::update_render_result(BL::RenderLayer &b_rlay)
|
||||
{
|
||||
if (!session->copy_render_tile_from_device()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int2 tile_size = session->get_render_tile_size();
|
||||
vector<float> pixels(tile_size.x * tile_size.y * 4);
|
||||
|
||||
/* Copy combined pass. */
|
||||
BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
|
||||
if (session->get_render_tile_pixels("Combined", b_combined_pass.channels(), &pixels[0])) {
|
||||
b_combined_pass.rect(&pixels[0]);
|
||||
}
|
||||
session->set_output_driver(nullptr);
|
||||
}
|
||||
|
||||
void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
|
||||
@@ -895,7 +764,7 @@ void BlenderSession::draw(BL::SpaceImageEditor &space_image)
|
||||
}
|
||||
|
||||
BL::Array<float, 2> zoom = space_image.zoom();
|
||||
gpu_display_->set_zoom(zoom[0], zoom[1]);
|
||||
display_driver_->set_zoom(zoom[0], zoom[1]);
|
||||
|
||||
session->draw();
|
||||
}
|
||||
|
@@ -29,7 +29,7 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BlenderGPUDisplay;
|
||||
class BlenderDisplayDriver;
|
||||
class BlenderSync;
|
||||
class ImageMetaData;
|
||||
class Scene;
|
||||
@@ -70,20 +70,7 @@ class BlenderSession {
|
||||
const int bake_width,
|
||||
const int bake_height);
|
||||
|
||||
void write_render_result(BL::RenderLayer &b_rlay);
|
||||
void write_render_tile();
|
||||
|
||||
void update_render_tile();
|
||||
|
||||
void full_buffer_written(string_view filename);
|
||||
|
||||
/* update functions are used to update display buffer only after sample was rendered
|
||||
* only needed for better visual feedback */
|
||||
void update_render_result(BL::RenderLayer &b_rlay);
|
||||
|
||||
/* read functions for baking input */
|
||||
void read_render_tile();
|
||||
|
||||
/* interactive updates */
|
||||
void synchronize(BL::Depsgraph &b_depsgraph);
|
||||
|
||||
@@ -164,8 +151,8 @@ class BlenderSession {
|
||||
int last_pass_index = -1;
|
||||
} draw_state_;
|
||||
|
||||
/* NOTE: The BlenderSession references the GPU display. */
|
||||
BlenderGPUDisplay *gpu_display_ = nullptr;
|
||||
/* NOTE: The BlenderSession references the display driver. */
|
||||
BlenderDisplayDriver *display_driver_ = nullptr;
|
||||
|
||||
vector<string> full_buffer_files_;
|
||||
};
|
||||
|
@@ -279,7 +279,7 @@ static ShaderNode *add_node(Scene *scene,
|
||||
array<float3> curve_mapping_curves;
|
||||
float min_x, max_x;
|
||||
curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, true);
|
||||
curvemapping_minmax(mapping, true, &min_x, &max_x);
|
||||
curvemapping_minmax(mapping, 4, &min_x, &max_x);
|
||||
curves->set_min_x(min_x);
|
||||
curves->set_max_x(max_x);
|
||||
curves->set_curves(curve_mapping_curves);
|
||||
@@ -292,12 +292,25 @@ static ShaderNode *add_node(Scene *scene,
|
||||
array<float3> curve_mapping_curves;
|
||||
float min_x, max_x;
|
||||
curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, false);
|
||||
curvemapping_minmax(mapping, false, &min_x, &max_x);
|
||||
curvemapping_minmax(mapping, 3, &min_x, &max_x);
|
||||
curves->set_min_x(min_x);
|
||||
curves->set_max_x(max_x);
|
||||
curves->set_curves(curve_mapping_curves);
|
||||
node = curves;
|
||||
}
|
||||
else if (b_node.is_a(&RNA_ShaderNodeFloatCurve)) {
|
||||
BL::ShaderNodeFloatCurve b_curve_node(b_node);
|
||||
BL::CurveMapping mapping(b_curve_node.mapping());
|
||||
FloatCurveNode *curve = graph->create_node<FloatCurveNode>();
|
||||
array<float> curve_mapping_curve;
|
||||
float min_x, max_x;
|
||||
curvemapping_float_to_array(mapping, curve_mapping_curve, RAMP_TABLE_SIZE);
|
||||
curvemapping_minmax(mapping, 1, &min_x, &max_x);
|
||||
curve->set_min_x(min_x);
|
||||
curve->set_max_x(max_x);
|
||||
curve->set_curve(curve_mapping_curve);
|
||||
node = curve;
|
||||
}
|
||||
else if (b_node.is_a(&RNA_ShaderNodeValToRGB)) {
|
||||
RGBRampNode *ramp = graph->create_node<RGBRampNode>();
|
||||
BL::ShaderNodeValToRGB b_ramp_node(b_node);
|
||||
|
@@ -545,8 +545,6 @@ static PassType get_blender_pass_type(BL::RenderPass &b_pass)
|
||||
MAP_PASS("Shadow Catcher", PASS_SHADOW_CATCHER);
|
||||
MAP_PASS("Noisy Shadow Catcher", PASS_SHADOW_CATCHER);
|
||||
|
||||
MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
|
||||
|
||||
MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
|
||||
MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
|
||||
|
||||
@@ -604,10 +602,6 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
|
||||
PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
|
||||
|
||||
/* Debug passes. */
|
||||
if (get_boolean(crl, "pass_debug_render_time")) {
|
||||
b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
|
||||
pass_add(scene, PASS_RENDER_TIME, "Debug Render Time");
|
||||
}
|
||||
if (get_boolean(crl, "pass_debug_sample_count")) {
|
||||
b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
|
||||
pass_add(scene, PASS_SAMPLE_COUNT, "Debug Sample Count");
|
||||
|
@@ -171,12 +171,11 @@ static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap &curve, float *m
|
||||
}
|
||||
|
||||
static inline void curvemapping_minmax(/*const*/ BL::CurveMapping &cumap,
|
||||
bool rgb_curve,
|
||||
int num_curves,
|
||||
float *min_x,
|
||||
float *max_x)
|
||||
{
|
||||
// const int num_curves = cumap.curves.length(); /* Gives linking error so far. */
|
||||
const int num_curves = rgb_curve ? 4 : 3;
|
||||
*min_x = FLT_MAX;
|
||||
*max_x = -FLT_MAX;
|
||||
for (int i = 0; i < num_curves; ++i) {
|
||||
@@ -196,6 +195,28 @@ static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &
|
||||
}
|
||||
}
|
||||
|
||||
static inline void curvemapping_float_to_array(BL::CurveMapping &cumap,
|
||||
array<float> &data,
|
||||
int size)
|
||||
{
|
||||
float min = 0.0f, max = 1.0f;
|
||||
|
||||
curvemapping_minmax(cumap, 1, &min, &max);
|
||||
|
||||
const float range = max - min;
|
||||
|
||||
cumap.update();
|
||||
|
||||
BL::CurveMap map = cumap.curves[0];
|
||||
|
||||
data.resize(size);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
float t = min + (float)i / (float)(size - 1) * range;
|
||||
data[i] = cumap.evaluate(map, t);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
|
||||
array<float3> &data,
|
||||
int size,
|
||||
@@ -214,7 +235,8 @@ static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
|
||||
*
|
||||
* There might be some better estimations here tho.
|
||||
*/
|
||||
curvemapping_minmax(cumap, rgb_curve, &min_x, &max_x);
|
||||
const int num_curves = rgb_curve ? 4 : 3;
|
||||
curvemapping_minmax(cumap, num_curves, &min_x, &max_x);
|
||||
|
||||
const float range_x = max_x - min_x;
|
||||
|
||||
|
@@ -50,10 +50,6 @@ struct PackedBVH {
|
||||
array<int4> leaf_nodes;
|
||||
/* object index to BVH node index mapping for instances */
|
||||
array<int> object_node;
|
||||
/* Mapping from primitive index to index in triangle array. */
|
||||
array<uint> prim_tri_index;
|
||||
/* Continuous storage of triangle vertices. */
|
||||
array<float4> prim_tri_verts;
|
||||
/* primitive type - triangle or strand */
|
||||
array<int> prim_type;
|
||||
/* visibility visibilitys for primitives */
|
||||
|
@@ -439,61 +439,20 @@ void BVH2::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility
|
||||
|
||||
/* Triangles */
|
||||
|
||||
void BVH2::pack_triangle(int idx, float4 tri_verts[3])
|
||||
{
|
||||
int tob = pack.prim_object[idx];
|
||||
assert(tob >= 0 && tob < objects.size());
|
||||
const Mesh *mesh = static_cast<const Mesh *>(objects[tob]->get_geometry());
|
||||
|
||||
int tidx = pack.prim_index[idx];
|
||||
Mesh::Triangle t = mesh->get_triangle(tidx);
|
||||
const float3 *vpos = &mesh->verts[0];
|
||||
float3 v0 = vpos[t.v[0]];
|
||||
float3 v1 = vpos[t.v[1]];
|
||||
float3 v2 = vpos[t.v[2]];
|
||||
|
||||
tri_verts[0] = float3_to_float4(v0);
|
||||
tri_verts[1] = float3_to_float4(v1);
|
||||
tri_verts[2] = float3_to_float4(v2);
|
||||
}
|
||||
|
||||
void BVH2::pack_primitives()
|
||||
{
|
||||
const size_t tidx_size = pack.prim_index.size();
|
||||
size_t num_prim_triangles = 0;
|
||||
/* Count number of triangles primitives in BVH. */
|
||||
for (unsigned int i = 0; i < tidx_size; i++) {
|
||||
if ((pack.prim_index[i] != -1)) {
|
||||
if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
|
||||
++num_prim_triangles;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Reserve size for arrays. */
|
||||
pack.prim_tri_index.clear();
|
||||
pack.prim_tri_index.resize(tidx_size);
|
||||
pack.prim_tri_verts.clear();
|
||||
pack.prim_tri_verts.resize(num_prim_triangles * 3);
|
||||
pack.prim_visibility.clear();
|
||||
pack.prim_visibility.resize(tidx_size);
|
||||
/* Fill in all the arrays. */
|
||||
size_t prim_triangle_index = 0;
|
||||
for (unsigned int i = 0; i < tidx_size; i++) {
|
||||
if (pack.prim_index[i] != -1) {
|
||||
int tob = pack.prim_object[i];
|
||||
Object *ob = objects[tob];
|
||||
if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
|
||||
pack_triangle(i, (float4 *)&pack.prim_tri_verts[3 * prim_triangle_index]);
|
||||
pack.prim_tri_index[i] = 3 * prim_triangle_index;
|
||||
++prim_triangle_index;
|
||||
}
|
||||
else {
|
||||
pack.prim_tri_index[i] = -1;
|
||||
}
|
||||
pack.prim_visibility[i] = ob->visibility_for_tracing();
|
||||
}
|
||||
else {
|
||||
pack.prim_tri_index[i] = -1;
|
||||
pack.prim_visibility[i] = 0;
|
||||
}
|
||||
}
|
||||
@@ -522,10 +481,8 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
|
||||
/* reserve */
|
||||
size_t prim_index_size = pack.prim_index.size();
|
||||
size_t prim_tri_verts_size = pack.prim_tri_verts.size();
|
||||
|
||||
size_t pack_prim_index_offset = prim_index_size;
|
||||
size_t pack_prim_tri_verts_offset = prim_tri_verts_size;
|
||||
size_t pack_nodes_offset = nodes_size;
|
||||
size_t pack_leaf_nodes_offset = leaf_nodes_size;
|
||||
size_t object_offset = 0;
|
||||
@@ -535,7 +492,6 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
|
||||
if (geom->need_build_bvh(params.bvh_layout)) {
|
||||
prim_index_size += bvh->pack.prim_index.size();
|
||||
prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
|
||||
nodes_size += bvh->pack.nodes.size();
|
||||
leaf_nodes_size += bvh->pack.leaf_nodes.size();
|
||||
}
|
||||
@@ -545,8 +501,6 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
pack.prim_type.resize(prim_index_size);
|
||||
pack.prim_object.resize(prim_index_size);
|
||||
pack.prim_visibility.resize(prim_index_size);
|
||||
pack.prim_tri_verts.resize(prim_tri_verts_size);
|
||||
pack.prim_tri_index.resize(prim_index_size);
|
||||
pack.nodes.resize(nodes_size);
|
||||
pack.leaf_nodes.resize(leaf_nodes_size);
|
||||
pack.object_node.resize(objects.size());
|
||||
@@ -559,8 +513,6 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
int *pack_prim_type = (pack.prim_type.size()) ? &pack.prim_type[0] : NULL;
|
||||
int *pack_prim_object = (pack.prim_object.size()) ? &pack.prim_object[0] : NULL;
|
||||
uint *pack_prim_visibility = (pack.prim_visibility.size()) ? &pack.prim_visibility[0] : NULL;
|
||||
float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size()) ? &pack.prim_tri_verts[0] : NULL;
|
||||
uint *pack_prim_tri_index = (pack.prim_tri_index.size()) ? &pack.prim_tri_index[0] : NULL;
|
||||
int4 *pack_nodes = (pack.nodes.size()) ? &pack.nodes[0] : NULL;
|
||||
int4 *pack_leaf_nodes = (pack.leaf_nodes.size()) ? &pack.leaf_nodes[0] : NULL;
|
||||
float2 *pack_prim_time = (pack.prim_time.size()) ? &pack.prim_time[0] : NULL;
|
||||
@@ -609,18 +561,14 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
int *bvh_prim_index = &bvh->pack.prim_index[0];
|
||||
int *bvh_prim_type = &bvh->pack.prim_type[0];
|
||||
uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
|
||||
uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
|
||||
float2 *bvh_prim_time = bvh->pack.prim_time.size() ? &bvh->pack.prim_time[0] : NULL;
|
||||
|
||||
for (size_t i = 0; i < bvh_prim_index_size; i++) {
|
||||
if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
|
||||
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
|
||||
pack_prim_tri_index[pack_prim_index_offset] = -1;
|
||||
}
|
||||
else {
|
||||
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset;
|
||||
pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
|
||||
pack_prim_tri_verts_offset;
|
||||
}
|
||||
|
||||
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
|
||||
@@ -633,15 +581,6 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
|
||||
}
|
||||
}
|
||||
|
||||
/* Merge triangle vertices data. */
|
||||
if (bvh->pack.prim_tri_verts.size()) {
|
||||
const size_t prim_tri_size = bvh->pack.prim_tri_verts.size();
|
||||
memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset,
|
||||
&bvh->pack.prim_tri_verts[0],
|
||||
prim_tri_size * sizeof(float4));
|
||||
pack_prim_tri_verts_offset += prim_tri_size;
|
||||
}
|
||||
|
||||
/* merge nodes */
|
||||
if (bvh->pack.leaf_nodes.size()) {
|
||||
int4 *leaf_nodes_offset = &bvh->pack.leaf_nodes[0];
|
||||
|
@@ -67,8 +67,12 @@ BVHBuild::~BVHBuild()
|
||||
|
||||
/* Adding References */
|
||||
|
||||
void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *mesh, int i)
|
||||
void BVHBuild::add_reference_triangles(BoundBox &root,
|
||||
BoundBox ¢er,
|
||||
Mesh *mesh,
|
||||
int object_index)
|
||||
{
|
||||
const PrimitiveType primitive_type = mesh->primitive_type();
|
||||
const Attribute *attr_mP = NULL;
|
||||
if (mesh->has_motion_blur()) {
|
||||
attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
@@ -81,7 +85,7 @@ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *m
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
t.bounds_grow(verts, bounds);
|
||||
if (bounds.valid() && t.valid(verts)) {
|
||||
references.push_back(BVHReference(bounds, j, i, PRIMITIVE_TRIANGLE));
|
||||
references.push_back(BVHReference(bounds, j, object_index, primitive_type));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -101,7 +105,7 @@ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *m
|
||||
t.bounds_grow(vert_steps + step * num_verts, bounds);
|
||||
}
|
||||
if (bounds.valid()) {
|
||||
references.push_back(BVHReference(bounds, j, i, PRIMITIVE_MOTION_TRIANGLE));
|
||||
references.push_back(BVHReference(bounds, j, object_index, primitive_type));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -140,7 +144,7 @@ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *m
|
||||
if (bounds.valid()) {
|
||||
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
|
||||
references.push_back(
|
||||
BVHReference(bounds, j, i, PRIMITIVE_MOTION_TRIANGLE, prev_time, curr_time));
|
||||
BVHReference(bounds, j, object_index, primitive_type, prev_time, curr_time));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -153,18 +157,14 @@ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *m
|
||||
}
|
||||
}
|
||||
|
||||
void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int i)
|
||||
void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int object_index)
|
||||
{
|
||||
const Attribute *curve_attr_mP = NULL;
|
||||
if (hair->has_motion_blur()) {
|
||||
curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
}
|
||||
|
||||
const PrimitiveType primitive_type =
|
||||
(curve_attr_mP != NULL) ?
|
||||
((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_MOTION_CURVE_RIBBON :
|
||||
PRIMITIVE_MOTION_CURVE_THICK) :
|
||||
((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON : PRIMITIVE_CURVE_THICK);
|
||||
const PrimitiveType primitive_type = hair->primitive_type();
|
||||
|
||||
const size_t num_curves = hair->num_curves();
|
||||
for (uint j = 0; j < num_curves; j++) {
|
||||
@@ -177,7 +177,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
||||
curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds);
|
||||
if (bounds.valid()) {
|
||||
int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
|
||||
references.push_back(BVHReference(bounds, j, i, packed_type));
|
||||
references.push_back(BVHReference(bounds, j, object_index, packed_type));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -198,7 +198,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
||||
}
|
||||
if (bounds.valid()) {
|
||||
int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
|
||||
references.push_back(BVHReference(bounds, j, i, packed_type));
|
||||
references.push_back(BVHReference(bounds, j, object_index, packed_type));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -254,7 +254,8 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
||||
if (bounds.valid()) {
|
||||
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
|
||||
int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
|
||||
references.push_back(BVHReference(bounds, j, i, packed_type, prev_time, curr_time));
|
||||
references.push_back(
|
||||
BVHReference(bounds, j, object_index, packed_type, prev_time, curr_time));
|
||||
root.grow(bounds);
|
||||
center.grow(bounds.center2());
|
||||
}
|
||||
@@ -268,15 +269,18 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair
|
||||
}
|
||||
}
|
||||
|
||||
void BVHBuild::add_reference_geometry(BoundBox &root, BoundBox ¢er, Geometry *geom, int i)
|
||||
void BVHBuild::add_reference_geometry(BoundBox &root,
|
||||
BoundBox ¢er,
|
||||
Geometry *geom,
|
||||
int object_index)
|
||||
{
|
||||
if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
add_reference_triangles(root, center, mesh, i);
|
||||
add_reference_triangles(root, center, mesh, object_index);
|
||||
}
|
||||
else if (geom->geometry_type == Geometry::HAIR) {
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
add_reference_curves(root, center, hair, i);
|
||||
add_reference_curves(root, center, hair, object_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -89,20 +89,9 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
|
||||
/* Test if we need to record this transparent intersection. */
|
||||
if (ctx->num_hits < ctx->max_hits || ray->tfar < ctx->max_t) {
|
||||
/* Skip already recorded intersections. */
|
||||
int num_recorded_hits = min(ctx->num_hits, ctx->max_hits);
|
||||
|
||||
for (int i = 0; i < num_recorded_hits; ++i) {
|
||||
if (current_isect.object == ctx->isect_s[i].object &&
|
||||
current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
|
||||
/* This intersection was already recorded, skip it. */
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* If maximum number of hits was reached, replace the intersection with the
|
||||
* highest distance. We want to find the N closest intersections. */
|
||||
const int num_recorded_hits = min(ctx->num_hits, ctx->max_hits);
|
||||
int isect_index = num_recorded_hits;
|
||||
if (num_recorded_hits + 1 >= ctx->max_hits) {
|
||||
float max_t = ctx->isect_s[0].t;
|
||||
@@ -147,10 +136,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
}
|
||||
else {
|
||||
kernel_embree_convert_hit(kg, ray, hit, ¤t_isect);
|
||||
int object = (current_isect.object == OBJECT_NONE) ?
|
||||
kernel_tex_fetch(__prim_object, current_isect.prim) :
|
||||
current_isect.object;
|
||||
if (ctx->local_object_id != object) {
|
||||
if (ctx->local_object_id != current_isect.object) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
@@ -169,41 +155,49 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
break;
|
||||
}
|
||||
|
||||
/* See triangle_intersect_subsurface() for the native equivalent. */
|
||||
for (int i = min(ctx->max_hits, ctx->local_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if (ctx->local_isect->hits[i].t == ray->tfar) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LocalIntersection *local_isect = ctx->local_isect;
|
||||
int hit_idx = 0;
|
||||
|
||||
if (ctx->lcg_state) {
|
||||
/* See triangle_intersect_subsurface() for the native equivalent. */
|
||||
for (int i = min(ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
|
||||
if (local_isect->hits[i].t == ray->tfar) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
++ctx->local_isect->num_hits;
|
||||
if (ctx->local_isect->num_hits <= ctx->max_hits) {
|
||||
hit_idx = ctx->local_isect->num_hits - 1;
|
||||
local_isect->num_hits++;
|
||||
|
||||
if (local_isect->num_hits <= ctx->max_hits) {
|
||||
hit_idx = local_isect->num_hits - 1;
|
||||
}
|
||||
else {
|
||||
/* reservoir sampling: if we are at the maximum number of
|
||||
* hits, randomly replace element or skip it */
|
||||
hit_idx = lcg_step_uint(ctx->lcg_state) % ctx->local_isect->num_hits;
|
||||
hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
|
||||
|
||||
if (hit_idx >= ctx->max_hits) {
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
ctx->local_isect->num_hits = 1;
|
||||
/* Record closest intersection only. */
|
||||
if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
|
||||
*args->valid = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
local_isect->num_hits = 1;
|
||||
}
|
||||
|
||||
/* record intersection */
|
||||
ctx->local_isect->hits[hit_idx] = current_isect;
|
||||
ctx->local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
|
||||
local_isect->hits[hit_idx] = current_isect;
|
||||
local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
|
||||
/* This tells Embree to continue tracing. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
@@ -213,21 +207,11 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
|
||||
if (ctx->num_hits < ctx->max_hits) {
|
||||
Intersection current_isect;
|
||||
kernel_embree_convert_hit(kg, ray, hit, ¤t_isect);
|
||||
for (size_t i = 0; i < ctx->num_hits; ++i) {
|
||||
if (current_isect.object == ctx->isect_s[i].object &&
|
||||
current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
|
||||
/* This intersection was already recorded, skip it. */
|
||||
*args->valid = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Intersection *isect = &ctx->isect_s[ctx->num_hits];
|
||||
++ctx->num_hits;
|
||||
*isect = current_isect;
|
||||
/* Only primitives from volume object. */
|
||||
uint tri_object = (isect->object == OBJECT_NONE) ?
|
||||
kernel_tex_fetch(__prim_object, isect->prim) :
|
||||
isect->object;
|
||||
uint tri_object = isect->object;
|
||||
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
|
||||
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
|
||||
--ctx->num_hits;
|
||||
@@ -249,7 +233,7 @@ static void rtc_filter_func_thick_curve(const RTCFilterFunctionNArguments *args)
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
|
||||
/* Always ignore backfacing intersections. */
|
||||
/* Always ignore back-facing intersections. */
|
||||
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
|
||||
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
|
||||
*args->valid = 0;
|
||||
@@ -262,7 +246,7 @@ static void rtc_filter_occluded_func_thick_curve(const RTCFilterFunctionNArgumen
|
||||
const RTCRay *ray = (RTCRay *)args->ray;
|
||||
RTCHit *hit = (RTCHit *)args->hit;
|
||||
|
||||
/* Always ignore backfacing intersections. */
|
||||
/* Always ignore back-facing intersections. */
|
||||
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
|
||||
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
|
||||
*args->valid = 0;
|
||||
@@ -456,7 +440,7 @@ void BVHEmbree::add_instance(Object *ob, int i)
|
||||
|
||||
void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
|
||||
{
|
||||
size_t prim_offset = mesh->optix_prim_offset;
|
||||
size_t prim_offset = mesh->prim_offset;
|
||||
|
||||
const Attribute *attr_mP = NULL;
|
||||
size_t num_motion_steps = 1;
|
||||
@@ -625,7 +609,7 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c
|
||||
|
||||
void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
|
||||
{
|
||||
size_t prim_offset = hair->optix_prim_offset;
|
||||
size_t prim_offset = hair->curve_segment_offset;
|
||||
|
||||
const Attribute *attr_mP = NULL;
|
||||
size_t num_motion_steps = 1;
|
||||
@@ -702,7 +686,7 @@ void BVHEmbree::refit(Progress &progress)
|
||||
if (mesh->num_triangles() > 0) {
|
||||
RTCGeometry geom = rtcGetGeometry(scene, geom_id);
|
||||
set_tri_vertex_buffer(geom, mesh, true);
|
||||
rtcSetGeometryUserData(geom, (void *)mesh->optix_prim_offset);
|
||||
rtcSetGeometryUserData(geom, (void *)mesh->prim_offset);
|
||||
rtcCommitGeometry(geom);
|
||||
}
|
||||
}
|
||||
@@ -711,7 +695,7 @@ void BVHEmbree::refit(Progress &progress)
|
||||
if (hair->num_curves() > 0) {
|
||||
RTCGeometry geom = rtcGetGeometry(scene, geom_id + 1);
|
||||
set_curve_vertex_buffer(geom, hair, true);
|
||||
rtcSetGeometryUserData(geom, (void *)hair->optix_prim_offset);
|
||||
rtcSetGeometryUserData(geom, (void *)hair->curve_segment_offset);
|
||||
rtcCommitGeometry(geom);
|
||||
}
|
||||
}
|
||||
|
@@ -521,7 +521,7 @@ endif()
|
||||
if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD)
|
||||
find_package(CUDA) # Try to auto locate CUDA toolkit
|
||||
if(CUDA_FOUND)
|
||||
message(STATUS "CUDA nvcc = ${CUDA_NVCC_EXECUTABLE}")
|
||||
message(STATUS "Found CUDA ${CUDA_NVCC_EXECUTABLE} (${CUDA_VERSION})")
|
||||
else()
|
||||
message(STATUS "CUDA compiler not found, disabling WITH_CYCLES_CUDA_BINARIES")
|
||||
set(WITH_CYCLES_CUDA_BINARIES OFF)
|
||||
@@ -537,6 +537,16 @@ endif()
|
||||
# HIP
|
||||
###########################################################################
|
||||
|
||||
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
find_package(HIP)
|
||||
if(HIP_FOUND)
|
||||
message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
|
||||
else()
|
||||
message(STATUS "HIP compiler not found, disabling WITH_CYCLES_HIP_BINARIES")
|
||||
set(WITH_CYCLES_HIP_BINARIES OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT WITH_HIP_DYNLOAD)
|
||||
set(WITH_HIP_DYNLOAD ON)
|
||||
endif()
|
||||
|
@@ -37,14 +37,15 @@ CUDADeviceGraphicsInterop::~CUDADeviceGraphicsInterop()
|
||||
}
|
||||
}
|
||||
|
||||
void CUDADeviceGraphicsInterop::set_destination(
|
||||
const DeviceGraphicsInteropDestination &destination)
|
||||
void CUDADeviceGraphicsInterop::set_display_interop(
|
||||
const DisplayDriver::GraphicsInterop &display_interop)
|
||||
{
|
||||
const int64_t new_buffer_area = int64_t(destination.buffer_width) * destination.buffer_height;
|
||||
const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
|
||||
display_interop.buffer_height;
|
||||
|
||||
need_clear_ = destination.need_clear;
|
||||
need_clear_ = display_interop.need_clear;
|
||||
|
||||
if (opengl_pbo_id_ == destination.opengl_pbo_id && buffer_area_ == new_buffer_area) {
|
||||
if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -55,12 +56,12 @@ void CUDADeviceGraphicsInterop::set_destination(
|
||||
}
|
||||
|
||||
const CUresult result = cuGraphicsGLRegisterBuffer(
|
||||
&cu_graphics_resource_, destination.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
|
||||
&cu_graphics_resource_, display_interop.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
LOG(ERROR) << "Error registering OpenGL buffer: " << cuewErrorString(result);
|
||||
}
|
||||
|
||||
opengl_pbo_id_ = destination.opengl_pbo_id;
|
||||
opengl_pbo_id_ = display_interop.opengl_pbo_id;
|
||||
buffer_area_ = new_buffer_area;
|
||||
}
|
||||
|
||||
|
@@ -41,7 +41,7 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
CUDADeviceGraphicsInterop &operator=(const CUDADeviceGraphicsInterop &other) = delete;
|
||||
CUDADeviceGraphicsInterop &operator=(CUDADeviceGraphicsInterop &&other) = delete;
|
||||
|
||||
virtual void set_destination(const DeviceGraphicsInteropDestination &destination) override;
|
||||
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
|
||||
|
||||
virtual device_ptr map() override;
|
||||
virtual void unmap() override;
|
||||
|
@@ -16,25 +16,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "render/display_driver.h"
|
||||
|
||||
#include "util/util_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Information about interoperability destination.
|
||||
* Is provided by the GPUDisplay. */
|
||||
class DeviceGraphicsInteropDestination {
|
||||
public:
|
||||
/* Dimensions of the buffer, in pixels. */
|
||||
int buffer_width = 0;
|
||||
int buffer_height = 0;
|
||||
|
||||
/* OpenGL pixel buffer object. */
|
||||
int opengl_pbo_id = 0;
|
||||
|
||||
/* Clear the entire destination before doing partial write to it. */
|
||||
bool need_clear = false;
|
||||
};
|
||||
|
||||
/* Device-side graphics interoperability support.
|
||||
*
|
||||
* Takes care of holding all the handlers needed by the device to implement interoperability with
|
||||
@@ -46,7 +33,7 @@ class DeviceGraphicsInterop {
|
||||
|
||||
/* Update this device-side graphics interoperability object with the given destination resource
|
||||
* information. */
|
||||
virtual void set_destination(const DeviceGraphicsInteropDestination &destination) = 0;
|
||||
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0;
|
||||
|
||||
virtual device_ptr map() = 0;
|
||||
virtual void unmap() = 0;
|
||||
|
@@ -37,11 +37,15 @@ HIPDeviceGraphicsInterop::~HIPDeviceGraphicsInterop()
|
||||
}
|
||||
}
|
||||
|
||||
void HIPDeviceGraphicsInterop::set_destination(const DeviceGraphicsInteropDestination &destination)
|
||||
void HIPDeviceGraphicsInterop::set_display_interop(
|
||||
const DisplayDriver::GraphicsInterop &display_interop)
|
||||
{
|
||||
const int64_t new_buffer_area = int64_t(destination.buffer_width) * destination.buffer_height;
|
||||
const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
|
||||
display_interop.buffer_height;
|
||||
|
||||
if (opengl_pbo_id_ == destination.opengl_pbo_id && buffer_area_ == new_buffer_area) {
|
||||
need_clear_ = display_interop.need_clear;
|
||||
|
||||
if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -52,12 +56,12 @@ void HIPDeviceGraphicsInterop::set_destination(const DeviceGraphicsInteropDestin
|
||||
}
|
||||
|
||||
const hipError_t result = hipGraphicsGLRegisterBuffer(
|
||||
&hip_graphics_resource_, destination.opengl_pbo_id, hipGraphicsRegisterFlagsNone);
|
||||
&hip_graphics_resource_, display_interop.opengl_pbo_id, hipGraphicsRegisterFlagsNone);
|
||||
if (result != hipSuccess) {
|
||||
LOG(ERROR) << "Error registering OpenGL buffer: " << hipewErrorString(result);
|
||||
}
|
||||
|
||||
opengl_pbo_id_ = destination.opengl_pbo_id;
|
||||
opengl_pbo_id_ = display_interop.opengl_pbo_id;
|
||||
buffer_area_ = new_buffer_area;
|
||||
}
|
||||
|
||||
@@ -77,6 +81,14 @@ device_ptr HIPDeviceGraphicsInterop::map()
|
||||
hip_device_assert(
|
||||
device_, hipGraphicsResourceGetMappedPointer(&hip_buffer, &bytes, hip_graphics_resource_));
|
||||
|
||||
if (need_clear_) {
|
||||
hip_device_assert(
|
||||
device_,
|
||||
hipMemsetD8Async(static_cast<hipDeviceptr_t>(hip_buffer), 0, bytes, queue_->stream()));
|
||||
|
||||
need_clear_ = false;
|
||||
}
|
||||
|
||||
return static_cast<device_ptr>(hip_buffer);
|
||||
}
|
||||
|
||||
|
@@ -39,7 +39,7 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
HIPDeviceGraphicsInterop &operator=(const HIPDeviceGraphicsInterop &other) = delete;
|
||||
HIPDeviceGraphicsInterop &operator=(HIPDeviceGraphicsInterop &&other) = delete;
|
||||
|
||||
virtual void set_destination(const DeviceGraphicsInteropDestination &destination) override;
|
||||
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
|
||||
|
||||
virtual device_ptr map() override;
|
||||
virtual void unmap() override;
|
||||
@@ -53,6 +53,9 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
/* Buffer area in pixels of the corresponding PBO. */
|
||||
int64_t buffer_area_ = 0;
|
||||
|
||||
/* The destination was requested to be cleared. */
|
||||
bool need_clear_ = false;
|
||||
|
||||
hipGraphicsResource hip_graphics_resource_ = nullptr;
|
||||
};
|
||||
|
||||
|
@@ -28,7 +28,7 @@ void HIPDeviceKernels::load(HIPDevice *device)
|
||||
for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
|
||||
HIPDeviceKernel &kernel = kernels_[i];
|
||||
|
||||
/* No megakernel used for GPU. */
|
||||
/* No mega-kernel used for GPU. */
|
||||
if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
continue;
|
||||
}
|
||||
|
@@ -768,7 +768,13 @@ void OptiXDevice::denoise_color_read(DenoiseContext &context, const DenoisePass
|
||||
destination.num_components = 3;
|
||||
destination.pixel_stride = context.buffer_params.pass_stride;
|
||||
|
||||
pass_accessor.get_render_tile_pixels(context.render_buffers, context.buffer_params, destination);
|
||||
BufferParams buffer_params = context.buffer_params;
|
||||
buffer_params.window_x = 0;
|
||||
buffer_params.window_y = 0;
|
||||
buffer_params.window_width = buffer_params.width;
|
||||
buffer_params.window_height = buffer_params.height;
|
||||
|
||||
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
|
||||
}
|
||||
|
||||
bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass)
|
||||
@@ -1246,7 +1252,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
|
||||
build_input.curveArray.indexStrideInBytes = sizeof(int);
|
||||
build_input.curveArray.flag = build_flags;
|
||||
build_input.curveArray.primitiveIndexOffset = hair->optix_prim_offset;
|
||||
build_input.curveArray.primitiveIndexOffset = hair->curve_segment_offset;
|
||||
}
|
||||
else {
|
||||
/* Disable visibility test any-hit program, since it is already checked during
|
||||
@@ -1259,7 +1265,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
|
||||
build_input.customPrimitiveArray.flags = &build_flags;
|
||||
build_input.customPrimitiveArray.numSbtRecords = 1;
|
||||
build_input.customPrimitiveArray.primitiveIndexOffset = hair->optix_prim_offset;
|
||||
build_input.customPrimitiveArray.primitiveIndexOffset = hair->curve_segment_offset;
|
||||
}
|
||||
|
||||
if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
|
||||
@@ -1328,7 +1334,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
* buffers for that purpose. OptiX does not allow this to be zero though, so just pass in
|
||||
* one and rely on that having the same meaning in this case. */
|
||||
build_input.triangleArray.numSbtRecords = 1;
|
||||
build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset;
|
||||
build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset;
|
||||
|
||||
if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
|
||||
progress.set_error("Failed to build OptiX acceleration structure");
|
||||
@@ -1395,8 +1401,8 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
instance.transform[5] = 1.0f;
|
||||
instance.transform[10] = 1.0f;
|
||||
|
||||
/* Set user instance ID to object index (but leave low bit blank). */
|
||||
instance.instanceId = ob->get_device_index() << 1;
|
||||
/* Set user instance ID to object index. */
|
||||
instance.instanceId = ob->get_device_index();
|
||||
|
||||
/* Add some of the object visibility bits to the mask.
|
||||
* __prim_visibility contains the combined visibility bits of all instances, so is not
|
||||
@@ -1419,7 +1425,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
}
|
||||
else {
|
||||
/* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
|
||||
* since it needs to filter out endcaps there).
|
||||
* since it needs to filter out end-caps there).
|
||||
* It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
|
||||
* programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
|
||||
*/
|
||||
@@ -1508,9 +1514,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
else {
|
||||
/* Disable instance transform if geometry already has it applied to vertex data. */
|
||||
instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
|
||||
/* Non-instanced objects read ID from 'prim_object', so distinguish
|
||||
* them from instanced objects with the low bit set. */
|
||||
instance.instanceId |= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -27,6 +27,8 @@ set(SRC
|
||||
pass_accessor.cpp
|
||||
pass_accessor_cpu.cpp
|
||||
pass_accessor_gpu.cpp
|
||||
path_trace_display.cpp
|
||||
path_trace_tile.cpp
|
||||
path_trace_work.cpp
|
||||
path_trace_work_cpu.cpp
|
||||
path_trace_work_gpu.cpp
|
||||
@@ -47,6 +49,8 @@ set(SRC_HEADERS
|
||||
pass_accessor.h
|
||||
pass_accessor_cpu.h
|
||||
pass_accessor_gpu.h
|
||||
path_trace_display.h
|
||||
path_trace_tile.h
|
||||
path_trace_work.h
|
||||
path_trace_work_cpu.h
|
||||
path_trace_work_gpu.h
|
||||
|
@@ -289,7 +289,13 @@ class OIDNDenoiseContext {
|
||||
* pixels. */
|
||||
const PassAccessorCPU pass_accessor(pass_access_info, 1.0f, num_samples_);
|
||||
|
||||
pass_accessor.get_render_tile_pixels(render_buffers_, buffer_params_, destination);
|
||||
BufferParams buffer_params = buffer_params_;
|
||||
buffer_params.window_x = 0;
|
||||
buffer_params.window_y = 0;
|
||||
buffer_params.window_width = buffer_params.width;
|
||||
buffer_params.window_height = buffer_params.height;
|
||||
|
||||
pass_accessor.get_render_tile_pixels(render_buffers_, buffer_params, destination);
|
||||
}
|
||||
|
||||
/* Read pass pixels using PassAccessor into a temporary buffer which is owned by the pass.. */
|
||||
|
@@ -149,9 +149,6 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
|
||||
/* Denoised passes store their final pixels, no need in special calculation. */
|
||||
get_pass_float(render_buffers, buffer_params, destination);
|
||||
}
|
||||
else if (type == PASS_RENDER_TIME) {
|
||||
/* TODO(sergey): Needs implementation. */
|
||||
}
|
||||
else if (type == PASS_DEPTH) {
|
||||
get_pass_depth(render_buffers, buffer_params, destination);
|
||||
}
|
||||
|
@@ -99,17 +99,22 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
|
||||
{
|
||||
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
|
||||
|
||||
const float *buffer_data = render_buffers->buffer.data();
|
||||
const int64_t pass_stride = buffer_params.pass_stride;
|
||||
const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride;
|
||||
|
||||
const float *window_data = render_buffers->buffer.data() + buffer_params.window_x * pass_stride +
|
||||
buffer_params.window_y * buffer_row_stride;
|
||||
|
||||
const int pixel_stride = destination.pixel_stride ? destination.pixel_stride :
|
||||
destination.num_components;
|
||||
|
||||
tbb::parallel_for(0, buffer_params.height, [&](int64_t y) {
|
||||
int64_t pixel_index = y * buffer_params.width;
|
||||
for (int64_t x = 0; x < buffer_params.width; ++x, ++pixel_index) {
|
||||
const int64_t input_pixel_offset = pixel_index * buffer_params.pass_stride;
|
||||
const float *buffer = buffer_data + input_pixel_offset;
|
||||
float *pixel = destination.pixels + (pixel_index + destination.offset) * pixel_stride;
|
||||
tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
|
||||
const float *buffer = window_data + y * buffer_row_stride;
|
||||
float *pixel = destination.pixels +
|
||||
(y * buffer_params.width + destination.offset) * pixel_stride;
|
||||
|
||||
for (int64_t x = 0; x < buffer_params.window_width;
|
||||
++x, buffer += pass_stride, pixel += pixel_stride) {
|
||||
processor(kfilm_convert, buffer, pixel);
|
||||
}
|
||||
});
|
||||
@@ -123,26 +128,28 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
|
||||
const Destination &destination,
|
||||
const Processor &processor) const
|
||||
{
|
||||
const float *buffer_data = render_buffers->buffer.data();
|
||||
const int64_t pass_stride = buffer_params.pass_stride;
|
||||
const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride;
|
||||
|
||||
const float *window_data = render_buffers->buffer.data() + buffer_params.window_x * pass_stride +
|
||||
buffer_params.window_y * buffer_row_stride;
|
||||
|
||||
half4 *dst_start = destination.pixels_half_rgba + destination.offset;
|
||||
const int destination_stride = destination.stride != 0 ? destination.stride :
|
||||
buffer_params.width;
|
||||
|
||||
tbb::parallel_for(0, buffer_params.height, [&](int64_t y) {
|
||||
int64_t pixel_index = y * buffer_params.width;
|
||||
half4 *dst_row_start = dst_start + y * destination_stride;
|
||||
for (int64_t x = 0; x < buffer_params.width; ++x, ++pixel_index) {
|
||||
const int64_t input_pixel_offset = pixel_index * buffer_params.pass_stride;
|
||||
const float *buffer = buffer_data + input_pixel_offset;
|
||||
tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
|
||||
const float *buffer = window_data + y * buffer_row_stride;
|
||||
half4 *pixel = dst_start + y * destination_stride;
|
||||
for (int64_t x = 0; x < buffer_params.window_width; ++x, buffer += pass_stride, ++pixel) {
|
||||
|
||||
float pixel[4];
|
||||
processor(kfilm_convert, buffer, pixel);
|
||||
float pixel_rgba[4];
|
||||
processor(kfilm_convert, buffer, pixel_rgba);
|
||||
|
||||
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel);
|
||||
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba);
|
||||
|
||||
half4 *pixel_half_rgba = dst_row_start + x;
|
||||
float4_store_half(&pixel_half_rgba->x, make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
|
||||
float4_store_half(&pixel->x,
|
||||
make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3]));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@@ -43,10 +43,13 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel,
|
||||
KernelFilmConvert kfilm_convert;
|
||||
init_kernel_film_convert(&kfilm_convert, buffer_params, destination);
|
||||
|
||||
const int work_size = buffer_params.width * buffer_params.height;
|
||||
const int work_size = buffer_params.window_width * buffer_params.window_height;
|
||||
|
||||
const int destination_stride = destination.stride != 0 ? destination.stride :
|
||||
buffer_params.width;
|
||||
buffer_params.window_width;
|
||||
|
||||
const int offset = buffer_params.window_x * buffer_params.pass_stride +
|
||||
buffer_params.window_y * buffer_params.stride * buffer_params.pass_stride;
|
||||
|
||||
if (destination.d_pixels) {
|
||||
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
|
||||
@@ -55,8 +58,8 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel,
|
||||
const_cast<device_ptr *>(&destination.d_pixels),
|
||||
const_cast<device_ptr *>(&render_buffers->buffer.device_pointer),
|
||||
const_cast<int *>(&work_size),
|
||||
const_cast<int *>(&buffer_params.width),
|
||||
const_cast<int *>(&buffer_params.offset),
|
||||
const_cast<int *>(&buffer_params.window_width),
|
||||
const_cast<int *>(&offset),
|
||||
const_cast<int *>(&buffer_params.stride),
|
||||
const_cast<int *>(&destination.offset),
|
||||
const_cast<int *>(&destination_stride)};
|
||||
@@ -70,8 +73,8 @@ void PassAccessorGPU::run_film_convert_kernels(DeviceKernel kernel,
|
||||
const_cast<device_ptr *>(&destination.d_pixels_half_rgba),
|
||||
const_cast<device_ptr *>(&render_buffers->buffer.device_pointer),
|
||||
const_cast<int *>(&work_size),
|
||||
const_cast<int *>(&buffer_params.width),
|
||||
const_cast<int *>(&buffer_params.offset),
|
||||
const_cast<int *>(&buffer_params.window_width),
|
||||
const_cast<int *>(&offset),
|
||||
const_cast<int *>(&buffer_params.stride),
|
||||
const_cast<int *>(&destination.offset),
|
||||
const_cast<int *>(&destination_stride)};
|
||||
|
@@ -19,8 +19,9 @@
|
||||
#include "device/cpu/device.h"
|
||||
#include "device/device.h"
|
||||
#include "integrator/pass_accessor.h"
|
||||
#include "integrator/path_trace_display.h"
|
||||
#include "integrator/path_trace_tile.h"
|
||||
#include "integrator/render_scheduler.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/pass.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/tile.h"
|
||||
@@ -67,11 +68,11 @@ PathTrace::PathTrace(Device *device,
|
||||
PathTrace::~PathTrace()
|
||||
{
|
||||
/* Destroy any GPU resource which was used for graphics interop.
|
||||
* Need to have access to the GPUDisplay as it is the only source of drawing context which is
|
||||
* used for interop. */
|
||||
if (gpu_display_) {
|
||||
* Need to have access to the PathTraceDisplay as it is the only source of drawing context which
|
||||
* is used for interop. */
|
||||
if (display_) {
|
||||
for (auto &&path_trace_work : path_trace_works_) {
|
||||
path_trace_work->destroy_gpu_resources(gpu_display_.get());
|
||||
path_trace_work->destroy_gpu_resources(display_.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -94,7 +95,7 @@ bool PathTrace::ready_to_reset()
|
||||
{
|
||||
/* The logic here is optimized for the best feedback in the viewport, which implies having a GPU
|
||||
* display. Of there is no such display, the logic here will break. */
|
||||
DCHECK(gpu_display_);
|
||||
DCHECK(display_);
|
||||
|
||||
/* The logic here tries to provide behavior which feels the most interactive feel to artists.
|
||||
* General idea is to be able to reset as quickly as possible, while still providing interactive
|
||||
@@ -126,8 +127,8 @@ void PathTrace::reset(const BufferParams &full_params, const BufferParams &big_t
|
||||
/* NOTE: GPU display checks for buffer modification and avoids unnecessary re-allocation.
|
||||
* It is requires to inform about reset whenever it happens, so that the redraw state tracking is
|
||||
* properly updated. */
|
||||
if (gpu_display_) {
|
||||
gpu_display_->reset(full_params);
|
||||
if (display_) {
|
||||
display_->reset(full_params);
|
||||
}
|
||||
|
||||
render_state_.has_denoised_result = false;
|
||||
@@ -233,42 +234,53 @@ template<typename Callback>
|
||||
static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>> &path_trace_works,
|
||||
const vector<WorkBalanceInfo> &work_balance_infos,
|
||||
const BufferParams &buffer_params,
|
||||
const int overscan,
|
||||
const Callback &callback)
|
||||
{
|
||||
const int num_works = path_trace_works.size();
|
||||
const int height = buffer_params.height;
|
||||
const int window_height = buffer_params.window_height;
|
||||
|
||||
int current_y = 0;
|
||||
for (int i = 0; i < num_works; ++i) {
|
||||
const double weight = work_balance_infos[i].weight;
|
||||
const int slice_height = max(lround(height * weight), 1);
|
||||
const int slice_window_full_y = buffer_params.full_y + buffer_params.window_y + current_y;
|
||||
const int slice_window_height = max(lround(window_height * weight), 1);
|
||||
|
||||
/* Disallow negative values to deal with situations when there are more compute devices than
|
||||
* scan-lines. */
|
||||
const int remaining_height = max(0, height - current_y);
|
||||
const int remaining_window_height = max(0, window_height - current_y);
|
||||
|
||||
BufferParams slice_params = buffer_params;
|
||||
|
||||
slice_params.full_y = max(slice_window_full_y - overscan, buffer_params.full_y);
|
||||
slice_params.window_y = slice_window_full_y - slice_params.full_y;
|
||||
|
||||
BufferParams slide_params = buffer_params;
|
||||
slide_params.full_y = buffer_params.full_y + current_y;
|
||||
if (i < num_works - 1) {
|
||||
slide_params.height = min(slice_height, remaining_height);
|
||||
slice_params.window_height = min(slice_window_height, remaining_window_height);
|
||||
}
|
||||
else {
|
||||
slide_params.height = remaining_height;
|
||||
slice_params.window_height = remaining_window_height;
|
||||
}
|
||||
|
||||
slide_params.update_offset_stride();
|
||||
slice_params.height = slice_params.window_y + slice_params.window_height + overscan;
|
||||
slice_params.height = min(slice_params.height,
|
||||
buffer_params.height + buffer_params.full_y - slice_params.full_y);
|
||||
|
||||
callback(path_trace_works[i].get(), slide_params);
|
||||
slice_params.update_offset_stride();
|
||||
|
||||
current_y += slide_params.height;
|
||||
callback(path_trace_works[i].get(), slice_params);
|
||||
|
||||
current_y += slice_params.window_height;
|
||||
}
|
||||
}
|
||||
|
||||
void PathTrace::update_allocated_work_buffer_params()
|
||||
{
|
||||
const int overscan = tile_manager_.get_tile_overscan();
|
||||
foreach_sliced_buffer_params(path_trace_works_,
|
||||
work_balance_infos_,
|
||||
big_tile_params_,
|
||||
overscan,
|
||||
[](PathTraceWork *path_trace_work, const BufferParams ¶ms) {
|
||||
RenderBuffers *buffers = path_trace_work->get_render_buffers();
|
||||
buffers->reset(params);
|
||||
@@ -281,6 +293,12 @@ static BufferParams scale_buffer_params(const BufferParams ¶ms, int resoluti
|
||||
|
||||
scaled_params.width = max(1, params.width / resolution_divider);
|
||||
scaled_params.height = max(1, params.height / resolution_divider);
|
||||
|
||||
scaled_params.window_x = params.window_x / resolution_divider;
|
||||
scaled_params.window_y = params.window_y / resolution_divider;
|
||||
scaled_params.window_width = params.window_width / resolution_divider;
|
||||
scaled_params.window_height = params.window_height / resolution_divider;
|
||||
|
||||
scaled_params.full_x = params.full_x / resolution_divider;
|
||||
scaled_params.full_y = params.full_y / resolution_divider;
|
||||
scaled_params.full_width = params.full_width / resolution_divider;
|
||||
@@ -299,9 +317,12 @@ void PathTrace::update_effective_work_buffer_params(const RenderWork &render_wor
|
||||
const BufferParams scaled_big_tile_params = scale_buffer_params(big_tile_params_,
|
||||
resolution_divider);
|
||||
|
||||
const int overscan = tile_manager_.get_tile_overscan();
|
||||
|
||||
foreach_sliced_buffer_params(path_trace_works_,
|
||||
work_balance_infos_,
|
||||
scaled_big_tile_params,
|
||||
overscan,
|
||||
[&](PathTraceWork *path_trace_work, const BufferParams params) {
|
||||
path_trace_work->set_effective_buffer_params(
|
||||
scaled_full_params, scaled_big_tile_params, params);
|
||||
@@ -535,25 +556,35 @@ void PathTrace::denoise(const RenderWork &render_work)
|
||||
render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
|
||||
}
|
||||
|
||||
void PathTrace::set_gpu_display(unique_ptr<GPUDisplay> gpu_display)
|
||||
void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
|
||||
{
|
||||
gpu_display_ = move(gpu_display);
|
||||
output_driver_ = move(driver);
|
||||
}
|
||||
|
||||
void PathTrace::clear_gpu_display()
|
||||
void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
|
||||
{
|
||||
if (gpu_display_) {
|
||||
gpu_display_->clear();
|
||||
if (driver) {
|
||||
display_ = make_unique<PathTraceDisplay>(move(driver));
|
||||
}
|
||||
else {
|
||||
display_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void PathTrace::clear_display()
|
||||
{
|
||||
if (display_) {
|
||||
display_->clear();
|
||||
}
|
||||
}
|
||||
|
||||
void PathTrace::draw()
|
||||
{
|
||||
if (!gpu_display_) {
|
||||
if (!display_) {
|
||||
return;
|
||||
}
|
||||
|
||||
did_draw_after_reset_ |= gpu_display_->draw();
|
||||
did_draw_after_reset_ |= display_->draw();
|
||||
}
|
||||
|
||||
void PathTrace::update_display(const RenderWork &render_work)
|
||||
@@ -562,31 +593,32 @@ void PathTrace::update_display(const RenderWork &render_work)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!gpu_display_ && !tile_buffer_update_cb) {
|
||||
if (!display_ && !output_driver_) {
|
||||
VLOG(3) << "Ignore display update.";
|
||||
return;
|
||||
}
|
||||
|
||||
if (full_params_.width == 0 || full_params_.height == 0) {
|
||||
VLOG(3) << "Skipping GPUDisplay update due to 0 size of the render buffer.";
|
||||
VLOG(3) << "Skipping PathTraceDisplay update due to 0 size of the render buffer.";
|
||||
return;
|
||||
}
|
||||
|
||||
const double start_time = time_dt();
|
||||
|
||||
if (tile_buffer_update_cb) {
|
||||
if (output_driver_) {
|
||||
VLOG(3) << "Invoke buffer update callback.";
|
||||
|
||||
tile_buffer_update_cb();
|
||||
PathTraceTile tile(*this);
|
||||
output_driver_->update_render_tile(tile);
|
||||
}
|
||||
|
||||
if (gpu_display_) {
|
||||
if (display_) {
|
||||
VLOG(3) << "Perform copy to GPUDisplay work.";
|
||||
|
||||
const int resolution_divider = render_work.resolution_divider;
|
||||
const int texture_width = max(1, full_params_.width / resolution_divider);
|
||||
const int texture_height = max(1, full_params_.height / resolution_divider);
|
||||
if (!gpu_display_->update_begin(texture_width, texture_height)) {
|
||||
if (!display_->update_begin(texture_width, texture_height)) {
|
||||
LOG(ERROR) << "Error beginning GPUDisplay update.";
|
||||
return;
|
||||
}
|
||||
@@ -600,10 +632,10 @@ void PathTrace::update_display(const RenderWork &render_work)
|
||||
* all works in parallel. */
|
||||
const int num_samples = get_num_samples_in_buffer();
|
||||
for (auto &&path_trace_work : path_trace_works_) {
|
||||
path_trace_work->copy_to_gpu_display(gpu_display_.get(), pass_mode, num_samples);
|
||||
path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
|
||||
}
|
||||
|
||||
gpu_display_->update_end();
|
||||
display_->update_end();
|
||||
}
|
||||
|
||||
render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
|
||||
@@ -753,20 +785,26 @@ bool PathTrace::is_cancel_requested()
|
||||
|
||||
void PathTrace::tile_buffer_write()
|
||||
{
|
||||
if (!tile_buffer_write_cb) {
|
||||
if (!output_driver_) {
|
||||
return;
|
||||
}
|
||||
|
||||
tile_buffer_write_cb();
|
||||
PathTraceTile tile(*this);
|
||||
output_driver_->write_render_tile(tile);
|
||||
}
|
||||
|
||||
void PathTrace::tile_buffer_read()
|
||||
{
|
||||
if (!tile_buffer_read_cb) {
|
||||
if (!device_scene_->data.bake.use) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (tile_buffer_read_cb()) {
|
||||
if (!output_driver_) {
|
||||
return;
|
||||
}
|
||||
|
||||
PathTraceTile tile(*this);
|
||||
if (output_driver_->read_render_tile(tile)) {
|
||||
tbb::parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) {
|
||||
path_trace_work->copy_render_buffers_to_device();
|
||||
});
|
||||
@@ -987,12 +1025,12 @@ bool PathTrace::set_render_tile_pixels(PassAccessor &pass_accessor,
|
||||
int2 PathTrace::get_render_tile_size() const
|
||||
{
|
||||
if (full_frame_state_.render_buffers) {
|
||||
return make_int2(full_frame_state_.render_buffers->params.width,
|
||||
full_frame_state_.render_buffers->params.height);
|
||||
return make_int2(full_frame_state_.render_buffers->params.window_width,
|
||||
full_frame_state_.render_buffers->params.window_height);
|
||||
}
|
||||
|
||||
const Tile &tile = tile_manager_.get_current_tile();
|
||||
return make_int2(tile.width, tile.height);
|
||||
return make_int2(tile.window_width, tile.window_height);
|
||||
}
|
||||
|
||||
int2 PathTrace::get_render_tile_offset() const
|
||||
@@ -1002,7 +1040,12 @@ int2 PathTrace::get_render_tile_offset() const
|
||||
}
|
||||
|
||||
const Tile &tile = tile_manager_.get_current_tile();
|
||||
return make_int2(tile.x, tile.y);
|
||||
return make_int2(tile.x + tile.window_x, tile.y + tile.window_y);
|
||||
}
|
||||
|
||||
int2 PathTrace::get_render_size() const
|
||||
{
|
||||
return tile_manager_.get_size();
|
||||
}
|
||||
|
||||
const BufferParams &PathTrace::get_render_tile_params() const
|
||||
|
@@ -31,12 +31,14 @@ CCL_NAMESPACE_BEGIN
|
||||
class AdaptiveSampling;
|
||||
class Device;
|
||||
class DeviceScene;
|
||||
class DisplayDriver;
|
||||
class Film;
|
||||
class RenderBuffers;
|
||||
class RenderScheduler;
|
||||
class RenderWork;
|
||||
class PathTraceDisplay;
|
||||
class OutputDriver;
|
||||
class Progress;
|
||||
class GPUDisplay;
|
||||
class TileManager;
|
||||
|
||||
/* PathTrace class takes care of kernel graph and scheduling on a (multi)device. It takes care of
|
||||
@@ -98,13 +100,16 @@ class PathTrace {
|
||||
* Use this to configure the adaptive sampler before rendering any samples. */
|
||||
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling);
|
||||
|
||||
/* Set GPU display which takes care of drawing the render result. */
|
||||
void set_gpu_display(unique_ptr<GPUDisplay> gpu_display);
|
||||
/* Sets output driver for render buffer output. */
|
||||
void set_output_driver(unique_ptr<OutputDriver> driver);
|
||||
|
||||
/* Clear the GPU display by filling it in with all zeroes. */
|
||||
void clear_gpu_display();
|
||||
/* Set display driver for interactive render buffer display. */
|
||||
void set_display_driver(unique_ptr<DisplayDriver> driver);
|
||||
|
||||
/* Perform drawing of the current state of the GPUDisplay. */
|
||||
/* Clear the display buffer by filling it in with all zeroes. */
|
||||
void clear_display();
|
||||
|
||||
/* Perform drawing of the current state of the DisplayDriver. */
|
||||
void draw();
|
||||
|
||||
/* Cancel rendering process as soon as possible, without waiting for full tile to be sampled.
|
||||
@@ -157,6 +162,7 @@ class PathTrace {
|
||||
* instead. */
|
||||
int2 get_render_tile_size() const;
|
||||
int2 get_render_tile_offset() const;
|
||||
int2 get_render_size() const;
|
||||
|
||||
/* Get buffer parameters of the current tile.
|
||||
*
|
||||
@@ -168,18 +174,6 @@ class PathTrace {
|
||||
* times, and so on. */
|
||||
string full_report() const;
|
||||
|
||||
/* Callback which communicates an updates state of the render buffer of the current big tile.
|
||||
* Is called during path tracing to communicate work-in-progress state of the final buffer. */
|
||||
function<void(void)> tile_buffer_update_cb;
|
||||
|
||||
/* Callback which communicates final rendered buffer. Is called after path-tracing is done. */
|
||||
function<void(void)> tile_buffer_write_cb;
|
||||
|
||||
/* Callback which initializes rendered buffer. Is called before path-tracing starts.
|
||||
*
|
||||
* This is used for baking. */
|
||||
function<bool(void)> tile_buffer_read_cb;
|
||||
|
||||
/* Callback which is called to report current rendering progress.
|
||||
*
|
||||
* It is supposed to be cheaper than buffer update/write, hence can be called more often.
|
||||
@@ -252,7 +246,11 @@ class PathTrace {
|
||||
RenderScheduler &render_scheduler_;
|
||||
TileManager &tile_manager_;
|
||||
|
||||
unique_ptr<GPUDisplay> gpu_display_;
|
||||
/* Display driver for interactive render buffer display. */
|
||||
unique_ptr<PathTraceDisplay> display_;
|
||||
|
||||
/* Output driver to write render buffer to. */
|
||||
unique_ptr<OutputDriver> output_driver_;
|
||||
|
||||
/* Per-compute device descriptors of work which is responsible for path tracing on its configured
|
||||
* device. */
|
||||
|
@@ -14,20 +14,25 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "render/gpu_display.h"
|
||||
#include "integrator/path_trace_display.h"
|
||||
|
||||
#include "render/buffers.h"
|
||||
|
||||
#include "util/util_logging.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
void GPUDisplay::reset(const BufferParams &buffer_params)
|
||||
PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(move(driver))
|
||||
{
|
||||
}
|
||||
|
||||
void PathTraceDisplay::reset(const BufferParams &buffer_params)
|
||||
{
|
||||
thread_scoped_lock lock(mutex_);
|
||||
|
||||
const GPUDisplayParams old_params = params_;
|
||||
const DisplayDriver::Params old_params = params_;
|
||||
|
||||
params_.offset = make_int2(buffer_params.full_x, buffer_params.full_y);
|
||||
params_.full_offset = make_int2(buffer_params.full_x, buffer_params.full_y);
|
||||
params_.full_size = make_int2(buffer_params.full_width, buffer_params.full_height);
|
||||
params_.size = make_int2(buffer_params.width, buffer_params.height);
|
||||
|
||||
@@ -44,7 +49,7 @@ void GPUDisplay::reset(const BufferParams &buffer_params)
|
||||
texture_state_.is_outdated = true;
|
||||
}
|
||||
|
||||
void GPUDisplay::mark_texture_updated()
|
||||
void PathTraceDisplay::mark_texture_updated()
|
||||
{
|
||||
texture_state_.is_outdated = false;
|
||||
texture_state_.is_usable = true;
|
||||
@@ -54,7 +59,7 @@ void GPUDisplay::mark_texture_updated()
|
||||
* Update procedure.
|
||||
*/
|
||||
|
||||
bool GPUDisplay::update_begin(int texture_width, int texture_height)
|
||||
bool PathTraceDisplay::update_begin(int texture_width, int texture_height)
|
||||
{
|
||||
DCHECK(!update_state_.is_active);
|
||||
|
||||
@@ -66,15 +71,15 @@ bool GPUDisplay::update_begin(int texture_width, int texture_height)
|
||||
/* Get parameters within a mutex lock, to avoid reset() modifying them at the same time.
|
||||
* The update itself is non-blocking however, for better performance and to avoid
|
||||
* potential deadlocks due to locks held by the subclass. */
|
||||
GPUDisplayParams params;
|
||||
DisplayDriver::Params params;
|
||||
{
|
||||
thread_scoped_lock lock(mutex_);
|
||||
params = params_;
|
||||
texture_state_.size = make_int2(texture_width, texture_height);
|
||||
}
|
||||
|
||||
if (!do_update_begin(params, texture_width, texture_height)) {
|
||||
LOG(ERROR) << "GPUDisplay implementation could not begin update.";
|
||||
if (!driver_->update_begin(params, texture_width, texture_height)) {
|
||||
LOG(ERROR) << "PathTraceDisplay implementation could not begin update.";
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -83,7 +88,7 @@ bool GPUDisplay::update_begin(int texture_width, int texture_height)
|
||||
return true;
|
||||
}
|
||||
|
||||
void GPUDisplay::update_end()
|
||||
void PathTraceDisplay::update_end()
|
||||
{
|
||||
DCHECK(update_state_.is_active);
|
||||
|
||||
@@ -92,12 +97,12 @@ void GPUDisplay::update_end()
|
||||
return;
|
||||
}
|
||||
|
||||
do_update_end();
|
||||
driver_->update_end();
|
||||
|
||||
update_state_.is_active = false;
|
||||
}
|
||||
|
||||
int2 GPUDisplay::get_texture_size() const
|
||||
int2 PathTraceDisplay::get_texture_size() const
|
||||
{
|
||||
return texture_state_.size;
|
||||
}
|
||||
@@ -106,25 +111,54 @@ int2 GPUDisplay::get_texture_size() const
|
||||
* Texture update from CPU buffer.
|
||||
*/
|
||||
|
||||
void GPUDisplay::copy_pixels_to_texture(
|
||||
void PathTraceDisplay::copy_pixels_to_texture(
|
||||
const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height)
|
||||
{
|
||||
DCHECK(update_state_.is_active);
|
||||
|
||||
if (!update_state_.is_active) {
|
||||
LOG(ERROR) << "Attempt to copy pixels data outside of GPUDisplay update.";
|
||||
LOG(ERROR) << "Attempt to copy pixels data outside of PathTraceDisplay update.";
|
||||
return;
|
||||
}
|
||||
|
||||
mark_texture_updated();
|
||||
do_copy_pixels_to_texture(rgba_pixels, texture_x, texture_y, pixels_width, pixels_height);
|
||||
|
||||
/* This call copies pixels to a mapped texture buffer which is typically much cheaper from CPU
|
||||
* time point of view than to copy data directly to a texture.
|
||||
*
|
||||
* The possible downside of this approach is that it might require a higher peak memory when
|
||||
* doing partial updates of the texture (although, in practice even partial updates might peak
|
||||
* with a full-frame buffer stored on the CPU if the GPU is currently occupied). */
|
||||
half4 *mapped_rgba_pixels = map_texture_buffer();
|
||||
if (!mapped_rgba_pixels) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int texture_width = texture_state_.size.x;
|
||||
const int texture_height = texture_state_.size.y;
|
||||
|
||||
if (texture_x == 0 && texture_y == 0 && pixels_width == texture_width &&
|
||||
pixels_height == texture_height) {
|
||||
const size_t size_in_bytes = sizeof(half4) * texture_width * texture_height;
|
||||
memcpy(mapped_rgba_pixels, rgba_pixels, size_in_bytes);
|
||||
}
|
||||
else {
|
||||
const half4 *rgba_row = rgba_pixels;
|
||||
half4 *mapped_rgba_row = mapped_rgba_pixels + texture_y * texture_width + texture_x;
|
||||
for (int y = 0; y < pixels_height;
|
||||
++y, rgba_row += pixels_width, mapped_rgba_row += texture_width) {
|
||||
memcpy(mapped_rgba_row, rgba_row, sizeof(half4) * pixels_width);
|
||||
}
|
||||
}
|
||||
|
||||
unmap_texture_buffer();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Texture buffer mapping.
|
||||
*/
|
||||
|
||||
half4 *GPUDisplay::map_texture_buffer()
|
||||
half4 *PathTraceDisplay::map_texture_buffer()
|
||||
{
|
||||
DCHECK(!texture_buffer_state_.is_mapped);
|
||||
DCHECK(update_state_.is_active);
|
||||
@@ -135,11 +169,11 @@ half4 *GPUDisplay::map_texture_buffer()
|
||||
}
|
||||
|
||||
if (!update_state_.is_active) {
|
||||
LOG(ERROR) << "Attempt to copy pixels data outside of GPUDisplay update.";
|
||||
LOG(ERROR) << "Attempt to copy pixels data outside of PathTraceDisplay update.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
half4 *mapped_rgba_pixels = do_map_texture_buffer();
|
||||
half4 *mapped_rgba_pixels = driver_->map_texture_buffer();
|
||||
|
||||
if (mapped_rgba_pixels) {
|
||||
texture_buffer_state_.is_mapped = true;
|
||||
@@ -148,7 +182,7 @@ half4 *GPUDisplay::map_texture_buffer()
|
||||
return mapped_rgba_pixels;
|
||||
}
|
||||
|
||||
void GPUDisplay::unmap_texture_buffer()
|
||||
void PathTraceDisplay::unmap_texture_buffer()
|
||||
{
|
||||
DCHECK(texture_buffer_state_.is_mapped);
|
||||
|
||||
@@ -160,14 +194,14 @@ void GPUDisplay::unmap_texture_buffer()
|
||||
texture_buffer_state_.is_mapped = false;
|
||||
|
||||
mark_texture_updated();
|
||||
do_unmap_texture_buffer();
|
||||
driver_->unmap_texture_buffer();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Graphics interoperability.
|
||||
*/
|
||||
|
||||
DeviceGraphicsInteropDestination GPUDisplay::graphics_interop_get()
|
||||
DisplayDriver::GraphicsInterop PathTraceDisplay::graphics_interop_get()
|
||||
{
|
||||
DCHECK(!texture_buffer_state_.is_mapped);
|
||||
DCHECK(update_state_.is_active);
|
||||
@@ -175,38 +209,45 @@ DeviceGraphicsInteropDestination GPUDisplay::graphics_interop_get()
|
||||
if (texture_buffer_state_.is_mapped) {
|
||||
LOG(ERROR)
|
||||
<< "Attempt to use graphics interoperability mode while the texture buffer is mapped.";
|
||||
return DeviceGraphicsInteropDestination();
|
||||
return DisplayDriver::GraphicsInterop();
|
||||
}
|
||||
|
||||
if (!update_state_.is_active) {
|
||||
LOG(ERROR) << "Attempt to use graphics interoperability outside of GPUDisplay update.";
|
||||
return DeviceGraphicsInteropDestination();
|
||||
LOG(ERROR) << "Attempt to use graphics interoperability outside of PathTraceDisplay update.";
|
||||
return DisplayDriver::GraphicsInterop();
|
||||
}
|
||||
|
||||
/* Assume that interop will write new values to the texture. */
|
||||
mark_texture_updated();
|
||||
|
||||
return do_graphics_interop_get();
|
||||
return driver_->graphics_interop_get();
|
||||
}
|
||||
|
||||
void GPUDisplay::graphics_interop_activate()
|
||||
void PathTraceDisplay::graphics_interop_activate()
|
||||
{
|
||||
driver_->graphics_interop_activate();
|
||||
}
|
||||
|
||||
void GPUDisplay::graphics_interop_deactivate()
|
||||
void PathTraceDisplay::graphics_interop_deactivate()
|
||||
{
|
||||
driver_->graphics_interop_deactivate();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Drawing.
|
||||
*/
|
||||
|
||||
bool GPUDisplay::draw()
|
||||
void PathTraceDisplay::clear()
|
||||
{
|
||||
driver_->clear();
|
||||
}
|
||||
|
||||
bool PathTraceDisplay::draw()
|
||||
{
|
||||
/* Get parameters within a mutex lock, to avoid reset() modifying them at the same time.
|
||||
* The drawing itself is non-blocking however, for better performance and to avoid
|
||||
* potential deadlocks due to locks held by the subclass. */
|
||||
GPUDisplayParams params;
|
||||
DisplayDriver::Params params;
|
||||
bool is_usable;
|
||||
bool is_outdated;
|
||||
|
||||
@@ -218,7 +259,7 @@ bool GPUDisplay::draw()
|
||||
}
|
||||
|
||||
if (is_usable) {
|
||||
do_draw(params);
|
||||
driver_->draw(params);
|
||||
}
|
||||
|
||||
return !is_outdated;
|
@@ -16,52 +16,30 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "device/device_graphics_interop.h"
|
||||
#include "render/display_driver.h"
|
||||
|
||||
#include "util/util_half.h"
|
||||
#include "util/util_thread.h"
|
||||
#include "util/util_types.h"
|
||||
#include "util/util_unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BufferParams;
|
||||
|
||||
/* GPUDisplay class takes care of drawing render result in a viewport. The render result is stored
|
||||
* in a GPU-side texture, which is updated from a path tracer and drawn by an application.
|
||||
/* PathTraceDisplay is used for efficient render buffer display.
|
||||
*
|
||||
* The base GPUDisplay does some special texture state tracking, which allows render Session to
|
||||
* make decisions on whether reset for an updated state is possible or not. This state should only
|
||||
* be tracked in a base class and a particular implementation should not worry about it.
|
||||
* The host applications implements a DisplayDriver, storing a render pass in a GPU-side
|
||||
* textures. This texture is continuously updated by the path tracer and drawn by the host
|
||||
* application.
|
||||
*
|
||||
* The subclasses should only implement the pure virtual methods, which allows them to not worry
|
||||
* about parent method calls, which helps them to be as small and reliable as possible. */
|
||||
* PathTraceDisplay is a wrapper around the DisplayDriver, adding thread safety, state tracking
|
||||
* and error checking. */
|
||||
|
||||
class GPUDisplayParams {
|
||||
class PathTraceDisplay {
|
||||
public:
|
||||
/* Offset of the display within a viewport.
|
||||
* For example, set to a lower-bottom corner of border render in Blender's viewport. */
|
||||
int2 offset = make_int2(0, 0);
|
||||
|
||||
/* Full viewport size.
|
||||
*
|
||||
* NOTE: Is not affected by the resolution divider. */
|
||||
int2 full_size = make_int2(0, 0);
|
||||
|
||||
/* Effective viewport size.
|
||||
* In the case of border render, size of the border rectangle.
|
||||
*
|
||||
* NOTE: Is not affected by the resolution divider. */
|
||||
int2 size = make_int2(0, 0);
|
||||
|
||||
bool modified(const GPUDisplayParams &other) const
|
||||
{
|
||||
return !(offset == other.offset && full_size == other.full_size && size == other.size);
|
||||
}
|
||||
};
|
||||
|
||||
class GPUDisplay {
|
||||
public:
|
||||
GPUDisplay() = default;
|
||||
virtual ~GPUDisplay() = default;
|
||||
PathTraceDisplay(unique_ptr<DisplayDriver> driver);
|
||||
virtual ~PathTraceDisplay() = default;
|
||||
|
||||
/* Reset the display for the new state of render session. Is called whenever session is reset,
|
||||
* which happens on changes like viewport navigation or viewport dimension change.
|
||||
@@ -69,11 +47,6 @@ class GPUDisplay {
|
||||
* This call will configure parameters for a changed buffer and reset the texture state. */
|
||||
void reset(const BufferParams &buffer_params);
|
||||
|
||||
const GPUDisplayParams &get_params() const
|
||||
{
|
||||
return params_;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Update procedure.
|
||||
*
|
||||
@@ -94,7 +67,8 @@ class GPUDisplay {
|
||||
/* --------------------------------------------------------------------
|
||||
* Texture update from CPU buffer.
|
||||
*
|
||||
* NOTE: The GPUDisplay should be marked for an update being in process with `update_begin()`.
|
||||
* NOTE: The PathTraceDisplay should be marked for an update being in process with
|
||||
* `update_begin()`.
|
||||
*
|
||||
* Most portable implementation, which must be supported by all platforms. Might not be the most
|
||||
* efficient one.
|
||||
@@ -115,7 +89,8 @@ class GPUDisplay {
|
||||
* This functionality is used to update GPU-side texture content without need to maintain CPU
|
||||
* side buffer on the caller.
|
||||
*
|
||||
* NOTE: The GPUDisplay should be marked for an update being in process with `update_begin()`.
|
||||
* NOTE: The PathTraceDisplay should be marked for an update being in process with
|
||||
* `update_begin()`.
|
||||
*
|
||||
* NOTE: Texture buffer can not be mapped while graphics interoperability is active. This means
|
||||
* that `map_texture_buffer()` is not allowed between `graphics_interop_begin()` and
|
||||
@@ -145,14 +120,14 @@ class GPUDisplay {
|
||||
* that `graphics_interop_get()` is not allowed between `map_texture_buffer()` and
|
||||
* `unmap_texture_buffer()` calls. */
|
||||
|
||||
/* Get GPUDisplay graphics interoperability information which acts as a destination for the
|
||||
/* Get PathTraceDisplay graphics interoperability information which acts as a destination for the
|
||||
* device API. */
|
||||
DeviceGraphicsInteropDestination graphics_interop_get();
|
||||
DisplayDriver::GraphicsInterop graphics_interop_get();
|
||||
|
||||
/* (De)activate GPU display for graphics interoperability outside of regular display update
|
||||
* routines. */
|
||||
virtual void graphics_interop_activate();
|
||||
virtual void graphics_interop_deactivate();
|
||||
void graphics_interop_activate();
|
||||
void graphics_interop_deactivate();
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Drawing.
|
||||
@@ -168,42 +143,21 @@ class GPUDisplay {
|
||||
* after clear will write new pixel values for an updating area, leaving everything else zeroed.
|
||||
*
|
||||
* If the GPU display supports graphics interoperability then the zeroing the display is to be
|
||||
* delegated to the device via the `DeviceGraphicsInteropDestination`. */
|
||||
virtual void clear() = 0;
|
||||
* delegated to the device via the `DisplayDriver::GraphicsInterop`. */
|
||||
void clear();
|
||||
|
||||
/* Draw the current state of the texture.
|
||||
*
|
||||
* Returns true if this call did draw an updated state of the texture. */
|
||||
bool draw();
|
||||
|
||||
protected:
|
||||
/* Implementation-specific calls which subclasses are to implement.
|
||||
* These `do_foo()` method corresponds to their `foo()` calls, but they are purely virtual to
|
||||
* simplify their particular implementation. */
|
||||
virtual bool do_update_begin(const GPUDisplayParams ¶ms,
|
||||
int texture_width,
|
||||
int texture_height) = 0;
|
||||
virtual void do_update_end() = 0;
|
||||
|
||||
virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels,
|
||||
int texture_x,
|
||||
int texture_y,
|
||||
int pixels_width,
|
||||
int pixels_height) = 0;
|
||||
|
||||
virtual half4 *do_map_texture_buffer() = 0;
|
||||
virtual void do_unmap_texture_buffer() = 0;
|
||||
|
||||
/* Note that this might be called in parallel to do_update_begin() and do_update_end(),
|
||||
* the subclass is responsible for appropriate mutex locks to avoid multiple threads
|
||||
* editing and drawing the texture at the same time. */
|
||||
virtual void do_draw(const GPUDisplayParams ¶ms) = 0;
|
||||
|
||||
virtual DeviceGraphicsInteropDestination do_graphics_interop_get() = 0;
|
||||
|
||||
private:
|
||||
/* Display driver implemented by the host application. */
|
||||
unique_ptr<DisplayDriver> driver_;
|
||||
|
||||
/* Current display parameters */
|
||||
thread_mutex mutex_;
|
||||
GPUDisplayParams params_;
|
||||
DisplayDriver::Params params_;
|
||||
|
||||
/* Mark texture as its content has been updated.
|
||||
* Used from places which knows that the texture content has been brought up-to-date, so that the
|
107
intern/cycles/integrator/path_trace_tile.cpp
Normal file
107
intern/cycles/integrator/path_trace_tile.cpp
Normal file
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "integrator/path_trace_tile.h"
|
||||
#include "integrator/pass_accessor_cpu.h"
|
||||
#include "integrator/path_trace.h"
|
||||
|
||||
#include "render/buffers.h"
|
||||
#include "render/film.h"
|
||||
#include "render/pass.h"
|
||||
#include "render/scene.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
PathTraceTile::PathTraceTile(PathTrace &path_trace)
|
||||
: OutputDriver::Tile(path_trace.get_render_tile_offset(),
|
||||
path_trace.get_render_tile_size(),
|
||||
path_trace.get_render_size(),
|
||||
path_trace.get_render_tile_params().layer,
|
||||
path_trace.get_render_tile_params().view),
|
||||
path_trace_(path_trace),
|
||||
copied_from_device_(false)
|
||||
{
|
||||
}
|
||||
|
||||
bool PathTraceTile::get_pass_pixels(const string_view pass_name,
|
||||
const int num_channels,
|
||||
float *pixels) const
|
||||
{
|
||||
/* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
|
||||
* is happening while this function runs. */
|
||||
|
||||
if (!copied_from_device_) {
|
||||
/* Copy from device on demand. */
|
||||
path_trace_.copy_render_tile_from_device();
|
||||
const_cast<PathTraceTile *>(this)->copied_from_device_ = true;
|
||||
}
|
||||
|
||||
const BufferParams &buffer_params = path_trace_.get_render_tile_params();
|
||||
|
||||
const BufferPass *pass = buffer_params.find_pass(pass_name);
|
||||
if (pass == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool has_denoised_result = path_trace_.has_denoised_result();
|
||||
if (pass->mode == PassMode::DENOISED && !has_denoised_result) {
|
||||
pass = buffer_params.find_pass(pass->type);
|
||||
if (pass == nullptr) {
|
||||
/* Happens when denoised result pass is requested but is never written by the kernel. */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
pass = buffer_params.get_actual_display_pass(pass);
|
||||
|
||||
const float exposure = buffer_params.exposure;
|
||||
const int num_samples = path_trace_.get_num_render_tile_samples();
|
||||
|
||||
PassAccessor::PassAccessInfo pass_access_info(*pass);
|
||||
pass_access_info.use_approximate_shadow_catcher = buffer_params.use_approximate_shadow_catcher;
|
||||
pass_access_info.use_approximate_shadow_catcher_background =
|
||||
pass_access_info.use_approximate_shadow_catcher && !buffer_params.use_transparent_background;
|
||||
|
||||
const PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
|
||||
const PassAccessor::Destination destination(pixels, num_channels);
|
||||
|
||||
return path_trace_.get_render_tile_pixels(pass_accessor, destination);
|
||||
}
|
||||
|
||||
bool PathTraceTile::set_pass_pixels(const string_view pass_name,
|
||||
const int num_channels,
|
||||
const float *pixels) const
|
||||
{
|
||||
/* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification
|
||||
* is happening while this function runs. */
|
||||
|
||||
const BufferParams &buffer_params = path_trace_.get_render_tile_params();
|
||||
const BufferPass *pass = buffer_params.find_pass(pass_name);
|
||||
if (!pass) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const float exposure = buffer_params.exposure;
|
||||
const int num_samples = 1;
|
||||
|
||||
const PassAccessor::PassAccessInfo pass_access_info(*pass);
|
||||
PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples);
|
||||
PassAccessor::Source source(pixels, num_channels);
|
||||
|
||||
return path_trace_.set_render_tile_pixels(pass_accessor, source);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
43
intern/cycles/integrator/path_trace_tile.h
Normal file
43
intern/cycles/integrator/path_trace_tile.h
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "render/output_driver.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* PathTraceTile
|
||||
*
|
||||
* Implementation of OutputDriver::Tile interface for path tracer. */
|
||||
|
||||
class PathTrace;
|
||||
|
||||
class PathTraceTile : public OutputDriver::Tile {
|
||||
public:
|
||||
PathTraceTile(PathTrace &path_trace);
|
||||
|
||||
bool get_pass_pixels(const string_view pass_name, const int num_channels, float *pixels) const;
|
||||
bool set_pass_pixels(const string_view pass_name,
|
||||
const int num_channels,
|
||||
const float *pixels) const;
|
||||
|
||||
private:
|
||||
PathTrace &path_trace_;
|
||||
bool copied_from_device_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
@@ -16,12 +16,12 @@
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "integrator/path_trace_display.h"
|
||||
#include "integrator/path_trace_work.h"
|
||||
#include "integrator/path_trace_work_cpu.h"
|
||||
#include "integrator/path_trace_work_gpu.h"
|
||||
#include "render/buffers.h"
|
||||
#include "render/film.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/scene.h"
|
||||
|
||||
#include "kernel/kernel_types.h"
|
||||
@@ -134,7 +134,8 @@ void PathTraceWork::copy_from_denoised_render_buffers(const RenderBuffers *rende
|
||||
bool PathTraceWork::get_render_tile_pixels(const PassAccessor &pass_accessor,
|
||||
const PassAccessor::Destination &destination)
|
||||
{
|
||||
const int offset_y = effective_buffer_params_.full_y - effective_big_tile_params_.full_y;
|
||||
const int offset_y = (effective_buffer_params_.full_y + effective_buffer_params_.window_y) -
|
||||
(effective_big_tile_params_.full_y + effective_big_tile_params_.window_y);
|
||||
const int width = effective_buffer_params_.width;
|
||||
|
||||
PassAccessor::Destination slice_destination = destination;
|
||||
@@ -185,14 +186,16 @@ PassAccessor::PassAccessInfo PathTraceWork::get_display_pass_access_info(PassMod
|
||||
return pass_access_info;
|
||||
}
|
||||
|
||||
PassAccessor::Destination PathTraceWork::get_gpu_display_destination_template(
|
||||
const GPUDisplay *gpu_display) const
|
||||
PassAccessor::Destination PathTraceWork::get_display_destination_template(
|
||||
const PathTraceDisplay *display) const
|
||||
{
|
||||
PassAccessor::Destination destination(film_->get_display_pass());
|
||||
|
||||
const int2 display_texture_size = gpu_display->get_texture_size();
|
||||
const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x;
|
||||
const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y;
|
||||
const int2 display_texture_size = display->get_texture_size();
|
||||
const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x +
|
||||
effective_buffer_params_.window_x;
|
||||
const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y +
|
||||
effective_buffer_params_.window_y;
|
||||
|
||||
destination.offset = texture_y * display_texture_size.x + texture_x;
|
||||
destination.stride = display_texture_size.x;
|
||||
|
@@ -28,7 +28,7 @@ class BufferParams;
|
||||
class Device;
|
||||
class DeviceScene;
|
||||
class Film;
|
||||
class GPUDisplay;
|
||||
class PathTraceDisplay;
|
||||
class RenderBuffers;
|
||||
|
||||
class PathTraceWork {
|
||||
@@ -83,11 +83,9 @@ class PathTraceWork {
|
||||
* noisy pass mode will be passed here when it is known that the buffer does not have denoised
|
||||
* passes yet (because denoiser did not run). If the denoised pass is requested and denoiser is
|
||||
* not used then this function will fall-back to the noisy pass instead. */
|
||||
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) = 0;
|
||||
virtual void copy_to_display(PathTraceDisplay *display, PassMode pass_mode, int num_samples) = 0;
|
||||
|
||||
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) = 0;
|
||||
virtual void destroy_gpu_resources(PathTraceDisplay *display) = 0;
|
||||
|
||||
/* Copy data from/to given render buffers.
|
||||
* Will copy pixels from a corresponding place (from multi-device point of view) of the render
|
||||
@@ -162,8 +160,8 @@ class PathTraceWork {
|
||||
|
||||
/* Get destination which offset and stride are configured so that writing to it will write to a
|
||||
* proper location of GPU display texture, taking current tile and device slice into account. */
|
||||
PassAccessor::Destination get_gpu_display_destination_template(
|
||||
const GPUDisplay *gpu_display) const;
|
||||
PassAccessor::Destination get_display_destination_template(
|
||||
const PathTraceDisplay *display) const;
|
||||
|
||||
/* Device which will be used for path tracing.
|
||||
* Note that it is an actual render device (and never is a multi-device). */
|
||||
|
@@ -22,9 +22,9 @@
|
||||
#include "kernel/kernel_path_state.h"
|
||||
|
||||
#include "integrator/pass_accessor_cpu.h"
|
||||
#include "integrator/path_trace_display.h"
|
||||
|
||||
#include "render/buffers.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/scene.h"
|
||||
|
||||
#include "util/util_atomic.h"
|
||||
@@ -161,14 +161,14 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
|
||||
}
|
||||
}
|
||||
|
||||
void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
half4 *rgba_half = gpu_display->map_texture_buffer();
|
||||
half4 *rgba_half = display->map_texture_buffer();
|
||||
if (!rgba_half) {
|
||||
/* TODO(sergey): Look into using copy_to_gpu_display() if mapping failed. Might be needed for
|
||||
* some implementations of GPUDisplay which can not map memory? */
|
||||
/* TODO(sergey): Look into using copy_to_display() if mapping failed. Might be needed for
|
||||
* some implementations of PathTraceDisplay which can not map memory? */
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -178,7 +178,7 @@ void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
|
||||
const PassAccessorCPU pass_accessor(pass_access_info, kfilm.exposure, num_samples);
|
||||
|
||||
PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
|
||||
PassAccessor::Destination destination = get_display_destination_template(display);
|
||||
destination.pixels_half_rgba = rgba_half;
|
||||
|
||||
tbb::task_arena local_arena = local_tbb_arena_create(device_);
|
||||
@@ -186,10 +186,10 @@ void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
pass_accessor.get_render_tile_pixels(buffers_.get(), effective_buffer_params_, destination);
|
||||
});
|
||||
|
||||
gpu_display->unmap_texture_buffer();
|
||||
display->unmap_texture_buffer();
|
||||
}
|
||||
|
||||
void PathTraceWorkCPU::destroy_gpu_resources(GPUDisplay * /*gpu_display*/)
|
||||
void PathTraceWorkCPU::destroy_gpu_resources(PathTraceDisplay * /*display*/)
|
||||
{
|
||||
}
|
||||
|
||||
|
@@ -50,10 +50,10 @@ class PathTraceWorkCPU : public PathTraceWork {
|
||||
int start_sample,
|
||||
int samples_num) override;
|
||||
|
||||
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;
|
||||
virtual void copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
|
||||
|
||||
virtual bool copy_render_buffers_from_device() override;
|
||||
virtual bool copy_render_buffers_to_device() override;
|
||||
|
@@ -15,14 +15,15 @@
|
||||
*/
|
||||
|
||||
#include "integrator/path_trace_work_gpu.h"
|
||||
#include "integrator/path_trace_display.h"
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "integrator/pass_accessor_gpu.h"
|
||||
#include "render/buffers.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/scene.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_string.h"
|
||||
#include "util/util_tbb.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
@@ -30,6 +31,38 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
static size_t estimate_single_state_size()
|
||||
{
|
||||
size_t state_size = 0;
|
||||
|
||||
#define KERNEL_STRUCT_BEGIN(name) for (int array_index = 0;; array_index++) {
|
||||
#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type);
|
||||
#define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) state_size += sizeof(type);
|
||||
#define KERNEL_STRUCT_END(name) \
|
||||
break; \
|
||||
}
|
||||
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
|
||||
if (array_index >= gpu_array_size - 1) { \
|
||||
break; \
|
||||
} \
|
||||
}
|
||||
/* TODO(sergey): Look into better estimation for fields which depend on scene features. Maybe
|
||||
* maximum state calculation should happen as `alloc_work_memory()`, so that we can react to an
|
||||
* updated scene state here.
|
||||
* For until then use common value. Currently this size is only used for logging, but is weak to
|
||||
* rely on this. */
|
||||
#define KERNEL_STRUCT_VOLUME_STACK_SIZE 4
|
||||
#include "kernel/integrator/integrator_state_template.h"
|
||||
#undef KERNEL_STRUCT_BEGIN
|
||||
#undef KERNEL_STRUCT_MEMBER
|
||||
#undef KERNEL_STRUCT_ARRAY_MEMBER
|
||||
#undef KERNEL_STRUCT_END
|
||||
#undef KERNEL_STRUCT_END_ARRAY
|
||||
#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
|
||||
|
||||
return state_size;
|
||||
}
|
||||
|
||||
PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
|
||||
Film *film,
|
||||
DeviceScene *device_scene,
|
||||
@@ -46,8 +79,8 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
|
||||
queued_paths_(device, "queued_paths", MEM_READ_WRITE),
|
||||
num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE),
|
||||
work_tiles_(device, "work_tiles", MEM_READ_WRITE),
|
||||
gpu_display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
|
||||
max_num_paths_(queue_->num_concurrent_states(sizeof(IntegratorStateCPU))),
|
||||
display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
|
||||
max_num_paths_(queue_->num_concurrent_states(estimate_single_state_size())),
|
||||
min_num_active_paths_(queue_->num_concurrent_busy_states()),
|
||||
max_active_path_index_(0)
|
||||
{
|
||||
@@ -96,16 +129,27 @@ void PathTraceWorkGPU::alloc_integrator_soa()
|
||||
break; \
|
||||
}
|
||||
#define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
|
||||
if (array_index == gpu_array_size - 1) { \
|
||||
if (array_index >= gpu_array_size - 1) { \
|
||||
break; \
|
||||
} \
|
||||
}
|
||||
#define KERNEL_STRUCT_VOLUME_STACK_SIZE (device_scene_->data.volume_stack_size)
|
||||
#include "kernel/integrator/integrator_state_template.h"
|
||||
#undef KERNEL_STRUCT_BEGIN
|
||||
#undef KERNEL_STRUCT_MEMBER
|
||||
#undef KERNEL_STRUCT_ARRAY_MEMBER
|
||||
#undef KERNEL_STRUCT_END
|
||||
#undef KERNEL_STRUCT_END_ARRAY
|
||||
#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
|
||||
|
||||
if (VLOG_IS_ON(3)) {
|
||||
size_t total_soa_size = 0;
|
||||
for (auto &&soa_memory : integrator_state_soa_) {
|
||||
total_soa_size += soa_memory->memory_size();
|
||||
}
|
||||
|
||||
VLOG(3) << "GPU SoA state size: " << string_human_readable_size(total_soa_size);
|
||||
}
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::alloc_integrator_queue()
|
||||
@@ -652,7 +696,7 @@ int PathTraceWorkGPU::get_num_active_paths()
|
||||
bool PathTraceWorkGPU::should_use_graphics_interop()
|
||||
{
|
||||
/* There are few aspects with the graphics interop when using multiple devices caused by the fact
|
||||
* that the GPUDisplay has a single texture:
|
||||
* that the PathTraceDisplay has a single texture:
|
||||
*
|
||||
* CUDA will return `CUDA_ERROR_NOT_SUPPORTED` from `cuGraphicsGLRegisterBuffer()` when
|
||||
* attempting to register OpenGL PBO which has been mapped. Which makes sense, because
|
||||
@@ -678,9 +722,9 @@ bool PathTraceWorkGPU::should_use_graphics_interop()
|
||||
return interop_use_;
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
if (device_->have_error()) {
|
||||
/* Don't attempt to update GPU display if the device has errors: the error state will make
|
||||
@@ -694,7 +738,7 @@ void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
}
|
||||
|
||||
if (should_use_graphics_interop()) {
|
||||
if (copy_to_gpu_display_interop(gpu_display, pass_mode, num_samples)) {
|
||||
if (copy_to_display_interop(display, pass_mode, num_samples)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -703,66 +747,64 @@ void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
interop_use_ = false;
|
||||
}
|
||||
|
||||
copy_to_gpu_display_naive(gpu_display, pass_mode, num_samples);
|
||||
copy_to_display_naive(display, pass_mode, num_samples);
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::copy_to_gpu_display_naive(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
const int full_x = effective_buffer_params_.full_x;
|
||||
const int full_y = effective_buffer_params_.full_y;
|
||||
const int width = effective_buffer_params_.width;
|
||||
const int height = effective_buffer_params_.height;
|
||||
const int final_width = buffers_->params.width;
|
||||
const int final_height = buffers_->params.height;
|
||||
const int width = effective_buffer_params_.window_width;
|
||||
const int height = effective_buffer_params_.window_height;
|
||||
const int final_width = buffers_->params.window_width;
|
||||
const int final_height = buffers_->params.window_height;
|
||||
|
||||
const int texture_x = full_x - effective_full_params_.full_x;
|
||||
const int texture_y = full_y - effective_full_params_.full_y;
|
||||
const int texture_x = full_x - effective_full_params_.full_x + effective_buffer_params_.window_x;
|
||||
const int texture_y = full_y - effective_full_params_.full_y + effective_buffer_params_.window_y;
|
||||
|
||||
/* Re-allocate display memory if needed, and make sure the device pointer is allocated.
|
||||
*
|
||||
* NOTE: allocation happens to the final resolution so that no re-allocation happens on every
|
||||
* change of the resolution divider. However, if the display becomes smaller, shrink the
|
||||
* allocated memory as well. */
|
||||
if (gpu_display_rgba_half_.data_width != final_width ||
|
||||
gpu_display_rgba_half_.data_height != final_height) {
|
||||
gpu_display_rgba_half_.alloc(final_width, final_height);
|
||||
if (display_rgba_half_.data_width != final_width ||
|
||||
display_rgba_half_.data_height != final_height) {
|
||||
display_rgba_half_.alloc(final_width, final_height);
|
||||
/* TODO(sergey): There should be a way to make sure device-side memory is allocated without
|
||||
* transferring zeroes to the device. */
|
||||
queue_->zero_to_device(gpu_display_rgba_half_);
|
||||
queue_->zero_to_device(display_rgba_half_);
|
||||
}
|
||||
|
||||
PassAccessor::Destination destination(film_->get_display_pass());
|
||||
destination.d_pixels_half_rgba = gpu_display_rgba_half_.device_pointer;
|
||||
destination.d_pixels_half_rgba = display_rgba_half_.device_pointer;
|
||||
|
||||
get_render_tile_film_pixels(destination, pass_mode, num_samples);
|
||||
|
||||
queue_->copy_from_device(gpu_display_rgba_half_);
|
||||
queue_->copy_from_device(display_rgba_half_);
|
||||
queue_->synchronize();
|
||||
|
||||
gpu_display->copy_pixels_to_texture(
|
||||
gpu_display_rgba_half_.data(), texture_x, texture_y, width, height);
|
||||
display->copy_pixels_to_texture(display_rgba_half_.data(), texture_x, texture_y, width, height);
|
||||
}
|
||||
|
||||
bool PathTraceWorkGPU::copy_to_gpu_display_interop(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
if (!device_graphics_interop_) {
|
||||
device_graphics_interop_ = queue_->graphics_interop_create();
|
||||
}
|
||||
|
||||
const DeviceGraphicsInteropDestination graphics_interop_dst =
|
||||
gpu_display->graphics_interop_get();
|
||||
device_graphics_interop_->set_destination(graphics_interop_dst);
|
||||
const DisplayDriver::GraphicsInterop graphics_interop_dst = display->graphics_interop_get();
|
||||
device_graphics_interop_->set_display_interop(graphics_interop_dst);
|
||||
|
||||
const device_ptr d_rgba_half = device_graphics_interop_->map();
|
||||
if (!d_rgba_half) {
|
||||
return false;
|
||||
}
|
||||
|
||||
PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
|
||||
PassAccessor::Destination destination = get_display_destination_template(display);
|
||||
destination.d_pixels_half_rgba = d_rgba_half;
|
||||
|
||||
get_render_tile_film_pixels(destination, pass_mode, num_samples);
|
||||
@@ -772,14 +814,14 @@ bool PathTraceWorkGPU::copy_to_gpu_display_interop(GPUDisplay *gpu_display,
|
||||
return true;
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::destroy_gpu_resources(GPUDisplay *gpu_display)
|
||||
void PathTraceWorkGPU::destroy_gpu_resources(PathTraceDisplay *display)
|
||||
{
|
||||
if (!device_graphics_interop_) {
|
||||
return;
|
||||
}
|
||||
gpu_display->graphics_interop_activate();
|
||||
display->graphics_interop_activate();
|
||||
device_graphics_interop_ = nullptr;
|
||||
gpu_display->graphics_interop_deactivate();
|
||||
display->graphics_interop_deactivate();
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::get_render_tile_film_pixels(const PassAccessor::Destination &destination,
|
||||
|
@@ -48,10 +48,10 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
int start_sample,
|
||||
int samples_num) override;
|
||||
|
||||
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;
|
||||
virtual void copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
|
||||
|
||||
virtual bool copy_render_buffers_from_device() override;
|
||||
virtual bool copy_render_buffers_to_device() override;
|
||||
@@ -88,16 +88,16 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
|
||||
int get_num_active_paths();
|
||||
|
||||
/* Check whether graphics interop can be used for the GPUDisplay update. */
|
||||
/* Check whether graphics interop can be used for the PathTraceDisplay update. */
|
||||
bool should_use_graphics_interop();
|
||||
|
||||
/* Naive implementation of the `copy_to_gpu_display()` which performs film conversion on the
|
||||
* device, then copies pixels to the host and pushes them to the `gpu_display`. */
|
||||
void copy_to_gpu_display_naive(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);
|
||||
/* Naive implementation of the `copy_to_display()` which performs film conversion on the
|
||||
* device, then copies pixels to the host and pushes them to the `display`. */
|
||||
void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
|
||||
|
||||
/* Implementation of `copy_to_gpu_display()` which uses driver's OpenGL/GPU interoperability
|
||||
/* Implementation of `copy_to_display()` which uses driver's OpenGL/GPU interoperability
|
||||
* functionality, avoiding copy of pixels to the host. */
|
||||
bool copy_to_gpu_display_interop(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);
|
||||
bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
|
||||
|
||||
/* Synchronously run film conversion kernel and store display result in the given destination. */
|
||||
void get_render_tile_film_pixels(const PassAccessor::Destination &destination,
|
||||
@@ -139,9 +139,9 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
/* Temporary buffer for passing work tiles to kernel. */
|
||||
device_vector<KernelWorkTile> work_tiles_;
|
||||
|
||||
/* Temporary buffer used by the copy_to_gpu_display() whenever graphics interoperability is not
|
||||
/* Temporary buffer used by the copy_to_display() whenever graphics interoperability is not
|
||||
* available. Is allocated on-demand. */
|
||||
device_vector<half4> gpu_display_rgba_half_;
|
||||
device_vector<half4> display_rgba_half_;
|
||||
|
||||
unique_ptr<DeviceGraphicsInterop> device_graphics_interop_;
|
||||
|
||||
|
@@ -344,7 +344,7 @@ class RenderScheduler {
|
||||
/* Number of rendered samples on top of the start sample. */
|
||||
int num_rendered_samples = 0;
|
||||
|
||||
/* Point in time the latest GPUDisplay work has been scheduled. */
|
||||
/* Point in time the latest PathTraceDisplay work has been scheduled. */
|
||||
double last_display_update_time = 0.0;
|
||||
/* Value of -1 means display was never updated. */
|
||||
int last_display_update_sample = -1;
|
||||
|
@@ -404,16 +404,27 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
-cuda-toolkit-dir "${cuda_toolkit_root_dir}"
|
||||
DEPENDS ${kernel_sources} cycles_cubin_cc)
|
||||
else()
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_file}
|
||||
COMMAND ${cuda_nvcc_executable}
|
||||
set(_cuda_nvcc_args
|
||||
-arch=${arch}
|
||||
${CUDA_NVCC_FLAGS}
|
||||
--${format}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
||||
--ptxas-options="-v"
|
||||
${cuda_flags}
|
||||
DEPENDS ${kernel_sources})
|
||||
${cuda_flags})
|
||||
|
||||
if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM)
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_file}
|
||||
COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
||||
DEPENDS ${kernel_sources})
|
||||
else()
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_file}
|
||||
COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
||||
DEPENDS ${kernel_sources})
|
||||
endif()
|
||||
|
||||
unset(_cuda_nvcc_args)
|
||||
endif()
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND cuda_cubins ${cuda_file})
|
||||
@@ -472,20 +483,10 @@ endif()
|
||||
|
||||
# HIP module
|
||||
|
||||
if(WITH_CYCLES_HIP_BINARIES)
|
||||
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
# 64 bit only
|
||||
set(HIP_BITS 64)
|
||||
|
||||
# HIP version
|
||||
execute_process(COMMAND ${HIP_HIPCC_EXECUTABLE} "--version" OUTPUT_VARIABLE HIPCC_OUT)
|
||||
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" HIP_VERSION_MAJOR "${HIPCC_OUT}")
|
||||
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" HIP_VERSION_MINOR "${HIPCC_OUT}")
|
||||
set(HIP_VERSION "${HIP_VERSION_MAJOR}${HIP_VERSION_MINOR}")
|
||||
|
||||
|
||||
message(WARNING
|
||||
"HIP version ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR} detected")
|
||||
|
||||
# build for each arch
|
||||
set(hip_sources device/hip/kernel.cpp
|
||||
${SRC_HEADERS}
|
||||
@@ -542,23 +543,24 @@ if(WITH_CYCLES_HIP_BINARIES)
|
||||
-D WITH_NANOVDB
|
||||
-I "${NANOVDB_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${hip_file}
|
||||
COMMAND ${HIP_HIPCC_EXECUTABLE}
|
||||
-arch=${arch}
|
||||
${HIP_HIPCC_FLAGS}
|
||||
--${format}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
|
||||
${hip_flags}
|
||||
DEPENDS ${kernel_sources})
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND hip_fatbins ${hip_file})
|
||||
endmacro()
|
||||
|
||||
set(prev_arch "none")
|
||||
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
|
||||
set(hip_hipcc_executable ${HIP_HIPCC_EXECUTABLE})
|
||||
set(hip_toolkit_root_dir ${HIP_TOOLKIT_ROOT_DIR})
|
||||
if(DEFINED hip_hipcc_executable AND DEFINED hip_toolkit_root_dir)
|
||||
# Compile regular kernel
|
||||
CYCLES_HIP_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${hip_sources}" FALSE)
|
||||
|
||||
if(WITH_CYCLES_HIP_BUILD_SERIAL)
|
||||
set(prev_arch ${arch})
|
||||
endif()
|
||||
|
||||
unset(hip_hipcc_executable)
|
||||
unset(hip_toolkit_root_dir)
|
||||
endif()
|
||||
# Compile regular kernel
|
||||
CYCLES_HIP_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${hip_sources}" FALSE)
|
||||
endforeach()
|
||||
|
||||
add_custom_target(cycles_kernel_hip ALL DEPENDS ${hip_fatbins})
|
||||
|
@@ -106,9 +106,6 @@ ccl_device_inline void kernel_embree_convert_hit(const KernelGlobals *kg,
|
||||
const RTCHit *hit,
|
||||
Intersection *isect)
|
||||
{
|
||||
bool is_hair = hit->geomID & 1;
|
||||
isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u;
|
||||
isect->v = is_hair ? hit->v : hit->u;
|
||||
isect->t = ray->tfar;
|
||||
isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
|
||||
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
|
||||
@@ -121,27 +118,37 @@ ccl_device_inline void kernel_embree_convert_hit(const KernelGlobals *kg,
|
||||
else {
|
||||
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
|
||||
isect->object = OBJECT_NONE;
|
||||
isect->object = hit->geomID / 2;
|
||||
}
|
||||
|
||||
const bool is_hair = hit->geomID & 1;
|
||||
if (is_hair) {
|
||||
const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, isect->prim);
|
||||
isect->type = segment.type;
|
||||
isect->prim = segment.prim;
|
||||
isect->u = hit->u;
|
||||
isect->v = hit->v;
|
||||
}
|
||||
else {
|
||||
isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type;
|
||||
isect->u = 1.0f - hit->v - hit->u;
|
||||
isect->v = hit->u;
|
||||
}
|
||||
isect->type = kernel_tex_fetch(__prim_type, isect->prim);
|
||||
}
|
||||
|
||||
ccl_device_inline void kernel_embree_convert_sss_hit(const KernelGlobals *kg,
|
||||
const RTCRay *ray,
|
||||
const RTCHit *hit,
|
||||
Intersection *isect,
|
||||
int local_object_id)
|
||||
ccl_device_inline void kernel_embree_convert_sss_hit(
|
||||
const KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
|
||||
{
|
||||
isect->u = 1.0f - hit->v - hit->u;
|
||||
isect->v = hit->u;
|
||||
isect->t = ray->tfar;
|
||||
isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
|
||||
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
|
||||
rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2));
|
||||
rtcGetGeometry(kernel_data.bvh.scene, object * 2));
|
||||
isect->prim = hit->primID +
|
||||
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
|
||||
isect->object = local_object_id;
|
||||
isect->type = kernel_tex_fetch(__prim_type, isect->prim);
|
||||
isect->object = object;
|
||||
isect->type = kernel_tex_fetch(__objects, object).primitive_type;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -130,7 +130,6 @@ ccl_device_inline
|
||||
if (prim_addr >= 0) {
|
||||
const int prim_addr2 = __float_as_int(leaf.y);
|
||||
const uint type = __float_as_int(leaf.w);
|
||||
const uint p_type = type & PRIMITIVE_ALL;
|
||||
|
||||
/* pop */
|
||||
node_addr = traversal_stack[stack_ptr];
|
||||
@@ -138,14 +137,15 @@ ccl_device_inline
|
||||
|
||||
/* primitive intersection */
|
||||
while (prim_addr < prim_addr2) {
|
||||
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
|
||||
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
|
||||
(type & PRIMITIVE_ALL));
|
||||
bool hit;
|
||||
|
||||
/* todo: specialized intersect functions which don't fill in
|
||||
* isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
|
||||
* might give a few % performance improvement */
|
||||
|
||||
switch (p_type) {
|
||||
switch (type & PRIMITIVE_ALL) {
|
||||
case PRIMITIVE_TRIANGLE: {
|
||||
hit = triangle_intersect(
|
||||
kg, isect, P, dir, isect_t, visibility, object, prim_addr);
|
||||
@@ -163,17 +163,20 @@ ccl_device_inline
|
||||
case PRIMITIVE_MOTION_CURVE_THICK:
|
||||
case PRIMITIVE_CURVE_RIBBON:
|
||||
case PRIMITIVE_MOTION_CURVE_RIBBON: {
|
||||
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
|
||||
hit = curve_intersect(kg,
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
isect_t,
|
||||
visibility,
|
||||
object,
|
||||
prim_addr,
|
||||
ray->time,
|
||||
curve_type);
|
||||
if ((type & PRIMITIVE_ALL_MOTION) && kernel_data.bvh.use_bvh_steps) {
|
||||
const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr);
|
||||
if (ray->time < prim_time.x || ray->time > prim_time.y) {
|
||||
hit = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int curve_object = kernel_tex_fetch(__prim_object, prim_addr);
|
||||
const int curve_type = kernel_tex_fetch(__prim_type, prim_addr);
|
||||
const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr);
|
||||
hit = curve_intersect(
|
||||
kg, isect, P, dir, isect_t, curve_object, curve_prim, ray->time, curve_type);
|
||||
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
@@ -165,18 +165,18 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg,
|
||||
case PRIMITIVE_CURVE_RIBBON:
|
||||
case PRIMITIVE_MOTION_CURVE_RIBBON: {
|
||||
for (; prim_addr < prim_addr2; prim_addr++) {
|
||||
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
|
||||
kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
|
||||
const bool hit = curve_intersect(kg,
|
||||
isect,
|
||||
P,
|
||||
dir,
|
||||
isect->t,
|
||||
visibility,
|
||||
object,
|
||||
prim_addr,
|
||||
ray->time,
|
||||
curve_type);
|
||||
if ((type & PRIMITIVE_ALL_MOTION) && kernel_data.bvh.use_bvh_steps) {
|
||||
const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr);
|
||||
if (ray->time < prim_time.x || ray->time > prim_time.y) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
const int curve_object = kernel_tex_fetch(__prim_object, prim_addr);
|
||||
const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr);
|
||||
const int curve_type = kernel_tex_fetch(__prim_type, prim_addr);
|
||||
const bool hit = curve_intersect(
|
||||
kg, isect, P, dir, isect->t, curve_object, curve_prim, ray->time, curve_type);
|
||||
if (hit) {
|
||||
/* shadow ray early termination */
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE)
|
||||
|
@@ -118,19 +118,18 @@ ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
|
||||
ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *ccl_restrict kg,
|
||||
const Intersection *ccl_restrict isect)
|
||||
{
|
||||
const int prim = kernel_tex_fetch(__prim_index, isect->prim);
|
||||
const int prim = isect->prim;
|
||||
int shader = 0;
|
||||
|
||||
#ifdef __HAIR__
|
||||
if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE)
|
||||
if (isect->type & PRIMITIVE_ALL_TRIANGLE)
|
||||
#endif
|
||||
{
|
||||
shader = kernel_tex_fetch(__tri_shader, prim);
|
||||
}
|
||||
#ifdef __HAIR__
|
||||
else {
|
||||
float4 str = kernel_tex_fetch(__curves, prim);
|
||||
shader = __float_as_int(str.z);
|
||||
shader = kernel_tex_fetch(__curves, prim).shader_id;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -138,21 +137,19 @@ ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *cc
|
||||
}
|
||||
|
||||
ccl_device_forceinline int intersection_get_shader_from_isect_prim(
|
||||
const KernelGlobals *ccl_restrict kg, const int isect_prim)
|
||||
const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type)
|
||||
{
|
||||
const int prim = kernel_tex_fetch(__prim_index, isect_prim);
|
||||
int shader = 0;
|
||||
|
||||
#ifdef __HAIR__
|
||||
if (kernel_tex_fetch(__prim_type, isect_prim) & PRIMITIVE_ALL_TRIANGLE)
|
||||
if (isect_type & PRIMITIVE_ALL_TRIANGLE)
|
||||
#endif
|
||||
{
|
||||
shader = kernel_tex_fetch(__tri_shader, prim);
|
||||
}
|
||||
#ifdef __HAIR__
|
||||
else {
|
||||
float4 str = kernel_tex_fetch(__curves, prim);
|
||||
shader = __float_as_int(str.z);
|
||||
shader = kernel_tex_fetch(__curves, prim).shader_id;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -162,25 +159,13 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim(
|
||||
ccl_device_forceinline int intersection_get_shader(const KernelGlobals *ccl_restrict kg,
|
||||
const Intersection *ccl_restrict isect)
|
||||
{
|
||||
return intersection_get_shader_from_isect_prim(kg, isect->prim);
|
||||
}
|
||||
|
||||
ccl_device_forceinline int intersection_get_object(const KernelGlobals *ccl_restrict kg,
|
||||
const Intersection *ccl_restrict isect)
|
||||
{
|
||||
if (isect->object != OBJECT_NONE) {
|
||||
return isect->object;
|
||||
}
|
||||
|
||||
return kernel_tex_fetch(__prim_object, isect->prim);
|
||||
return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type);
|
||||
}
|
||||
|
||||
ccl_device_forceinline int intersection_get_object_flags(const KernelGlobals *ccl_restrict kg,
|
||||
const Intersection *ccl_restrict isect)
|
||||
{
|
||||
const int object = intersection_get_object(kg, isect);
|
||||
|
||||
return kernel_tex_fetch(__object_flag, object);
|
||||
return kernel_tex_fetch(__object_flag, isect->object);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
@@ -35,21 +35,25 @@ static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledDiffuseBsdf),
|
||||
"PrincipledDiffuseBsdf is too large!");
|
||||
|
||||
ccl_device float3 calculate_principled_diffuse_brdf(
|
||||
const PrincipledDiffuseBsdf *bsdf, float3 N, float3 V, float3 L, float3 H, float *pdf)
|
||||
const PrincipledDiffuseBsdf *bsdf, float3 N, float3 V, float3 L, float *pdf)
|
||||
{
|
||||
float NdotL = dot(N, L);
|
||||
float NdotV = dot(N, V);
|
||||
|
||||
if (NdotL <= 0 || NdotV <= 0) {
|
||||
*pdf = 0.0f;
|
||||
if (NdotL <= 0) {
|
||||
return make_float3(0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
float LdotH = dot(L, H);
|
||||
float NdotV = dot(N, V);
|
||||
|
||||
/* H = normalize(L + V); // Bissector of an angle between L and V
|
||||
* LH2 = 2 * dot(L, H)^2 = 2cos(x)^2 = cos(2x) + 1 = dot(L, V) + 1,
|
||||
* half-angle x between L and V is at most 90 deg
|
||||
*/
|
||||
float LH2 = dot(L, V) + 1;
|
||||
|
||||
float FL = schlick_fresnel(NdotL), FV = schlick_fresnel(NdotV);
|
||||
const float Fd90 = 0.5f + 2.0f * LdotH * LdotH * bsdf->roughness;
|
||||
float Fd = (1.0f * (1.0f - FL) + Fd90 * FL) * (1.0f * (1.0f - FV) + Fd90 * FV);
|
||||
const float Fd90 = 0.5f + LH2 * bsdf->roughness;
|
||||
float Fd = (1.0f - FL + Fd90 * FL) * (1.0f - FV + Fd90 * FV);
|
||||
|
||||
float value = M_1_PI_F * NdotL * Fd;
|
||||
|
||||
@@ -72,11 +76,10 @@ ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc,
|
||||
float3 N = bsdf->N;
|
||||
float3 V = I; // outgoing
|
||||
float3 L = omega_in; // incoming
|
||||
float3 H = normalize(L + V);
|
||||
|
||||
if (dot(N, omega_in) > 0.0f) {
|
||||
*pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
|
||||
return calculate_principled_diffuse_brdf(bsdf, N, V, L, H, pdf);
|
||||
return calculate_principled_diffuse_brdf(bsdf, N, V, L, pdf);
|
||||
}
|
||||
else {
|
||||
*pdf = 0.0f;
|
||||
@@ -112,9 +115,7 @@ ccl_device int bsdf_principled_diffuse_sample(const ShaderClosure *sc,
|
||||
sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
|
||||
|
||||
if (dot(Ng, *omega_in) > 0) {
|
||||
float3 H = normalize(I + *omega_in);
|
||||
|
||||
*eval = calculate_principled_diffuse_brdf(bsdf, N, I, *omega_in, H, pdf);
|
||||
*eval = calculate_principled_diffuse_brdf(bsdf, N, I, *omega_in, pdf);
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
// TODO: find a better approximation for the diffuse bounce
|
||||
|
@@ -424,8 +424,12 @@ ccl_device_inline void kernel_gpu_film_convert_common(const KernelFilmConvert *k
|
||||
return;
|
||||
}
|
||||
|
||||
const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kfilm_convert->pass_stride;
|
||||
ccl_global const float *buffer = render_buffer + render_buffer_offset;
|
||||
const int x = render_pixel_index % width;
|
||||
const int y = render_pixel_index / width;
|
||||
|
||||
ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert->pass_stride +
|
||||
y * stride * kfilm_convert->pass_stride;
|
||||
|
||||
ccl_global float *pixel = pixels +
|
||||
(render_pixel_index + dst_offset) * kfilm_convert->pixel_stride;
|
||||
|
||||
@@ -451,17 +455,17 @@ ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
|
||||
return;
|
||||
}
|
||||
|
||||
const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * kfilm_convert->pass_stride;
|
||||
ccl_global const float *buffer = render_buffer + render_buffer_offset;
|
||||
const int x = render_pixel_index % width;
|
||||
const int y = render_pixel_index / width;
|
||||
|
||||
ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert->pass_stride +
|
||||
y * stride * kfilm_convert->pass_stride;
|
||||
|
||||
float pixel[4];
|
||||
processor(kfilm_convert, buffer, pixel);
|
||||
|
||||
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel);
|
||||
|
||||
const int x = render_pixel_index % width;
|
||||
const int y = render_pixel_index / width;
|
||||
|
||||
ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
|
||||
float4_store_half((ccl_global half *)out, make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
|
||||
}
|
||||
|
@@ -41,22 +41,15 @@ template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
|
||||
return (T *)(((uint64_t)optixGetPayload_3() << 32) | optixGetPayload_2());
|
||||
}
|
||||
|
||||
template<bool always = false> ccl_device_forceinline uint get_object_id()
|
||||
ccl_device_forceinline int get_object_id()
|
||||
{
|
||||
#ifdef __OBJECT_MOTION__
|
||||
/* Always get the the instance ID from the TLAS.
|
||||
/* Always get the the instance ID from the TLAS
|
||||
* There might be a motion transform node between TLAS and BLAS which does not have one. */
|
||||
uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
|
||||
return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
|
||||
#else
|
||||
uint object = optixGetInstanceId();
|
||||
return optixGetInstanceId();
|
||||
#endif
|
||||
/* Choose between always returning object ID or only for instances. */
|
||||
if (always || (object & 1) == 0)
|
||||
/* Can just remove the low bit since instance always contains object ID. */
|
||||
return object >> 1;
|
||||
else
|
||||
/* Set to OBJECT_NONE if this is not an instanced object. */
|
||||
return OBJECT_NONE;
|
||||
}
|
||||
|
||||
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
|
||||
@@ -108,7 +101,7 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
|
||||
#endif
|
||||
|
||||
#ifdef __BVH_LOCAL__
|
||||
const uint object = get_object_id<true>();
|
||||
const int object = get_object_id();
|
||||
if (object != optixGetPayload_4() /* local_object */) {
|
||||
/* Only intersect with matching object. */
|
||||
return optixIgnoreIntersection();
|
||||
@@ -152,21 +145,23 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit()
|
||||
local_isect->num_hits = 1;
|
||||
}
|
||||
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
|
||||
Intersection *isect = &local_isect->hits[hit];
|
||||
isect->t = optixGetRayTmax();
|
||||
isect->prim = optixGetPrimitiveIndex();
|
||||
isect->prim = prim;
|
||||
isect->object = get_object_id();
|
||||
isect->type = kernel_tex_fetch(__prim_type, isect->prim);
|
||||
isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type;
|
||||
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
isect->u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
isect->v = barycentrics.x;
|
||||
|
||||
/* Record geometric normal. */
|
||||
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim);
|
||||
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0));
|
||||
const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1));
|
||||
const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
|
||||
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
|
||||
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0));
|
||||
const float3 tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1));
|
||||
const float3 tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
|
||||
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
|
||||
|
||||
/* Continue tracing (without this the trace call would return after the first hit). */
|
||||
@@ -179,25 +174,32 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
bool ignore_intersection = false;
|
||||
|
||||
const uint prim = optixGetPrimitiveIndex();
|
||||
int prim = optixGetPrimitiveIndex();
|
||||
const uint object = get_object_id();
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
|
||||
if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
|
||||
ignore_intersection = true;
|
||||
}
|
||||
# endif
|
||||
|
||||
float u = 0.0f, v = 0.0f;
|
||||
int type = 0;
|
||||
if (optixIsTriangleHit()) {
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
v = barycentrics.x;
|
||||
type = kernel_tex_fetch(__objects, object).primitive_type;
|
||||
}
|
||||
# ifdef __HAIR__
|
||||
else {
|
||||
u = __uint_as_float(optixGetAttribute_0());
|
||||
v = __uint_as_float(optixGetAttribute_1());
|
||||
|
||||
const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
|
||||
type = segment.type;
|
||||
prim = segment.prim;
|
||||
|
||||
/* Filter out curve endcaps. */
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
ignore_intersection = true;
|
||||
@@ -245,8 +247,8 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
|
||||
isect->v = v;
|
||||
isect->t = optixGetRayTmax();
|
||||
isect->prim = prim;
|
||||
isect->object = get_object_id();
|
||||
isect->type = kernel_tex_fetch(__prim_type, prim);
|
||||
isect->object = object;
|
||||
isect->type = type;
|
||||
|
||||
# ifdef __TRANSPARENT_SHADOWS__
|
||||
/* Detect if this surface has a shader with transparent shadows. */
|
||||
@@ -274,15 +276,14 @@ extern "C" __global__ void __anyhit__kernel_optix_volume_test()
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint object = get_object_id();
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
const uint prim = optixGetPrimitiveIndex();
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
|
||||
if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint object = get_object_id<true>();
|
||||
if ((kernel_tex_fetch(__object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
@@ -301,9 +302,9 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
|
||||
#endif
|
||||
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
const uint prim = optixGetPrimitiveIndex();
|
||||
const uint object = get_object_id();
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) {
|
||||
if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
|
||||
return optixIgnoreIntersection();
|
||||
}
|
||||
|
||||
@@ -316,28 +317,39 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
|
||||
|
||||
extern "C" __global__ void __closesthit__kernel_optix_hit()
|
||||
{
|
||||
const int object = get_object_id();
|
||||
const int prim = optixGetPrimitiveIndex();
|
||||
|
||||
optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
|
||||
optixSetPayload_3(optixGetPrimitiveIndex());
|
||||
optixSetPayload_4(get_object_id());
|
||||
/* Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index. */
|
||||
optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex()));
|
||||
optixSetPayload_4(object);
|
||||
|
||||
if (optixIsTriangleHit()) {
|
||||
const float2 barycentrics = optixGetTriangleBarycentrics();
|
||||
optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
|
||||
optixSetPayload_2(__float_as_uint(barycentrics.x));
|
||||
optixSetPayload_3(prim);
|
||||
optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type);
|
||||
}
|
||||
else {
|
||||
const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim);
|
||||
optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
|
||||
optixSetPayload_2(optixGetAttribute_1());
|
||||
optixSetPayload_3(segment.prim);
|
||||
optixSetPayload_5(segment.type);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __HAIR__
|
||||
ccl_device_inline void optix_intersection_curve(const uint prim, const uint type)
|
||||
ccl_device_inline void optix_intersection_curve(const int prim, const int type)
|
||||
{
|
||||
const uint object = get_object_id<true>();
|
||||
const int object = get_object_id();
|
||||
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = optixGetPayload_4();
|
||||
if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
float3 P = optixGetObjectRayOrigin();
|
||||
float3 dir = optixGetObjectRayDirection();
|
||||
@@ -358,7 +370,7 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type
|
||||
if (isect.t != FLT_MAX)
|
||||
isect.t *= len;
|
||||
|
||||
if (curve_intersect(NULL, &isect, P, dir, isect.t, visibility, object, prim, time, type)) {
|
||||
if (curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) {
|
||||
optixReportIntersection(isect.t / len,
|
||||
type & PRIMITIVE_ALL,
|
||||
__float_as_int(isect.u), /* Attribute_0 */
|
||||
@@ -368,9 +380,9 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type
|
||||
|
||||
extern "C" __global__ void __intersection__curve_ribbon()
|
||||
{
|
||||
const uint prim = optixGetPrimitiveIndex();
|
||||
const uint type = kernel_tex_fetch(__prim_type, prim);
|
||||
|
||||
const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, optixGetPrimitiveIndex());
|
||||
const int prim = segment.prim;
|
||||
const int type = segment.type;
|
||||
if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
|
||||
optix_intersection_curve(prim, type);
|
||||
}
|
||||
|
@@ -34,8 +34,8 @@ ccl_device float curve_attribute_float(const KernelGlobals *kg,
|
||||
float *dy)
|
||||
{
|
||||
if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
|
||||
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
int k1 = k0 + 1;
|
||||
|
||||
float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0);
|
||||
@@ -76,8 +76,8 @@ ccl_device float2 curve_attribute_float2(const KernelGlobals *kg,
|
||||
float2 *dy)
|
||||
{
|
||||
if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
|
||||
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
int k1 = k0 + 1;
|
||||
|
||||
float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0);
|
||||
@@ -122,8 +122,8 @@ ccl_device float3 curve_attribute_float3(const KernelGlobals *kg,
|
||||
float3 *dy)
|
||||
{
|
||||
if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
|
||||
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
int k1 = k0 + 1;
|
||||
|
||||
float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
|
||||
@@ -164,8 +164,8 @@ ccl_device float4 curve_attribute_float4(const KernelGlobals *kg,
|
||||
float4 *dy)
|
||||
{
|
||||
if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
|
||||
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
int k1 = k0 + 1;
|
||||
|
||||
float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0);
|
||||
@@ -206,8 +206,8 @@ ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd)
|
||||
float r = 0.0f;
|
||||
|
||||
if (sd->type & PRIMITIVE_ALL_CURVE) {
|
||||
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
int k1 = k0 + 1;
|
||||
|
||||
float4 P_curve[2];
|
||||
@@ -231,8 +231,8 @@ ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd)
|
||||
|
||||
ccl_device float3 curve_motion_center_location(const KernelGlobals *kg, const ShaderData *sd)
|
||||
{
|
||||
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
|
||||
int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
int k1 = k0 + 1;
|
||||
|
||||
float4 P_curve[2];
|
||||
|
@@ -630,33 +630,19 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg,
|
||||
const float3 P,
|
||||
const float3 dir,
|
||||
const float tmax,
|
||||
uint visibility,
|
||||
int object,
|
||||
int curveAddr,
|
||||
int prim,
|
||||
float time,
|
||||
int type)
|
||||
{
|
||||
const bool is_motion = (type & PRIMITIVE_ALL_MOTION);
|
||||
|
||||
# ifndef __KERNEL_OPTIX__ /* See OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */
|
||||
if (is_motion && kernel_data.bvh.use_bvh_steps) {
|
||||
const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr);
|
||||
if (time < prim_time.x || time > prim_time.y) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
|
||||
|
||||
int segment = PRIMITIVE_UNPACK_SEGMENT(type);
|
||||
int prim = kernel_tex_fetch(__prim_index, curveAddr);
|
||||
|
||||
float4 v00 = kernel_tex_fetch(__curves, prim);
|
||||
|
||||
int k0 = __float_as_int(v00.x) + segment;
|
||||
int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(type);
|
||||
int k1 = k0 + 1;
|
||||
|
||||
int ka = max(k0 - 1, __float_as_int(v00.x));
|
||||
int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
|
||||
int ka = max(k0 - 1, kcurve.first_key);
|
||||
int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1);
|
||||
|
||||
float4 curve[4];
|
||||
if (!is_motion) {
|
||||
@@ -666,21 +652,14 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg,
|
||||
curve[3] = kernel_tex_fetch(__curve_keys, kb);
|
||||
}
|
||||
else {
|
||||
int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
|
||||
motion_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, curve);
|
||||
motion_curve_keys(kg, object, prim, time, ka, k0, k1, kb, curve);
|
||||
}
|
||||
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
if (!(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)) {
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
|
||||
/* todo: adaptive number of subdivisions could help performance here. */
|
||||
const int subdivisions = kernel_data.bvh.curve_subdivisions;
|
||||
if (ribbon_intersect(P, dir, tmax, subdivisions, curve, isect)) {
|
||||
isect->prim = curveAddr;
|
||||
isect->prim = prim;
|
||||
isect->object = object;
|
||||
isect->type = type;
|
||||
return true;
|
||||
@@ -690,7 +669,7 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg,
|
||||
}
|
||||
else {
|
||||
if (curve_intersect_recursive(P, dir, tmax, curve, isect)) {
|
||||
isect->prim = curveAddr;
|
||||
isect->prim = prim;
|
||||
isect->object = object;
|
||||
isect->type = type;
|
||||
return true;
|
||||
@@ -708,7 +687,7 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg,
|
||||
const int isect_object,
|
||||
const int isect_prim)
|
||||
{
|
||||
if (isect_object != OBJECT_NONE) {
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform tfm = object_get_inverse_transform(kg, sd);
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
@@ -716,14 +695,12 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg,
|
||||
D = safe_normalize_len(D, &t);
|
||||
}
|
||||
|
||||
int prim = kernel_tex_fetch(__prim_index, isect_prim);
|
||||
float4 v00 = kernel_tex_fetch(__curves, prim);
|
||||
KernelCurve kcurve = kernel_tex_fetch(__curves, isect_prim);
|
||||
|
||||
int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
int k1 = k0 + 1;
|
||||
|
||||
int ka = max(k0 - 1, __float_as_int(v00.x));
|
||||
int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1);
|
||||
int ka = max(k0 - 1, kcurve.first_key);
|
||||
int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1);
|
||||
|
||||
float4 P_curve[4];
|
||||
|
||||
@@ -780,15 +757,13 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg,
|
||||
sd->dPdv = cross(dPdu, sd->Ng);
|
||||
# endif
|
||||
|
||||
if (isect_object != OBJECT_NONE) {
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform tfm = object_get_transform(kg, sd);
|
||||
P = transform_point(&tfm, P);
|
||||
}
|
||||
|
||||
sd->P = P;
|
||||
|
||||
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
|
||||
sd->shader = __float_as_int(curvedata.z);
|
||||
sd->shader = kernel_tex_fetch(__curves, sd->prim).shader_id;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -72,9 +72,9 @@ ccl_device_inline void motion_triangle_verts_for_step(const KernelGlobals *kg,
|
||||
{
|
||||
if (step == numsteps) {
|
||||
/* center step: regular vertex location */
|
||||
verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
|
||||
verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
|
||||
verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
|
||||
verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
|
||||
verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
|
||||
verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
|
||||
}
|
||||
else {
|
||||
/* center step not store in this array */
|
||||
|
@@ -44,7 +44,7 @@ ccl_device_inline float3 motion_triangle_refine(const KernelGlobals *kg,
|
||||
float3 verts[3])
|
||||
{
|
||||
#ifdef __INTERSECTION_REFINE__
|
||||
if (isect_object != OBJECT_NONE) {
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
if (UNLIKELY(t == 0.0f)) {
|
||||
return P;
|
||||
}
|
||||
@@ -70,7 +70,7 @@ ccl_device_inline float3 motion_triangle_refine(const KernelGlobals *kg,
|
||||
/* Compute refined position. */
|
||||
P = P + D * rt;
|
||||
|
||||
if (isect_object != OBJECT_NONE) {
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform tfm = object_get_transform(kg, sd);
|
||||
P = transform_point(&tfm, P);
|
||||
}
|
||||
@@ -106,7 +106,7 @@ ccl_device_inline
|
||||
return motion_triangle_refine(kg, sd, P, D, t, isect_object, isect_prim, verts);
|
||||
# else
|
||||
# ifdef __INTERSECTION_REFINE__
|
||||
if (isect_object != OBJECT_NONE) {
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform tfm = object_get_inverse_transform(kg, sd);
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
@@ -128,7 +128,7 @@ ccl_device_inline
|
||||
|
||||
P = P + D * rt;
|
||||
|
||||
if (isect_object != OBJECT_NONE) {
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform tfm = object_get_transform(kg, sd);
|
||||
P = transform_point(&tfm, P);
|
||||
}
|
||||
@@ -186,8 +186,9 @@ ccl_device_inline bool motion_triangle_intersect(const KernelGlobals *kg,
|
||||
isect->t = t;
|
||||
isect->u = u;
|
||||
isect->v = v;
|
||||
isect->prim = prim_addr;
|
||||
isect->object = object;
|
||||
isect->prim = prim;
|
||||
isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) :
|
||||
object;
|
||||
isect->type = PRIMITIVE_MOTION_TRIANGLE;
|
||||
return true;
|
||||
}
|
||||
@@ -288,8 +289,8 @@ ccl_device_inline bool motion_triangle_intersect_local(const KernelGlobals *kg,
|
||||
isect->t = t;
|
||||
isect->u = u;
|
||||
isect->v = v;
|
||||
isect->prim = prim_addr;
|
||||
isect->object = object;
|
||||
isect->prim = prim;
|
||||
isect->object = local_object;
|
||||
isect->type = PRIMITIVE_MOTION_TRIANGLE;
|
||||
|
||||
/* Record geometric normal. */
|
||||
|
@@ -37,7 +37,7 @@ ccl_device void shader_setup_object_transforms(const KernelGlobals *ccl_restrict
|
||||
#endif
|
||||
|
||||
/* TODO: break this up if it helps reduce register pressure to load data from
|
||||
* global memory as we write it to shaderdata. */
|
||||
* global memory as we write it to shader-data. */
|
||||
ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict kg,
|
||||
ShaderData *ccl_restrict sd,
|
||||
const Ray *ccl_restrict ray,
|
||||
@@ -52,10 +52,9 @@ ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict k
|
||||
sd->v = isect->v;
|
||||
sd->ray_length = isect->t;
|
||||
sd->type = isect->type;
|
||||
sd->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) :
|
||||
isect->object;
|
||||
sd->object = isect->object;
|
||||
sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
|
||||
sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
|
||||
sd->prim = isect->prim;
|
||||
sd->lamp = LAMP_NONE;
|
||||
sd->flag = 0;
|
||||
|
||||
@@ -103,7 +102,7 @@ ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict k
|
||||
|
||||
sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
|
||||
|
||||
if (isect->object != OBJECT_NONE) {
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
/* instance transform */
|
||||
object_normal_transform_auto(kg, sd, &sd->N);
|
||||
object_normal_transform_auto(kg, sd, &sd->Ng);
|
||||
|
@@ -29,9 +29,9 @@ ccl_device_inline float3 triangle_normal(const KernelGlobals *kg, ShaderData *sd
|
||||
{
|
||||
/* load triangle vertices */
|
||||
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
|
||||
const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
|
||||
const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
|
||||
const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
|
||||
const float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
|
||||
const float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
|
||||
const float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
|
||||
|
||||
/* return normal */
|
||||
if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
|
||||
@@ -54,9 +54,9 @@ ccl_device_inline void triangle_point_normal(const KernelGlobals *kg,
|
||||
{
|
||||
/* load triangle vertices */
|
||||
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
|
||||
float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
|
||||
float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
|
||||
float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
|
||||
float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
|
||||
float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
|
||||
float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
|
||||
/* compute point */
|
||||
float t = 1.0f - u - v;
|
||||
*P = (u * v0 + v * v1 + t * v2);
|
||||
@@ -78,9 +78,9 @@ ccl_device_inline void triangle_point_normal(const KernelGlobals *kg,
|
||||
ccl_device_inline void triangle_vertices(const KernelGlobals *kg, int prim, float3 P[3])
|
||||
{
|
||||
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
|
||||
P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
|
||||
P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
|
||||
P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
|
||||
P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
|
||||
P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
|
||||
P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
|
||||
}
|
||||
|
||||
/* Triangle vertex locations and vertex normals */
|
||||
@@ -91,9 +91,9 @@ ccl_device_inline void triangle_vertices_and_normals(const KernelGlobals *kg,
|
||||
float3 N[3])
|
||||
{
|
||||
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
|
||||
P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
|
||||
P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
|
||||
P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
|
||||
P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
|
||||
P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
|
||||
P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
|
||||
N[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
|
||||
N[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
|
||||
N[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
|
||||
@@ -145,9 +145,9 @@ ccl_device_inline void triangle_dPdudv(const KernelGlobals *kg,
|
||||
{
|
||||
/* fetch triangle vertex coordinates */
|
||||
const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
|
||||
const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0));
|
||||
const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1));
|
||||
const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2));
|
||||
const float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
|
||||
const float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
|
||||
const float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
|
||||
|
||||
/* compute derivatives of P w.r.t. uv */
|
||||
*dPdu = (p0 - p2);
|
||||
|
@@ -35,13 +35,14 @@ ccl_device_inline bool triangle_intersect(const KernelGlobals *kg,
|
||||
int object,
|
||||
int prim_addr)
|
||||
{
|
||||
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
|
||||
const int prim = kernel_tex_fetch(__prim_index, prim_addr);
|
||||
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
|
||||
#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||
const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex];
|
||||
const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex];
|
||||
#else
|
||||
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
|
||||
tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
|
||||
tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2);
|
||||
const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
|
||||
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
|
||||
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
|
||||
#endif
|
||||
float t, u, v;
|
||||
if (ray_triangle_intersect(P,
|
||||
@@ -64,8 +65,9 @@ ccl_device_inline bool triangle_intersect(const KernelGlobals *kg,
|
||||
if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
|
||||
#endif
|
||||
{
|
||||
isect->prim = prim_addr;
|
||||
isect->object = object;
|
||||
isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) :
|
||||
object;
|
||||
isect->prim = prim;
|
||||
isect->type = PRIMITIVE_TRIANGLE;
|
||||
isect->u = u;
|
||||
isect->v = v;
|
||||
@@ -102,13 +104,14 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg,
|
||||
}
|
||||
}
|
||||
|
||||
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
|
||||
const int prim = kernel_tex_fetch(__prim_index, prim_addr);
|
||||
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
|
||||
# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||
const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex];
|
||||
const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex];
|
||||
# else
|
||||
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)),
|
||||
tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)),
|
||||
tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
|
||||
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)),
|
||||
tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)),
|
||||
tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
|
||||
# endif
|
||||
float t, u, v;
|
||||
if (!ray_triangle_intersect(P,
|
||||
@@ -167,8 +170,8 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg,
|
||||
|
||||
/* Record intersection. */
|
||||
Intersection *isect = &local_isect->hits[hit];
|
||||
isect->prim = prim_addr;
|
||||
isect->object = object;
|
||||
isect->prim = prim;
|
||||
isect->object = local_object;
|
||||
isect->type = PRIMITIVE_TRIANGLE;
|
||||
isect->u = u;
|
||||
isect->v = v;
|
||||
@@ -176,9 +179,9 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg,
|
||||
|
||||
/* Record geometric normal. */
|
||||
# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
|
||||
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)),
|
||||
tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)),
|
||||
tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2));
|
||||
const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)),
|
||||
tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)),
|
||||
tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
|
||||
# endif
|
||||
local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
|
||||
|
||||
@@ -206,7 +209,7 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg,
|
||||
const int isect_prim)
|
||||
{
|
||||
#ifdef __INTERSECTION_REFINE__
|
||||
if (isect_object != OBJECT_NONE) {
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
if (UNLIKELY(t == 0.0f)) {
|
||||
return P;
|
||||
}
|
||||
@@ -219,10 +222,10 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg,
|
||||
|
||||
P = P + D * t;
|
||||
|
||||
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect_prim);
|
||||
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
|
||||
tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
|
||||
tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2);
|
||||
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w;
|
||||
const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
|
||||
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
|
||||
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
|
||||
float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
|
||||
float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
|
||||
float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
|
||||
@@ -239,7 +242,7 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg,
|
||||
P = P + D * rt;
|
||||
}
|
||||
|
||||
if (isect_object != OBJECT_NONE) {
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform tfm = object_get_transform(kg, sd);
|
||||
P = transform_point(&tfm, P);
|
||||
}
|
||||
@@ -265,7 +268,7 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg,
|
||||
/* t is always in world space with OptiX. */
|
||||
return triangle_refine(kg, sd, P, D, t, isect_object, isect_prim);
|
||||
#else
|
||||
if (isect_object != OBJECT_NONE) {
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform tfm = object_get_inverse_transform(kg, sd);
|
||||
|
||||
P = transform_point(&tfm, P);
|
||||
@@ -276,10 +279,10 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg,
|
||||
P = P + D * t;
|
||||
|
||||
# ifdef __INTERSECTION_REFINE__
|
||||
const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect_prim);
|
||||
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0),
|
||||
tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1),
|
||||
tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2);
|
||||
const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w;
|
||||
const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
|
||||
tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
|
||||
tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
|
||||
float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
|
||||
float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
|
||||
float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
|
||||
@@ -297,7 +300,7 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg,
|
||||
}
|
||||
# endif /* __INTERSECTION_REFINE__ */
|
||||
|
||||
if (isect_object != OBJECT_NONE) {
|
||||
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
const Transform tfm = object_get_transform(kg, sd);
|
||||
P = transform_point(&tfm, P);
|
||||
}
|
||||
|
@@ -109,9 +109,17 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
|
||||
}
|
||||
|
||||
/* Position and normal on triangle. */
|
||||
const int object = kernel_data.bake.object_index;
|
||||
float3 P, Ng;
|
||||
int shader;
|
||||
triangle_point_normal(kg, kernel_data.bake.object_index, prim, u, v, &P, &Ng, &shader);
|
||||
triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
|
||||
|
||||
const int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
|
||||
P = transform_point_auto(&tfm, P);
|
||||
}
|
||||
|
||||
if (kernel_data.film.pass_background != PASS_UNUSED) {
|
||||
/* Environment baking. */
|
||||
|
||||
@@ -130,8 +138,13 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
|
||||
}
|
||||
else {
|
||||
/* Surface baking. */
|
||||
const float3 N = (shader & SHADER_SMOOTH_NORMAL) ? triangle_smooth_normal(kg, Ng, prim, u, v) :
|
||||
Ng;
|
||||
float3 N = (shader & SHADER_SMOOTH_NORMAL) ? triangle_smooth_normal(kg, Ng, prim, u, v) : Ng;
|
||||
|
||||
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
|
||||
N = normalize(transform_direction_transposed(&itfm, N));
|
||||
Ng = normalize(transform_direction_transposed(&itfm, Ng));
|
||||
}
|
||||
|
||||
/* Setup ray. */
|
||||
Ray ray ccl_optional_struct_init;
|
||||
@@ -143,6 +156,12 @@ ccl_device bool integrator_init_from_bake(INTEGRATOR_STATE_ARGS,
|
||||
/* Setup differentials. */
|
||||
float3 dPdu, dPdv;
|
||||
triangle_dPdudv(kg, prim, &dPdu, &dPdv);
|
||||
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
|
||||
dPdu = transform_direction(&tfm, dPdu);
|
||||
dPdv = transform_direction(&tfm, dPdv);
|
||||
}
|
||||
|
||||
differential3 dP;
|
||||
dP.dx = dPdu * dudx + dPdv * dvdx;
|
||||
dP.dy = dPdu * dudy + dPdv * dvdy;
|
||||
|
@@ -123,7 +123,7 @@ ccl_device_forceinline void integrator_intersect_shader_next_kernel(
|
||||
#ifdef __SHADOW_CATCHER__
|
||||
const int object_flags = intersection_get_object_flags(kg, isect);
|
||||
if (kernel_shadow_catcher_split(INTEGRATOR_STATE_PASS, object_flags)) {
|
||||
if (kernel_data.film.use_approximate_shadow_catcher && !kernel_data.background.transparent) {
|
||||
if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) {
|
||||
INTEGRATOR_STATE_WRITE(path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
|
||||
|
||||
if (use_raytrace_kernel) {
|
||||
@@ -160,10 +160,7 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS)
|
||||
if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) {
|
||||
ray.t = kernel_data.integrator.ao_bounces_distance;
|
||||
|
||||
const int last_object = last_isect_object != OBJECT_NONE ?
|
||||
last_isect_object :
|
||||
kernel_tex_fetch(__prim_object, last_isect_prim);
|
||||
const float object_ao_distance = kernel_tex_fetch(__objects, last_object).ao_distance;
|
||||
const float object_ao_distance = kernel_tex_fetch(__objects, last_isect_object).ao_distance;
|
||||
if (object_ao_distance != 0.0f) {
|
||||
ray.t = object_ao_distance;
|
||||
}
|
||||
|
@@ -38,10 +38,13 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A
|
||||
volume_ray.P = from_P;
|
||||
volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t);
|
||||
|
||||
/* Store to avoid global fetches on every intersection step. */
|
||||
const uint volume_stack_size = kernel_data.volume_stack_size;
|
||||
|
||||
#ifdef __VOLUME_RECORD_ALL__
|
||||
Intersection hits[2 * VOLUME_STACK_SIZE + 1];
|
||||
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
|
||||
uint num_hits = scene_intersect_volume_all(
|
||||
kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, PATH_RAY_ALL_VISIBILITY);
|
||||
kg, &volume_ray, hits, 2 * volume_stack_size, PATH_RAY_ALL_VISIBILITY);
|
||||
if (num_hits > 0) {
|
||||
Intersection *isect = hits;
|
||||
|
||||
@@ -55,7 +58,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(INTEGRATOR_STATE_A
|
||||
#else
|
||||
Intersection isect;
|
||||
int step = 0;
|
||||
while (step < 2 * VOLUME_STACK_SIZE &&
|
||||
while (step < 2 * volume_stack_size &&
|
||||
scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
|
||||
shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
|
||||
volume_stack_enter_exit(INTEGRATOR_STATE_PASS, stack_sd);
|
||||
@@ -91,12 +94,15 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
|
||||
stack_index++;
|
||||
}
|
||||
|
||||
/* Store to avoid global fetches on every intersection step. */
|
||||
const uint volume_stack_size = kernel_data.volume_stack_size;
|
||||
|
||||
#ifdef __VOLUME_RECORD_ALL__
|
||||
Intersection hits[2 * VOLUME_STACK_SIZE + 1];
|
||||
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
|
||||
uint num_hits = scene_intersect_volume_all(
|
||||
kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, visibility);
|
||||
kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
|
||||
if (num_hits > 0) {
|
||||
int enclosed_volumes[VOLUME_STACK_SIZE];
|
||||
int enclosed_volumes[MAX_VOLUME_STACK_SIZE];
|
||||
Intersection *isect = hits;
|
||||
|
||||
qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
|
||||
@@ -121,7 +127,7 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (need_add && stack_index < VOLUME_STACK_SIZE - 1) {
|
||||
if (need_add && stack_index < volume_stack_size - 1) {
|
||||
const VolumeStack new_entry = {stack_sd->object, stack_sd->shader};
|
||||
integrator_state_write_volume_stack(INTEGRATOR_STATE_PASS, stack_index, new_entry);
|
||||
++stack_index;
|
||||
@@ -136,11 +142,12 @@ ccl_device void integrator_intersect_volume_stack(INTEGRATOR_STATE_ARGS)
|
||||
}
|
||||
}
|
||||
#else
|
||||
int enclosed_volumes[VOLUME_STACK_SIZE];
|
||||
/* CUDA does not support defintion of a variable size arrays, so use the maximum possible. */
|
||||
int enclosed_volumes[MAX_VOLUME_STACK_SIZE];
|
||||
int step = 0;
|
||||
|
||||
while (stack_index < VOLUME_STACK_SIZE - 1 && enclosed_index < VOLUME_STACK_SIZE - 1 &&
|
||||
step < 2 * VOLUME_STACK_SIZE) {
|
||||
while (stack_index < volume_stack_size - 1 && enclosed_index < volume_stack_size - 1 &&
|
||||
step < 2 * volume_stack_size) {
|
||||
Intersection isect;
|
||||
if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
|
||||
break;
|
||||
|
@@ -192,7 +192,8 @@ ccl_device void integrator_shade_background(INTEGRATOR_STATE_ARGS,
|
||||
INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND;
|
||||
|
||||
const int isect_prim = INTEGRATOR_STATE(isect, prim);
|
||||
const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim);
|
||||
const int isect_type = INTEGRATOR_STATE(isect, type);
|
||||
const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim, isect_type);
|
||||
const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
|
||||
|
||||
if ((shader_flags & SD_HAS_RAYTRACE) || (kernel_data.film.pass_ao != PASS_UNUSED)) {
|
||||
|
@@ -59,8 +59,6 @@ CCL_NAMESPACE_BEGIN
|
||||
*
|
||||
* TODO: these could be made dynamic depending on the features used in the scene. */
|
||||
|
||||
#define INTEGRATOR_VOLUME_STACK_SIZE VOLUME_STACK_SIZE
|
||||
|
||||
#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024
|
||||
#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4
|
||||
|
||||
@@ -85,12 +83,14 @@ typedef struct IntegratorStateCPU {
|
||||
#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
|
||||
} \
|
||||
name[cpu_size];
|
||||
#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
|
||||
#include "kernel/integrator/integrator_state_template.h"
|
||||
#undef KERNEL_STRUCT_BEGIN
|
||||
#undef KERNEL_STRUCT_MEMBER
|
||||
#undef KERNEL_STRUCT_ARRAY_MEMBER
|
||||
#undef KERNEL_STRUCT_END
|
||||
#undef KERNEL_STRUCT_END_ARRAY
|
||||
#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
|
||||
} IntegratorStateCPU;
|
||||
|
||||
/* Path Queue
|
||||
@@ -114,12 +114,14 @@ typedef struct IntegratorStateGPU {
|
||||
#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
|
||||
} \
|
||||
name[gpu_size];
|
||||
#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
|
||||
#include "kernel/integrator/integrator_state_template.h"
|
||||
#undef KERNEL_STRUCT_BEGIN
|
||||
#undef KERNEL_STRUCT_MEMBER
|
||||
#undef KERNEL_STRUCT_ARRAY_MEMBER
|
||||
#undef KERNEL_STRUCT_END
|
||||
#undef KERNEL_STRUCT_END_ARRAY
|
||||
#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
|
||||
|
||||
/* Count number of queued kernels. */
|
||||
IntegratorQueueCounter *queue_counter;
|
||||
|
@@ -107,7 +107,9 @@ KERNEL_STRUCT_END(subsurface)
|
||||
KERNEL_STRUCT_BEGIN(volume_stack)
|
||||
KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, object, KERNEL_FEATURE_VOLUME)
|
||||
KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
|
||||
KERNEL_STRUCT_END_ARRAY(volume_stack, INTEGRATOR_VOLUME_STACK_SIZE, INTEGRATOR_VOLUME_STACK_SIZE)
|
||||
KERNEL_STRUCT_END_ARRAY(volume_stack,
|
||||
KERNEL_STRUCT_VOLUME_STACK_SIZE,
|
||||
KERNEL_STRUCT_VOLUME_STACK_SIZE)
|
||||
|
||||
/********************************* Shadow Path State **************************/
|
||||
|
||||
@@ -163,5 +165,5 @@ KERNEL_STRUCT_BEGIN(shadow_volume_stack)
|
||||
KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, object, KERNEL_FEATURE_VOLUME)
|
||||
KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
|
||||
KERNEL_STRUCT_END_ARRAY(shadow_volume_stack,
|
||||
INTEGRATOR_VOLUME_STACK_SIZE,
|
||||
INTEGRATOR_VOLUME_STACK_SIZE)
|
||||
KERNEL_STRUCT_VOLUME_STACK_SIZE,
|
||||
KERNEL_STRUCT_VOLUME_STACK_SIZE)
|
||||
|
@@ -155,7 +155,7 @@ ccl_device_forceinline void integrator_state_read_shadow_isect(INTEGRATOR_STATE_
|
||||
ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(INTEGRATOR_STATE_ARGS)
|
||||
{
|
||||
if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
|
||||
for (int i = 0; i < INTEGRATOR_VOLUME_STACK_SIZE; i++) {
|
||||
for (int i = 0; i < kernel_data.volume_stack_size; i++) {
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, object) = INTEGRATOR_STATE_ARRAY(
|
||||
volume_stack, i, object);
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(shadow_volume_stack, i, shader) = INTEGRATOR_STATE_ARRAY(
|
||||
@@ -223,6 +223,8 @@ ccl_device_inline void integrator_state_copy_only(const IntegratorState to_state
|
||||
while (index < gpu_array_size) \
|
||||
;
|
||||
|
||||
# define KERNEL_STRUCT_VOLUME_STACK_SIZE kernel_data.volume_stack_size
|
||||
|
||||
# include "kernel/integrator/integrator_state_template.h"
|
||||
|
||||
# undef KERNEL_STRUCT_BEGIN
|
||||
@@ -230,6 +232,7 @@ ccl_device_inline void integrator_state_copy_only(const IntegratorState to_state
|
||||
# undef KERNEL_STRUCT_ARRAY_MEMBER
|
||||
# undef KERNEL_STRUCT_END
|
||||
# undef KERNEL_STRUCT_END_ARRAY
|
||||
# undef KERNEL_STRUCT_VOLUME_STACK_SIZE
|
||||
}
|
||||
|
||||
ccl_device_inline void integrator_state_move(const IntegratorState to_state,
|
||||
|
@@ -577,7 +577,7 @@ ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS)
|
||||
# ifdef __VOLUME__
|
||||
/* Update volume stack if needed. */
|
||||
if (kernel_data.integrator.use_volumes) {
|
||||
const int object = intersection_get_object(kg, &ss_isect.hits[0]);
|
||||
const int object = ss_isect.hits[0].object;
|
||||
const int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
|
||||
if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
|
||||
|
@@ -72,7 +72,7 @@ ccl_device void volume_stack_enter_exit(INTEGRATOR_STATE_ARGS,
|
||||
}
|
||||
|
||||
/* If we exceed the stack limit, ignore. */
|
||||
if (i >= VOLUME_STACK_SIZE - 1) {
|
||||
if (i >= kernel_data.volume_stack_size - 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@@ -186,8 +186,8 @@ ccl_device_inline float _shader_bsdf_multi_eval(const KernelGlobals *kg,
|
||||
float sum_sample_weight,
|
||||
const uint light_shader_flags)
|
||||
{
|
||||
/* this is the veach one-sample model with balance heuristic, some pdf
|
||||
* factors drop out when using balance heuristic weighting */
|
||||
/* This is the veach one-sample model with balance heuristic,
|
||||
* some PDF factors drop out when using balance heuristic weighting. */
|
||||
for (int i = 0; i < sd->num_closure; i++) {
|
||||
const ShaderClosure *sc = &sd->closure[i];
|
||||
|
||||
@@ -780,8 +780,8 @@ ccl_device_inline void shader_eval_volume(INTEGRATOR_STATE_CONST_ARGS,
|
||||
break;
|
||||
}
|
||||
|
||||
/* setup shaderdata from stack. it's mostly setup already in
|
||||
* shader_setup_from_volume, this switching should be quick */
|
||||
/* Setup shader-data from stack. it's mostly setup already in
|
||||
* shader_setup_from_volume, this switching should be quick. */
|
||||
sd->object = entry.object;
|
||||
sd->lamp = LAMP_NONE;
|
||||
sd->shader = entry.shader;
|
||||
|
@@ -18,11 +18,9 @@
|
||||
# define KERNEL_TEX(type, name)
|
||||
#endif
|
||||
|
||||
/* bvh */
|
||||
/* BVH2, not used for OptiX or Embree. */
|
||||
KERNEL_TEX(float4, __bvh_nodes)
|
||||
KERNEL_TEX(float4, __bvh_leaf_nodes)
|
||||
KERNEL_TEX(float4, __prim_tri_verts)
|
||||
KERNEL_TEX(uint, __prim_tri_index)
|
||||
KERNEL_TEX(uint, __prim_type)
|
||||
KERNEL_TEX(uint, __prim_visibility)
|
||||
KERNEL_TEX(uint, __prim_index)
|
||||
@@ -46,10 +44,12 @@ KERNEL_TEX(float4, __tri_vnormal)
|
||||
KERNEL_TEX(uint4, __tri_vindex)
|
||||
KERNEL_TEX(uint, __tri_patch)
|
||||
KERNEL_TEX(float2, __tri_patch_uv)
|
||||
KERNEL_TEX(float4, __tri_verts)
|
||||
|
||||
/* curves */
|
||||
KERNEL_TEX(float4, __curves)
|
||||
KERNEL_TEX(KernelCurve, __curves)
|
||||
KERNEL_TEX(float4, __curve_keys)
|
||||
KERNEL_TEX(KernelCurveSegment, __curve_segments)
|
||||
|
||||
/* patches */
|
||||
KERNEL_TEX(uint, __patches)
|
||||
|
@@ -61,8 +61,6 @@ CCL_NAMESPACE_BEGIN
|
||||
#define ID_NONE (0.0f)
|
||||
#define PASS_UNUSED (~0)
|
||||
|
||||
#define VOLUME_STACK_SIZE 4
|
||||
|
||||
/* Kernel features */
|
||||
#define __SOBOL__
|
||||
#define __DPDU__
|
||||
@@ -190,7 +188,7 @@ enum SamplingPattern {
|
||||
SAMPLING_NUM_PATTERNS,
|
||||
};
|
||||
|
||||
/* these flags values correspond to raytypes in osl.cpp, so keep them in sync! */
|
||||
/* These flags values correspond to `raytypes` in `osl.cpp`, so keep them in sync! */
|
||||
|
||||
enum PathRayFlag {
|
||||
/* --------------------------------------------------------------------
|
||||
@@ -360,7 +358,6 @@ typedef enum PassType {
|
||||
PASS_MATERIAL_ID,
|
||||
PASS_MOTION,
|
||||
PASS_MOTION_WEIGHT,
|
||||
PASS_RENDER_TIME,
|
||||
PASS_CRYPTOMATTE,
|
||||
PASS_AOV_COLOR,
|
||||
PASS_AOV_VALUE,
|
||||
@@ -609,6 +606,12 @@ typedef struct AttributeDescriptor {
|
||||
# define MAX_CLOSURE __MAX_CLOSURE__
|
||||
#endif
|
||||
|
||||
#ifndef __MAX_VOLUME_STACK_SIZE__
|
||||
# define MAX_VOLUME_STACK_SIZE 32
|
||||
#else
|
||||
# define MAX_VOLUME_STACK_SIZE __MAX_VOLUME_STACK_SIZE__
|
||||
#endif
|
||||
|
||||
#define MAX_VOLUME_CLOSURE 8
|
||||
|
||||
/* This struct is the base class for all closures. The common members are
|
||||
@@ -863,9 +866,6 @@ typedef struct VolumeStack {
|
||||
|
||||
/* Struct to gather multiple nearby intersections. */
|
||||
typedef struct LocalIntersection {
|
||||
Ray ray;
|
||||
float3 weight[LOCAL_MAX_HITS];
|
||||
|
||||
int num_hits;
|
||||
struct Intersection hits[LOCAL_MAX_HITS];
|
||||
float3 Ng[LOCAL_MAX_HITS];
|
||||
@@ -1224,7 +1224,7 @@ typedef struct KernelData {
|
||||
uint kernel_features;
|
||||
uint max_closures;
|
||||
uint max_shaders;
|
||||
uint pad;
|
||||
uint volume_stack_size;
|
||||
|
||||
KernelCamera cam;
|
||||
KernelFilm film;
|
||||
@@ -1267,10 +1267,25 @@ typedef struct KernelObject {
|
||||
|
||||
float ao_distance;
|
||||
|
||||
float pad1, pad2;
|
||||
uint visibility;
|
||||
int primitive_type;
|
||||
} KernelObject;
|
||||
static_assert_align(KernelObject, 16);
|
||||
|
||||
typedef struct KernelCurve {
|
||||
int shader_id;
|
||||
int first_key;
|
||||
int num_keys;
|
||||
int type;
|
||||
} KernelCurve;
|
||||
static_assert_align(KernelCurve, 16);
|
||||
|
||||
typedef struct KernelCurveSegment {
|
||||
int prim;
|
||||
int type;
|
||||
} KernelCurveSegment;
|
||||
static_assert_align(KernelCurveSegment, 8);
|
||||
|
||||
typedef struct KernelSpotLight {
|
||||
float radius;
|
||||
float invarea;
|
||||
|
@@ -110,6 +110,7 @@ ustring OSLRenderServices::u_curve_thickness("geom:curve_thickness");
|
||||
ustring OSLRenderServices::u_curve_length("geom:curve_length");
|
||||
ustring OSLRenderServices::u_curve_tangent_normal("geom:curve_tangent_normal");
|
||||
ustring OSLRenderServices::u_curve_random("geom:curve_random");
|
||||
ustring OSLRenderServices::u_normal_map_normal("geom:normal_map_normal");
|
||||
ustring OSLRenderServices::u_path_ray_length("path:ray_length");
|
||||
ustring OSLRenderServices::u_path_ray_depth("path:ray_depth");
|
||||
ustring OSLRenderServices::u_path_diffuse_depth("path:diffuse_depth");
|
||||
@@ -985,8 +986,18 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobals *kg,
|
||||
float3 f = curve_tangent_normal(kg, sd);
|
||||
return set_attribute_float3(f, type, derivatives, val);
|
||||
}
|
||||
else
|
||||
else if (name == u_normal_map_normal) {
|
||||
if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
|
||||
float3 f = triangle_smooth_normal_unnormalized(kg, sd, sd->Ng, sd->prim, sd->u, sd->v);
|
||||
return set_attribute_float3(f, type, derivatives, val);
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool OSLRenderServices::get_background_attribute(const KernelGlobals *kg,
|
||||
|
@@ -297,6 +297,7 @@ class OSLRenderServices : public OSL::RendererServices {
|
||||
static ustring u_curve_length;
|
||||
static ustring u_curve_tangent_normal;
|
||||
static ustring u_curve_random;
|
||||
static ustring u_normal_map_normal;
|
||||
static ustring u_path_ray_length;
|
||||
static ustring u_path_ray_depth;
|
||||
static ustring u_path_diffuse_depth;
|
||||
|
@@ -41,6 +41,7 @@ set(SRC_OSL
|
||||
node_vector_displacement.osl
|
||||
node_emission.osl
|
||||
node_environment_texture.osl
|
||||
node_float_curve.osl
|
||||
node_fresnel.osl
|
||||
node_gamma.osl
|
||||
node_geometry.osl
|
||||
|
32
intern/cycles/kernel/shaders/node_float_curve.osl
Normal file
32
intern/cycles/kernel/shaders/node_float_curve.osl
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright 2011-2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "node_ramp_util.h"
|
||||
#include "stdcycles.h"
|
||||
|
||||
shader node_float_curve(float ramp[] = {0.0},
|
||||
float min_x = 0.0,
|
||||
float max_x = 1.0,
|
||||
float ValueIn = 0.0,
|
||||
float Factor = 0.0,
|
||||
output float ValueOut = 0.0)
|
||||
{
|
||||
float c = (ValueIn - min_x) / (max_x - min_x);
|
||||
|
||||
ValueOut = rgb_ramp_lookup(ramp, c, 1, 1);
|
||||
|
||||
ValueOut = mix(ValueIn, ValueOut, Factor);
|
||||
}
|
@@ -45,7 +45,7 @@ shader node_normal_map(normal NormalIn = N,
|
||||
|
||||
// get _unnormalized_ interpolated normal and tangent
|
||||
if (getattribute(attr_name, tangent) && getattribute(attr_sign_name, tangent_sign) &&
|
||||
(!is_smooth || getattribute("geom:N", ninterp))) {
|
||||
(!is_smooth || getattribute("geom:normal_map_normal", ninterp))) {
|
||||
// apply normal map
|
||||
vector B = tangent_sign * cross(ninterp, tangent);
|
||||
Normal = normalize(mcolor[0] * tangent + mcolor[1] * B + mcolor[2] * ninterp);
|
||||
|
@@ -493,11 +493,13 @@ ccl_device void svm_eval_nodes(INTEGRATOR_STATE_CONST_ARGS,
|
||||
case NODE_IES:
|
||||
svm_node_ies(kg, sd, stack, node);
|
||||
break;
|
||||
|
||||
case NODE_RGB_CURVES:
|
||||
case NODE_VECTOR_CURVES:
|
||||
offset = svm_node_curves(kg, sd, stack, node, offset);
|
||||
break;
|
||||
case NODE_FLOAT_CURVE:
|
||||
offset = svm_node_curve(kg, sd, stack, node, offset);
|
||||
break;
|
||||
case NODE_TANGENT:
|
||||
svm_node_tangent(kg, sd, stack, node);
|
||||
break;
|
||||
|
@@ -182,17 +182,17 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS,
|
||||
float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
|
||||
|
||||
/* Create ray. */
|
||||
Ray *ray = &isect.ray;
|
||||
ray->P = sd->P + disk_N * disk_height + disk_P;
|
||||
ray->D = -disk_N;
|
||||
ray->t = 2.0f * disk_height;
|
||||
ray->dP = differential_zero_compact();
|
||||
ray->dD = differential_zero_compact();
|
||||
ray->time = sd->time;
|
||||
Ray ray ccl_optional_struct_init;
|
||||
ray.P = sd->P + disk_N * disk_height + disk_P;
|
||||
ray.D = -disk_N;
|
||||
ray.t = 2.0f * disk_height;
|
||||
ray.dP = differential_zero_compact();
|
||||
ray.dD = differential_zero_compact();
|
||||
ray.time = sd->time;
|
||||
|
||||
/* Intersect with the same object. if multiple intersections are found it
|
||||
* will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */
|
||||
scene_intersect_local(kg, ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS);
|
||||
scene_intersect_local(kg, &ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS);
|
||||
|
||||
int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS);
|
||||
|
||||
@@ -201,23 +201,20 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS,
|
||||
float3 hit_P;
|
||||
if (sd->type & PRIMITIVE_TRIANGLE) {
|
||||
hit_P = triangle_refine_local(
|
||||
kg, sd, ray->P, ray->D, ray->t, isect.hits[hit].object, isect.hits[hit].prim);
|
||||
kg, sd, ray.P, ray.D, ray.t, isect.hits[hit].object, isect.hits[hit].prim);
|
||||
}
|
||||
# ifdef __OBJECT_MOTION__
|
||||
else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
|
||||
float3 verts[3];
|
||||
motion_triangle_vertices(
|
||||
kg, sd->object, kernel_tex_fetch(__prim_index, isect.hits[hit].prim), sd->time, verts);
|
||||
motion_triangle_vertices(kg, sd->object, isect.hits[hit].prim, sd->time, verts);
|
||||
hit_P = motion_triangle_refine_local(
|
||||
kg, sd, ray->P, ray->D, ray->t, isect.hits[hit].object, isect.hits[hit].prim, verts);
|
||||
kg, sd, ray.P, ray.D, ray.t, isect.hits[hit].object, isect.hits[hit].prim, verts);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
/* Get geometric normal. */
|
||||
float3 hit_Ng = isect.Ng[hit];
|
||||
int object = (isect.hits[hit].object == OBJECT_NONE) ?
|
||||
kernel_tex_fetch(__prim_object, isect.hits[hit].prim) :
|
||||
isect.hits[hit].object;
|
||||
int object = isect.hits[hit].object;
|
||||
int object_flag = kernel_tex_fetch(__object_flag, object);
|
||||
if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
|
||||
hit_Ng = -hit_Ng;
|
||||
@@ -225,7 +222,7 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS,
|
||||
|
||||
/* Compute smooth normal. */
|
||||
float3 N = hit_Ng;
|
||||
int prim = kernel_tex_fetch(__prim_index, isect.hits[hit].prim);
|
||||
int prim = isect.hits[hit].prim;
|
||||
int shader = kernel_tex_fetch(__tri_shader, prim);
|
||||
|
||||
if (shader & SHADER_SMOOTH_NORMAL) {
|
||||
|
@@ -85,6 +85,9 @@ ccl_device_noinline int svm_node_closure_bsdf(
|
||||
}
|
||||
|
||||
float3 N = stack_valid(data_node.x) ? stack_load_float3(stack, data_node.x) : sd->N;
|
||||
if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
|
||||
N = ensure_valid_reflection(sd->Ng, sd->I, N);
|
||||
}
|
||||
|
||||
float param1 = (stack_valid(param1_offset)) ? stack_load_float(stack, param1_offset) :
|
||||
__uint_as_float(node.z);
|
||||
@@ -166,6 +169,9 @@ ccl_device_noinline int svm_node_closure_bsdf(
|
||||
float3 clearcoat_normal = stack_valid(data_cn_ssr.x) ?
|
||||
stack_load_float3(stack, data_cn_ssr.x) :
|
||||
sd->N;
|
||||
if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
|
||||
clearcoat_normal = ensure_valid_reflection(sd->Ng, sd->I, clearcoat_normal);
|
||||
}
|
||||
float3 subsurface_radius = stack_valid(data_cn_ssr.y) ?
|
||||
stack_load_float3(stack, data_cn_ssr.y) :
|
||||
make_float3(1.0f, 1.0f, 1.0f);
|
||||
|
@@ -200,43 +200,43 @@ ccl_device float svm_math(NodeMathType type, float a, float b, float c)
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate color in range 800..12000 using an approximation
|
||||
* a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
|
||||
* Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
|
||||
* which is enough to get the same 8 bit/channel color.
|
||||
*/
|
||||
|
||||
ccl_static_constant float blackbody_table_r[6][3] = {
|
||||
{2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f},
|
||||
{3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f},
|
||||
{4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f},
|
||||
{4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f},
|
||||
{4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f},
|
||||
{3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f},
|
||||
};
|
||||
|
||||
ccl_static_constant float blackbody_table_g[6][3] = {
|
||||
{-7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f},
|
||||
{-1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f},
|
||||
{-1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f},
|
||||
{-1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f},
|
||||
{-1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f},
|
||||
{-5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f},
|
||||
};
|
||||
|
||||
ccl_static_constant float blackbody_table_b[6][4] = {
|
||||
{0.0f, 0.0f, 0.0f, 0.0f}, /* zeros should be optimized by compiler */
|
||||
{0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{-2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f},
|
||||
{-2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f},
|
||||
{6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f},
|
||||
};
|
||||
|
||||
ccl_device float3 svm_math_blackbody_color(float t)
|
||||
{
|
||||
/* TODO(lukas): Reimplement in XYZ. */
|
||||
|
||||
/* Calculate color in range 800..12000 using an approximation
|
||||
* a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
|
||||
* Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
|
||||
* which is enough to get the same 8 bit/channel color.
|
||||
*/
|
||||
|
||||
const float blackbody_table_r[6][3] = {
|
||||
{2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f},
|
||||
{3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f},
|
||||
{4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f},
|
||||
{4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f},
|
||||
{4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f},
|
||||
{3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f},
|
||||
};
|
||||
|
||||
const float blackbody_table_g[6][3] = {
|
||||
{-7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f},
|
||||
{-1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f},
|
||||
{-1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f},
|
||||
{-1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f},
|
||||
{-1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f},
|
||||
{-5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f},
|
||||
};
|
||||
|
||||
const float blackbody_table_b[6][4] = {
|
||||
{0.0f, 0.0f, 0.0f, 0.0f}, /* zeros should be optimized by compiler */
|
||||
{0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{-2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f},
|
||||
{-2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f},
|
||||
{6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f},
|
||||
};
|
||||
|
||||
if (t >= 12000.0f) {
|
||||
return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
|
||||
}
|
||||
|
@@ -21,6 +21,48 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
|
||||
|
||||
ccl_device_inline float fetch_float(const KernelGlobals *kg, int offset)
|
||||
{
|
||||
uint4 node = kernel_tex_fetch(__svm_nodes, offset);
|
||||
return __uint_as_float(node.x);
|
||||
}
|
||||
|
||||
ccl_device_inline float float_ramp_lookup(const KernelGlobals *kg,
|
||||
int offset,
|
||||
float f,
|
||||
bool interpolate,
|
||||
bool extrapolate,
|
||||
int table_size)
|
||||
{
|
||||
if ((f < 0.0f || f > 1.0f) && extrapolate) {
|
||||
float t0, dy;
|
||||
if (f < 0.0f) {
|
||||
t0 = fetch_float(kg, offset);
|
||||
dy = t0 - fetch_float(kg, offset + 1);
|
||||
f = -f;
|
||||
}
|
||||
else {
|
||||
t0 = fetch_float(kg, offset + table_size - 1);
|
||||
dy = t0 - fetch_float(kg, offset + table_size - 2);
|
||||
f = f - 1.0f;
|
||||
}
|
||||
return t0 + dy * f * (table_size - 1);
|
||||
}
|
||||
|
||||
f = saturate(f) * (table_size - 1);
|
||||
|
||||
/* clamp int as well in case of NaN */
|
||||
int i = clamp(float_to_int(f), 0, table_size - 1);
|
||||
float t = f - (float)i;
|
||||
|
||||
float a = fetch_float(kg, offset + i);
|
||||
|
||||
if (interpolate && t > 0.0f)
|
||||
a = (1.0f - t) * a + t * fetch_float(kg, offset + i + 1);
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
ccl_device_inline float4 rgb_ramp_lookup(const KernelGlobals *kg,
|
||||
int offset,
|
||||
float f,
|
||||
@@ -105,6 +147,30 @@ ccl_device_noinline int svm_node_curves(
|
||||
return offset;
|
||||
}
|
||||
|
||||
ccl_device_noinline int svm_node_curve(
|
||||
const KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int offset)
|
||||
{
|
||||
uint fac_offset, value_in_offset, out_offset;
|
||||
svm_unpack_node_uchar3(node.y, &fac_offset, &value_in_offset, &out_offset);
|
||||
|
||||
uint table_size = read_node(kg, &offset).x;
|
||||
|
||||
float fac = stack_load_float(stack, fac_offset);
|
||||
float in = stack_load_float(stack, value_in_offset);
|
||||
|
||||
const float min = __int_as_float(node.z), max = __int_as_float(node.w);
|
||||
const float range = max - min;
|
||||
const float relpos = (in - min) / range;
|
||||
|
||||
float v = float_ramp_lookup(kg, offset, relpos, true, true, table_size);
|
||||
|
||||
in = (1.0f - fac) * in + fac * v;
|
||||
stack_store_float(stack, out_offset, in);
|
||||
|
||||
offset += table_size;
|
||||
return offset;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __SVM_RAMP_H__ */
|
||||
|
@@ -347,8 +347,6 @@ ccl_device_noinline void svm_node_normal_map(const KernelGlobals *kg,
|
||||
N = safe_normalize(sd->N + (N - sd->N) * strength);
|
||||
}
|
||||
|
||||
N = ensure_valid_reflection(sd->Ng, sd->I, N);
|
||||
|
||||
if (is_zero(N)) {
|
||||
N = sd->N;
|
||||
}
|
||||
|
@@ -122,6 +122,7 @@ typedef enum ShaderNodeType {
|
||||
NODE_AOV_START,
|
||||
NODE_AOV_COLOR,
|
||||
NODE_AOV_VALUE,
|
||||
NODE_FLOAT_CURVE,
|
||||
/* NOTE: for best OpenCL performance, item definition in the enum must
|
||||
* match the switch case order in svm.h. */
|
||||
} ShaderNodeType;
|
||||
|
@@ -34,44 +34,44 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Wavelength to RGB */
|
||||
|
||||
// CIE colour matching functions xBar, yBar, and zBar for
|
||||
// wavelengths from 380 through 780 nanometers, every 5
|
||||
// nanometers. For a wavelength lambda in this range:
|
||||
// cie_colour_match[(lambda - 380) / 5][0] = xBar
|
||||
// cie_colour_match[(lambda - 380) / 5][1] = yBar
|
||||
// cie_colour_match[(lambda - 380) / 5][2] = zBar
|
||||
ccl_static_constant float cie_colour_match[81][3] = {
|
||||
{0.0014f, 0.0000f, 0.0065f}, {0.0022f, 0.0001f, 0.0105f}, {0.0042f, 0.0001f, 0.0201f},
|
||||
{0.0076f, 0.0002f, 0.0362f}, {0.0143f, 0.0004f, 0.0679f}, {0.0232f, 0.0006f, 0.1102f},
|
||||
{0.0435f, 0.0012f, 0.2074f}, {0.0776f, 0.0022f, 0.3713f}, {0.1344f, 0.0040f, 0.6456f},
|
||||
{0.2148f, 0.0073f, 1.0391f}, {0.2839f, 0.0116f, 1.3856f}, {0.3285f, 0.0168f, 1.6230f},
|
||||
{0.3483f, 0.0230f, 1.7471f}, {0.3481f, 0.0298f, 1.7826f}, {0.3362f, 0.0380f, 1.7721f},
|
||||
{0.3187f, 0.0480f, 1.7441f}, {0.2908f, 0.0600f, 1.6692f}, {0.2511f, 0.0739f, 1.5281f},
|
||||
{0.1954f, 0.0910f, 1.2876f}, {0.1421f, 0.1126f, 1.0419f}, {0.0956f, 0.1390f, 0.8130f},
|
||||
{0.0580f, 0.1693f, 0.6162f}, {0.0320f, 0.2080f, 0.4652f}, {0.0147f, 0.2586f, 0.3533f},
|
||||
{0.0049f, 0.3230f, 0.2720f}, {0.0024f, 0.4073f, 0.2123f}, {0.0093f, 0.5030f, 0.1582f},
|
||||
{0.0291f, 0.6082f, 0.1117f}, {0.0633f, 0.7100f, 0.0782f}, {0.1096f, 0.7932f, 0.0573f},
|
||||
{0.1655f, 0.8620f, 0.0422f}, {0.2257f, 0.9149f, 0.0298f}, {0.2904f, 0.9540f, 0.0203f},
|
||||
{0.3597f, 0.9803f, 0.0134f}, {0.4334f, 0.9950f, 0.0087f}, {0.5121f, 1.0000f, 0.0057f},
|
||||
{0.5945f, 0.9950f, 0.0039f}, {0.6784f, 0.9786f, 0.0027f}, {0.7621f, 0.9520f, 0.0021f},
|
||||
{0.8425f, 0.9154f, 0.0018f}, {0.9163f, 0.8700f, 0.0017f}, {0.9786f, 0.8163f, 0.0014f},
|
||||
{1.0263f, 0.7570f, 0.0011f}, {1.0567f, 0.6949f, 0.0010f}, {1.0622f, 0.6310f, 0.0008f},
|
||||
{1.0456f, 0.5668f, 0.0006f}, {1.0026f, 0.5030f, 0.0003f}, {0.9384f, 0.4412f, 0.0002f},
|
||||
{0.8544f, 0.3810f, 0.0002f}, {0.7514f, 0.3210f, 0.0001f}, {0.6424f, 0.2650f, 0.0000f},
|
||||
{0.5419f, 0.2170f, 0.0000f}, {0.4479f, 0.1750f, 0.0000f}, {0.3608f, 0.1382f, 0.0000f},
|
||||
{0.2835f, 0.1070f, 0.0000f}, {0.2187f, 0.0816f, 0.0000f}, {0.1649f, 0.0610f, 0.0000f},
|
||||
{0.1212f, 0.0446f, 0.0000f}, {0.0874f, 0.0320f, 0.0000f}, {0.0636f, 0.0232f, 0.0000f},
|
||||
{0.0468f, 0.0170f, 0.0000f}, {0.0329f, 0.0119f, 0.0000f}, {0.0227f, 0.0082f, 0.0000f},
|
||||
{0.0158f, 0.0057f, 0.0000f}, {0.0114f, 0.0041f, 0.0000f}, {0.0081f, 0.0029f, 0.0000f},
|
||||
{0.0058f, 0.0021f, 0.0000f}, {0.0041f, 0.0015f, 0.0000f}, {0.0029f, 0.0010f, 0.0000f},
|
||||
{0.0020f, 0.0007f, 0.0000f}, {0.0014f, 0.0005f, 0.0000f}, {0.0010f, 0.0004f, 0.0000f},
|
||||
{0.0007f, 0.0002f, 0.0000f}, {0.0005f, 0.0002f, 0.0000f}, {0.0003f, 0.0001f, 0.0000f},
|
||||
{0.0002f, 0.0001f, 0.0000f}, {0.0002f, 0.0001f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f},
|
||||
{0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}};
|
||||
|
||||
ccl_device_noinline void svm_node_wavelength(
|
||||
const KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out)
|
||||
{
|
||||
// CIE colour matching functions xBar, yBar, and zBar for
|
||||
// wavelengths from 380 through 780 nanometers, every 5
|
||||
// nanometers. For a wavelength lambda in this range:
|
||||
// cie_colour_match[(lambda - 380) / 5][0] = xBar
|
||||
// cie_colour_match[(lambda - 380) / 5][1] = yBar
|
||||
// cie_colour_match[(lambda - 380) / 5][2] = zBar
|
||||
const float cie_colour_match[81][3] = {
|
||||
{0.0014f, 0.0000f, 0.0065f}, {0.0022f, 0.0001f, 0.0105f}, {0.0042f, 0.0001f, 0.0201f},
|
||||
{0.0076f, 0.0002f, 0.0362f}, {0.0143f, 0.0004f, 0.0679f}, {0.0232f, 0.0006f, 0.1102f},
|
||||
{0.0435f, 0.0012f, 0.2074f}, {0.0776f, 0.0022f, 0.3713f}, {0.1344f, 0.0040f, 0.6456f},
|
||||
{0.2148f, 0.0073f, 1.0391f}, {0.2839f, 0.0116f, 1.3856f}, {0.3285f, 0.0168f, 1.6230f},
|
||||
{0.3483f, 0.0230f, 1.7471f}, {0.3481f, 0.0298f, 1.7826f}, {0.3362f, 0.0380f, 1.7721f},
|
||||
{0.3187f, 0.0480f, 1.7441f}, {0.2908f, 0.0600f, 1.6692f}, {0.2511f, 0.0739f, 1.5281f},
|
||||
{0.1954f, 0.0910f, 1.2876f}, {0.1421f, 0.1126f, 1.0419f}, {0.0956f, 0.1390f, 0.8130f},
|
||||
{0.0580f, 0.1693f, 0.6162f}, {0.0320f, 0.2080f, 0.4652f}, {0.0147f, 0.2586f, 0.3533f},
|
||||
{0.0049f, 0.3230f, 0.2720f}, {0.0024f, 0.4073f, 0.2123f}, {0.0093f, 0.5030f, 0.1582f},
|
||||
{0.0291f, 0.6082f, 0.1117f}, {0.0633f, 0.7100f, 0.0782f}, {0.1096f, 0.7932f, 0.0573f},
|
||||
{0.1655f, 0.8620f, 0.0422f}, {0.2257f, 0.9149f, 0.0298f}, {0.2904f, 0.9540f, 0.0203f},
|
||||
{0.3597f, 0.9803f, 0.0134f}, {0.4334f, 0.9950f, 0.0087f}, {0.5121f, 1.0000f, 0.0057f},
|
||||
{0.5945f, 0.9950f, 0.0039f}, {0.6784f, 0.9786f, 0.0027f}, {0.7621f, 0.9520f, 0.0021f},
|
||||
{0.8425f, 0.9154f, 0.0018f}, {0.9163f, 0.8700f, 0.0017f}, {0.9786f, 0.8163f, 0.0014f},
|
||||
{1.0263f, 0.7570f, 0.0011f}, {1.0567f, 0.6949f, 0.0010f}, {1.0622f, 0.6310f, 0.0008f},
|
||||
{1.0456f, 0.5668f, 0.0006f}, {1.0026f, 0.5030f, 0.0003f}, {0.9384f, 0.4412f, 0.0002f},
|
||||
{0.8544f, 0.3810f, 0.0002f}, {0.7514f, 0.3210f, 0.0001f}, {0.6424f, 0.2650f, 0.0000f},
|
||||
{0.5419f, 0.2170f, 0.0000f}, {0.4479f, 0.1750f, 0.0000f}, {0.3608f, 0.1382f, 0.0000f},
|
||||
{0.2835f, 0.1070f, 0.0000f}, {0.2187f, 0.0816f, 0.0000f}, {0.1649f, 0.0610f, 0.0000f},
|
||||
{0.1212f, 0.0446f, 0.0000f}, {0.0874f, 0.0320f, 0.0000f}, {0.0636f, 0.0232f, 0.0000f},
|
||||
{0.0468f, 0.0170f, 0.0000f}, {0.0329f, 0.0119f, 0.0000f}, {0.0227f, 0.0082f, 0.0000f},
|
||||
{0.0158f, 0.0057f, 0.0000f}, {0.0114f, 0.0041f, 0.0000f}, {0.0081f, 0.0029f, 0.0000f},
|
||||
{0.0058f, 0.0021f, 0.0000f}, {0.0041f, 0.0015f, 0.0000f}, {0.0029f, 0.0010f, 0.0000f},
|
||||
{0.0020f, 0.0007f, 0.0000f}, {0.0014f, 0.0005f, 0.0000f}, {0.0010f, 0.0004f, 0.0000f},
|
||||
{0.0007f, 0.0002f, 0.0000f}, {0.0005f, 0.0002f, 0.0000f}, {0.0003f, 0.0001f, 0.0000f},
|
||||
{0.0002f, 0.0001f, 0.0000f}, {0.0002f, 0.0001f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f},
|
||||
{0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}};
|
||||
|
||||
float lambda_nm = stack_load_float(stack, wavelength);
|
||||
float ii = (lambda_nm - 380.0f) * (1.0f / 5.0f); // scaled 0..80
|
||||
int i = float_to_int(ii);
|
||||
|
@@ -35,7 +35,6 @@ set(SRC
|
||||
denoising.cpp
|
||||
film.cpp
|
||||
geometry.cpp
|
||||
gpu_display.cpp
|
||||
graph.cpp
|
||||
hair.cpp
|
||||
image.cpp
|
||||
@@ -78,9 +77,10 @@ set(SRC_HEADERS
|
||||
colorspace.h
|
||||
constant_fold.h
|
||||
denoising.h
|
||||
display_driver.h
|
||||
output_driver.h
|
||||
film.h
|
||||
geometry.h
|
||||
gpu_display.h
|
||||
graph.h
|
||||
hair.h
|
||||
image.h
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user