Cycles: build Light Tree in parallel #105862

Merged
Weizhen Huang merged 4 commits from weizhen/blender:parallel_light_tree into main 2023-03-20 18:02:23 +01:00
4 changed files with 132 additions and 62 deletions
Showing only changes of commit 2539258099 - Show all commits

View File

@ -7,6 +7,7 @@
#include "scene/film.h"
#include "scene/integrator.h"
#include "scene/light.h"
#include "scene/light_tree.h"
#include "scene/mesh.h"
#include "scene/object.h"
#include "scene/scene.h"
@ -557,31 +558,43 @@ void LightManager::device_update_tree(Device *,
}
/* First initialize the light tree's nodes. */
const vector<LightTreeNode> &linearized_bvh = light_tree.get_nodes();
KernelLightTreeNode *light_tree_nodes = dscene->light_tree_nodes.alloc(linearized_bvh.size());
KernelLightTreeNode *light_tree_nodes = dscene->light_tree_nodes.alloc(light_tree.size());
KernelLightTreeEmitter *light_tree_emitters = dscene->light_tree_emitters.alloc(
light_prims.size());
for (int index = 0; index < linearized_bvh.size(); index++) {
const LightTreeNode &node = linearized_bvh[index];
light_tree_nodes[index].energy = node.energy;
/* Copy the light tree nodes to an array in the device. */
/* The nodes are arranged in a depth-first order, meaning the left child of each inner node
* always comes immediately after that inner node in the array, so that we only need to store the
* index of the right child.
* To do so, we repeatedly move to the left child of the current node until we reach the leftmost
* descendant, while keeping track of the right child of each node we visited by storing the
* pointer in the `right_node_stack`.
* Once finished visiting the left subtree, we retrieve the the last stored pointer from
* `right_node_stack`, assign it to its parent (retrieved from `left_index_stack`), and repeat
* the process from there. */
int left_index_stack[32]; /* sizeof(bit_trail) * 8 == 32 */
LightTreeNode *right_node_stack[32];
int stack_id = 0;
const LightTreeNode *node = light_tree.get_root();
for (int index = 0; index < light_tree.size(); index++) {
light_tree_nodes[index].energy = node->energy;
light_tree_nodes[index].bbox.min = node.bbox.min;
light_tree_nodes[index].bbox.max = node.bbox.max;
light_tree_nodes[index].bbox.min = node->bbox.min;
light_tree_nodes[index].bbox.max = node->bbox.max;
light_tree_nodes[index].bcone.axis = node.bcone.axis;
light_tree_nodes[index].bcone.theta_o = node.bcone.theta_o;
light_tree_nodes[index].bcone.theta_e = node.bcone.theta_e;
light_tree_nodes[index].bcone.axis = node->bcone.axis;
light_tree_nodes[index].bcone.theta_o = node->bcone.theta_o;
light_tree_nodes[index].bcone.theta_e = node->bcone.theta_e;
light_tree_nodes[index].bit_trail = node.bit_trail;
light_tree_nodes[index].num_prims = node.num_prims;
light_tree_nodes[index].bit_trail = node->bit_trail;
light_tree_nodes[index].num_prims = node->num_prims;
/* Here we need to make a distinction between interior and leaf nodes. */
if (node.is_leaf()) {
light_tree_nodes[index].child_index = -node.first_prim_index;
if (node->is_leaf()) {
light_tree_nodes[index].child_index = -node->first_prim_index;
for (int i = 0; i < node.num_prims; i++) {
int emitter_index = i + node.first_prim_index;
for (int i = 0; i < node->num_prims; i++) {
int emitter_index = i + node->first_prim_index;
LightTreePrimitive &prim = light_prims[emitter_index];
light_tree_emitters[emitter_index].energy = prim.energy;
@ -628,12 +641,23 @@ void LightManager::device_update_tree(Device *,
light_tree_emitters[emitter_index].emission_sampling = EMISSION_SAMPLING_FRONT_BACK;
light_array[~prim.prim_id] = emitter_index;
}
light_tree_emitters[emitter_index].parent_index = index;
}
/* Retrieve from the stacks. */
if (stack_id == 0) {
break;
}
stack_id--;
light_tree_nodes[left_index_stack[stack_id]].child_index = index + 1;
node = right_node_stack[stack_id];
}
else {
light_tree_nodes[index].child_index = node.right_child_index;
/* Fill in the stacks. */
left_index_stack[stack_id] = index;
right_node_stack[stack_id] = node->children[right];
node = node->children[left];
stack_id++;
}
}

View File

@ -10,7 +10,6 @@
/* included as Light::set_shader defined through NODE_SOCKET_API does not select
* the right Node::set overload as it does not know that Shader is a Node */
#include "scene/light_tree.h"
#include "scene/shader.h"
#include "util/ies.h"

View File

@ -207,44 +207,42 @@ LightTree::LightTree(vector<LightTreePrimitive> &prims,
max_lights_in_leaf_ = max_lights_in_leaf;
const int num_prims = prims.size();
const int num_local_lights = num_prims - num_distant_lights;
/* The amount of nodes is estimated to be twice the amount of primitives */
nodes_.reserve(2 * num_prims);
nodes_.emplace_back(); /* root node */
recursive_build(0, num_local_lights, prims, 0, 1); /* build tree */
nodes_[0].make_interior(nodes_.size());
root = create_node(BoundBox::empty, OrientationBounds::empty, 0.0f, 0);
/* All local lights are grouped to the left child as an inner node. */
recursive_build<left>(root, 0, num_local_lights, &prims, 0, 1);
task_pool.wait_work();
/* All distant lights are grouped to one node (right child of the root node) */
OrientationBounds bcone = OrientationBounds::empty;
float energy_total = 0.0;
/* All distant lights are grouped to the right child as a leaf node. */
for (int i = num_local_lights; i < num_prims; i++) {
const LightTreePrimitive &prim = prims.at(i);
bcone = merge(bcone, prim.bcone);
energy_total += prim.energy;
}
nodes_.emplace_back(BoundBox::empty, bcone, energy_total, 1);
nodes_.back().make_leaf(num_local_lights, num_distant_lights);
LightTreeNode *distant_node = create_node(BoundBox::empty, bcone, energy_total, 1);
distant_node->make_leaf(num_local_lights, num_distant_lights);
nodes_.shrink_to_fit();
root->children[right] = distant_node;
}
const vector<LightTreeNode> &LightTree::get_nodes() const
{
return nodes_;
}
int LightTree::recursive_build(
int start, int end, vector<LightTreePrimitive> &prims, uint bit_trail, int depth)
template<Child child>
brecht marked this conversation as resolved Outdated

Is there a reason this is a template argument instead of a regular function argument?

It's only used for parent->children[child], not worth specializing the function for as far as I can see.

Is there a reason this is a template argument instead of a regular function argument? It's only used for `parent->children[child]`, not worth specializing the function for as far as I can see.

It's also used for recursive_build<left>(current_node, start, middle, prims, bit_trail, depth + 1);, I think it looks better than recursive_build(0, current_node, start, middle, prims, bit_trail, depth + 1);

It's also used for `recursive_build<left>(current_node, start, middle, prims, bit_trail, depth + 1);`, I think it looks better than `recursive_build(0, current_node, start, middle, prims, bit_trail, depth + 1);`

I think this would look ok too?

recursive_build(left, current_node, start, middle, prims, bit_trail, depth + 1);

Generally there should be a performance reason for using templates like this. Not a big deal for any individual function, but still would rather not do it here.

I think this would look ok too? ``` recursive_build(left, current_node, start, middle, prims, bit_trail, depth + 1); ``` Generally there should be a performance reason for using templates like this. Not a big deal for any individual function, but still would rather not do it here.
void LightTree::recursive_build(LightTreeNode *parent,
const int start,
const int end,
vector<LightTreePrimitive> *prims,
const uint bit_trail,
const int depth)
{
BoundBox bbox = BoundBox::empty;
OrientationBounds bcone = OrientationBounds::empty;
BoundBox centroid_bounds = BoundBox::empty;
float energy_total = 0.0;
int current_index = nodes_.size();
float energy_total = 0.0f;
const int num_prims = end - start;
for (int i = start; i < end; i++) {
const LightTreePrimitive &prim = prims.at(i);
const LightTreePrimitive &prim = (*prims)[i];
bbox.grow(prim.bbox);
bcone = merge(bcone, prim.bcone);
centroid_bounds.grow(prim.centroid);
@ -252,7 +250,8 @@ int LightTree::recursive_build(
energy_total += prim.energy;
}
nodes_.emplace_back(bbox, bcone, energy_total, bit_trail);
LightTreeNode *current_node = create_node(bbox, bcone, energy_total, bit_trail);
parent->children[child] = current_node;
const bool try_splitting = num_prims > 1 && len(centroid_bounds.size()) > 0.0f;
int split_dim = -1, split_bucket = 0, num_left_prims = 0;
@ -261,7 +260,7 @@ int LightTree::recursive_build(
/* Find the best place to split the primitives into 2 nodes.
* If the best split cost is no better than making a leaf node, make a leaf instead. */
const float min_cost = min_split_saoh(
centroid_bounds, start, end, bbox, bcone, split_dim, split_bucket, num_left_prims, prims);
centroid_bounds, start, end, bbox, bcone, split_dim, split_bucket, num_left_prims, *prims);
should_split = num_prims > max_lights_in_leaf_ || min_cost < energy_total;
}
if (should_split) {
@ -271,9 +270,9 @@ int LightTree::recursive_build(
/* Partition the primitives between start and end based on the split dimension and bucket
* calculated by `split_saoh` */
middle = start + num_left_prims;
std::nth_element(prims.begin() + start,
prims.begin() + middle,
prims.begin() + end,
std::nth_element(prims->begin() + start,
prims->begin() + middle,
prims->begin() + end,
[split_dim](const LightTreePrimitive &l, const LightTreePrimitive &r) {
return l.centroid[split_dim] < r.centroid[split_dim];
});
@ -283,15 +282,31 @@ int LightTree::recursive_build(
middle = (start + end) / 2;
}
[[maybe_unused]] int left_index = recursive_build(start, middle, prims, bit_trail, depth + 1);
int right_index = recursive_build(middle, end, prims, bit_trail | (1u << depth), depth + 1);
assert(left_index == current_index + 1);
nodes_[current_index].make_interior(right_index);
/* Recursively build the left branch. */
if (middle - start > MIN_PRIMS_PER_THREAD) {
task_pool.push([=] {
recursive_build<left>(current_node, start, middle, prims, bit_trail, depth + 1);
});
}
else {
recursive_build<left>(current_node, start, middle, prims, bit_trail, depth + 1);
}
/* Recursively build the right branch. */
if (end - middle > MIN_PRIMS_PER_THREAD) {
task_pool.push([=] {
recursive_build<right>(
current_node, middle, end, prims, bit_trail | (1u << depth), depth + 1);
});
}
else {
recursive_build<right>(
current_node, middle, end, prims, bit_trail | (1u << depth), depth + 1);
}
}
else {
nodes_[current_index].make_leaf(start, num_prims);
current_node->make_leaf(start, num_prims);
}
return current_index;
}
float LightTree::min_split_saoh(const BoundBox &centroid_bbox,

View File

@ -8,6 +8,7 @@
#include "scene/scene.h"
#include "util/boundbox.h"
#include "util/task.h"
#include "util/types.h"
#include "util/vector.h"
@ -52,6 +53,12 @@ OrientationBounds merge(const OrientationBounds &cone_a, const OrientationBounds
* The light tree construction is based on PBRT's BVH construction.
*/
/* Left or right child of an inner node. */
enum Child {
brecht marked this conversation as resolved Outdated

This name is too generic to put in the global Cycles namespace. Should be either defined inside the LightTree class, or named LightTreeChild.

This name is too generic to put in the global Cycles namespace. Should be either defined inside the `LightTree` class, or named `LightTreeChild`.
left = 0,
right = 1,
};
/* Light Tree Primitive
* Struct that indexes into the scene's triangle and light arrays. */
struct LightTreePrimitive {
@ -95,11 +102,11 @@ struct LightTreeNode {
OrientationBounds bcone;
float energy;
uint bit_trail;
int num_prims = -1;
union {
int first_prim_index; /* leaf nodes contain an index to first primitive. */
int right_child_index; /* interior nodes contain an index to second child. */
};
int num_prims = -1; /* The number of primitives a leaf node stores. A negative
number indicates it is an inner node. */
int first_prim_index; /* Leaf nodes contain an index to first primitive. */
LightTreeNode *children[2]; /* Inner node. */
LightTreeNode() = default;
LightTreeNode(const BoundBox &bbox,
@ -115,10 +122,6 @@ struct LightTreeNode {
this->first_prim_index = first_prim_index;
this->num_prims = num_prims;
}
void make_interior(const int &right_child_index)
{
this->right_child_index = right_child_index;
}
inline bool is_leaf() const
{
@ -131,7 +134,8 @@ struct LightTreeNode {
* BVH-like data structure that keeps track of lights
* and considers additional orientation and energy information */
class LightTree {
vector<LightTreeNode> nodes_;
LightTreeNode *root;
atomic<int> num_nodes = 0;
uint max_lights_in_leaf_;
public:
@ -139,11 +143,39 @@ class LightTree {
const int &num_distant_lights,
uint max_lights_in_leaf);
const vector<LightTreeNode> &get_nodes() const;
int size() const
{
return num_nodes;
};
LightTreeNode *get_root() const
{
return root;
};
/* NOTE: Always use this function to create a new node so the number of nodes is in sync. */
LightTreeNode *create_node(const BoundBox &bbox,
const OrientationBounds &bcone,
const float &energy,
const uint &bit_trial)
{
num_nodes++;
return new LightTreeNode(bbox, bcone, energy, bit_trial);
brecht marked this conversation as resolved Outdated

We try to avoid new and delete in new code to avoid potential memory allocation bugs. Instead unique_ptr and make_unique can be used.

We try to avoid `new` and `delete` in new code to avoid potential memory allocation bugs. Instead `unique_ptr` and `make_unique` can be used.
}
private:
int recursive_build(
int start, int end, vector<LightTreePrimitive> &prims, uint bit_trail, int depth);
/* Thread. */
TaskPool task_pool;
/* Do not spawn a thread if less than this amount of primitives are to be processed. */
enum { MIN_PRIMS_PER_THREAD = 4096 };
template<Child child>
void recursive_build(LightTreeNode *parent,
int start,
int end,
vector<LightTreePrimitive> *prims,
uint bit_trail,
int depth);
float min_split_saoh(const BoundBox &centroid_bbox,
int start,
int end,