Cycles: build Light Tree in parallel #105862

Merged
Weizhen Huang merged 4 commits from weizhen/blender:parallel_light_tree into main 2023-03-20 18:02:23 +01:00
4 changed files with 131 additions and 62 deletions

View File

@ -7,6 +7,7 @@
#include "scene/film.h"
#include "scene/integrator.h"
#include "scene/light.h"
#include "scene/light_tree.h"
#include "scene/mesh.h"
#include "scene/object.h"
#include "scene/scene.h"
@ -557,31 +558,43 @@ void LightManager::device_update_tree(Device *,
}
/* First initialize the light tree's nodes. */
const vector<LightTreeNode> &linearized_bvh = light_tree.get_nodes();
KernelLightTreeNode *light_tree_nodes = dscene->light_tree_nodes.alloc(linearized_bvh.size());
KernelLightTreeNode *light_tree_nodes = dscene->light_tree_nodes.alloc(light_tree.size());
KernelLightTreeEmitter *light_tree_emitters = dscene->light_tree_emitters.alloc(
light_prims.size());
for (int index = 0; index < linearized_bvh.size(); index++) {
const LightTreeNode &node = linearized_bvh[index];
light_tree_nodes[index].energy = node.energy;
/* Copy the light tree nodes to an array in the device. */
/* The nodes are arranged in a depth-first order, meaning the left child of each inner node
* always comes immediately after that inner node in the array, so that we only need to store the
* index of the right child.
* To do so, we repeatedly move to the left child of the current node until we reach the leftmost
* descendant, while keeping track of the right child of each node we visited by storing the
* pointer in the `right_node_stack`.
* Once finished visiting the left subtree, we retrieve the the last stored pointer from
* `right_node_stack`, assign it to its parent (retrieved from `left_index_stack`), and repeat
* the process from there. */
int left_index_stack[32]; /* sizeof(bit_trail) * 8 == 32 */
LightTreeNode *right_node_stack[32];
int stack_id = 0;
const LightTreeNode *node = light_tree.get_root();
for (int index = 0; index < light_tree.size(); index++) {
light_tree_nodes[index].energy = node->energy;
light_tree_nodes[index].bbox.min = node.bbox.min;
light_tree_nodes[index].bbox.max = node.bbox.max;
light_tree_nodes[index].bbox.min = node->bbox.min;
light_tree_nodes[index].bbox.max = node->bbox.max;
light_tree_nodes[index].bcone.axis = node.bcone.axis;
light_tree_nodes[index].bcone.theta_o = node.bcone.theta_o;
light_tree_nodes[index].bcone.theta_e = node.bcone.theta_e;
light_tree_nodes[index].bcone.axis = node->bcone.axis;
light_tree_nodes[index].bcone.theta_o = node->bcone.theta_o;
light_tree_nodes[index].bcone.theta_e = node->bcone.theta_e;
light_tree_nodes[index].bit_trail = node.bit_trail;
light_tree_nodes[index].num_prims = node.num_prims;
light_tree_nodes[index].bit_trail = node->bit_trail;
light_tree_nodes[index].num_prims = node->num_prims;
/* Here we need to make a distinction between interior and leaf nodes. */
if (node.is_leaf()) {
light_tree_nodes[index].child_index = -node.first_prim_index;
if (node->is_leaf()) {
light_tree_nodes[index].child_index = -node->first_prim_index;
for (int i = 0; i < node.num_prims; i++) {
int emitter_index = i + node.first_prim_index;
for (int i = 0; i < node->num_prims; i++) {
int emitter_index = i + node->first_prim_index;
LightTreePrimitive &prim = light_prims[emitter_index];
light_tree_emitters[emitter_index].energy = prim.energy;
@ -628,12 +641,23 @@ void LightManager::device_update_tree(Device *,
light_tree_emitters[emitter_index].emission_sampling = EMISSION_SAMPLING_FRONT_BACK;
light_array[~prim.prim_id] = emitter_index;
}
light_tree_emitters[emitter_index].parent_index = index;
}
/* Retrieve from the stacks. */
if (stack_id == 0) {
break;
}
stack_id--;
light_tree_nodes[left_index_stack[stack_id]].child_index = index + 1;
node = right_node_stack[stack_id];
}
else {
light_tree_nodes[index].child_index = node.right_child_index;
/* Fill in the stacks. */
left_index_stack[stack_id] = index;
right_node_stack[stack_id] = node->children[LightTree::right].get();
node = node->children[LightTree::left].get();
stack_id++;
}
}

View File

@ -10,7 +10,6 @@
/* included as Light::set_shader defined through NODE_SOCKET_API does not select
* the right Node::set overload as it does not know that Shader is a Node */
#include "scene/light_tree.h"
#include "scene/shader.h"
#include "util/ies.h"

View File

@ -207,44 +207,41 @@ LightTree::LightTree(vector<LightTreePrimitive> &prims,
max_lights_in_leaf_ = max_lights_in_leaf;
const int num_prims = prims.size();
const int num_local_lights = num_prims - num_distant_lights;
/* The amount of nodes is estimated to be twice the amount of primitives */
nodes_.reserve(2 * num_prims);
nodes_.emplace_back(); /* root node */
recursive_build(0, num_local_lights, prims, 0, 1); /* build tree */
nodes_[0].make_interior(nodes_.size());
root = create_node(BoundBox::empty, OrientationBounds::empty, 0.0f, 0);
/* All local lights are grouped to the left child as an inner node. */
recursive_build(left, root.get(), 0, num_local_lights, &prims, 0, 1);
task_pool.wait_work();
/* All distant lights are grouped to one node (right child of the root node) */
OrientationBounds bcone = OrientationBounds::empty;
float energy_total = 0.0;
/* All distant lights are grouped to the right child as a leaf node. */
for (int i = num_local_lights; i < num_prims; i++) {
const LightTreePrimitive &prim = prims.at(i);
bcone = merge(bcone, prim.bcone);
energy_total += prim.energy;
}
nodes_.emplace_back(BoundBox::empty, bcone, energy_total, 1);
nodes_.back().make_leaf(num_local_lights, num_distant_lights);
nodes_.shrink_to_fit();
root->children[right] = create_node(BoundBox::empty, bcone, energy_total, 1);
root->children[right]->make_leaf(num_local_lights, num_distant_lights);
}
const vector<LightTreeNode> &LightTree::get_nodes() const
{
return nodes_;
}
int LightTree::recursive_build(
int start, int end, vector<LightTreePrimitive> &prims, uint bit_trail, int depth)
void LightTree::recursive_build(const Child child,
LightTreeNode *parent,
brecht marked this conversation as resolved Outdated

Is there a reason this is a template argument instead of a regular function argument?

It's only used for parent->children[child], not worth specializing the function for as far as I can see.

Is there a reason this is a template argument instead of a regular function argument? It's only used for `parent->children[child]`, not worth specializing the function for as far as I can see.

It's also used for recursive_build<left>(current_node, start, middle, prims, bit_trail, depth + 1);, I think it looks better than recursive_build(0, current_node, start, middle, prims, bit_trail, depth + 1);

It's also used for `recursive_build<left>(current_node, start, middle, prims, bit_trail, depth + 1);`, I think it looks better than `recursive_build(0, current_node, start, middle, prims, bit_trail, depth + 1);`

I think this would look ok too?

recursive_build(left, current_node, start, middle, prims, bit_trail, depth + 1);

Generally there should be a performance reason for using templates like this. Not a big deal for any individual function, but still would rather not do it here.

I think this would look ok too? ``` recursive_build(left, current_node, start, middle, prims, bit_trail, depth + 1); ``` Generally there should be a performance reason for using templates like this. Not a big deal for any individual function, but still would rather not do it here.
const int start,
const int end,
vector<LightTreePrimitive> *prims,
const uint bit_trail,
const int depth)
{
BoundBox bbox = BoundBox::empty;
OrientationBounds bcone = OrientationBounds::empty;
BoundBox centroid_bounds = BoundBox::empty;
float energy_total = 0.0;
int current_index = nodes_.size();
float energy_total = 0.0f;
const int num_prims = end - start;
for (int i = start; i < end; i++) {
const LightTreePrimitive &prim = prims.at(i);
const LightTreePrimitive &prim = (*prims)[i];
bbox.grow(prim.bbox);
bcone = merge(bcone, prim.bcone);
centroid_bounds.grow(prim.centroid);
@ -252,7 +249,8 @@ int LightTree::recursive_build(
energy_total += prim.energy;
}
nodes_.emplace_back(bbox, bcone, energy_total, bit_trail);
parent->children[child] = create_node(bbox, bcone, energy_total, bit_trail);
LightTreeNode *current_node = parent->children[child].get();
const bool try_splitting = num_prims > 1 && len(centroid_bounds.size()) > 0.0f;
int split_dim = -1, split_bucket = 0, num_left_prims = 0;
@ -261,7 +259,7 @@ int LightTree::recursive_build(
/* Find the best place to split the primitives into 2 nodes.
* If the best split cost is no better than making a leaf node, make a leaf instead. */
const float min_cost = min_split_saoh(
centroid_bounds, start, end, bbox, bcone, split_dim, split_bucket, num_left_prims, prims);
centroid_bounds, start, end, bbox, bcone, split_dim, split_bucket, num_left_prims, *prims);
should_split = num_prims > max_lights_in_leaf_ || min_cost < energy_total;
}
if (should_split) {
@ -271,9 +269,9 @@ int LightTree::recursive_build(
/* Partition the primitives between start and end based on the split dimension and bucket
* calculated by `split_saoh` */
middle = start + num_left_prims;
std::nth_element(prims.begin() + start,
prims.begin() + middle,
prims.begin() + end,
std::nth_element(prims->begin() + start,
prims->begin() + middle,
prims->begin() + end,
[split_dim](const LightTreePrimitive &l, const LightTreePrimitive &r) {
return l.centroid[split_dim] < r.centroid[split_dim];
});
@ -283,15 +281,31 @@ int LightTree::recursive_build(
middle = (start + end) / 2;
}
[[maybe_unused]] int left_index = recursive_build(start, middle, prims, bit_trail, depth + 1);
int right_index = recursive_build(middle, end, prims, bit_trail | (1u << depth), depth + 1);
assert(left_index == current_index + 1);
nodes_[current_index].make_interior(right_index);
/* Recursively build the left branch. */
if (middle - start > MIN_PRIMS_PER_THREAD) {
task_pool.push([=] {
recursive_build(left, current_node, start, middle, prims, bit_trail, depth + 1);
});
}
else {
recursive_build(left, current_node, start, middle, prims, bit_trail, depth + 1);
}
/* Recursively build the right branch. */
if (end - middle > MIN_PRIMS_PER_THREAD) {
task_pool.push([=] {
recursive_build(
right, current_node, middle, end, prims, bit_trail | (1u << depth), depth + 1);
});
}
else {
recursive_build(
right, current_node, middle, end, prims, bit_trail | (1u << depth), depth + 1);
}
}
else {
nodes_[current_index].make_leaf(start, num_prims);
current_node->make_leaf(start, num_prims);
}
return current_index;
}
float LightTree::min_split_saoh(const BoundBox &centroid_bbox,

View File

@ -8,6 +8,7 @@
#include "scene/scene.h"
#include "util/boundbox.h"
#include "util/task.h"
#include "util/types.h"
#include "util/vector.h"
@ -95,11 +96,11 @@ struct LightTreeNode {
OrientationBounds bcone;
float energy;
uint bit_trail;
int num_prims = -1;
union {
int first_prim_index; /* leaf nodes contain an index to first primitive. */
int right_child_index; /* interior nodes contain an index to second child. */
};
int num_prims = -1; /* The number of primitives a leaf node stores. A negative
number indicates it is an inner node. */
int first_prim_index; /* Leaf nodes contain an index to first primitive. */
unique_ptr<LightTreeNode> children[2]; /* Inner node. */
LightTreeNode() = default;
LightTreeNode(const BoundBox &bbox,
@ -115,10 +116,6 @@ struct LightTreeNode {
this->first_prim_index = first_prim_index;
this->num_prims = num_prims;
}
void make_interior(const int &right_child_index)
{
this->right_child_index = right_child_index;
}
inline bool is_leaf() const
{
@ -131,19 +128,54 @@ struct LightTreeNode {
* BVH-like data structure that keeps track of lights
* and considers additional orientation and energy information */
class LightTree {
vector<LightTreeNode> nodes_;
unique_ptr<LightTreeNode> root;
atomic<int> num_nodes = 0;
uint max_lights_in_leaf_;
public:
/* Left or right child of an inner node. */
enum Child {
left = 0,
right = 1,
};
LightTree(vector<LightTreePrimitive> &prims,
const int &num_distant_lights,
uint max_lights_in_leaf);
const vector<LightTreeNode> &get_nodes() const;
int size() const
{
return num_nodes;
};
LightTreeNode *get_root() const
{
return root.get();
};
/* NOTE: Always use this function to create a new node so the number of nodes is in sync. */
unique_ptr<LightTreeNode> create_node(const BoundBox &bbox,
const OrientationBounds &bcone,
const float &energy,
const uint &bit_trial)
{
num_nodes++;
return make_unique<LightTreeNode>(bbox, bcone, energy, bit_trial);
brecht marked this conversation as resolved Outdated

We try to avoid new and delete in new code to avoid potential memory allocation bugs. Instead unique_ptr and make_unique can be used.

We try to avoid `new` and `delete` in new code to avoid potential memory allocation bugs. Instead `unique_ptr` and `make_unique` can be used.
}
private:
int recursive_build(
int start, int end, vector<LightTreePrimitive> &prims, uint bit_trail, int depth);
/* Thread. */
TaskPool task_pool;
/* Do not spawn a thread if less than this amount of primitives are to be processed. */
enum { MIN_PRIMS_PER_THREAD = 4096 };
void recursive_build(Child child,
LightTreeNode *parent,
int start,
int end,
vector<LightTreePrimitive> *prims,
uint bit_trail,
int depth);
float min_split_saoh(const BoundBox &centroid_bbox,
int start,
int end,