WIP: Cycles: Parallelize copying geometry attributes from Blender #106694

Closed
Hans Goudey wants to merge 5 commits from HooglyBoogly:cycles-attribute-copy-parallel into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
4 changed files with 115 additions and 68 deletions

View File

@ -910,6 +910,8 @@ static void export_hair_curves(Scene *scene,
const bool need_motion,
const float motion_scale)
{
scoped_timer timer;
const int num_keys = b_curves.points.length();
const int num_curves = b_curves.curves.length();
@ -993,6 +995,8 @@ static void export_hair_curves(Scene *scene,
}
attr_create_generic(scene, hair, b_curves, need_motion, motion_scale);
std::cout << time_human_readable_from_seconds(timer.get_time()) << '\n';
}
static void export_hair_curves_motion(Hair *hair, BL::Curves b_curves, int motion_step)

View File

@ -247,25 +247,29 @@ static void fill_generic_attribute(BL::Mesh &b_mesh,
return;
}
const int *poly_offsets = static_cast<const int *>(b_mesh.polygons[0].ptr.data);
for (int i = 0; i < polys_num; i++) {
const int poly_start = poly_offsets[i];
const int poly_size = poly_offsets[i + 1] - poly_start;
for (int j = 0; j < poly_size; j++) {
*data = get_value_at_index(poly_start + j);
data++;
parallel_for(blocked_range<int>(0, polys_num, 4096), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
const int poly_start = poly_offsets[i];
const int poly_size = poly_offsets[i + 1] - poly_start;
for (int j = 0; j < poly_size; j++) {
*data = get_value_at_index(poly_start + j);
data++;
}
}
}
});
}
else {
const int tris_num = b_mesh.loop_triangles.length();
const MLoopTri *looptris = static_cast<const MLoopTri *>(
b_mesh.loop_triangles[0].ptr.data);
for (int i = 0; i < tris_num; i++) {
const MLoopTri &tri = looptris[i];
data[i * 3 + 0] = get_value_at_index(tri.tri[0]);
data[i * 3 + 1] = get_value_at_index(tri.tri[1]);
data[i * 3 + 2] = get_value_at_index(tri.tri[2]);
}
parallel_for(blocked_range<int>(0, tris_num, 4096), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
const MLoopTri &tri = looptris[i];
data[i * 3 + 0] = get_value_at_index(tri.tri[0]);
data[i * 3 + 1] = get_value_at_index(tri.tri[1]);
data[i * 3 + 2] = get_value_at_index(tri.tri[2]);
}
});
}
break;
}
@ -998,6 +1002,8 @@ static void create_mesh(Scene *scene,
const bool subdivision = false,
const bool subdivide_uvs = true)
{
scoped_timer timer;
const int numverts = b_mesh.vertices.length();
const int polys_num = b_mesh.polygons.length();
int numfaces = (!subdivision) ? b_mesh.loop_triangles.length() : b_mesh.polygons.length();
@ -1051,10 +1057,12 @@ static void create_mesh(Scene *scene,
if (subdivision || !(use_loop_normals && corner_normals)) {
const float(*b_vert_normals)[3] = static_cast<const float(*)[3]>(
b_mesh.vertex_normals[0].ptr.data);
for (int i = 0; i < numverts; i++) {
const float *b_vert_normal = b_vert_normals[i];
N[i] = make_float3(b_vert_normal[0], b_vert_normal[1], b_vert_normal[2]);
}
parallel_for(blocked_range<int>(0, numverts, 4096), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
const float *b_vert_normal = b_vert_normals[i];
N[i] = make_float3(b_vert_normal[0], b_vert_normal[1], b_vert_normal[2]);
}
});
}
/* create generated coordinates from undeformed coordinates */
@ -1087,19 +1095,23 @@ static void create_mesh(Scene *scene,
int *shader = mesh->get_shader().data();
const MLoopTri *looptris = static_cast<const MLoopTri *>(b_mesh.loop_triangles[0].ptr.data);
for (int i = 0; i < numtris; i++) {
const MLoopTri &tri = looptris[i];
triangles[i * 3 + 0] = corner_verts[tri.tri[0]];
triangles[i * 3 + 1] = corner_verts[tri.tri[1]];
triangles[i * 3 + 2] = corner_verts[tri.tri[2]];
}
parallel_for(blocked_range<int>(0, numtris, 8196), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
const MLoopTri &tri = looptris[i];
triangles[i * 3 + 0] = corner_verts[tri.tri[0]];
triangles[i * 3 + 1] = corner_verts[tri.tri[1]];
triangles[i * 3 + 2] = corner_verts[tri.tri[2]];
}
});
if (material_indices) {
const int *looptri_polys = static_cast<const int *>(
b_mesh.loop_triangle_polygons[0].ptr.data);
for (int i = 0; i < numtris; i++) {
shader[i] = clamp_material_index(material_indices[looptri_polys[i]]);
}
parallel_for(blocked_range<int>(0, numtris, 4096), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
shader[i] = clamp_material_index(material_indices[looptri_polys[i]]);
}
});
}
else {
std::fill(shader, shader + numtris, 0);
@ -1108,24 +1120,28 @@ static void create_mesh(Scene *scene,
if (sharp_faces && !(use_loop_normals && corner_normals)) {
const int *looptri_polys = static_cast<const int *>(
b_mesh.loop_triangle_polygons[0].ptr.data);
for (int i = 0; i < numtris; i++) {
smooth[i] = !sharp_faces[looptri_polys[i]];
}
parallel_for(blocked_range<int>(0, numtris, 4096), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
smooth[i] = !sharp_faces[looptri_polys[i]];
}
});
}
else {
std::fill(smooth, smooth + numtris, true);
}
if (use_loop_normals && corner_normals) {
for (int i = 0; i < numtris; i++) {
const MLoopTri &tri = looptris[i];
for (int i = 0; i < 3; i++) {
const int corner = tri.tri[i];
const int vert = corner_verts[corner];
const float *normal = corner_normals[corner];
N[vert] = make_float3(normal[0], normal[1], normal[2]);
parallel_for(blocked_range<int>(0, numtris, 4096), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
const MLoopTri &tri = looptris[i];
for (int i = 0; i < 3; i++) {
const int corner = tri.tri[i];
const int vert = corner_verts[corner];
const float *normal = corner_normals[corner];
N[vert] = make_float3(normal[0], normal[1], normal[2]);
}
}
}
});
}
mesh->tag_triangles_modified();
@ -1207,6 +1223,8 @@ static void create_mesh(Scene *scene,
*tfm = transform_translate(-loc) * transform_scale(size);
}
std::cout << time_human_readable_from_seconds(timer.get_time()) << '\n';
}
static void create_subd_mesh(Scene *scene,

View File

@ -13,6 +13,7 @@
#include "util/color.h"
#include "util/foreach.h"
#include "util/hash.h"
#include "util/tbb.h"
CCL_NAMESPACE_BEGIN
@ -88,9 +89,11 @@ static void copy_attributes(PointCloud *pointcloud,
const bool *src = static_cast<const bool *>(b_bool_attribute.data[0].ptr.data);
Attribute *attr = attributes.add(name, TypeFloat, element);
float *data = attr->data_float();
for (int i = 0; i < num_points; i++) {
data[i] = float(src[i]);
}
parallel_for(blocked_range<int>(0, num_points, 8192), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
data[i] = float(src[i]);
}
});
break;
}
case BL::Attribute::data_type_INT: {
@ -98,9 +101,11 @@ static void copy_attributes(PointCloud *pointcloud,
const int *src = static_cast<const int *>(b_int_attribute.data[0].ptr.data);
Attribute *attr = attributes.add(name, TypeFloat, element);
float *data = attr->data_float();
for (int i = 0; i < num_points; i++) {
data[i] = float(src[i]);
}
parallel_for(blocked_range<int>(0, num_points, 8192), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
data[i] = float(src[i]);
}
});
break;
}
case BL::Attribute::data_type_INT32_2D: {
@ -108,9 +113,11 @@ static void copy_attributes(PointCloud *pointcloud,
const int2 *src = static_cast<const int2 *>(b_int2_attribute.data[0].ptr.data);
Attribute *attr = attributes.add(name, TypeFloat2, element);
float2 *data = attr->data_float2();
for (int i = 0; i < num_points; i++) {
data[i] = make_float2(float(src[i][0]), float(src[i][1]));
}
parallel_for(blocked_range<int>(0, num_points, 8192), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
data[i] = make_float2(float(src[i][0]), float(src[i][1]));
}
});
break;
}
case BL::Attribute::data_type_FLOAT_VECTOR: {
@ -118,9 +125,11 @@ static void copy_attributes(PointCloud *pointcloud,
const float(*src)[3] = static_cast<const float(*)[3]>(b_vector_attribute.data[0].ptr.data);
Attribute *attr = attributes.add(name, TypeVector, element);
float3 *data = attr->data_float3();
for (int i = 0; i < num_points; i++) {
data[i] = make_float3(src[i][0], src[i][1], src[i][2]);
}
parallel_for(blocked_range<int>(0, num_points, 8192), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
data[i] = make_float3(src[i][0], src[i][1], src[i][2]);
}
});
break;
}
case BL::Attribute::data_type_BYTE_COLOR: {
@ -128,12 +137,14 @@ static void copy_attributes(PointCloud *pointcloud,
const uchar(*src)[4] = static_cast<const uchar(*)[4]>(b_color_attribute.data[0].ptr.data);
Attribute *attr = attributes.add(name, TypeRGBA, element);
float4 *data = attr->data_float4();
for (int i = 0; i < num_points; i++) {
data[i] = make_float4(color_srgb_to_linear(byte_to_float(src[i][0])),
color_srgb_to_linear(byte_to_float(src[i][1])),
color_srgb_to_linear(byte_to_float(src[i][2])),
color_srgb_to_linear(byte_to_float(src[i][3])));
}
parallel_for(blocked_range<int>(0, num_points, 4096), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
data[i] = color_srgb_to_linear(make_float4(byte_to_float(src[i][0]),
byte_to_float(src[i][1]),
byte_to_float(src[i][2]),
byte_to_float(src[i][3])));
}
});
break;
}
case BL::Attribute::data_type_FLOAT_COLOR: {
@ -141,9 +152,11 @@ static void copy_attributes(PointCloud *pointcloud,
const float(*src)[4] = static_cast<const float(*)[4]>(b_color_attribute.data[0].ptr.data);
Attribute *attr = attributes.add(name, TypeRGBA, element);
float4 *data = attr->data_float4();
for (int i = 0; i < num_points; i++) {
data[i] = make_float4(src[i][0], src[i][1], src[i][2], src[i][3]);
}
parallel_for(blocked_range<int>(0, num_points, 8192), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
data[i] = make_float4(src[i][0], src[i][1], src[i][2], src[i][3]);
}
});
break;
}
case BL::Attribute::data_type_FLOAT2: {
@ -151,9 +164,11 @@ static void copy_attributes(PointCloud *pointcloud,
const float(*src)[2] = static_cast<const float(*)[2]>(b_float2_attribute.data[0].ptr.data);
Attribute *attr = attributes.add(name, TypeFloat2, element);
float2 *data = attr->data_float2();
for (int i = 0; i < num_points; i++) {
data[i] = make_float2(src[i][0], src[i][1]);
}
parallel_for(blocked_range<int>(0, num_points, 8192), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
data[i] = make_float2(src[i][0], src[i][1]);
}
});
break;
}
default:
@ -207,15 +222,19 @@ static void export_pointcloud(Scene *scene,
const bool need_motion,
const float motion_scale)
{
scoped_timer timer;
const int num_points = b_pointcloud.points.length();
pointcloud->resize(num_points);
const float(*b_attr_position)[3] = find_position_attribute(b_pointcloud);
float3 *points = pointcloud->get_points().data();
for (int i = 0; i < num_points; i++) {
points[i] = make_float3(b_attr_position[i][0], b_attr_position[i][1], b_attr_position[i][2]);
}
parallel_for(blocked_range<int>(0, num_points, 4096), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
points[i] = make_float3(b_attr_position[i][0], b_attr_position[i][1], b_attr_position[i][2]);
}
});
const float *b_attr_radius = find_radius_attribute(b_pointcloud);
float *radius = pointcloud->get_radius().data();
@ -232,12 +251,16 @@ static void export_pointcloud(Scene *scene,
if (pointcloud->need_attribute(scene, ATTR_STD_POINT_RANDOM)) {
Attribute *attr_random = pointcloud->attributes.add(ATTR_STD_POINT_RANDOM);
float *data = attr_random->data_float();
for (int i = 0; i < num_points; i++) {
data[i] = hash_uint2_to_float(i, 0);
}
parallel_for(blocked_range<int>(0, num_points, 4096), [&](const blocked_range<int> &r) {
for (int i = r.begin(); i != r.end(); i++) {
data[i] = hash_uint2_to_float(i, 0);
}
});
}
copy_attributes(pointcloud, b_pointcloud, need_motion, motion_scale);
std::cout << time_human_readable_from_seconds(timer.get_time()) << '\n';
}
static void export_pointcloud_motion(PointCloud *pointcloud,
@ -293,7 +316,7 @@ static void export_pointcloud_motion(PointCloud *pointcloud,
void BlenderSync::sync_pointcloud(PointCloud *pointcloud, BObjectInfo &b_ob_info)
{
size_t old_numpoints = pointcloud->num_points();
int old_numpoints = pointcloud->num_points();
array<Node *> used_shaders = pointcloud->get_used_shaders();

View File

@ -11,6 +11,7 @@
#include <tbb/enumerable_thread_specific.h>
#include <tbb/parallel_for.h>
#include <tbb/parallel_for_each.h>
#include <tbb/parallel_invoke.h>
#include <tbb/task_arena.h>
#include <tbb/task_group.h>
@ -26,6 +27,7 @@ using tbb::blocked_range;
using tbb::enumerable_thread_specific;
using tbb::parallel_for;
using tbb::parallel_for_each;
using tbb::parallel_invoke;
static inline void thread_capture_fp_settings()
{