Volume rendering:

* Multithreaded volume light cache

While the render process itself is multithreaded, the light cache pre-process 
previously wasn't (painfully noticed this the other week rendering on some 
borrowed octocore nodes!). This commit adds threading, similar to the tiled render - 
it divides the light cache's voxel grid into 3d parts and renders them with the 
available threads.

This makes the most significant difference on shots where the light cache pre-
process is the bottleneck, so shots with either several lights, or a high res light 
cache, or both. On this file (3 lights, light cache res 120), on my Core 2 Duo it now 
renders in 27 seconds compared to 49 previously.

http://mke3.net/blender/devel/rendering/volumetrics/threaded_cache.jpg
This commit is contained in:
2008-12-22 20:28:02 +00:00
parent 7124d321d8
commit be1d06a2c5
2 changed files with 168 additions and 183 deletions

View File

@@ -35,6 +35,7 @@
#include "BLI_blenlib.h"
#include "BLI_arithb.h"
#include "BLI_threads.h"
#include "PIL_time.h"
@@ -47,6 +48,9 @@
#include "renderdatabase.h"
#include "volumetric.h"
#include "BKE_global.h"
/* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
/* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
/* only to be used here in this file, it's for speed */
@@ -207,11 +211,7 @@ static void lightcache_filter(float *cache, int res)
}
}
/* Precache a volume into a 3D voxel grid.
* The voxel grid is stored in the ObjectInstanceRen,
* in camera space, aligned with the ObjectRen's bounding box.
* Resolution is defined by the user.
*/
void vol_precache_objectinstance(Render *re, ObjectInstanceRen *obi, Material *ma, float *bbmin, float *bbmax)
{
int x, y, z;
@@ -325,29 +325,20 @@ void vol_precache_objectinstance(Render *re, ObjectInstanceRen *obi, Material *m
}
#if 0
typedef struct VolPrecachePart {
struct VolPrecachePart *next, *prev;
int num;
int minx, maxx;
int miny, maxy;
int minz, maxz;
int res;
float bbmin[3], voxel[3];
struct RayTree *tree;
struct ShadeInput *shi;
struct ObjectInstanceRen *obi;
int done;
} VolPrecachePart;
#if 0 // debug stuff
static void *vol_precache_part_test(void *data)
{
VolPrecachePart *vpt = (VolPrecachePart *)data;
VolPrecachePart *pa = data;
printf("part number: %d \n", vpt->num);
printf("part number: %d \n", pa->num);
printf("done: %d \n", pa->done);
printf("x min: %d x max: %d \n", pa->minx, pa->maxx);
printf("y min: %d y max: %d \n", pa->miny, pa->maxy);
printf("z min: %d z max: %d \n", pa->minz, pa->maxz);
return 0;
return NULL;
}
#endif
/* Iterate over the 3d voxel grid, and fill the voxels with scattering information
*
@@ -357,28 +348,24 @@ static void *vol_precache_part_test(void *data)
*/
static void *vol_precache_part(void *data)
{
VolPrecachePart *vpt = (VolPrecachePart *)data;
ObjectInstanceRen *obi = vpt->obi;
RayTree *tree = vpt->tree;
ShadeInput *shi = vpt->shi;
float scatter_col[3] = {0.f, 0.f, 0.f};
VolPrecachePart *pa = (VolPrecachePart *)data;
ObjectInstanceRen *obi = pa->obi;
RayTree *tree = pa->tree;
ShadeInput *shi = pa->shi;
float density, scatter_col[3] = {0.f, 0.f, 0.f};
float co[3];
int x, y, z;
const int res=vpt->res, res_2=vpt->res*vpt->res, res_3=vpt->res*vpt->res*vpt->res;
const int res=pa->res, res_2=pa->res*pa->res, res_3=pa->res*pa->res*pa->res;
const float stepsize = vol_get_stepsize(shi, STEPSIZE_VIEW);
res = vpt->res;
res_2 = res*res;
res_3 = res*res*res;
for (x= vpt->minx; x < vpt->maxx; x++) {
co[0] = vpt->bbmin[0] + (vpt->voxel[0] * x);
for (x= pa->minx; x < pa->maxx; x++) {
co[0] = pa->bbmin[0] + (pa->voxel[0] * x);
for (y= vpt->miny; y < vpt->maxy; y++) {
co[1] = vpt->bbmin[1] + (vpt->voxel[1] * y);
for (y= pa->miny; y < pa->maxy; y++) {
co[1] = pa->bbmin[1] + (pa->voxel[1] * y);
for (z=vpt->minz; z < vpt->maxz; z++) {
co[2] = vpt->bbmin[2] + (vpt->voxel[2] * z);
for (z=pa->minz; z < pa->maxz; z++) {
co[2] = pa->bbmin[2] + (pa->voxel[2] * z);
// don't bother if the point is not inside the volume mesh
if (!point_inside_obi(tree, obi, co)) {
@@ -397,14 +384,17 @@ static void *vol_precache_part(void *data)
}
}
pa->done = 1;
return 0;
}
static void precache_setup_shadeinput(Render *re, ObjectInstanceRen *obi, Material *ma, ShadeInput *shi)
{
float view[3] = {0.0,0.0,-1.0};
memset(&shi, 0, sizeof(ShadeInput));
memset(shi, 0, sizeof(ShadeInput));
shi->depth= 1;
shi->mask= 1;
shi->mat = ma;
@@ -417,61 +407,102 @@ static void precache_setup_shadeinput(Render *re, ObjectInstanceRen *obi, Materi
VECCOPY(shi->view, view);
}
static void precache_init_parts(ListBase *precache_parts, RayTree *tree, ShadeInput *shi, ObjectInstanceRen *obi, float *bbmin, float *bbmax, int res)
static void precache_init_parts(Render *re, RayTree *tree, ShadeInput *shi, ObjectInstanceRen *obi, float *bbmin, float *bbmax, int res, int totthread, int *parts)
{
int i;
int i=0, x, y, z;
float voxel[3];
int sizex, sizey, sizez;
int minx, maxx;
int miny, maxy;
int minz, maxz;
BLI_freelistN(&re->volume_precache_parts);
/* currently we just subdivide the box, number of threads per side */
parts[0] = parts[1] = parts[2] = totthread;
VecSubf(voxel, bbmax, bbmin);
if ((voxel[0] < FLT_EPSILON) || (voxel[1] < FLT_EPSILON) || (voxel[2] < FLT_EPSILON))
return;
VecMulf(voxel, 1.0f/res);
for(i=0; i < totparts; i++) {
VolPrecachePart *pa= MEM_callocN(sizeof(VolPrecachePart), "new precache part");
pa->done = 0;
pa->num = i;
for (x=0; x < parts[0]; x++) {
sizex = ceil(res / (float)parts[0]);
minx = x * sizex;
maxx = minx + sizex;
maxx = (maxx>res)?res:maxx;
pa->res = res;
VECCOPY(pa->bbmin, bbmin);
VECCOPY(precache_parts[j].voxel, voxel);
precache_parts[j].tree = tree;
precache_parts[j].shi = shi;
precache_parts[j].obi = obi;
BLI_addtail(precache_parts, pa);
for (y=0; y < parts[1]; y++) {
sizey = ceil(res / (float)parts[1]);
miny = y * sizey;
maxy = miny + sizey;
maxy = (maxy>res)?res:maxy;
for (z=0; z < parts[2]; z++) {
VolPrecachePart *pa= MEM_callocN(sizeof(VolPrecachePart), "new precache part");
sizez = ceil(res / (float)parts[2]);
minz = z * sizez;
maxz = minz + sizez;
maxz = (maxz>res)?res:maxz;
pa->done = 0;
pa->working = 0;
pa->num = i;
pa->tree = tree;
pa->shi = shi;
pa->obi = obi;
VECCOPY(pa->bbmin, bbmin);
VECCOPY(pa->voxel, voxel);
pa->res = res;
pa->minx = minx; pa->maxx = maxx;
pa->miny = miny; pa->maxy = maxy;
pa->minz = minz; pa->maxz = maxz;
BLI_addtail(&re->volume_precache_parts, pa);
i++;
}
}
}
}
static VolPrecachePart *precache_get_new_part(Render *re)
{
VolPrecachePart *pa, *nextpa=NULL;
for (pa = re->volume_precache_parts.first; pa; pa=pa->next)
{
if (pa->done==0 && pa->working==0) {
nextpa = pa;
break;
}
}
return nextpa;
}
/* Precache a volume into a 3D voxel grid.
* The voxel grid is stored in the ObjectInstanceRen,
* in camera space, aligned with the ObjectRen's bounding box.
* Resolution is defined by the user.
*/
void vol_precache_objectinstance_threads(Render *re, ObjectInstanceRen *obi, Material *ma, float *bbmin, float *bbmax)
{
int x, y, z;
float co[3], voxel[3], scatter_col[3];
ShadeInput shi;
float density;
float stepsize;
float resf, res_3f;
int res_2, res_3;
int edgeparts=2;
ListBase threads, precache_parts;
int cont= 1;
int xparts, yparts, zparts;
float part[3];
int totthread = re->r.threads;
int totparts = edgeparts*edgeparts*edgeparts;
VolPrecachePart *nextpa;
int j;
float i = 1.0f;
double time, lasttime= PIL_check_seconds_timer();
const int res = ma->vol_precache_resolution;
VolPrecachePart *nextpa, *pa;
RayTree *tree;
ShadeInput shi;
ListBase threads;
const int res = ma->vol_precache_resolution;
int parts[3], totparts;
int caching=1, counter=0;
int totthread = re->r.threads;
double time, lasttime= PIL_check_seconds_timer();
R = *re;
@@ -480,119 +511,62 @@ void vol_precache_objectinstance_threads(Render *re, ObjectInstanceRen *obi, Mat
tree = create_raytree_obi(obi, bbmin, bbmax);
if (!tree) return;
obi->volume_precache = MEM_callocN(sizeof(float)*res_3*3, "volume light cache");
obi->volume_precache = MEM_callocN(sizeof(float)*res*res*res*3, "volume light cache");
/* Need a shadeinput to calculate scattering */
precache_setup_shadeinput(re, obi, ma, &shi);
precache_init_parts(&precache_parts, tree, shi, obi, bbmin, bbmax, res);
precache_init_parts(re, tree, &shi, obi, bbmin, bbmax, res, totthread, parts);
totparts = parts[0] * parts[1] * parts[2];
BLI_init_threads(&threads, vol_precache_part, totthread);
nextpa = precache_get_new_part(precache_threads);
while(cont) {
while(caching) {
if(BLI_available_threads(&threads) && !(re->test_break())) {
precache_get_new_part(
// get new job (data pointer)
for(j=0; j < totparts; j++) {
if (!precache_threads[j].done) {
// tag job 'processed
precache_threads[j].done = 1;
}
nextpa = precache_get_new_part(re);
if (nextpa) {
nextpa->working = 1;
BLI_insert_thread(&threads, nextpa);
}
BLI_insert_thread(&threads, precache_get_new_part(precache_threads));
}
else PIL_sleep_ms(50);
// find if a job is ready, this the do_something_func() should write in job somewhere
cont= 0;
for(go over all jobs)
if(job is ready) {
if(job was not removed) {
BLI_remove_thread(&lb, job);
}
}
else cont= 1;
caching=0;
counter=0;
for(pa= re->volume_precache_parts.first; pa; pa= pa->next) {
if(pa->done) {
counter++;
BLI_remove_thread(&threads, pa);
} else
caching = 1;
}
// conditions to exit loop
if(if escape loop event) {
if(BLI_available_threadslots(&lb)==maxthreads)
break;
}
}
BLI_end_threads(&threads);
//
/* Iterate over the 3d voxel grid, and fill the voxels with scattering information
*
* It's stored in memory as 3 big float grids next to each other, one for each RGB channel.
* I'm guessing the memory alignment may work out better this way for the purposes
* of doing linear interpolation, but I haven't actually tested this theory! :)
*/
/*
for (x=0; x < res; x++) {
co[0] = bbmin[0] + (voxel[0] * x);
for (y=0; y < res; y++) {
co[1] = bbmin[1] + (voxel[1] * y);
for (z=0; z < res; z++) {
co[2] = bbmin[2] + (voxel[2] * z);
time= PIL_check_seconds_timer();
i++;
// display progress every second
if(re->test_break()) {
if(tree) {
RE_ray_tree_free(tree);
tree= NULL;
}
return;
}
if(time-lasttime>1.0f) {
char str[64];
sprintf(str, "Precaching volume: %d%%", (int)(100.0f * (i / res_3f)));
re->i.infostr= str;
re->stats_draw(&re->i);
re->i.infostr= NULL;
lasttime= time;
}
// don't bother if the point is not inside the volume mesh
if (!point_inside_obi(tree, obi, co)) {
obi->volume_precache[0*res_3 + x*res_2 + y*res + z] = -1.0f;
obi->volume_precache[1*res_3 + x*res_2 + y*res + z] = -1.0f;
obi->volume_precache[2*res_3 + x*res_2 + y*res + z] = -1.0f;
continue;
}
density = vol_get_density(&shi, co);
vol_get_scattering(&shi, scatter_col, co, stepsize, density);
obi->volume_precache[0*res_3 + x*res_2 + y*res + z] = scatter_col[0];
obi->volume_precache[1*res_3 + x*res_2 + y*res + z] = scatter_col[1];
obi->volume_precache[2*res_3 + x*res_2 + y*res + z] = scatter_col[2];
}
if (re->test_break() && BLI_available_threads(&threads)==totthread)
caching=0;
time= PIL_check_seconds_timer();
if(time-lasttime>1.0f) {
char str[64];
sprintf(str, "Precaching volume: %d%%", (int)(100.0f * ((float)counter / (float)totparts)));
re->i.infostr= str;
re->stats_draw(&re->i);
re->i.infostr= NULL;
lasttime= time;
}
}
*/
BLI_end_threads(&threads);
BLI_freelistN(&re->volume_precache_parts);
if(tree) {
RE_ray_tree_free(tree);
tree= NULL;
}
lightcache_filter(obi->volume_precache, res);
}
#endif
/* loop through all objects (and their associated materials)
* marked for pre-caching in convertblender.c, and pre-cache them */
@@ -605,7 +579,8 @@ void volume_precache(Render *re)
if (vo->ma->vol_shadeflag & MA_VOL_PRECACHESHADING) {
for(obi= re->instancetable.first; obi; obi= obi->next) {
if (obi->obr == vo->obr) {
vol_precache_objectinstance(re, obi, vo->ma, obi->obr->boundbox[0], obi->obr->boundbox[1]);
if (G.rt==10) vol_precache_objectinstance(re, obi, vo->ma, obi->obr->boundbox[0], obi->obr->boundbox[1]);
else vol_precache_objectinstance_threads(re, obi, vo->ma, obi->obr->boundbox[0], obi->obr->boundbox[1]);
}
}
}
@@ -624,7 +599,7 @@ void free_volume_precache(Render *re)
MEM_freeN(obi->volume_precache);
}
BLI_freelistN(&re->vol_precache_obs);
BLI_freelistN(&re->volumes);
}
int point_inside_volume_objectinstance(ObjectInstanceRen *obi, float *co)