2011-02-23 10:52:22 +00:00
|
|
|
/*
|
2009-07-11 22:29:53 +00:00
|
|
|
* ***** BEGIN GPL LICENSE BLOCK *****
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
|
* as published by the Free Software Foundation; either version 2
|
|
|
|
|
* of the License, or (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with this program; if not, write to the Free Software Foundation,
|
2010-02-12 13:34:04 +00:00
|
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
2009-07-11 22:29:53 +00:00
|
|
|
*
|
|
|
|
|
* The Original Code is Copyright (C) 2009 Blender Foundation.
|
|
|
|
|
* All rights reserved.
|
|
|
|
|
*
|
|
|
|
|
* The Original Code is: all of this file.
|
|
|
|
|
*
|
|
|
|
|
* Contributor(s): André Pinto.
|
|
|
|
|
*
|
|
|
|
|
* ***** END GPL LICENSE BLOCK *****
|
2009-07-11 22:13:01 +00:00
|
|
|
*/
|
Raytrace modifications from the Render Branch.
These should not have any effect on render results, except in some cases with
you have overlapping faces, where the noise seems to be slightly reduced.
There are some performance improvements, for simple scenes I wouldn't expect
more than 5-10% to be cut off the render time, for sintel scenes we got about
50% on average, that's with millions of polygons on intel quad cores. This
because memory access / cache misses were the main bottleneck for those scenes,
and the optimizations improve that.
Interal changes:
* Remove RE_raytrace.h, raytracer is now only used by render engine again.
* Split non-public parts rayobject.h into rayobject_internal.h, hopefully
makes it clearer how the API is used.
* Added rayintersection.h to contain some of the stuff from RE_raytrace.h
* Change Isect.vec/labda to Isect.dir/dist, previously vec was sometimes
normalized and sometimes not, confusing... now dir is always normalized
and dist contains the distance.
* Change VECCOPY and similar to BLI_math functions.
* Force inlining of auxiliary functions for ray-triangle/quad intersection,
helps a few percentages.
* Reorganize svbvh code so all the traversal functions are in one file
* Don't do test for root so that push_childs can be inlined
* Make shadow a template parameter so it doesn't need to be runtime checked
* Optimization in raytree building, was computing bounding boxes more often
than necessary.
* Leave out logf() factor in SAH, makes tree build quicker with no
noticeable influence on raytracing on performance?
* Set max childs to 4, simplifies traversal code a bit, but also seems
to help slightly in general.
* Store child pointers and child bb just as fixed arrays of size 4 in nodes,
nearly all nodes have this many children, so overall it actually reduces
memory usage a bit and avoids a pointer indirection.
2011-02-05 13:41:29 +00:00
|
|
|
|
2011-02-27 19:31:27 +00:00
|
|
|
/** \file blender/render/intern/raytrace/bvh.h
|
|
|
|
|
* \ingroup render
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
Raytrace modifications from the Render Branch.
These should not have any effect on render results, except in some cases with
you have overlapping faces, where the noise seems to be slightly reduced.
There are some performance improvements, for simple scenes I wouldn't expect
more than 5-10% to be cut off the render time, for sintel scenes we got about
50% on average, that's with millions of polygons on intel quad cores. This
because memory access / cache misses were the main bottleneck for those scenes,
and the optimizations improve that.
Interal changes:
* Remove RE_raytrace.h, raytracer is now only used by render engine again.
* Split non-public parts rayobject.h into rayobject_internal.h, hopefully
makes it clearer how the API is used.
* Added rayintersection.h to contain some of the stuff from RE_raytrace.h
* Change Isect.vec/labda to Isect.dir/dist, previously vec was sometimes
normalized and sometimes not, confusing... now dir is always normalized
and dist contains the distance.
* Change VECCOPY and similar to BLI_math functions.
* Force inlining of auxiliary functions for ray-triangle/quad intersection,
helps a few percentages.
* Reorganize svbvh code so all the traversal functions are in one file
* Don't do test for root so that push_childs can be inlined
* Make shadow a template parameter so it doesn't need to be runtime checked
* Optimization in raytree building, was computing bounding boxes more often
than necessary.
* Leave out logf() factor in SAH, makes tree build quicker with no
noticeable influence on raytracing on performance?
* Set max childs to 4, simplifies traversal code a bit, but also seems
to help slightly in general.
* Store child pointers and child bb just as fixed arrays of size 4 in nodes,
nearly all nodes have this many children, so overall it actually reduces
memory usage a bit and avoids a pointer indirection.
2011-02-05 13:41:29 +00:00
|
|
|
#include "MEM_guardedalloc.h"
|
|
|
|
|
|
|
|
|
|
#include "BLI_math.h"
|
|
|
|
|
|
2009-09-13 20:59:25 +00:00
|
|
|
#include "raycounter.h"
|
Raytrace modifications from the Render Branch.
These should not have any effect on render results, except in some cases with
you have overlapping faces, where the noise seems to be slightly reduced.
There are some performance improvements, for simple scenes I wouldn't expect
more than 5-10% to be cut off the render time, for sintel scenes we got about
50% on average, that's with millions of polygons on intel quad cores. This
because memory access / cache misses were the main bottleneck for those scenes,
and the optimizations improve that.
Interal changes:
* Remove RE_raytrace.h, raytracer is now only used by render engine again.
* Split non-public parts rayobject.h into rayobject_internal.h, hopefully
makes it clearer how the API is used.
* Added rayintersection.h to contain some of the stuff from RE_raytrace.h
* Change Isect.vec/labda to Isect.dir/dist, previously vec was sometimes
normalized and sometimes not, confusing... now dir is always normalized
and dist contains the distance.
* Change VECCOPY and similar to BLI_math functions.
* Force inlining of auxiliary functions for ray-triangle/quad intersection,
helps a few percentages.
* Reorganize svbvh code so all the traversal functions are in one file
* Don't do test for root so that push_childs can be inlined
* Make shadow a template parameter so it doesn't need to be runtime checked
* Optimization in raytree building, was computing bounding boxes more often
than necessary.
* Leave out logf() factor in SAH, makes tree build quicker with no
noticeable influence on raytracing on performance?
* Set max childs to 4, simplifies traversal code a bit, but also seems
to help slightly in general.
* Store child pointers and child bb just as fixed arrays of size 4 in nodes,
nearly all nodes have this many children, so overall it actually reduces
memory usage a bit and avoids a pointer indirection.
2011-02-05 13:41:29 +00:00
|
|
|
#include "rayintersection.h"
|
|
|
|
|
#include "rayobject.h"
|
2009-08-29 17:24:45 +00:00
|
|
|
#include "rayobject_hint.h"
|
Raytrace modifications from the Render Branch.
These should not have any effect on render results, except in some cases with
you have overlapping faces, where the noise seems to be slightly reduced.
There are some performance improvements, for simple scenes I wouldn't expect
more than 5-10% to be cut off the render time, for sintel scenes we got about
50% on average, that's with millions of polygons on intel quad cores. This
because memory access / cache misses were the main bottleneck for those scenes,
and the optimizations improve that.
Interal changes:
* Remove RE_raytrace.h, raytracer is now only used by render engine again.
* Split non-public parts rayobject.h into rayobject_internal.h, hopefully
makes it clearer how the API is used.
* Added rayintersection.h to contain some of the stuff from RE_raytrace.h
* Change Isect.vec/labda to Isect.dir/dist, previously vec was sometimes
normalized and sometimes not, confusing... now dir is always normalized
and dist contains the distance.
* Change VECCOPY and similar to BLI_math functions.
* Force inlining of auxiliary functions for ray-triangle/quad intersection,
helps a few percentages.
* Reorganize svbvh code so all the traversal functions are in one file
* Don't do test for root so that push_childs can be inlined
* Make shadow a template parameter so it doesn't need to be runtime checked
* Optimization in raytree building, was computing bounding boxes more often
than necessary.
* Leave out logf() factor in SAH, makes tree build quicker with no
noticeable influence on raytracing on performance?
* Set max childs to 4, simplifies traversal code a bit, but also seems
to help slightly in general.
* Store child pointers and child bb just as fixed arrays of size 4 in nodes,
nearly all nodes have this many children, so overall it actually reduces
memory usage a bit and avoids a pointer indirection.
2011-02-05 13:41:29 +00:00
|
|
|
#include "rayobject_rtbuild.h"
|
2011-01-07 18:36:47 +00:00
|
|
|
|
2009-08-29 17:24:45 +00:00
|
|
|
#include <assert.h>
|
2009-10-06 00:28:07 +00:00
|
|
|
|
|
|
|
|
#ifdef __SSE__
|
2009-08-06 17:45:51 +00:00
|
|
|
#include <xmmintrin.h>
|
2009-10-06 00:28:07 +00:00
|
|
|
#endif
|
2009-08-05 21:09:41 +00:00
|
|
|
|
2012-02-17 18:59:41 +00:00
|
|
|
#ifndef __BVH_H__
|
|
|
|
|
#define __BVH_H__
|
2009-08-11 00:33:51 +00:00
|
|
|
|
2009-10-06 00:28:07 +00:00
|
|
|
#ifdef __SSE__
|
2009-08-07 01:42:51 +00:00
|
|
|
inline int test_bb_group4(__m128 *bb_group, const Isect *isec)
|
2009-08-05 21:09:41 +00:00
|
|
|
{
|
2009-08-07 00:51:41 +00:00
|
|
|
const __m128 tmin0 = _mm_setzero_ps();
|
Raytrace modifications from the Render Branch.
These should not have any effect on render results, except in some cases with
you have overlapping faces, where the noise seems to be slightly reduced.
There are some performance improvements, for simple scenes I wouldn't expect
more than 5-10% to be cut off the render time, for sintel scenes we got about
50% on average, that's with millions of polygons on intel quad cores. This
because memory access / cache misses were the main bottleneck for those scenes,
and the optimizations improve that.
Interal changes:
* Remove RE_raytrace.h, raytracer is now only used by render engine again.
* Split non-public parts rayobject.h into rayobject_internal.h, hopefully
makes it clearer how the API is used.
* Added rayintersection.h to contain some of the stuff from RE_raytrace.h
* Change Isect.vec/labda to Isect.dir/dist, previously vec was sometimes
normalized and sometimes not, confusing... now dir is always normalized
and dist contains the distance.
* Change VECCOPY and similar to BLI_math functions.
* Force inlining of auxiliary functions for ray-triangle/quad intersection,
helps a few percentages.
* Reorganize svbvh code so all the traversal functions are in one file
* Don't do test for root so that push_childs can be inlined
* Make shadow a template parameter so it doesn't need to be runtime checked
* Optimization in raytree building, was computing bounding boxes more often
than necessary.
* Leave out logf() factor in SAH, makes tree build quicker with no
noticeable influence on raytracing on performance?
* Set max childs to 4, simplifies traversal code a bit, but also seems
to help slightly in general.
* Store child pointers and child bb just as fixed arrays of size 4 in nodes,
nearly all nodes have this many children, so overall it actually reduces
memory usage a bit and avoids a pointer indirection.
2011-02-05 13:41:29 +00:00
|
|
|
const __m128 tmax0 = _mm_set_ps1(isec->dist);
|
|
|
|
|
|
|
|
|
|
float start[3], idot_axis[3];
|
|
|
|
|
copy_v3_v3(start, isec->start);
|
|
|
|
|
copy_v3_v3(idot_axis, isec->idot_axis);
|
|
|
|
|
|
2012-05-15 18:45:20 +00:00
|
|
|
const __m128 tmin1 = _mm_max_ps(tmin0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[0]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) );
|
|
|
|
|
const __m128 tmax1 = _mm_min_ps(tmax0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[1]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) );
|
|
|
|
|
const __m128 tmin2 = _mm_max_ps(tmin1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[2]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) );
|
|
|
|
|
const __m128 tmax2 = _mm_min_ps(tmax1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[3]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) );
|
|
|
|
|
const __m128 tmin3 = _mm_max_ps(tmin2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[4]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) );
|
|
|
|
|
const __m128 tmax3 = _mm_min_ps(tmax2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[5]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) );
|
2009-08-06 20:20:40 +00:00
|
|
|
|
2009-08-07 00:51:41 +00:00
|
|
|
return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3));
|
2009-08-05 21:09:41 +00:00
|
|
|
}
|
2009-10-06 00:28:07 +00:00
|
|
|
#endif
|
2009-08-05 21:09:41 +00:00
|
|
|
|
Raytrace modifications from the Render Branch.
These should not have any effect on render results, except in some cases with
you have overlapping faces, where the noise seems to be slightly reduced.
There are some performance improvements, for simple scenes I wouldn't expect
more than 5-10% to be cut off the render time, for sintel scenes we got about
50% on average, that's with millions of polygons on intel quad cores. This
because memory access / cache misses were the main bottleneck for those scenes,
and the optimizations improve that.
Interal changes:
* Remove RE_raytrace.h, raytracer is now only used by render engine again.
* Split non-public parts rayobject.h into rayobject_internal.h, hopefully
makes it clearer how the API is used.
* Added rayintersection.h to contain some of the stuff from RE_raytrace.h
* Change Isect.vec/labda to Isect.dir/dist, previously vec was sometimes
normalized and sometimes not, confusing... now dir is always normalized
and dist contains the distance.
* Change VECCOPY and similar to BLI_math functions.
* Force inlining of auxiliary functions for ray-triangle/quad intersection,
helps a few percentages.
* Reorganize svbvh code so all the traversal functions are in one file
* Don't do test for root so that push_childs can be inlined
* Make shadow a template parameter so it doesn't need to be runtime checked
* Optimization in raytree building, was computing bounding boxes more often
than necessary.
* Leave out logf() factor in SAH, makes tree build quicker with no
noticeable influence on raytracing on performance?
* Set max childs to 4, simplifies traversal code a bit, but also seems
to help slightly in general.
* Store child pointers and child bb just as fixed arrays of size 4 in nodes,
nearly all nodes have this many children, so overall it actually reduces
memory usage a bit and avoids a pointer indirection.
2011-02-05 13:41:29 +00:00
|
|
|
/*
|
|
|
|
|
* Determines the distance that the ray must travel to hit the bounding volume of the given node
|
|
|
|
|
* Based on Tactical Optimization of Ray/Box Intersection, by Graham Fyffe
|
|
|
|
|
* [http://tog.acm.org/resources/RTNews/html/rtnv21n1.html#art9]
|
|
|
|
|
*/
|
2012-06-28 12:32:06 +00:00
|
|
|
static inline int rayobject_bb_intersect_test(const Isect *isec, const float *_bb)
|
Raytrace modifications from the Render Branch.
These should not have any effect on render results, except in some cases with
you have overlapping faces, where the noise seems to be slightly reduced.
There are some performance improvements, for simple scenes I wouldn't expect
more than 5-10% to be cut off the render time, for sintel scenes we got about
50% on average, that's with millions of polygons on intel quad cores. This
because memory access / cache misses were the main bottleneck for those scenes,
and the optimizations improve that.
Interal changes:
* Remove RE_raytrace.h, raytracer is now only used by render engine again.
* Split non-public parts rayobject.h into rayobject_internal.h, hopefully
makes it clearer how the API is used.
* Added rayintersection.h to contain some of the stuff from RE_raytrace.h
* Change Isect.vec/labda to Isect.dir/dist, previously vec was sometimes
normalized and sometimes not, confusing... now dir is always normalized
and dist contains the distance.
* Change VECCOPY and similar to BLI_math functions.
* Force inlining of auxiliary functions for ray-triangle/quad intersection,
helps a few percentages.
* Reorganize svbvh code so all the traversal functions are in one file
* Don't do test for root so that push_childs can be inlined
* Make shadow a template parameter so it doesn't need to be runtime checked
* Optimization in raytree building, was computing bounding boxes more often
than necessary.
* Leave out logf() factor in SAH, makes tree build quicker with no
noticeable influence on raytracing on performance?
* Set max childs to 4, simplifies traversal code a bit, but also seems
to help slightly in general.
* Store child pointers and child bb just as fixed arrays of size 4 in nodes,
nearly all nodes have this many children, so overall it actually reduces
memory usage a bit and avoids a pointer indirection.
2011-02-05 13:41:29 +00:00
|
|
|
{
|
|
|
|
|
const float *bb = _bb;
|
|
|
|
|
|
|
|
|
|
float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0];
|
|
|
|
|
float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0];
|
|
|
|
|
float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1];
|
|
|
|
|
float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1];
|
|
|
|
|
float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2];
|
|
|
|
|
float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2];
|
|
|
|
|
|
|
|
|
|
RE_RC_COUNT(isec->raycounter->bb.test);
|
|
|
|
|
|
2012-07-29 18:14:20 +00:00
|
|
|
if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0;
|
|
|
|
|
if (t2x < 0.0f || t2y < 0.0f || t2z < 0.0f) return 0;
|
2012-04-28 06:31:57 +00:00
|
|
|
if (t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0;
|
2012-10-21 05:46:41 +00:00
|
|
|
RE_RC_COUNT(isec->raycounter->bb.hit);
|
Raytrace modifications from the Render Branch.
These should not have any effect on render results, except in some cases with
you have overlapping faces, where the noise seems to be slightly reduced.
There are some performance improvements, for simple scenes I wouldn't expect
more than 5-10% to be cut off the render time, for sintel scenes we got about
50% on average, that's with millions of polygons on intel quad cores. This
because memory access / cache misses were the main bottleneck for those scenes,
and the optimizations improve that.
Interal changes:
* Remove RE_raytrace.h, raytracer is now only used by render engine again.
* Split non-public parts rayobject.h into rayobject_internal.h, hopefully
makes it clearer how the API is used.
* Added rayintersection.h to contain some of the stuff from RE_raytrace.h
* Change Isect.vec/labda to Isect.dir/dist, previously vec was sometimes
normalized and sometimes not, confusing... now dir is always normalized
and dist contains the distance.
* Change VECCOPY and similar to BLI_math functions.
* Force inlining of auxiliary functions for ray-triangle/quad intersection,
helps a few percentages.
* Reorganize svbvh code so all the traversal functions are in one file
* Don't do test for root so that push_childs can be inlined
* Make shadow a template parameter so it doesn't need to be runtime checked
* Optimization in raytree building, was computing bounding boxes more often
than necessary.
* Leave out logf() factor in SAH, makes tree build quicker with no
noticeable influence on raytracing on performance?
* Set max childs to 4, simplifies traversal code a bit, but also seems
to help slightly in general.
* Store child pointers and child bb just as fixed arrays of size 4 in nodes,
nearly all nodes have this many children, so overall it actually reduces
memory usage a bit and avoids a pointer indirection.
2011-02-05 13:41:29 +00:00
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
2009-07-11 22:13:01 +00:00
|
|
|
|
|
|
|
|
/* bvh tree generics */
|
|
|
|
|
template<class Tree> static void bvh_add(Tree *obj, RayObject *ob)
|
|
|
|
|
{
|
2012-04-29 17:11:40 +00:00
|
|
|
rtbuild_add(obj->builder, ob);
|
2009-07-11 22:13:01 +00:00
|
|
|
}
|
|
|
|
|
|
2009-08-11 00:33:51 +00:00
|
|
|
template<class Node>
|
|
|
|
|
inline bool is_leaf(Node *node)
|
|
|
|
|
{
|
2009-08-25 20:26:50 +00:00
|
|
|
return !RE_rayobject_isAligned(node);
|
2009-08-11 00:33:51 +00:00
|
|
|
}
|
|
|
|
|
|
2009-07-11 22:13:01 +00:00
|
|
|
template<class Tree> static void bvh_done(Tree *obj);
|
|
|
|
|
|
|
|
|
|
template<class Tree>
|
|
|
|
|
static void bvh_free(Tree *obj)
|
|
|
|
|
{
|
2012-04-28 06:31:57 +00:00
|
|
|
if (obj->builder)
|
2009-07-11 22:13:01 +00:00
|
|
|
rtbuild_free(obj->builder);
|
|
|
|
|
|
2012-04-28 06:31:57 +00:00
|
|
|
if (obj->node_arena)
|
2009-07-11 22:13:01 +00:00
|
|
|
BLI_memarena_free(obj->node_arena);
|
|
|
|
|
|
|
|
|
|
MEM_freeN(obj);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<class Tree>
|
|
|
|
|
static void bvh_bb(Tree *obj, float *min, float *max)
|
|
|
|
|
{
|
2012-04-28 06:31:57 +00:00
|
|
|
if (obj->root)
|
2011-03-11 22:27:06 +00:00
|
|
|
bvh_node_merge_bb(obj->root, min, max);
|
2009-07-11 22:13:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<class Tree>
|
|
|
|
|
static float bvh_cost(Tree *obj)
|
|
|
|
|
{
|
2012-08-05 13:26:39 +00:00
|
|
|
assert(obj->cost >= 0.0f);
|
2009-07-11 22:13:01 +00:00
|
|
|
return obj->cost;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* bvh tree nodes generics */
|
|
|
|
|
template<class Node> static inline int bvh_node_hit_test(Node *node, Isect *isec)
|
|
|
|
|
{
|
2012-05-15 18:45:20 +00:00
|
|
|
return rayobject_bb_intersect_test(isec, (const float *)node->bb);
|
2009-07-11 22:13:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<class Node>
|
2012-05-12 22:34:20 +00:00
|
|
|
static inline void bvh_node_merge_bb(Node *node, float min[3], float max[3])
|
2009-07-11 22:13:01 +00:00
|
|
|
{
|
2012-04-28 06:31:57 +00:00
|
|
|
if (is_leaf(node)) {
|
2012-04-29 17:11:40 +00:00
|
|
|
RE_rayobject_merge_bb((RayObject *)node, min, max);
|
2009-07-11 22:13:01 +00:00
|
|
|
}
|
2012-04-28 06:31:57 +00:00
|
|
|
else {
|
2012-05-12 22:34:20 +00:00
|
|
|
DO_MIN(node->bb, min);
|
|
|
|
|
DO_MAX(node->bb + 3, max);
|
2009-07-11 22:13:01 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2011-10-17 06:39:13 +00:00
|
|
|
* recursively transverse a BVH looking for a rayhit using a local stack
|
2009-07-11 22:13:01 +00:00
|
|
|
*/
|
|
|
|
|
template<class Node> static inline void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, int &stack_pos);
|
|
|
|
|
|
2012-04-29 15:47:02 +00:00
|
|
|
template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT, bool SHADOW>
|
2009-07-11 22:13:01 +00:00
|
|
|
static int bvh_node_stack_raycast(Node *root, Isect *isec)
|
|
|
|
|
{
|
|
|
|
|
Node *stack[MAX_STACK_SIZE];
|
|
|
|
|
int hit = 0, stack_pos = 0;
|
|
|
|
|
|
2012-04-28 06:31:57 +00:00
|
|
|
if (!TEST_ROOT && !is_leaf(root))
|
2009-07-12 18:04:10 +00:00
|
|
|
bvh_node_push_childs(root, isec, stack, stack_pos);
|
|
|
|
|
else
|
|
|
|
|
stack[stack_pos++] = root;
|
|
|
|
|
|
2012-04-28 06:31:57 +00:00
|
|
|
while (stack_pos) {
|
2009-07-11 22:13:01 +00:00
|
|
|
Node *node = stack[--stack_pos];
|
2012-04-28 06:31:57 +00:00
|
|
|
if (!is_leaf(node)) {
|
2012-04-29 15:47:02 +00:00
|
|
|
if (bvh_node_hit_test(node, isec)) {
|
2009-07-11 22:13:01 +00:00
|
|
|
bvh_node_push_childs(node, isec, stack, stack_pos);
|
|
|
|
|
assert(stack_pos <= MAX_STACK_SIZE);
|
|
|
|
|
}
|
|
|
|
|
}
|
2012-04-28 06:31:57 +00:00
|
|
|
else {
|
2012-05-15 18:45:20 +00:00
|
|
|
hit |= RE_rayobject_intersect( (RayObject *)node, isec);
|
2012-04-28 06:31:57 +00:00
|
|
|
if (SHADOW && hit) return hit;
|
2009-07-11 22:13:01 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return hit;
|
2009-08-06 17:45:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2009-10-06 00:28:07 +00:00
|
|
|
#ifdef __SSE__
|
2009-08-06 17:45:51 +00:00
|
|
|
/*
|
|
|
|
|
* Generic SIMD bvh recursion
|
|
|
|
|
* this was created to be able to use any simd (with the cost of some memmoves)
|
|
|
|
|
* it can take advantage of any SIMD width and doens't needs any special tree care
|
|
|
|
|
*/
|
2012-04-29 15:47:02 +00:00
|
|
|
template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT>
|
2009-08-06 17:45:51 +00:00
|
|
|
static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
|
|
|
|
|
{
|
|
|
|
|
Node *stack[MAX_STACK_SIZE];
|
|
|
|
|
|
|
|
|
|
int hit = 0, stack_pos = 0;
|
|
|
|
|
|
2012-04-28 06:31:57 +00:00
|
|
|
if (!TEST_ROOT) {
|
|
|
|
|
if (!is_leaf(root)) {
|
|
|
|
|
if (!is_leaf(root->child))
|
2009-08-06 17:45:51 +00:00
|
|
|
bvh_node_push_childs(root, isec, stack, stack_pos);
|
|
|
|
|
else
|
2012-05-15 18:45:20 +00:00
|
|
|
return RE_rayobject_intersect( (RayObject *)root->child, isec);
|
2009-08-06 17:45:51 +00:00
|
|
|
}
|
|
|
|
|
else
|
2012-05-15 18:45:20 +00:00
|
|
|
return RE_rayobject_intersect( (RayObject *)root, isec);
|
2009-08-06 17:45:51 +00:00
|
|
|
}
|
2012-04-28 06:31:57 +00:00
|
|
|
else {
|
|
|
|
|
if (!is_leaf(root))
|
2009-08-06 17:45:51 +00:00
|
|
|
stack[stack_pos++] = root;
|
|
|
|
|
else
|
2012-05-15 18:45:20 +00:00
|
|
|
return RE_rayobject_intersect( (RayObject *)root, isec);
|
2009-08-06 17:45:51 +00:00
|
|
|
}
|
|
|
|
|
|
2012-04-28 06:31:57 +00:00
|
|
|
while (true) {
|
2009-08-06 17:45:51 +00:00
|
|
|
//Use SIMD 4
|
2012-04-28 06:31:57 +00:00
|
|
|
if (stack_pos >= 4) {
|
2009-08-07 00:51:41 +00:00
|
|
|
__m128 t_bb[6];
|
2012-05-15 18:45:20 +00:00
|
|
|
Node *t_node[4];
|
2009-08-07 00:51:41 +00:00
|
|
|
|
2009-08-06 17:45:51 +00:00
|
|
|
stack_pos -= 4;
|
2009-08-07 01:42:51 +00:00
|
|
|
|
|
|
|
|
/* prepare the 4BB for SIMD */
|
2012-05-15 18:45:20 +00:00
|
|
|
t_node[0] = stack[stack_pos + 0]->child;
|
|
|
|
|
t_node[1] = stack[stack_pos + 1]->child;
|
|
|
|
|
t_node[2] = stack[stack_pos + 2]->child;
|
|
|
|
|
t_node[3] = stack[stack_pos + 3]->child;
|
|
|
|
|
|
|
|
|
|
const float *bb0 = stack[stack_pos + 0]->bb;
|
|
|
|
|
const float *bb1 = stack[stack_pos + 1]->bb;
|
|
|
|
|
const float *bb2 = stack[stack_pos + 2]->bb;
|
|
|
|
|
const float *bb3 = stack[stack_pos + 3]->bb;
|
|
|
|
|
|
|
|
|
|
const __m128 x0y0x1y1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(1, 0, 1, 0) );
|
|
|
|
|
const __m128 x2y2x3y3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(1, 0, 1, 0) );
|
|
|
|
|
t_bb[0] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2, 0, 2, 0) );
|
|
|
|
|
t_bb[1] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3, 1, 3, 1) );
|
|
|
|
|
|
|
|
|
|
const __m128 z0X0z1X1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(3, 2, 3, 2) );
|
|
|
|
|
const __m128 z2X2z3X3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(3, 2, 3, 2) );
|
|
|
|
|
t_bb[2] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2, 0, 2, 0) );
|
|
|
|
|
t_bb[3] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3, 1, 3, 1) );
|
|
|
|
|
|
|
|
|
|
const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps(_mm_load_ps(bb0 + 4), _mm_load_ps(bb1 + 4), _MM_SHUFFLE(1, 0, 1, 0) );
|
|
|
|
|
const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps(_mm_load_ps(bb2 + 4), _mm_load_ps(bb3 + 4), _MM_SHUFFLE(1, 0, 1, 0) );
|
|
|
|
|
t_bb[4] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2, 0, 2, 0) );
|
|
|
|
|
t_bb[5] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3, 1, 3, 1) );
|
2012-03-09 18:28:30 +00:00
|
|
|
#if 0
|
2012-05-15 18:45:20 +00:00
|
|
|
for (int i = 0; i < 4; i++)
|
2009-08-06 17:45:51 +00:00
|
|
|
{
|
2012-05-15 18:45:20 +00:00
|
|
|
Node *t = stack[stack_pos + i];
|
2009-08-11 00:33:51 +00:00
|
|
|
assert(!is_leaf(t));
|
2009-08-06 17:45:51 +00:00
|
|
|
|
2012-05-15 18:45:20 +00:00
|
|
|
float *bb = ((float *)t_bb) + i;
|
|
|
|
|
bb[4 * 0] = t->bb[0];
|
|
|
|
|
bb[4 * 1] = t->bb[1];
|
|
|
|
|
bb[4 * 2] = t->bb[2];
|
|
|
|
|
bb[4 * 3] = t->bb[3];
|
|
|
|
|
bb[4 * 4] = t->bb[4];
|
|
|
|
|
bb[4 * 5] = t->bb[5];
|
2009-08-06 17:45:51 +00:00
|
|
|
t_node[i] = t->child;
|
|
|
|
|
}
|
2012-03-09 18:28:30 +00:00
|
|
|
#endif
|
2009-09-06 19:14:06 +00:00
|
|
|
RE_RC_COUNT(isec->raycounter->simd_bb.test);
|
2012-05-15 18:45:20 +00:00
|
|
|
int res = test_bb_group4(t_bb, isec);
|
2009-07-11 22:13:01 +00:00
|
|
|
|
2012-04-28 06:31:57 +00:00
|
|
|
for (int i = 0; i < 4; i++)
|
2012-05-15 18:45:20 +00:00
|
|
|
if (res & (1 << i)) {
|
|
|
|
|
RE_RC_COUNT(isec->raycounter->simd_bb.hit);
|
|
|
|
|
if (!is_leaf(t_node[i])) {
|
|
|
|
|
for (Node *t = t_node[i]; t; t = t->sibling) {
|
|
|
|
|
assert(stack_pos < MAX_STACK_SIZE);
|
|
|
|
|
stack[stack_pos++] = t;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
hit |= RE_rayobject_intersect( (RayObject *)t_node[i], isec);
|
|
|
|
|
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
|
2009-08-06 17:45:51 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2012-04-28 06:31:57 +00:00
|
|
|
else if (stack_pos > 0) {
|
2009-08-06 17:45:51 +00:00
|
|
|
Node *node = stack[--stack_pos];
|
2009-08-11 00:33:51 +00:00
|
|
|
assert(!is_leaf(node));
|
2009-08-06 17:45:51 +00:00
|
|
|
|
2012-04-29 15:47:02 +00:00
|
|
|
if (bvh_node_hit_test(node, isec)) {
|
2012-04-28 06:31:57 +00:00
|
|
|
if (!is_leaf(node->child)) {
|
2009-08-06 17:45:51 +00:00
|
|
|
bvh_node_push_childs(node, isec, stack, stack_pos);
|
|
|
|
|
assert(stack_pos <= MAX_STACK_SIZE);
|
|
|
|
|
}
|
2012-04-28 06:31:57 +00:00
|
|
|
else {
|
2012-05-15 18:45:20 +00:00
|
|
|
hit |= RE_rayobject_intersect( (RayObject *)node->child, isec);
|
2012-04-28 06:31:57 +00:00
|
|
|
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
|
2009-08-06 17:45:51 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else break;
|
|
|
|
|
}
|
|
|
|
|
return hit;
|
2009-07-11 22:13:01 +00:00
|
|
|
}
|
2009-10-06 00:28:07 +00:00
|
|
|
#endif
|
2009-07-11 22:13:01 +00:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* recursively transverse a BVH looking for a rayhit using system stack
|
|
|
|
|
*/
|
2012-03-09 18:28:30 +00:00
|
|
|
#if 0
|
2009-07-11 22:13:01 +00:00
|
|
|
template<class Node>
|
|
|
|
|
static int bvh_node_raycast(Node *node, Isect *isec)
|
|
|
|
|
{
|
|
|
|
|
int hit = 0;
|
2012-04-28 06:31:57 +00:00
|
|
|
if (bvh_test_node(node, isec))
|
2009-07-11 22:13:01 +00:00
|
|
|
{
|
2012-04-28 06:31:57 +00:00
|
|
|
if (isec->idot_axis[node->split_axis] > 0.0f)
|
2009-07-11 22:13:01 +00:00
|
|
|
{
|
|
|
|
|
int i;
|
2012-05-15 18:45:20 +00:00
|
|
|
for (i = 0; i < BVH_NCHILDS; i++)
|
2012-04-28 06:31:57 +00:00
|
|
|
if (!is_leaf(node->child[i]))
|
2009-07-11 22:13:01 +00:00
|
|
|
{
|
2012-04-28 06:31:57 +00:00
|
|
|
if (node->child[i] == 0) break;
|
2009-07-11 22:13:01 +00:00
|
|
|
|
|
|
|
|
hit |= bvh_node_raycast(node->child[i], isec);
|
2012-04-28 06:31:57 +00:00
|
|
|
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
|
2009-07-11 22:13:01 +00:00
|
|
|
}
|
2012-05-15 18:45:20 +00:00
|
|
|
else {
|
|
|
|
|
hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec);
|
2012-04-28 06:31:57 +00:00
|
|
|
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
|
2009-07-11 22:13:01 +00:00
|
|
|
}
|
|
|
|
|
}
|
2012-05-15 18:45:20 +00:00
|
|
|
else {
|
2009-07-11 22:13:01 +00:00
|
|
|
int i;
|
2012-05-15 18:45:20 +00:00
|
|
|
for (i = BVH_NCHILDS - 1; i >= 0; i--)
|
2012-04-28 06:31:57 +00:00
|
|
|
if (!is_leaf(node->child[i]))
|
2009-07-11 22:13:01 +00:00
|
|
|
{
|
2012-04-28 06:31:57 +00:00
|
|
|
if (node->child[i])
|
2009-07-11 22:13:01 +00:00
|
|
|
{
|
|
|
|
|
hit |= dfs_raycast(node->child[i], isec);
|
2012-04-28 06:31:57 +00:00
|
|
|
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
|
2009-07-11 22:13:01 +00:00
|
|
|
}
|
|
|
|
|
}
|
2012-05-15 18:45:20 +00:00
|
|
|
else {
|
|
|
|
|
hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec);
|
2012-04-28 06:31:57 +00:00
|
|
|
if (hit && isec->mode == RE_RAY_SHADOW) return hit;
|
2009-07-11 22:13:01 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return hit;
|
|
|
|
|
}
|
2012-03-09 18:28:30 +00:00
|
|
|
#endif
|
2009-08-11 00:33:51 +00:00
|
|
|
|
2012-04-29 15:47:02 +00:00
|
|
|
template<class Node, class HintObject>
|
2013-05-08 12:55:05 +00:00
|
|
|
static void bvh_dfs_make_hint(Node *node, LCTSHint *hint, int reserve_space, HintObject *hintObject)
|
2009-08-29 17:24:45 +00:00
|
|
|
{
|
2012-04-29 17:11:40 +00:00
|
|
|
assert(hint->size + reserve_space + 1 <= RE_RAY_LCTS_MAX_SIZE);
|
2009-08-29 17:24:45 +00:00
|
|
|
|
2012-04-28 06:31:57 +00:00
|
|
|
if (is_leaf(node)) {
|
2012-05-15 18:45:20 +00:00
|
|
|
hint->stack[hint->size++] = (RayObject *)node;
|
2009-08-29 17:24:45 +00:00
|
|
|
}
|
2012-04-28 06:31:57 +00:00
|
|
|
else {
|
2009-08-29 17:24:45 +00:00
|
|
|
int childs = count_childs(node);
|
2012-04-28 06:31:57 +00:00
|
|
|
if (hint->size + reserve_space + childs <= RE_RAY_LCTS_MAX_SIZE) {
|
2012-05-15 18:45:20 +00:00
|
|
|
int result = hint_test_bb(hintObject, node->bb, node->bb + 3);
|
2012-04-28 06:31:57 +00:00
|
|
|
if (result == HINT_RECURSE) {
|
2009-08-29 17:24:45 +00:00
|
|
|
/* We are 100% sure the ray will be pass inside this node */
|
|
|
|
|
bvh_dfs_make_hint_push_siblings(node->child, hint, reserve_space, hintObject);
|
|
|
|
|
}
|
2012-04-28 06:31:57 +00:00
|
|
|
else if (result == HINT_ACCEPT) {
|
2012-05-15 18:45:20 +00:00
|
|
|
hint->stack[hint->size++] = (RayObject *)node;
|
2009-08-29 17:24:45 +00:00
|
|
|
}
|
|
|
|
|
}
|
2012-04-28 06:31:57 +00:00
|
|
|
else {
|
2012-05-15 18:45:20 +00:00
|
|
|
hint->stack[hint->size++] = (RayObject *)node;
|
2009-08-29 17:24:45 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<class Tree>
|
2012-05-15 18:45:20 +00:00
|
|
|
static RayObjectAPI *bvh_get_api(int maxstacksize);
|
2009-08-29 17:24:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
template<class Tree, int DFS_STACK_SIZE>
|
|
|
|
|
static inline RayObject *bvh_create_tree(int size)
|
|
|
|
|
{
|
2012-05-15 18:45:20 +00:00
|
|
|
Tree *obj = (Tree *)MEM_callocN(sizeof(Tree), "BVHTree");
|
2012-04-29 17:11:40 +00:00
|
|
|
assert(RE_rayobject_isAligned(obj)); /* RayObject API assumes real data to be 4-byte aligned */
|
2009-08-29 17:24:45 +00:00
|
|
|
|
|
|
|
|
obj->rayobj.api = bvh_get_api<Tree>(DFS_STACK_SIZE);
|
|
|
|
|
obj->root = NULL;
|
|
|
|
|
|
|
|
|
|
obj->node_arena = NULL;
|
2012-05-15 18:45:20 +00:00
|
|
|
obj->builder = rtbuild_create(size);
|
2009-08-29 17:24:45 +00:00
|
|
|
|
2012-05-15 18:45:20 +00:00
|
|
|
return RE_rayobject_unalignRayAPI((RayObject *) obj);
|
2009-08-29 17:24:45 +00:00
|
|
|
}
|
|
|
|
|
|
2009-08-11 00:33:51 +00:00
|
|
|
#endif
|