*Added a tree structure with a variable number of childs per node, but with groupped childs (for SIMD)

*SIMD support for the first 4*N childs of each node
*Some bvh code organized
This commit is contained in:
2009-08-11 00:33:51 +00:00
parent 4ce316ee0f
commit 5d40c1b597
4 changed files with 523 additions and 192 deletions

View File

@@ -28,6 +28,9 @@
*/
#include <xmmintrin.h>
#ifndef RE_RAYTRACE_BVH_H
#define RE_RAYTRACE_BVH_H
inline int test_bb_group4(__m128 *bb_group, const Isect *isec)
{
@@ -53,6 +56,12 @@ template<class Tree> static void bvh_add(Tree *obj, RayObject *ob)
rtbuild_add( obj->builder, ob );
}
template<class Node>
inline bool is_leaf(Node *node)
{
return !RayObject_isAligned(node);
}
template<class Tree> static void bvh_done(Tree *obj);
template<class Tree>
@@ -93,14 +102,14 @@ template<class Node> static inline int bvh_node_hit_test(Node *node, Isect *isec
template<class Node>
static void bvh_node_merge_bb(Node *node, float *min, float *max)
{
if(RayObject_isAligned(node))
if(is_leaf(node))
{
DO_MIN(node->bb , min);
DO_MAX(node->bb+3, max);
RE_rayobject_merge_bb( (RayObject*)node, min, max);
}
else
{
RE_rayobject_merge_bb( (RayObject*)node, min, max);
DO_MIN(node->bb , min);
DO_MAX(node->bb+3, max);
}
}
@@ -117,7 +126,7 @@ static int bvh_node_stack_raycast(Node *root, Isect *isec)
Node *stack[MAX_STACK_SIZE];
int hit = 0, stack_pos = 0;
if(!TEST_ROOT && RayObject_isAligned(root))
if(!TEST_ROOT && !is_leaf(root))
bvh_node_push_childs(root, isec, stack, stack_pos);
else
stack[stack_pos++] = root;
@@ -125,7 +134,7 @@ static int bvh_node_stack_raycast(Node *root, Isect *isec)
while(stack_pos)
{
Node *node = stack[--stack_pos];
if(RayObject_isAligned(node))
if(!is_leaf(node))
{
if(bvh_node_hit_test(node,isec))
{
@@ -157,9 +166,9 @@ static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
if(!TEST_ROOT)
{
if(RayObject_isAligned(root))
if(!is_leaf(root))
{
if(RayObject_isAligned(root->child))
if(!is_leaf(root->child))
bvh_node_push_childs(root, isec, stack, stack_pos);
else
return RE_rayobject_intersect( (RayObject*)root->child, isec);
@@ -169,7 +178,7 @@ static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
}
else
{
if(RayObject_isAligned(root))
if(!is_leaf(root))
stack[stack_pos++] = root;
else
return RE_rayobject_intersect( (RayObject*)root, isec);
@@ -214,7 +223,7 @@ static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
for(int i=0; i<4; i++)
{
Node *t = stack[stack_pos+i];
assert(RayObject_isAligned(t));
assert(!is_leaf(t));
float *bb = ((float*)t_bb)+i;
bb[4*0] = t->bb[0];
@@ -237,7 +246,7 @@ static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
if(res & (1<<i))
{
RE_RC_COUNT(isec->raycounter->bb.hit);
if(RayObject_isAligned(t_node[i]))
if(!is_leaf(t_node[i]))
{
for(Node *t=t_node[i]; t; t=t->sibling)
{
@@ -255,11 +264,11 @@ static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
else if(stack_pos > 0)
{
Node *node = stack[--stack_pos];
assert(RayObject_isAligned(node));
assert(!is_leaf(node));
if(bvh_node_hit_test(node,isec))
{
if(RayObject_isAligned(node->child))
if(!is_leaf(node->child))
{
bvh_node_push_childs(node, isec, stack, stack_pos);
assert(stack_pos <= MAX_STACK_SIZE);
@@ -291,7 +300,7 @@ static int bvh_node_raycast(Node *node, Isect *isec)
{
int i;
for(i=0; i<BVH_NCHILDS; i++)
if(RayObject_isAligned(node->child[i]))
if(!is_leaf(node->child[i]))
{
if(node->child[i] == 0) break;
@@ -308,7 +317,7 @@ static int bvh_node_raycast(Node *node, Isect *isec)
{
int i;
for(i=BVH_NCHILDS-1; i>=0; i--)
if(RayObject_isAligned(node->child[i]))
if(!is_leaf(node->child[i]))
{
if(node->child[i])
{
@@ -326,3 +335,5 @@ static int bvh_node_raycast(Node *node, Isect *isec)
return hit;
}
*/
#endif

View File

@@ -26,6 +26,12 @@
*
* ***** END GPL LICENSE BLOCK *****
*/
#define RE_USE_HINT (0)
static int tot_pushup = 0;
static int tot_pushdown = 0;
static int tot_hints = 0;
extern "C"
{
#include <assert.h>
@@ -41,22 +47,21 @@ extern "C"
#include "rayobject_hint.h"
#include "reorganize.h"
#include "bvh.h"
#include "svbvh.h"
#include <queue>
#define BVHNode VBVHNode
#define BVHTree VBVHTree
#define RE_DO_HINTS (0)
#define RAY_BB_TEST_COST (0.2f)
#define DFS_STACK_SIZE 256
//#define DYNAMIC_ALLOC_BB
//#define rtbuild_split rtbuild_mean_split_largest_axis /* objects mean split on the longest axis, childs BB are allowed to overlap */
//#define rtbuild_split rtbuild_median_split_largest_axis /* space median split on the longest axis, childs BB are allowed to overlap */
#define rtbuild_split rtbuild_heuristic_object_split /* split objects using heuristic */
struct BVHNode
struct VBVHNode
{
#ifdef DYNAMIC_ALLOC_BB
float *bb;
@@ -64,15 +69,15 @@ struct BVHNode
float bb[6];
#endif
BVHNode *child;
BVHNode *sibling;
VBVHNode *child;
VBVHNode *sibling;
};
struct BVHTree
struct VBVHTree
{
RayObject rayobj;
BVHNode *root;
SVBVHNode *root;
MemArena *node_arena;
@@ -81,6 +86,54 @@ struct BVHTree
};
template<class Tree,class OldNode>
struct Reorganize_VBVH
{
Tree *tree;
Reorganize_VBVH(Tree *t)
{
tree = t;
}
VBVHNode *create_node()
{
VBVHNode *node = (VBVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(VBVHNode));
return node;
}
void copy_bb(VBVHNode *node, OldNode *old)
{
std::copy( old->bb, old->bb+6, node->bb );
}
VBVHNode *transform(OldNode *old)
{
if(is_leaf(old))
return (VBVHNode*)old;
VBVHNode *node = create_node();
VBVHNode **child_ptr = &node->child;
node->sibling = 0;
copy_bb(node,old);
for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling)
{
VBVHNode *n_child = transform(o_child);
*child_ptr = n_child;
if(is_leaf(n_child)) return node;
child_ptr = &n_child->sibling;
}
*child_ptr = 0;
return node;
}
};
/*
* Push nodes (used on dfs)
*/
@@ -89,7 +142,7 @@ inline static void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, i
{
Node *child = node->child;
if(!RayObject_isAligned(child))
if(is_leaf(child))
{
stack[stack_pos++] = child;
}
@@ -99,7 +152,7 @@ inline static void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, i
{
//Skips BB tests on primitives
/*
if(!RayObject_isAligned(child->child))
if(is_leaf(child->child))
stack[stack_pos++] = child->child;
else
*/
@@ -113,9 +166,9 @@ inline static void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, i
/*
* BVH done
*/
static BVHNode *bvh_new_node(BVHTree *tree)
static VBVHNode *bvh_new_node(VBVHTree *tree)
{
BVHNode *node = (BVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(BVHNode));
VBVHNode *node = (VBVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(VBVHNode));
if( (((intptr_t)node) & (0x0f)) != 0 )
{
@@ -132,70 +185,7 @@ static BVHNode *bvh_new_node(BVHTree *tree)
return node;
}
template<class Builder>
float rtbuild_area(Builder *builder)
{
float min[3], max[3];
INIT_MINMAX(min, max);
rtbuild_merge_bb(builder, min, max);
return bb_area(min, max);
}
template<class Node>
void bvh_update_bb(Node *node)
{
INIT_MINMAX(node->bb, node->bb+3);
Node *child = node->child;
while(child)
{
bvh_node_merge_bb(child, node->bb, node->bb+3);
if(RayObject_isAligned(child))
child = child->sibling;
else
child = 0;
}
}
static int tot_pushup = 0;
static int tot_pushdown = 0;
static int tot_hints = 0;
template<class Node>
void pushdown(Node *parent)
{
Node **s_child = &parent->child;
Node * child = parent->child;
while(child && RayObject_isAligned(child))
{
Node *next = child->sibling;
Node **next_s_child = &child->sibling;
//assert(bb_fits_inside(parent->bb, parent->bb+3, child->bb, child->bb+3));
for(Node *i = parent->child; RayObject_isAligned(i) && i; i = i->sibling)
if(child != i && bb_fits_inside(i->bb, i->bb+3, child->bb, child->bb+3) && RayObject_isAligned(i->child))
{
// todo optimize (should the one with the smallest area?)
// float ia = bb_area(i->bb, i->bb+3)
// if(child->i)
*s_child = child->sibling;
child->sibling = i->child;
i->child = child;
next_s_child = s_child;
tot_pushdown++;
break;
}
child = next;
s_child = next_s_child;
}
for(Node *i = parent->child; RayObject_isAligned(i) && i; i = i->sibling)
pushdown( i );
}
template<class Node>
int count_childs(Node *parent)
@@ -204,7 +194,7 @@ int count_childs(Node *parent)
for(Node *i = parent->child; i; i = i->sibling)
{
n++;
if(!RayObject_isAligned(i))
if(is_leaf(i))
break;
}
@@ -220,39 +210,6 @@ void append_sibling(Node *node, Node *sibling)
node->sibling = sibling;
}
template<class Node>
void pushup(Node *parent)
{
float p_area = bb_area(parent->bb, parent->bb+3);
Node **prev = &parent->child;
for(Node *child = parent->child; RayObject_isAligned(child) && child; )
{
float c_area = bb_area(child->bb, child->bb+3) ;
int nchilds = count_childs(child);
float original_cost = (c_area / p_area)*nchilds + 1;
float flatten_cost = nchilds;
if(flatten_cost < original_cost && nchilds >= 2)
{
append_sibling(child, child->child);
child = child->sibling;
*prev = child;
// *prev = child->child;
// append_sibling( *prev, child->sibling );
// child = *prev;
tot_pushup++;
}
else
{
*prev = child;
prev = &(*prev)->sibling;
child = *prev;
}
}
for(Node *child = parent->child; RayObject_isAligned(child) && child; child = child->sibling)
pushup(child);
}
template<class Tree, class Node, class Builder>
Node *bvh_rearrange(Tree *tree, Builder *builder)
@@ -264,7 +221,7 @@ Node *bvh_rearrange(Tree *tree, Builder *builder)
Node *node = bvh_new_node(tree);
INIT_MINMAX(node->bb, node->bb+3);
rtbuild_merge_bb(builder, node->bb, node->bb+3);
node->child = (BVHNode*) rtbuild_get_primitive( builder, 0 );
node->child = (VBVHNode*) rtbuild_get_primitive( builder, 0 );
return node;
}
else
@@ -292,30 +249,8 @@ Node *bvh_rearrange(Tree *tree, Builder *builder)
}
}
template<class Node>
float bvh_refit(Node *node)
{
if(!RayObject_isAligned(node)) return 0;
if(!RayObject_isAligned(node->child)) return 0;
float total = 0;
for(Node *child = node->child; child; child = child->sibling)
total += bvh_refit(child);
float old_area = bb_area(node->bb, node->bb+3);
INIT_MINMAX(node->bb, node->bb+3);
for(Node *child = node->child; child; child = child->sibling)
{
DO_MIN(child->bb, node->bb);
DO_MAX(child->bb+3, node->bb+3);
}
total += old_area - bb_area(node->bb, node->bb+3);
return total;
}
template<>
void bvh_done<BVHTree>(BVHTree *obj)
void bvh_done<VBVHTree>(VBVHTree *obj)
{
rtbuild_done(obj->builder);
@@ -323,18 +258,47 @@ void bvh_done<BVHTree>(BVHTree *obj)
if(needed_nodes > BLI_MEMARENA_STD_BUFSIZE)
needed_nodes = BLI_MEMARENA_STD_BUFSIZE;
obj->node_arena = BLI_memarena_new(needed_nodes);
BLI_memarena_use_malloc(obj->node_arena);
BLI_memarena_use_align(obj->node_arena, 16);
MemArena *arena1 = BLI_memarena_new(needed_nodes);
BLI_memarena_use_malloc(arena1);
BLI_memarena_use_align(arena1, 16);
obj->node_arena = arena1;
obj->root = bvh_rearrange<BVHTree,BVHNode,RTBuilder>( obj, obj->builder );
reorganize(obj->root);
remove_useless(obj->root, &obj->root);
printf("refit: %f\n", bvh_refit(obj->root) );
pushup(obj->root);
pushdown(obj->root);
// obj->root = memory_rearrange(obj->root);
VBVHNode *root = bvh_rearrange<VBVHTree,VBVHNode,RTBuilder>( obj, obj->builder );
reorganize(root);
remove_useless(root, &root);
printf("refit: %f\n", bvh_refit(root) );
pushup(root);
pushdown(root);
//Memory re-organize
if(0)
{
MemArena *arena2 = BLI_memarena_new(needed_nodes);
BLI_memarena_use_malloc(arena2);
BLI_memarena_use_align(arena2, 16);
obj->node_arena = arena2;
root = Reorganize_VBVH<VBVHTree,VBVHNode>(obj).transform(root);
BLI_memarena_free(arena1);
}
if(1)
{
MemArena *arena2 = BLI_memarena_new(needed_nodes);
BLI_memarena_use_malloc(arena2);
BLI_memarena_use_align(arena2, 16);
obj->node_arena = arena2;
obj->root = Reorganize_SVBVH<VBVHTree,VBVHNode>(obj).transform(root);
BLI_memarena_free(arena1);
}
/*
{
obj->root = root;
}
*/
obj->cost = 1.0;
rtbuild_free( obj->builder );
@@ -342,8 +306,9 @@ void bvh_done<BVHTree>(BVHTree *obj)
}
template<int StackSize>
int intersect(BVHTree *obj, Isect* isec)
int intersect(VBVHTree *obj, Isect* isec)
{
/*
if(RE_DO_HINTS && isec->hint)
{
LCTSHint *lcts = (LCTSHint*)isec->hint;
@@ -352,9 +317,9 @@ int intersect(BVHTree *obj, Isect* isec)
int hit = 0;
for(int i=0; i<lcts->size; i++)
{
BVHNode *node = (BVHNode*)lcts->stack[i];
VBVHNode *node = (VBVHNode*)lcts->stack[i];
if(RayObject_isAligned(node))
hit |= bvh_node_stack_raycast_simd<BVHNode,StackSize,true>(node, isec);
hit |= bvh_node_stack_raycast<VBVHNode,StackSize,true>(node, isec);
else
hit |= RE_rayobject_intersect( (RayObject*)node, isec );
@@ -365,9 +330,10 @@ int intersect(BVHTree *obj, Isect* isec)
return hit;
}
else
*/
{
if(RayObject_isAligned(obj->root))
return bvh_node_stack_raycast_simd<BVHNode,StackSize,false>(obj->root, isec);
return bvh_node_stack_raycast<SVBVHNode,StackSize,false>( obj->root, isec);
else
return RE_rayobject_intersect( (RayObject*) obj->root, isec );
}
@@ -395,7 +361,7 @@ void bvh_dfs_make_hint(Node *node, LCTSHint *hint, int reserve_space, HintObject
{
assert( hint->size + reserve_space + 1 <= RE_RAY_LCTS_MAX_SIZE );
if(!RayObject_isAligned(node))
if(is_leaf(node))
{
hint->stack[hint->size++] = (RayObject*)node;
}
@@ -425,25 +391,26 @@ void bvh_dfs_make_hint(Node *node, LCTSHint *hint, int reserve_space, HintObject
template<class Tree>
void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *min, float *max)
{
if(RE_DO_HINTS)
/*
if(RE_USE_HINT)
{
HintBB bb;
VECCOPY(bb.bb, min);
VECCOPY(bb.bb+3, max);
hint->size = 0;
bvh_dfs_make_hint( tree->root, hint, 0, &bb );
tot_hints++;
}
else
*/
{
hint->size = 0;
hint->stack[hint->size++] = (RayObject*)tree->root;
tot_hints++;
hint->size = 0;
hint->stack[hint->size++] = (RayObject*)tree->root;
}
}
void bfree(BVHTree *tree)
void bfree(VBVHTree *tree)
{
if(tot_pushup + tot_pushdown + tot_hints + tot_moves)
{
@@ -460,47 +427,40 @@ void bfree(BVHTree *tree)
}
/* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */
template<int STACK_SIZE>
template<class Tree,int STACK_SIZE>
static RayObjectAPI make_api()
{
static RayObjectAPI api =
{
(RE_rayobject_raycast_callback) ((int(*)(BVHTree*,Isect*)) &intersect<STACK_SIZE>),
(RE_rayobject_add_callback) ((void(*)(BVHTree*,RayObject*)) &bvh_add<BVHTree>),
(RE_rayobject_done_callback) ((void(*)(BVHTree*)) &bvh_done<BVHTree>),
// (RE_rayobject_free_callback) ((void(*)(BVHTree*)) &bvh_free<BVHTree>),
(RE_rayobject_free_callback) ((void(*)(BVHTree*)) &bfree),
(RE_rayobject_merge_bb_callback)((void(*)(BVHTree*,float*,float*)) &bvh_bb<BVHTree>),
(RE_rayobject_cost_callback) ((float(*)(BVHTree*)) &bvh_cost<BVHTree>),
(RE_rayobject_hint_bb_callback) ((void(*)(BVHTree*,LCTSHint*,float*,float*)) &bvh_hint_bb<BVHTree>)
(RE_rayobject_raycast_callback) ((int(*)(Tree*,Isect*)) &intersect<STACK_SIZE>),
(RE_rayobject_add_callback) ((void(*)(Tree*,RayObject*)) &bvh_add<Tree>),
(RE_rayobject_done_callback) ((void(*)(Tree*)) &bvh_done<Tree>),
// (RE_rayobject_free_callback) ((void(*)(Tree*)) &bvh_free<Tree>),
(RE_rayobject_free_callback) ((void(*)(Tree*)) &bfree),
(RE_rayobject_merge_bb_callback)((void(*)(Tree*,float*,float*)) &bvh_bb<Tree>),
(RE_rayobject_cost_callback) ((float(*)(Tree*)) &bvh_cost<Tree>),
(RE_rayobject_hint_bb_callback) ((void(*)(Tree*,LCTSHint*,float*,float*)) &bvh_hint_bb<Tree>)
};
return api;
}
template<class Tree>
static RayObjectAPI* get_api(int maxstacksize)
{
// static RayObjectAPI bvh_api16 = make_api<16>();
// static RayObjectAPI bvh_api32 = make_api<32>();
// static RayObjectAPI bvh_api64 = make_api<64>();
static RayObjectAPI bvh_api128 = make_api<128>();
static RayObjectAPI bvh_api256 = make_api<256>();
static RayObjectAPI bvh_api256 = make_api<Tree,1024>();
// if(maxstacksize <= 16 ) return &bvh_api16;
// if(maxstacksize <= 32 ) return &bvh_api32;
// if(maxstacksize <= 64 ) return &bvh_api64;
if(maxstacksize <= 128) return &bvh_api128;
if(maxstacksize <= 256) return &bvh_api256;
if(maxstacksize <= 1024) return &bvh_api256;
assert(maxstacksize <= 256);
return 0;
}
RayObject *RE_rayobject_vbvh_create(int size)
{
BVHTree *obj= (BVHTree*)MEM_callocN(sizeof(BVHTree), "BVHTree");
VBVHTree *obj= (VBVHTree*)MEM_callocN(sizeof(VBVHTree), "VBVHTree");
assert( RayObject_isAligned(obj) ); /* RayObject API assumes real data to be 4-byte aligned */
obj->rayobj.api = get_api(DFS_STACK_SIZE);
obj->rayobj.api = get_api<VBVHTree>(DFS_STACK_SIZE);
obj->root = NULL;
obj->node_arena = NULL;
@@ -508,3 +468,27 @@ RayObject *RE_rayobject_vbvh_create(int size)
return RayObject_unalignRayAPI((RayObject*) obj);
}
/* SVBVH */
template<class HintObject>
void bvh_dfs_make_hint(VBVHNode *node, LCTSHint *hint, int reserve_space, HintObject *hintObject)
{
return;
}
/*
RayObject *RE_rayobject_svbvh_create(int size)
{
SVBVHTree *obj= (SVBVHTree*)MEM_callocN(sizeof(SVBVHTree), "SVBVHTree");
assert( RayObject_isAligned(obj) ); // RayObject API assumes real data to be 4-byte aligned
obj->rayobj.api = get_api<SVBVHTree>(DFS_STACK_SIZE);
obj->root = NULL;
obj->node_arena = NULL;
obj->builder = rtbuild_create( size );
return RayObject_unalignRayAPI((RayObject*) obj);
}
*/

View File

@@ -130,9 +130,115 @@ void remove_useless(Node *node, Node **new_node)
}
if(node->child)
{
if(RayObject_isAligned(node->child) && node->child->child == 0)
if(RayObject_isAligned(node->child) && node->child->sibling == 0)
*new_node = node->child;
}
else if(node->child == 0)
*new_node = 0;
}
/*
* Minimizes expected number of BBtest by colapsing nodes
* it uses surface area heuristic for determining whether a node should be colapsed
*/
template<class Node>
void pushup(Node *parent)
{
float p_area = bb_area(parent->bb, parent->bb+3);
Node **prev = &parent->child;
for(Node *child = parent->child; RayObject_isAligned(child) && child; )
{
float c_area = bb_area(child->bb, child->bb+3) ;
int nchilds = count_childs(child);
float original_cost = (c_area / p_area)*nchilds + 1;
float flatten_cost = nchilds;
if(flatten_cost < original_cost && nchilds >= 2)
{
append_sibling(child, child->child);
child = child->sibling;
*prev = child;
// *prev = child->child;
// append_sibling( *prev, child->sibling );
// child = *prev;
tot_pushup++;
}
else
{
*prev = child;
prev = &(*prev)->sibling;
child = *prev;
}
}
for(Node *child = parent->child; RayObject_isAligned(child) && child; child = child->sibling)
pushup(child);
}
/*
* Pushdown
* makes sure no child fits inside any of its sibling
*/
template<class Node>
void pushdown(Node *parent)
{
Node **s_child = &parent->child;
Node * child = parent->child;
while(child && RayObject_isAligned(child))
{
Node *next = child->sibling;
Node **next_s_child = &child->sibling;
//assert(bb_fits_inside(parent->bb, parent->bb+3, child->bb, child->bb+3));
for(Node *i = parent->child; RayObject_isAligned(i) && i; i = i->sibling)
if(child != i && bb_fits_inside(i->bb, i->bb+3, child->bb, child->bb+3) && RayObject_isAligned(i->child))
{
// todo optimize (should the one with the smallest area?)
// float ia = bb_area(i->bb, i->bb+3)
// if(child->i)
*s_child = child->sibling;
child->sibling = i->child;
i->child = child;
next_s_child = s_child;
tot_pushdown++;
break;
}
child = next;
s_child = next_s_child;
}
for(Node *i = parent->child; RayObject_isAligned(i) && i; i = i->sibling)
pushdown( i );
}
/*
* BVH refit
* reajust nodes BB (useful if nodes childs where modified)
*/
template<class Node>
float bvh_refit(Node *node)
{
if(is_leaf(node)) return 0;
if(is_leaf(node->child)) return 0;
float total = 0;
for(Node *child = node->child; child; child = child->sibling)
total += bvh_refit(child);
float old_area = bb_area(node->bb, node->bb+3);
INIT_MINMAX(node->bb, node->bb+3);
for(Node *child = node->child; child; child = child->sibling)
{
DO_MIN(child->bb, node->bb);
DO_MAX(child->bb+3, node->bb+3);
}
total += old_area - bb_area(node->bb, node->bb+3);
return total;
}

View File

@@ -0,0 +1,230 @@
/**
* $Id$
*
* ***** BEGIN GPL LICENSE BLOCK *****
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The Original Code is Copyright (C) 2009 Blender Foundation.
* All rights reserved.
*
* The Original Code is: all of this file.
*
* Contributor(s): André Pinto.
*
* ***** END GPL LICENSE BLOCK *****
*/
#ifndef RE_RAYTRACE_SVBVH_H
#define RE_RAYTRACE_SVBVH_H
#define SVBVH_SIMD 1
#include "bvh.h"
#include <stdio.h>
struct SVBVHNode
{
int nchilds;
//Array of bb, array of childs
float *bb;
SVBVHNode **child;
};
template<>
inline int bvh_node_hit_test<SVBVHNode>(SVBVHNode *node, Isect *isec)
{
return 1;
}
template<>
inline void bvh_node_push_childs<SVBVHNode>(SVBVHNode *node, Isect *isec, SVBVHNode **stack, int &stack_pos)
{
if(SVBVH_SIMD)
{
int i=0;
while(i+4 <= node->nchilds)
{
int res = test_bb_group4( (__m128*) (node->bb+6*i), isec );
RE_RC_COUNT(isec->raycounter->bb.test);
RE_RC_COUNT(isec->raycounter->bb.test);
RE_RC_COUNT(isec->raycounter->bb.test);
RE_RC_COUNT(isec->raycounter->bb.test);
if(res & 1) { stack[stack_pos++] = node->child[i+0]; RE_RC_COUNT(isec->raycounter->bb.hit); }
if(res & 2) { stack[stack_pos++] = node->child[i+1]; RE_RC_COUNT(isec->raycounter->bb.hit); }
if(res & 4) { stack[stack_pos++] = node->child[i+2]; RE_RC_COUNT(isec->raycounter->bb.hit); }
if(res & 8) { stack[stack_pos++] = node->child[i+3]; RE_RC_COUNT(isec->raycounter->bb.hit); }
i += 4;
}
while(i < node->nchilds)
{
if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i))
stack[stack_pos++] = node->child[i];
i++;
}
}
else
{
for(int i=0; i<node->nchilds; i++)
{
if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i))
stack[stack_pos++] = node->child[i];
}
}
}
struct SVBVHTree
{
RayObject rayobj;
SVBVHNode *root;
MemArena *node_arena;
float cost;
RTBuilder *builder;
};
template<class Tree,class OldNode>
struct Reorganize_SVBVH
{
Tree *tree;
float childs_per_node;
int nodes_with_childs[16];
int nodes;
Reorganize_SVBVH(Tree *t)
{
tree = t;
nodes = 0;
childs_per_node = 0;
for(int i=0; i<16; i++)
nodes_with_childs[i] = 0;
}
~Reorganize_SVBVH()
{
printf("%f childs per node\n", childs_per_node / nodes);
for(int i=0; i<16; i++)
printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes, nodes_with_childs[i]/float(nodes));
}
SVBVHNode *create_node(int nchilds)
{
SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode));
node->nchilds = nchilds;
node->bb = (float*)BLI_memarena_alloc(tree->node_arena, sizeof(float)*6*nchilds);
node->child= (SVBVHNode**)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode*)*nchilds);
return node;
}
void copy_bb(float *bb, float *old_bb)
{
std::copy( old_bb, old_bb+6, bb );
}
void prepare_for_simd(SVBVHNode *node)
{
int i=0;
while(i+4 <= node->nchilds)
{
float vec_tmp[4*6];
float *res = node->bb+6*i;
std::copy( node->bb+6*i, node->bb+6*(i+4), vec_tmp);
for(int j=0; j<6; j++)
{
res[4*j+0] = vec_tmp[6*0+j];
res[4*j+1] = vec_tmp[6*1+j];
res[4*j+2] = vec_tmp[6*2+j];
res[4*j+3] = vec_tmp[6*3+j];
}
/*
const float *bb0 = vec_tmp+6*(i+0);
const float *bb1 = vec_tmp+6*(i+1);
const float *bb2 = vec_tmp+6*(i+2);
const float *bb3 = vec_tmp+6*(i+3);
//memmoves could be memory alligned
const __m128 x0y0x1y1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(1,0,1,0) );
const __m128 x2y2x3y3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(1,0,1,0) );
_mm_store_ps( node->bb+6*i+4*0, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) ) );
_mm_store_ps( node->bb+6*i+4*1, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) ) );
const __m128 z0X0z1X1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(3,2,3,2) );
const __m128 z2X2z3X3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(3,2,3,2) );
_mm_store_ps( node->bb+6*i+4*2, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) ) );
_mm_store_ps( node->bb+6*i+4*3, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) ) );
const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps( _mm_loadu_ps(bb0+4), _mm_loadu_ps(bb1+4), _MM_SHUFFLE(1,0,1,0) );
const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps( _mm_loadu_ps(bb2+4), _mm_loadu_ps(bb3+4), _MM_SHUFFLE(1,0,1,0) );
_mm_store_ps( node->bb+6*i+4*4, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) ) );
_mm_store_ps( node->bb+6*i+4*5, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) ) );
*/
i += 4;
}
}
SVBVHNode *transform(OldNode *old)
{
if(is_leaf(old))
return (SVBVHNode*)old;
if(is_leaf(old->child))
return (SVBVHNode*)old->child;
int nchilds = count_childs(old);
SVBVHNode *node = create_node(nchilds);
childs_per_node += nchilds;
nodes++;
if(nchilds < 16)
nodes_with_childs[nchilds]++;
int i=nchilds;
for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling)
{
i--;
node->child[i] = transform(o_child);
if(is_leaf(o_child))
{
float bb[6];
INIT_MINMAX(bb, bb+3);
RE_rayobject_merge_bb( (RayObject*)o_child, bb, bb+3);
copy_bb(node->bb+i*6, bb);
break;
}
else
{
copy_bb(node->bb+i*6, o_child->bb);
}
}
assert( i == 0 );
if(SVBVH_SIMD)
prepare_for_simd(node);
return node;
}
};
#endif