made rtbuild object_heuristic_spliter faster
I think its something like: old was: 4*nlogn + 3*(n*6) new is: (2*nlogn + 3*(n*6)) * f, with f<1 Still missing changing the sorting function to an introsort instead of qsort Other options like bucketing sort may be worth trying (for very large trees)
This commit is contained in:
		@@ -80,6 +80,14 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#define INIT_MINMAX2(min, max) { (min)[0]= (min)[1]= 1.0e30f; (max)[0]= (max)[1]= -1.0e30f; }
 | 
					#define INIT_MINMAX2(min, max) { (min)[0]= (min)[1]= 1.0e30f; (max)[0]= (max)[1]= -1.0e30f; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define DO_MIN(vec, min) { if( (min)[0]>(vec)[0] ) (min)[0]= (vec)[0];      \
 | 
				
			||||||
 | 
												  if( (min)[1]>(vec)[1] ) (min)[1]= (vec)[1];   \
 | 
				
			||||||
 | 
												  if( (min)[2]>(vec)[2] ) (min)[2]= (vec)[2]; } \
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define DO_MAX(vec, max) { if( (max)[0]<(vec)[0] ) (max)[0]= (vec)[0];		\
 | 
				
			||||||
 | 
												  if( (max)[1]<(vec)[1] ) (max)[1]= (vec)[1];	\
 | 
				
			||||||
 | 
												  if( (max)[2]<(vec)[2] ) (max)[2]= (vec)[2]; } \
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define DO_MINMAX(vec, min, max) { if( (min)[0]>(vec)[0] ) (min)[0]= (vec)[0]; \
 | 
					#define DO_MINMAX(vec, min, max) { if( (min)[0]>(vec)[0] ) (min)[0]= (vec)[0]; \
 | 
				
			||||||
							  if( (min)[1]>(vec)[1] ) (min)[1]= (vec)[1]; \
 | 
												  if( (min)[1]>(vec)[1] ) (min)[1]= (vec)[1]; \
 | 
				
			||||||
							  if( (min)[2]>(vec)[2] ) (min)[2]= (vec)[2]; \
 | 
												  if( (min)[2]>(vec)[2] ) (min)[2]= (vec)[2]; \
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -52,6 +52,8 @@ typedef struct RTBuilder
 | 
				
			|||||||
	
 | 
						
 | 
				
			||||||
	/* child partitions calculated during splitting */
 | 
						/* child partitions calculated during splitting */
 | 
				
			||||||
	int child_offset[RTBUILD_MAX_CHILDS+1];
 | 
						int child_offset[RTBUILD_MAX_CHILDS+1];
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						int child_sorted_axis; /* -1 if not sorted */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} RTBuilder;
 | 
					} RTBuilder;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -102,9 +102,8 @@ static void bih_free(BIHTree *obj)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
static void bih_bb(BIHTree *obj, float *min, float *max)
 | 
					static void bih_bb(BIHTree *obj, float *min, float *max)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	//TODO only half operations needed
 | 
						DO_MIN(obj->bb[0], min);
 | 
				
			||||||
	DO_MINMAX(obj->bb[0], min, max);
 | 
						DO_MAX(obj->bb[1], max);
 | 
				
			||||||
	DO_MINMAX(obj->bb[1], min, max);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
@@ -213,8 +212,8 @@ static BIHNode *bih_rearrange(BIHTree *tree, RTBuilder *builder, int nid, float
 | 
				
			|||||||
			parent->bi[i][0] = cbb[parent->split_axis];
 | 
								parent->bi[i][0] = cbb[parent->split_axis];
 | 
				
			||||||
			parent->bi[i][1] = cbb[parent->split_axis+3];
 | 
								parent->bi[i][1] = cbb[parent->split_axis+3];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			DO_MINMAX(cbb  , bb, bb+3);
 | 
								DO_MIN(cbb  , bb);
 | 
				
			||||||
			DO_MINMAX(cbb+3, bb, bb+3);
 | 
								DO_MAX(cbb+3, bb+3);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		for(; i<BIH_NCHILDS; i++)
 | 
							for(; i<BIH_NCHILDS; i++)
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -110,8 +110,8 @@ static void RayObject_blibvh_add(RayObject *o, RayObject *ob)
 | 
				
			|||||||
	INIT_MINMAX(min_max, min_max+3);
 | 
						INIT_MINMAX(min_max, min_max+3);
 | 
				
			||||||
	RE_rayobject_merge_bb(ob, min_max, min_max+3);
 | 
						RE_rayobject_merge_bb(ob, min_max, min_max+3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	DO_MINMAX(min_max  , obj->bb[0], obj->bb[1]);
 | 
						DO_MIN(min_max  , obj->bb[0]);
 | 
				
			||||||
	DO_MINMAX(min_max+3, obj->bb[0], obj->bb[1]);
 | 
						DO_MAX(min_max+3, obj->bb[1]);
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	BLI_bvhtree_insert(obj->bvh, (int)ob, min_max, 2 );	
 | 
						BLI_bvhtree_insert(obj->bvh, (int)ob, min_max, 2 );	
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -135,6 +135,6 @@ static void RayObject_blibvh_free(RayObject *o)
 | 
				
			|||||||
static void RayObject_blibvh_bb(RayObject *o, float *min, float *max)
 | 
					static void RayObject_blibvh_bb(RayObject *o, float *min, float *max)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	BVHObject *obj = (BVHObject*)o;
 | 
						BVHObject *obj = (BVHObject*)o;
 | 
				
			||||||
	DO_MINMAX( obj->bb[0], min, max );
 | 
						DO_MIN( obj->bb[0], min );
 | 
				
			||||||
	DO_MINMAX( obj->bb[1], min, max );
 | 
						DO_MAX( obj->bb[1], max );
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -133,9 +133,8 @@ static void bvh_merge_bb(BVHNode *node, float *min, float *max)
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
	if(RayObject_isAligned(node))
 | 
						if(RayObject_isAligned(node))
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		//TODO only half operations needed
 | 
							DO_MIN(node->bb  , min);
 | 
				
			||||||
		DO_MINMAX(node->bb  , min, max);
 | 
							DO_MAX(node->bb+3, max);
 | 
				
			||||||
		DO_MINMAX(node->bb+3, min, max);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -19,6 +19,7 @@ static void rtbuild_init(RTBuilder *b, RayObject **begin, RayObject **end)
 | 
				
			|||||||
	b->begin = begin;
 | 
						b->begin = begin;
 | 
				
			||||||
	b->end   = end;
 | 
						b->end   = end;
 | 
				
			||||||
	b->split_axis = 0;
 | 
						b->split_axis = 0;
 | 
				
			||||||
 | 
						b->child_sorted_axis = -1;
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	for(i=0; i<RTBUILD_MAX_CHILDS; i++)
 | 
						for(i=0; i<RTBUILD_MAX_CHILDS; i++)
 | 
				
			||||||
		b->child_offset[i] = 0;
 | 
							b->child_offset[i] = 0;
 | 
				
			||||||
@@ -46,6 +47,7 @@ void rtbuild_add(RTBuilder *b, RayObject *o)
 | 
				
			|||||||
RTBuilder* rtbuild_get_child(RTBuilder *b, int child, RTBuilder *tmp)
 | 
					RTBuilder* rtbuild_get_child(RTBuilder *b, int child, RTBuilder *tmp)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	rtbuild_init( tmp, b->begin + b->child_offset[child], b->begin + b->child_offset[child+1] );
 | 
						rtbuild_init( tmp, b->begin + b->child_offset[child], b->begin + b->child_offset[child+1] );
 | 
				
			||||||
 | 
						tmp->child_sorted_axis = b->child_sorted_axis;
 | 
				
			||||||
	return tmp;
 | 
						return tmp;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -254,12 +256,14 @@ float bb_volume(float *min, float *max)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
float bb_area(float *min, float *max)
 | 
					float bb_area(float *min, float *max)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	float sub[3];
 | 
						float sub[3], a;
 | 
				
			||||||
	sub[0] = max[0]-min[0];
 | 
						sub[0] = max[0]-min[0];
 | 
				
			||||||
	sub[1] = max[1]-min[1];
 | 
						sub[1] = max[1]-min[1];
 | 
				
			||||||
	sub[2] = max[2]-min[2];
 | 
						sub[2] = max[2]-min[2];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return (sub[0]*sub[1] + sub[0]*sub[2] + sub[1]*sub[2])*2;
 | 
						a = (sub[0]*sub[1] + sub[0]*sub[2] + sub[1]*sub[2])*2;
 | 
				
			||||||
 | 
						assert(a >= 0.0);
 | 
				
			||||||
 | 
						return a;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds)
 | 
					int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds)
 | 
				
			||||||
@@ -274,7 +278,7 @@ int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds)
 | 
				
			|||||||
	else
 | 
						else
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		float bcost = FLT_MAX;
 | 
							float bcost = FLT_MAX;
 | 
				
			||||||
		int i, axis, baxis, boffset;
 | 
							int i, axis, baxis, boffset, k, try_axis[3];
 | 
				
			||||||
		CostObject *cost   = MEM_mallocN( sizeof(CostObject)*size, "RTBuilder.HeuristicObjectSplitter" );
 | 
							CostObject *cost   = MEM_mallocN( sizeof(CostObject)*size, "RTBuilder.HeuristicObjectSplitter" );
 | 
				
			||||||
		float      *acc_bb = MEM_mallocN( sizeof(float)*6*size, "RTBuilder.HeuristicObjectSplitterBB" );
 | 
							float      *acc_bb = MEM_mallocN( sizeof(float)*6*size, "RTBuilder.HeuristicObjectSplitterBB" );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -285,7 +289,20 @@ int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds)
 | 
				
			|||||||
			RE_rayobject_merge_bb(b->begin[i], cost[i].bb, cost[i].bb+3);
 | 
								RE_rayobject_merge_bb(b->begin[i], cost[i].bb, cost[i].bb+3);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		
 | 
							
 | 
				
			||||||
		for(axis=0; axis<3; axis++)
 | 
							if(b->child_sorted_axis >= 0 && b->child_sorted_axis < 3)
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								try_axis[0] = b->child_sorted_axis;
 | 
				
			||||||
 | 
								try_axis[1] = (b->child_sorted_axis+1)%3;
 | 
				
			||||||
 | 
								try_axis[2] = (b->child_sorted_axis+2)%3;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								try_axis[0] = 0;
 | 
				
			||||||
 | 
								try_axis[1] = 1;
 | 
				
			||||||
 | 
								try_axis[2] = 2;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							
 | 
				
			||||||
 | 
							for(axis=try_axis[k=0]; k<3; axis=try_axis[++k])
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			float other_bb[6];
 | 
								float other_bb[6];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -295,37 +312,54 @@ int rtbuild_heuristic_object_split(RTBuilder *b, int nchilds)
 | 
				
			|||||||
				float *bb = acc_bb+i*6;
 | 
									float *bb = acc_bb+i*6;
 | 
				
			||||||
				if(i == size-1)
 | 
									if(i == size-1)
 | 
				
			||||||
				{
 | 
									{
 | 
				
			||||||
					INIT_MINMAX( bb, bb+3 );
 | 
										VECCOPY(bb, cost[i].bb);
 | 
				
			||||||
 | 
										VECCOPY(bb+3, cost[i].bb+3);
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
				else
 | 
									else
 | 
				
			||||||
				{
 | 
									{
 | 
				
			||||||
					VECCOPY( bb, bb+6 );
 | 
										bb[0] = MIN2(cost[i].bb[0], bb[6+0]);
 | 
				
			||||||
					VECCOPY( bb+3, bb+6+3 );
 | 
										bb[1] = MIN2(cost[i].bb[1], bb[6+1]);
 | 
				
			||||||
 | 
										bb[2] = MIN2(cost[i].bb[2], bb[6+2]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										bb[3] = MAX2(cost[i].bb[3], bb[6+3]);
 | 
				
			||||||
 | 
										bb[4] = MAX2(cost[i].bb[4], bb[6+4]);
 | 
				
			||||||
 | 
										bb[5] = MAX2(cost[i].bb[5], bb[6+5]);
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
				RE_rayobject_merge_bb( cost[i].obj, bb, bb+3 );
 | 
					 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			
 | 
								
 | 
				
			||||||
			INIT_MINMAX(other_bb, other_bb+3);
 | 
								INIT_MINMAX(other_bb, other_bb+3);
 | 
				
			||||||
			DO_MINMAX( cost[0].bb, other_bb, other_bb+3 );
 | 
								DO_MIN( cost[0].bb,   other_bb   );
 | 
				
			||||||
			DO_MINMAX( cost[0].bb+3, other_bb, other_bb+3 );
 | 
								DO_MAX( cost[0].bb+3, other_bb+3 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			for(i=1; i<size; i++)
 | 
								for(i=1; i<size; i++)
 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
				//Worst case heuristic (cost of each child is linear)
 | 
									//Worst case heuristic (cost of each child is linear)
 | 
				
			||||||
				float hcost = bb_area(other_bb, other_bb+3)*(i+log(i)) + bb_area(acc_bb+i*6, acc_bb+i*6+3)*(size-i+log(size-i));
 | 
									float hcost, left_side, right_side;
 | 
				
			||||||
				if(hcost < bcost)
 | 
									
 | 
				
			||||||
 | 
									left_side = bb_area(other_bb, other_bb+3)*(i+logf(i));
 | 
				
			||||||
 | 
									right_side= bb_area(acc_bb+i*6, acc_bb+i*6+3)*(size-i+logf(size-i));
 | 
				
			||||||
 | 
									
 | 
				
			||||||
 | 
									if(left_side > bcost) break;	//No way we can find a better heuristic in this axis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									hcost = left_side+right_side;
 | 
				
			||||||
 | 
									if( hcost < bcost
 | 
				
			||||||
 | 
									|| (hcost == bcost && axis < baxis)) //this makes sure the tree built is the same whatever is the order of the sorting axis
 | 
				
			||||||
				{
 | 
									{
 | 
				
			||||||
					bcost = hcost;
 | 
										bcost = hcost;
 | 
				
			||||||
					baxis = axis;
 | 
										baxis = axis;
 | 
				
			||||||
					boffset = i;
 | 
										boffset = i;
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
				DO_MINMAX( cost[i].bb, other_bb, other_bb+3 );
 | 
									DO_MIN( cost[i].bb,   other_bb   );
 | 
				
			||||||
				DO_MINMAX( cost[i].bb+3, other_bb, other_bb+3 );
 | 
									DO_MAX( cost[i].bb+3, other_bb+3 );
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								
 | 
				
			||||||
 | 
								if(baxis == axis)
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
 | 
									for(i=0; i<size; i++)
 | 
				
			||||||
 | 
										b->begin[i] = cost[i].obj;
 | 
				
			||||||
 | 
									b->child_sorted_axis = axis;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		costobject_sort(cost, cost+size, baxis);
 | 
					 | 
				
			||||||
		for(i=0; i<size; i++)
 | 
					 | 
				
			||||||
			b->begin[i] = cost[i].obj;
 | 
					 | 
				
			||||||
			
 | 
								
 | 
				
			||||||
		b->child_offset[0] = 0;
 | 
							b->child_offset[0] = 0;
 | 
				
			||||||
		b->child_offset[1] = boffset;
 | 
							b->child_offset[1] = boffset;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user