Render Monster support: (part 1)

Removed all limitations from render code for maximum threads. The only
define for this now is in BLI_threads.h, and currently set to 8.
Note that each thread renders an entire tile, and also allocates the
buffers for the tiles, so; more threads might work better with smaller
tiles.

IMPORTANT: node system won't work yet with more than 2 threads! So, don't
try material nodes or compositing with over 2 threads. That I'll commit
later today.

What does work (should work :) is AO and soft shadow now.
This commit is contained in:
2006-11-29 17:01:09 +00:00
parent 35f3682f7c
commit 6543d3e220
14 changed files with 157 additions and 162 deletions

View File

@@ -211,6 +211,7 @@ Scene *add_scene(char *name)
sce->r.yplay= 480; sce->r.yplay= 480;
sce->r.freqplay= 60; sce->r.freqplay= 60;
sce->r.depth= 32; sce->r.depth= 32;
sce->r.threads= 1;
sce->r.stereomode = 1; // no stereo sce->r.stereomode = 1; // no stereo

View File

@@ -34,12 +34,17 @@
/* one custom lock available now. can be extended */ /* one custom lock available now. can be extended */
#define LOCK_CUSTOM1 1 #define LOCK_CUSTOM1 1
void BLI_init_threads (ListBase *threadbase, void *(*do_thread)(void *), int tot); /* for tables, button in UI, etc */
int BLI_available_threads(ListBase *threadbase); #define BLENDER_MAX_THREADS 8
int BLI_available_thread_index(ListBase *threadbase);
void BLI_insert_thread (ListBase *threadbase, void *callerdata); struct ListBase;
void BLI_remove_thread (ListBase *threadbase, void *callerdata);
void BLI_end_threads (ListBase *threadbase); void BLI_init_threads (struct ListBase *threadbase, void *(*do_thread)(void *), int tot);
int BLI_available_threads(struct ListBase *threadbase);
int BLI_available_thread_index(struct ListBase *threadbase);
void BLI_insert_thread (struct ListBase *threadbase, void *callerdata);
void BLI_remove_thread (struct ListBase *threadbase, void *callerdata);
void BLI_end_threads (struct ListBase *threadbase);
void BLI_lock_thread (int type); void BLI_lock_thread (int type);
void BLI_unlock_thread (int type); void BLI_unlock_thread (int type);

View File

@@ -36,6 +36,8 @@
#include "MEM_guardedalloc.h" #include "MEM_guardedalloc.h"
#include "PIL_time.h" #include "PIL_time.h"
#include "BLI_threads.h"
#include "BLI_rand.h" #include "BLI_rand.h"
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
@@ -163,15 +165,14 @@ void BLI_array_randomize(void *data, int elemSize, int numElems, unsigned int se
} }
/* ********* for threaded random ************** */ /* ********* for threaded random ************** */
#define MAX_RNG_THREADS 16
static RNG rng_tab[MAX_RNG_THREADS]; static RNG rng_tab[BLENDER_MAX_THREADS];
void BLI_thread_srandom(int thread, unsigned int seed) void BLI_thread_srandom(int thread, unsigned int seed)
{ {
extern unsigned char hash[]; // noise.c extern unsigned char hash[]; // noise.c
if(thread >= MAX_RNG_THREADS) if(thread >= BLENDER_MAX_THREADS)
thread= 0; thread= 0;
rng_seed(&rng_tab[thread], seed + hash[seed & 255]); rng_seed(&rng_tab[thread], seed + hash[seed & 255]);

View File

@@ -6012,8 +6012,7 @@ static void do_versions(FileData *fd, Library *lib, Main *main)
BPoint *bp; BPoint *bp;
int a; int a;
sc= main->screen.first; for(sc= main->screen.first; sc; sc= sc->id.next) {
while(sc) {
ScrArea *sa; ScrArea *sa;
sa= sc->areabase.first; sa= sc->areabase.first;
while(sa) { while(sa) {
@@ -6028,12 +6027,17 @@ static void do_versions(FileData *fd, Library *lib, Main *main)
} }
sa = sa->next; sa = sa->next;
} }
sc = sc->id.next;
} }
for(sce= main->scene.first; sce; sce= sce->id.next) { for(sce= main->scene.first; sce; sce= sce->id.next) {
if (sce->toolsettings->select_thresh == 0.0f) if (sce->toolsettings->select_thresh == 0.0f)
sce->toolsettings->select_thresh= 0.01f; sce->toolsettings->select_thresh= 0.01f;
if (sce->r.threads==0) {
if (sce->r.mode & R_THREADS)
sce->r.threads= 2;
else
sce->r.threads= 1;
}
} }
/* add default radius values to old curve points */ /* add default radius values to old curve points */

View File

@@ -151,7 +151,7 @@ typedef struct RenderData {
int cfra, sfra, efra; /* fames as in 'images' */ int cfra, sfra, efra; /* fames as in 'images' */
int images, framapto, pad3; int images, framapto, pad3;
short flag, pad1; short flag, threads;
float ctime; /* use for calcutions */ float ctime; /* use for calcutions */
float framelen, blurfac; float framelen, blurfac;
@@ -514,6 +514,7 @@ typedef struct Scene {
#define R_GAUSS 0x20000 #define R_GAUSS 0x20000
/* fbuf obsolete... */ /* fbuf obsolete... */
#define R_FBUF 0x40000 #define R_FBUF 0x40000
/* threads obsolete... is there for old files */
#define R_THREADS 0x80000 #define R_THREADS 0x80000
#define R_SPEED 0x100000 #define R_SPEED 0x100000

View File

@@ -102,7 +102,7 @@ typedef struct World {
/* ambient occlusion */ /* ambient occlusion */
float aodist, aodistfac, aoenergy, aobias; float aodist, aodistfac, aoenergy, aobias;
short aomode, aosamp, aomix, aocolor; short aomode, aosamp, aomix, aocolor;
float *aosphere; float *aosphere, *aotables;
struct Ipo *ipo; struct Ipo *ipo;
struct MTex *mtex[10]; struct MTex *mtex[10];

View File

@@ -37,6 +37,8 @@
#include "DNA_object_types.h" #include "DNA_object_types.h"
#include "DNA_vec_types.h" #include "DNA_vec_types.h"
#include "BLI_threads.h"
#include "RE_pipeline.h" #include "RE_pipeline.h"
#include "RE_shader_ext.h" /* TexResult, ShadeResult, ShadeInput */ #include "RE_shader_ext.h" /* TexResult, ShadeResult, ShadeInput */
@@ -48,8 +50,6 @@ struct GHash;
#define TABLEINITSIZE 1024 #define TABLEINITSIZE 1024
#define LAMPINITSIZE 256 #define LAMPINITSIZE 256
/* hardcoded maximum now, for optimize tables */
#define RE_MAXTHREAD 2
typedef struct SampleTables typedef struct SampleTables
{ {
@@ -77,6 +77,7 @@ typedef struct RenderPart
short sample, nr; /* sample can be used by zbuffers, nr is partnr */ short sample, nr; /* sample can be used by zbuffers, nr is partnr */
short thread; /* thread id */ short thread; /* thread id */
char *clipflag; /* clipflags for part zbuffering */
} RenderPart; } RenderPart;
typedef struct Octree { typedef struct Octree {
@@ -209,7 +210,7 @@ typedef struct ShadBuf {
ListBase buffers; ListBase buffers;
/* irregular shadowbufer, result stored per thread */ /* irregular shadowbufer, result stored per thread */
struct ISBData *isb_result[RE_MAXTHREAD]; struct ISBData *isb_result[BLENDER_MAX_THREADS];
} ShadBuf; } ShadBuf;
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
@@ -326,7 +327,7 @@ typedef struct LampRen
float bias; float bias;
short ray_samp, ray_sampy, ray_sampz, ray_samp_type, area_shape, ray_totsamp; short ray_samp, ray_sampy, ray_sampz, ray_samp_type, area_shape, ray_totsamp;
short xold1, yold1, xold2, yold2; /* last jitter table for area lights */ short xold[BLENDER_MAX_THREADS], yold[BLENDER_MAX_THREADS]; /* last jitter table for area lights */
float area_size, area_sizey, area_sizez; float area_size, area_sizey, area_sizez;
struct ShadBuf *shb; struct ShadBuf *shb;
@@ -347,7 +348,7 @@ typedef struct LampRen
short YF_glowtype; short YF_glowtype;
/* ray optim */ /* ray optim */
VlakRen *vlr_last[RE_MAXTHREAD]; VlakRen *vlr_last[BLENDER_MAX_THREADS];
struct MTex *mtex[MAX_MTEX]; struct MTex *mtex[MAX_MTEX];
} LampRen; } LampRen;

View File

@@ -46,6 +46,7 @@
struct HaloRen; struct HaloRen;
struct ShadeInput; struct ShadeInput;
struct ShadeResult; struct ShadeResult;
struct World;
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
@@ -109,7 +110,7 @@ extern void ray_shadow(ShadeInput *, LampRen *, float *);
extern void ray_trace(ShadeInput *, ShadeResult *); extern void ray_trace(ShadeInput *, ShadeResult *);
extern void ray_ao(ShadeInput *, float *); extern void ray_ao(ShadeInput *, float *);
extern void init_jitter_plane(LampRen *lar); extern void init_jitter_plane(LampRen *lar);
extern void init_ao_sphere(float *sphere, int tot, int iter); extern void init_ao_sphere(struct World *wrld);
#endif /* RENDER_EXT_H */ #endif /* RENDER_EXT_H */

View File

@@ -2974,6 +2974,11 @@ void RE_Database_Free(Render *re)
re->wrld.aosphere= NULL; re->wrld.aosphere= NULL;
re->scene->world->aosphere= NULL; re->scene->world->aosphere= NULL;
} }
if(re->wrld.aotables) {
MEM_freeN(re->wrld.aotables);
re->wrld.aotables= NULL;
re->scene->world->aotables= NULL;
}
if(re->r.mode & R_RAYTRACE) freeoctree(re); if(re->r.mode & R_RAYTRACE) freeoctree(re);
@@ -3220,11 +3225,8 @@ void RE_Database_FromScene(Render *re, Scene *scene, int use_camera_view)
} }
init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */ init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */
if( (re->wrld.mode & WO_AMB_OCC) && (re->r.mode & R_RAYTRACE) ) { if( (re->wrld.mode & WO_AMB_OCC) && (re->r.mode & R_RAYTRACE) )
re->wrld.aosphere= MEM_mallocN(2*3*re->wrld.aosamp*re->wrld.aosamp*sizeof(float), "AO sphere"); init_ao_sphere(&re->wrld);
/* we make twice the amount of samples, because only a hemisphere is used */
init_ao_sphere(re->wrld.aosphere, 2*re->wrld.aosamp*re->wrld.aosamp, 16);
}
/* still bad... doing all */ /* still bad... doing all */
init_render_textures(re); init_render_textures(re);
@@ -3894,11 +3896,8 @@ void RE_Database_Baking(Render *re, Scene *scene, int type)
} }
init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */ init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */
if( (re->wrld.mode & WO_AMB_OCC) && (re->r.mode & R_RAYTRACE) ) { if( (re->wrld.mode & WO_AMB_OCC) && (re->r.mode & R_RAYTRACE) )
re->wrld.aosphere= MEM_mallocN(2*3*re->wrld.aosamp*re->wrld.aosamp*sizeof(float), "AO sphere"); init_ao_sphere(&re->wrld);
/* we make twice the amount of samples, because only a hemisphere is used */
init_ao_sphere(re->wrld.aosphere, 2*re->wrld.aosamp*re->wrld.aosamp, 16);
}
/* still bad... doing all */ /* still bad... doing all */
init_render_textures(re); init_render_textures(re);

View File

@@ -1100,7 +1100,7 @@ static void threaded_tile_processor(Render *re)
RenderPart *pa, *nextpa; RenderPart *pa, *nextpa;
RenderResult *rr; RenderResult *rr;
rctf viewplane= re->viewplane; rctf viewplane= re->viewplane;
int maxthreads, rendering=1, counter= 1, drawtimer=0, hasdrawn, minx=0; int rendering=1, counter= 1, drawtimer=0, hasdrawn, minx=0;
/* first step; the entire render result, or prepare exr buffer saving */ /* first step; the entire render result, or prepare exr buffer saving */
free_render_result(re->result); free_render_result(re->result);
@@ -1123,11 +1123,7 @@ static void threaded_tile_processor(Render *re)
IMB_exrtile_begin_write(rr->exrhandle, str, rr->rectx, rr->recty, rr->rectx/re->xparts, rr->recty/re->yparts); IMB_exrtile_begin_write(rr->exrhandle, str, rr->rectx, rr->recty, rr->rectx/re->xparts, rr->recty/re->yparts);
} }
if(re->r.mode & R_THREADS) BLI_init_threads(&threads, do_part_thread, re->r.threads);
maxthreads= RE_MAXTHREAD; /* should become button value too */
else maxthreads= 1;
BLI_init_threads(&threads, do_part_thread, maxthreads);
/* assuming no new data gets added to dbase... */ /* assuming no new data gets added to dbase... */
R= *re; R= *re;
@@ -1154,7 +1150,7 @@ static void threaded_tile_processor(Render *re)
nextpa= find_next_part(re, minx); nextpa= find_next_part(re, minx);
} }
else if(re->r.mode & R_PANORAMA) { else if(re->r.mode & R_PANORAMA) {
if(nextpa==NULL && BLI_available_threads(&threads)==maxthreads) if(nextpa==NULL && BLI_available_threads(&threads)==re->r.threads)
nextpa= find_next_pano_slice(re, &minx, &viewplane); nextpa= find_next_pano_slice(re, &minx, &viewplane);
else { else {
PIL_sleep_ms(50); PIL_sleep_ms(50);
@@ -1195,7 +1191,7 @@ static void threaded_tile_processor(Render *re)
drawtimer= 0; drawtimer= 0;
/* on break, wait for all slots to get freed */ /* on break, wait for all slots to get freed */
if( (g_break=re->test_break()) && BLI_available_threads(&threads)==maxthreads) if( (g_break=re->test_break()) && BLI_available_threads(&threads)==re->r.threads)
rendering= 0; rendering= 0;
} }
@@ -1223,9 +1219,7 @@ void RE_TileProcessor(Render *re, int firsttile)
re->i.starttime= PIL_check_seconds_timer(); re->i.starttime= PIL_check_seconds_timer();
//if(re->r.mode & R_THREADS)
// threaded_tile_processor(re); // threaded_tile_processor(re);
//else
render_tile_processor(re, firsttile); render_tile_processor(re, firsttile);
re->i.lastframetime= PIL_check_seconds_timer()- re->i.starttime; re->i.lastframetime= PIL_check_seconds_timer()- re->i.starttime;

View File

@@ -1762,7 +1762,10 @@ void init_jitter_plane(LampRen *lar)
float *fp; float *fp;
int x, iter=12, tot= lar->ray_totsamp; int x, iter=12, tot= lar->ray_totsamp;
fp=lar->jitter= MEM_mallocN(4*tot*2*sizeof(float), "lamp jitter tab"); /* at least 4, or max threads+1 tables */
if(BLENDER_MAX_THREADS < 4) x= 4;
else x= BLENDER_MAX_THREADS+1;
fp= lar->jitter= MEM_mallocN(x*tot*2*sizeof(float), "lamp jitter tab");
/* set per-lamp fixed seed */ /* set per-lamp fixed seed */
BLI_srandom(tot); BLI_srandom(tot);
@@ -1780,7 +1783,7 @@ void init_jitter_plane(LampRen *lar)
} }
} }
/* create the dithered tables */ /* create the dithered tables (could just check lamp type!) */
jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, 0.5, 0.0); jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, 0.5, 0.0);
jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, 0.5, 0.5); jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, 0.5, 0.5);
jitter_plane_offset(lar->jitter, lar->jitter+6*tot, tot, lar->area_size, lar->area_sizey, 0.0, 0.5); jitter_plane_offset(lar->jitter, lar->jitter+6*tot, tot, lar->area_size, lar->area_sizey, 0.0, 0.5);
@@ -1795,20 +1798,13 @@ static float *give_jitter_plane(LampRen *lar, int thread, int xs, int ys)
if(lar->ray_samp_type & LA_SAMP_JITTER) { if(lar->ray_samp_type & LA_SAMP_JITTER) {
/* made it threadsafe */ /* made it threadsafe */
if(thread & 1) {
if(lar->xold1!=xs || lar->yold1!=ys) { if(lar->xold[thread]!=xs || lar->yold[thread]!=ys) {
jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, BLI_thread_frand(1), BLI_thread_frand(1)); jitter_plane_offset(lar->jitter, lar->jitter+2*(thread+1)*tot, tot, lar->area_size, lar->area_sizey, BLI_thread_frand(thread), BLI_thread_frand(thread));
lar->xold1= xs; lar->yold1= ys; lar->xold[thread]= xs;
} lar->yold[thread]= ys;
return lar->jitter+2*tot;
}
else {
if(lar->xold2!=xs || lar->yold2!=ys) {
jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, BLI_thread_frand(0), BLI_thread_frand(0));
lar->xold2= xs; lar->yold2= ys;
}
return lar->jitter+4*tot;
} }
return lar->jitter+2*(thread+1)*tot;
} }
if(lar->ray_samp_type & LA_SAMP_DITHER) { if(lar->ray_samp_type & LA_SAMP_DITHER) {
return lar->jitter + 2*tot*((xs & 1)+2*(ys & 1)); return lar->jitter + 2*tot*((xs & 1)+2*(ys & 1));
@@ -2029,45 +2025,52 @@ static void DS_energy(float *sphere, int tot, float *vec)
/* called from convertBlenderScene.c */ /* called from convertBlenderScene.c */
/* creates an equally distributed spherical sample pattern */ /* creates an equally distributed spherical sample pattern */
void init_ao_sphere(float *sphere, int tot, int iter) /* and allocates threadsafe memory */
void init_ao_sphere(World *wrld)
{ {
float *fp; float *fp;
int a; int a, tot, iter= 16;
/* we make twice the amount of samples, because only a hemisphere is used */
tot= 2*wrld->aosamp*wrld->aosamp;
wrld->aosphere= MEM_mallocN(3*tot*sizeof(float), "AO sphere");
/* fixed random */
BLI_srandom(tot); BLI_srandom(tot);
/* init */ /* init */
fp= sphere; fp= wrld->aosphere;
for(a=0; a<tot; a++, fp+= 3) { for(a=0; a<tot; a++, fp+= 3) {
RandomSpherical(fp); RandomSpherical(fp);
} }
while(iter--) { while(iter--) {
for(a=0, fp= sphere; a<tot; a++, fp+= 3) { for(a=0, fp= wrld->aosphere; a<tot; a++, fp+= 3) {
DS_energy(sphere, tot, fp); DS_energy(wrld->aosphere, tot, fp);
} }
} }
/* tables */
wrld->aotables= MEM_mallocN(BLENDER_MAX_THREADS*3*tot*sizeof(float), "AO tables");
} }
/* give per thread a table, we have to compare xs ys because of way OSA works... */
static float *threadsafe_table_sphere(int test, int thread, int xs, int ys) static float *threadsafe_table_sphere(int test, int thread, int xs, int ys, int tot)
{ {
static float sphere1[2*3*256]; static int xso[BLENDER_MAX_THREADS], yso[BLENDER_MAX_THREADS];
static float sphere2[2*3*256]; static int firsttime= 1;
static int xs1=-1, xs2=-1, ys1=-1, ys2=-1;
if(thread & 1) { if(firsttime) {
if(xs==xs1 && ys==ys1) return sphere1; memset(xso, 255, sizeof(xso));
if(test) return NULL; memset(yso, 255, sizeof(yso));
xs1= xs; ys1= ys; firsttime= 0;
return sphere1;
} }
else {
if(xs==xs2 && ys==ys2) return sphere2; if(xs==xso[thread] && ys==yso[thread]) return R.wrld.aotables+ thread*tot*3;
if(test) return NULL; if(test) return NULL;
xs2= xs; ys2= ys; xso[thread]= xs; yso[thread]= ys;
return sphere2; return R.wrld.aotables+ thread*tot*3;
}
} }
static float *sphere_sampler(int type, int resol, int thread, int xs, int ys) static float *sphere_sampler(int type, int resol, int thread, int xs, int ys)
@@ -2097,9 +2100,9 @@ static float *sphere_sampler(int type, int resol, int thread, int xs, int ys)
float ang, *vec1; float ang, *vec1;
int a; int a;
sphere= threadsafe_table_sphere(1, thread, xs, ys); // returns table if xs and ys were equal to last call sphere= threadsafe_table_sphere(1, thread, xs, ys, tot); // returns table if xs and ys were equal to last call
if(sphere==NULL) { if(sphere==NULL) {
sphere= threadsafe_table_sphere(0, thread, xs, ys); sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
// random rotation // random rotation
ang= BLI_thread_frand(thread); ang= BLI_thread_frand(thread);
@@ -2239,7 +2242,7 @@ void ray_shadow(ShadeInput *shi, LampRen *lar, float *shadfac)
/* only when not mir tracing, first hit optimm */ /* only when not mir tracing, first hit optimm */
if(shi->depth==0) if(shi->depth==0)
isec.vlr_last= lar->vlr_last[shi->thread & 1]; isec.vlr_last= lar->vlr_last[shi->thread];
else else
isec.vlr_last= NULL; isec.vlr_last= NULL;
@@ -2352,7 +2355,7 @@ void ray_shadow(ShadeInput *shi, LampRen *lar, float *shadfac)
/* for first hit optim, set last interesected shadow face */ /* for first hit optim, set last interesected shadow face */
if(shi->depth==0) if(shi->depth==0)
lar->vlr_last[shi->thread & 1]= isec.vlr_last; lar->vlr_last[shi->thread]= isec.vlr_last;
} }

View File

@@ -3315,6 +3315,7 @@ void zbufshadeDA_tile(RenderPart *pa)
/* free all */ /* free all */
MEM_freeN(pa->rectp); pa->rectp= NULL; MEM_freeN(pa->rectp); pa->rectp= NULL;
MEM_freeN(pa->rectz); pa->rectz= NULL; MEM_freeN(pa->rectz); pa->rectz= NULL;
MEM_freeN(pa->clipflag); pa->clipflag= NULL;
/* display active layer */ /* display active layer */
rr->renrect.ymin=rr->renrect.ymax= 0; rr->renrect.ymin=rr->renrect.ymax= 0;
@@ -3462,6 +3463,7 @@ void zbufshade_tile(RenderPart *pa)
MEM_freeN(pa->rectp); pa->rectp= NULL; MEM_freeN(pa->rectp); pa->rectp= NULL;
MEM_freeN(pa->rectz); pa->rectz= NULL; MEM_freeN(pa->rectz); pa->rectz= NULL;
MEM_freeN(pa->clipflag); pa->clipflag= NULL;
} }
/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */
@@ -3900,10 +3902,10 @@ static void *do_bake_thread(void *bs_v)
/* returns 0 if nothing was handled */ /* returns 0 if nothing was handled */
int RE_bake_shade_all_selected(Render *re, int type) int RE_bake_shade_all_selected(Render *re, int type)
{ {
BakeShade handles[RE_MAXTHREAD]; BakeShade handles[BLENDER_MAX_THREADS];
ListBase threads; ListBase threads;
Image *ima; Image *ima;
int a, vdone=0, maxthreads= 1; int a, vdone=0;
/* initialize static vars */ /* initialize static vars */
get_next_bake_face(NULL); get_next_bake_face(NULL);
@@ -3916,14 +3918,10 @@ int RE_bake_shade_all_selected(Render *re, int type)
R= *re; R= *re;
R.bakebuf= NULL; R.bakebuf= NULL;
if(re->r.mode & R_THREADS) BLI_init_threads(&threads, do_bake_thread, re->r.threads);
maxthreads= RE_MAXTHREAD; /* should become button value too */
else maxthreads= 1;
BLI_init_threads(&threads, do_bake_thread, maxthreads);
/* get the threads running */ /* get the threads running */
for(a=0; a<maxthreads; a++) { for(a=0; a<re->r.threads; a++) {
/* set defaults in handles */ /* set defaults in handles */
memset(&handles[a], 0, sizeof(BakeShade)); memset(&handles[a], 0, sizeof(BakeShade));
handles[a].shi.lay= re->scene->lay; handles[a].shi.lay= re->scene->lay;
@@ -3935,11 +3933,11 @@ int RE_bake_shade_all_selected(Render *re, int type)
/* wait for everything to be done */ /* wait for everything to be done */
a= 0; a= 0;
while(a!=maxthreads) { while(a!=re->r.threads) {
PIL_sleep_ms(50); PIL_sleep_ms(50);
for(a=0; a<maxthreads; a++) for(a=0; a<re->r.threads; a++)
if(handles[a].ready==0) if(handles[a].ready==0)
break; break;
} }
@@ -3955,7 +3953,7 @@ int RE_bake_shade_all_selected(Render *re, int type)
} }
/* calculate return value */ /* calculate return value */
for(a=0; a<maxthreads; a++) { for(a=0; a<re->r.threads; a++) {
vdone+= handles[a].vdone; vdone+= handles[a].vdone;
zbuf_free_span(handles[a].zspan); zbuf_free_span(handles[a].zspan);

View File

@@ -1685,67 +1685,41 @@ void set_part_zbuf_clipflag(RenderPart *pa)
{ {
VertRen *ver=NULL; VertRen *ver=NULL;
float minx, miny, maxx, maxy, wco; float minx, miny, maxx, maxy, wco;
unsigned short clipclear;
int v; int v;
char *clipflag;
/* flags stored in part now */
clipflag= pa->clipflag= MEM_mallocN(R.totvert+1, "part clipflags");
minx= (2*pa->disprect.xmin - R.winx-1)/(float)R.winx; minx= (2*pa->disprect.xmin - R.winx-1)/(float)R.winx;
maxx= (2*pa->disprect.xmax - R.winx+1)/(float)R.winx; maxx= (2*pa->disprect.xmax - R.winx+1)/(float)R.winx;
miny= (2*pa->disprect.ymin - R.winy-1)/(float)R.winy; miny= (2*pa->disprect.ymin - R.winy-1)/(float)R.winy;
maxy= (2*pa->disprect.ymax - R.winy+1)/(float)R.winy; maxy= (2*pa->disprect.ymax - R.winy+1)/(float)R.winy;
/* supports up to 4 threads this way */ for(v=0; v<R.totvert; v++, clipflag++) {
clipclear= ~(15 << 4*(pa->thread & 3));
/* extra security to prevent access to same data */
BLI_lock_thread(LOCK_CUSTOM1);
for(v=0; v<R.totvert; v++) {
if((v & 255)==0) if((v & 255)==0)
ver= RE_findOrAddVert(&R, v); ver= RE_findOrAddVert(&R, v);
else ver++; else ver++;
wco= ver->ho[3]; wco= ver->ho[3];
ver->flag &= clipclear;
switch(pa->thread & 3) { *clipflag= 0;
case 0: if( ver->ho[0] > maxx*wco) *clipflag |= 1;
if( ver->ho[0] > maxx*wco) ver->flag |= 1; else if( ver->ho[0]< minx*wco) *clipflag |= 2;
else if( ver->ho[0]< minx*wco) ver->flag |= 2; if( ver->ho[1] > maxy*wco) *clipflag |= 4;
if( ver->ho[1] > maxy*wco) ver->flag |= 4; else if( ver->ho[1]< miny*wco) *clipflag |= 8;
else if( ver->ho[1]< miny*wco) ver->flag |= 8;
break;
case 1:
if( ver->ho[0] > maxx*wco) ver->flag |= 16;
else if( ver->ho[0]< minx*wco) ver->flag |= 32;
if( ver->ho[1] > maxy*wco) ver->flag |= 64;
else if( ver->ho[1]< miny*wco) ver->flag |= 128;
break;
case 2:
if( ver->ho[0] > maxx*wco) ver->flag |= 256;
else if( ver->ho[0]< minx*wco) ver->flag |= 512;
if( ver->ho[1] > maxy*wco) ver->flag |= 1024;
else if( ver->ho[1]< miny*wco) ver->flag |= 2048;
break;
case 3:
if( ver->ho[0] > maxx*wco) ver->flag |= 4096;
else if( ver->ho[0]< minx*wco) ver->flag |= 8192;
if( ver->ho[1] > maxy*wco) ver->flag |= 16384;
else if( ver->ho[1]< miny*wco) ver->flag |= 32768;
break;
} }
}
BLI_unlock_thread(LOCK_CUSTOM1);
} }
void zbuffer_solid(RenderPart *pa, unsigned int lay, short layflag) void zbuffer_solid(RenderPart *pa, unsigned int lay, short layflag)
{ {
ZSpan zspan; ZSpan zspan;
VlakRen *vlr= NULL; VlakRen *vlr= NULL;
VertRen *v1, *v2, *v3, *v4;
Material *ma=0; Material *ma=0;
int v, zvlnr; int v, zvlnr;
unsigned short clipmask;
short nofill=0, env=0, wire=0, all_z= layflag & SCE_LAY_ALL_Z; short nofill=0, env=0, wire=0, all_z= layflag & SCE_LAY_ALL_Z;
char *clipflag= pa->clipflag;
zbuf_alloc_span(&zspan, pa->rectx, pa->recty); zbuf_alloc_span(&zspan, pa->rectx, pa->recty);
@@ -1779,9 +1753,6 @@ void zbuffer_solid(RenderPart *pa, unsigned int lay, short layflag)
zspan.zbuffunc= zbuffillGL4; zspan.zbuffunc= zbuffillGL4;
zspan.zbuflinefunc= zbufline; zspan.zbuflinefunc= zbufline;
/* part clipflag, threaded */
clipmask= (15 << 4*(pa->thread & 3));
for(v=0; v<R.totvlak; v++) { for(v=0; v<R.totvlak; v++) {
if((v & 255)==0) vlr= R.blovl[v>>8]; if((v & 255)==0) vlr= R.blovl[v>>8];
@@ -1813,11 +1784,17 @@ void zbuffer_solid(RenderPart *pa, unsigned int lay, short layflag)
if(nofill==0) { if(nofill==0) {
unsigned short partclip; unsigned short partclip;
/* partclipping doesn't need viewplane clipping */ v1= vlr->v1;
if(vlr->v4) partclip= vlr->v1->flag & vlr->v2->flag & vlr->v3->flag & vlr->v4->flag; v2= vlr->v2;
else partclip= vlr->v1->flag & vlr->v2->flag & vlr->v3->flag; v3= vlr->v3;
v4= vlr->v4;
if((partclip & clipmask)==0) { /* partclipping doesn't need viewplane clipping */
partclip= clipflag[v1->index] & clipflag[v2->index] & clipflag[v3->index];
if(v4)
partclip &= clipflag[v4->index];
if(partclip==0) {
if(env) zvlnr= -1; if(env) zvlnr= -1;
else zvlnr= v+1; else zvlnr= v+1;
@@ -1825,14 +1802,14 @@ void zbuffer_solid(RenderPart *pa, unsigned int lay, short layflag)
if(wire) zbufclipwire(&zspan, zvlnr, vlr); if(wire) zbufclipwire(&zspan, zvlnr, vlr);
else { else {
/* strands allow to be filled in as quad */ /* strands allow to be filled in as quad */
if(vlr->v4 && (vlr->flag & R_STRAND)) { if(v4 && (vlr->flag & R_STRAND)) {
zbufclip4(&zspan, zvlnr, vlr->v1->ho, vlr->v2->ho, vlr->v3->ho, vlr->v4->ho, vlr->v1->clip, vlr->v2->clip, vlr->v3->clip, vlr->v4->clip); zbufclip4(&zspan, zvlnr, v1->ho, v2->ho, v3->ho, v4->ho, v1->clip, v2->clip, v3->clip, v4->clip);
} }
else { else {
zbufclip(&zspan, zvlnr, vlr->v1->ho, vlr->v2->ho, vlr->v3->ho, vlr->v1->clip, vlr->v2->clip, vlr->v3->clip); zbufclip(&zspan, zvlnr, v1->ho, v2->ho, v3->ho, v1->clip, v2->clip, v3->clip);
if(vlr->v4) { if(v4) {
if(zvlnr>0) zvlnr+= RE_QUAD_OFFS; if(zvlnr>0) zvlnr+= RE_QUAD_OFFS;
zbufclip(&zspan, zvlnr, vlr->v1->ho, vlr->v3->ho, vlr->v4->ho, vlr->v1->clip, vlr->v3->clip, vlr->v4->clip); zbufclip(&zspan, zvlnr, v1->ho, v3->ho, v4->ho, v1->clip, v3->clip, v4->clip);
} }
} }
} }
@@ -2561,9 +2538,10 @@ static void zbuffer_abuf(RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, u
ZSpan zspan; ZSpan zspan;
Material *ma=NULL; Material *ma=NULL;
VlakRen *vlr=NULL; VlakRen *vlr=NULL;
VertRen *v1, *v2, *v3, *v4;
float vec[3], hoco[4], mul, zval, fval; float vec[3], hoco[4], mul, zval, fval;
int v, zvlnr, zsample, dofill= 0; int v, zvlnr, zsample, dofill= 0;
unsigned short clipmask; char *clipflag= pa->clipflag;
zbuf_alloc_span(&zspan, pa->rectx, pa->recty); zbuf_alloc_span(&zspan, pa->rectx, pa->recty);
@@ -2580,9 +2558,6 @@ static void zbuffer_abuf(RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, u
zspan.zbuffunc= zbuffillAc4; zspan.zbuffunc= zbuffillAc4;
zspan.zbuflinefunc= zbuflineAc; zspan.zbuflinefunc= zbuflineAc;
/* part clipflag, 4 threads */
clipmask= (15 << 4*(pa->thread & 3));
for(zsample=0; zsample<R.osa || R.osa==0; zsample++) { for(zsample=0; zsample<R.osa || R.osa==0; zsample++) {
copyto_abufz(pa, zspan.arectz, zsample); /* init zbuffer */ copyto_abufz(pa, zspan.arectz, zsample); /* init zbuffer */
@@ -2618,17 +2593,23 @@ static void zbuffer_abuf(RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, u
if((vlr->flag & R_VISIBLE) && (vlr->lay & lay)) { if((vlr->flag & R_VISIBLE) && (vlr->lay & lay)) {
unsigned short partclip; unsigned short partclip;
/* partclipping doesn't need viewplane clipping */ v1= vlr->v1;
if(vlr->v4) partclip= vlr->v1->flag & vlr->v2->flag & vlr->v3->flag & vlr->v4->flag; v2= vlr->v2;
else partclip= vlr->v1->flag & vlr->v2->flag & vlr->v3->flag; v3= vlr->v3;
v4= vlr->v4;
if((partclip & clipmask)==0) { /* partclipping doesn't need viewplane clipping */
partclip= clipflag[v1->index] & clipflag[v2->index] & clipflag[v3->index];
if(v4)
partclip &= clipflag[v4->index];
if(partclip==0) {
/* a little advantage for transp rendering (a z offset) */ /* a little advantage for transp rendering (a z offset) */
if( ma->zoffs != 0.0) { if( ma->zoffs != 0.0) {
mul= 0x7FFFFFFF; mul= 0x7FFFFFFF;
zval= mul*(1.0+vlr->v1->ho[2]/vlr->v1->ho[3]); zval= mul*(1.0+v1->ho[2]/v1->ho[3]);
VECCOPY(vec, vlr->v1->co); VECCOPY(vec, v1->co);
/* z is negative, otherwise its being clipped */ /* z is negative, otherwise its being clipped */
vec[2]-= ma->zoffs; vec[2]-= ma->zoffs;
projectverto(vec, R.winmat, hoco); projectverto(vec, R.winmat, hoco);
@@ -2642,14 +2623,14 @@ static void zbuffer_abuf(RenderPart *pa, APixstr *APixbuf, ListBase *apsmbase, u
if(ma->mode & (MA_WIRE)) zbufclipwire(&zspan, zvlnr, vlr); if(ma->mode & (MA_WIRE)) zbufclipwire(&zspan, zvlnr, vlr);
else { else {
if(vlr->v4 && (vlr->flag & R_STRAND)) { if(v4 && (vlr->flag & R_STRAND)) {
zbufclip4(&zspan, zvlnr, vlr->v1->ho, vlr->v2->ho, vlr->v3->ho, vlr->v4->ho, vlr->v1->clip, vlr->v2->clip, vlr->v3->clip, vlr->v4->clip); zbufclip4(&zspan, zvlnr, v1->ho, v2->ho, v3->ho, v4->ho, v1->clip, v2->clip, v3->clip, v4->clip);
} }
else { else {
zbufclip(&zspan, zvlnr, vlr->v1->ho, vlr->v2->ho, vlr->v3->ho, vlr->v1->clip, vlr->v2->clip, vlr->v3->clip); zbufclip(&zspan, zvlnr, v1->ho, v2->ho, v3->ho, v1->clip, v2->clip, v3->clip);
if(vlr->v4) { if(v4) {
zvlnr+= RE_QUAD_OFFS; zvlnr+= RE_QUAD_OFFS;
zbufclip(&zspan, zvlnr, vlr->v1->ho, vlr->v3->ho, vlr->v4->ho, vlr->v1->clip, vlr->v3->clip, vlr->v4->clip); zbufclip(&zspan, zvlnr, v1->ho, v3->ho, v4->ho, v1->clip, v3->clip, v4->clip);
} }
} }
} }

View File

@@ -78,15 +78,21 @@
/* -----includes for this file specific----- */ /* -----includes for this file specific----- */
#include "DNA_image_types.h" #include "DNA_image_types.h"
#include "BKE_writeavi.h" #include "BKE_writeavi.h"
#include "BKE_writeffmpeg.h" #include "BKE_writeffmpeg.h"
#include "BKE_image.h" #include "BKE_image.h"
#include "BLI_threads.h"
#include "BIF_editsound.h"
#include "BIF_writeimage.h" #include "BIF_writeimage.h"
#include "BIF_writeavicodec.h" #include "BIF_writeavicodec.h"
#include "BIF_editsound.h"
#include "BSE_seqaudio.h" #include "BSE_seqaudio.h"
#include "BSE_headerbuttons.h" #include "BSE_headerbuttons.h"
#include "butspace.h" // own module #include "butspace.h" // own module
#ifdef WITH_QUICKTIME #ifdef WITH_QUICKTIME
@@ -1186,7 +1192,7 @@ static void render_panel_output(void)
uiBlockSetCol(block, TH_BUT_SETTING1); uiBlockSetCol(block, TH_BUT_SETTING1);
uiDefButBitS(block, TOG, R_BACKBUF, B_NOP,"Backbuf", 10, 94, 80, 20, &G.scene->r.bufflag, 0, 0, 0, 0, "Enable/Disable use of Backbuf image"); uiDefButBitS(block, TOG, R_BACKBUF, B_NOP,"Backbuf", 10, 94, 80, 20, &G.scene->r.bufflag, 0, 0, 0, 0, "Enable/Disable use of Backbuf image");
uiDefButBitI(block, TOG, R_THREADS, B_NOP,"Threads", 10, 68, 80, 20, &G.scene->r.mode, 0, 0, 0, 0, "Enable/Disable render in two threads"); uiDefButS(block, NUM, B_NOP, "Threads:", 10, 68, 120, 20, &G.scene->r.threads, 1, BLENDER_MAX_THREADS, 0, 0, "Amount of threads for render");
uiBlockSetCol(block, TH_AUTO); uiBlockSetCol(block, TH_AUTO);
uiBlockBeginAlign(block); uiBlockBeginAlign(block);