This repository has been archived on 2023-10-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
blender-archive/source/blender/render/intern/source/shadbuf.c
T
Ton Roosendaal fe036a0538 Added new malloc type in our MEM module; using the unix feature 'mmap'.
In Orange we've been fighting the past weeks with memory usage a lot...
at the moment incredible huge scenes are being rendered, with multiple
layers and all compositing, stressing limits of memory a lot.
I had hoped that less frequently used blocks would be swapped away
nicely, so fragmented memory could survive. Unfortunately (in OSX) the
malloc range is limited to 2 GB only (upped half of address space).
Other OS's have a limit too, but typically larger afaik.

Now here's mmap to the rescue! It has a very nice feature to map to
a virtual (non existing) file, allowing to allocate disk-mapped memory
on the fly. For as long there's real memory it works nearly as fast as
a regular malloc, and when you go to the swap boundary, it knows nicely
what to swap first.

The upcoming commit will use mmap for all large memory blocks, like
the composit stack, render layers, lamp buffers and images. Tested here
on my 1 GB system, and compositing huge images with a total of 2.5 gig
still works acceptable here. :)

http://www.blender.org/bf/memory.jpg
This is a silly composit test, using 64 MB images with a load of nodes.
Check the header print... the (2323.33M) is the mmap disk-cache in use.

BTW: note that is still limited to the virtual address space of 4 GB.

The new call is:
MEM_mapalloc()

Per definition, mmap() returns zero'ed memory, so a calloc isn't required.

For Windows there's no mmap() available, but I'm pretty sure there's an
equivalent. Windows gurus here are invited to insert that here in code! At
the moment it's nicely ifdeffed, so for Windows the mmap defaults to a
regular alloc.
2006-02-16 17:51:01 +00:00

738 lines
17 KiB
C

/*
* ***** BEGIN GPL LICENSE BLOCK *****
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
* All rights reserved.
*
* Contributor(s): 2004-2006, Blender Foundation
*
* ***** END GPL LICENSE BLOCK *****
*/
#include <math.h>
#include <string.h>
#include "MTC_matrixops.h"
#include "MEM_guardedalloc.h"
#include "DNA_lamp_types.h"
#include "BKE_global.h"
#include "BKE_utildefines.h"
#include "BLI_arithb.h"
#include "BLI_blenlib.h"
#include "BLI_jitter.h"
#include "renderpipeline.h"
#include "render_types.h"
#include "renderdatabase.h"
#include "shadbuf.h"
#include "zbuf.h"
/* XXX, could be better implemented... this is for endian issues
*/
#if defined(__sgi) || defined(__sparc) || defined(__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__BIG_ENDIAN__)
#define RCOMP 3
#define GCOMP 2
#define BCOMP 1
#define ACOMP 0
#else
#define RCOMP 0
#define GCOMP 1
#define BCOMP 2
#define ACOMP 3
#endif
/* ------------------------------------------------------------------------- */
/* initshadowbuf() in convertBlenderScene.c */
/* ------------------------------------------------------------------------- */
static void copy_to_ztile(int *rectz, int size, int x1, int y1, int tile, char *r1)
{
int len4, *rz;
int x2, y2;
x2= x1+tile;
y2= y1+tile;
if(x2>=size) x2= size-1;
if(y2>=size) y2= size-1;
if(x1>=x2 || y1>=y2) return;
len4= 4*(x2- x1);
rz= rectz + size*y1 + x1;
for(; y1<y2; y1++) {
memcpy(r1, rz, len4);
rz+= size;
r1+= len4;
}
}
#if 0
static int sizeoflampbuf(ShadBuf *shb)
{
int num,count=0;
char *cp;
cp= shb->cbuf;
num= (shb->size*shb->size)/256;
while(num--) count+= *(cp++);
return 256*count;
}
#endif
/* not threadsafe... */
static float *give_jitter_tab(int samp)
{
/* these are all possible jitter tables, takes up some
* 12k, not really bad!
* For soft shadows, it saves memory and render time
*/
static int tab[17]={1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256};
static float jit[1496][2];
static char ctab[17]= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
int a, offset=0;
if(samp<2) samp= 2;
else if(samp>16) samp= 16;
for(a=0; a<samp-1; a++) offset+= tab[a];
if(ctab[samp]==0) {
BLI_initjit(jit[offset], samp*samp);
ctab[samp]= 1;
}
return jit[offset];
}
static void make_jitter_weight_tab(ShadBuf *shb, short filtertype)
{
float *jit, totw= 0.0f;
int a, tot=shb->samp*shb->samp;
shb->weight= MEM_mallocN(sizeof(float)*tot, "weight tab lamp");
for(jit= shb->jit, a=0; a<tot; a++, jit+=2) {
if(filtertype==LA_SHADBUF_TENT)
shb->weight[a]= 0.71f - sqrt(jit[0]*jit[0] + jit[1]*jit[1]);
else if(filtertype==LA_SHADBUF_GAUSS)
shb->weight[a]= RE_filter_value(R_FILTER_GAUSS, 1.8f*sqrt(jit[0]*jit[0] + jit[1]*jit[1]));
else
shb->weight[a]= 1.0f;
totw+= shb->weight[a];
}
totw= 1.0f/totw;
for(a=0; a<tot; a++) {
shb->weight[a]*= totw;
}
}
/* create Z tiles (for compression): this system is 24 bits!!! */
static void compress_shadowbuf(ShadBuf *shb, int *rectz, int square)
{
ShadSampleBuf *shsample;
float dist;
unsigned long *ztile;
int *rz, *rz1, verg, verg1, size= shb->size;
int a, x, y, minx, miny, byt1, byt2;
char *rc, *rcline, *ctile, *zt;
shsample= MEM_mallocN( sizeof(ShadSampleBuf), "shad sample buf");
BLI_addtail(&shb->buffers, shsample);
shsample->zbuf= MEM_mallocN( sizeof(unsigned long)*(size*size)/256, "initshadbuf2");
shsample->cbuf= MEM_callocN( (size*size)/256, "initshadbuf3");
ztile= shsample->zbuf;
ctile= shsample->cbuf;
/* help buffer */
rcline= MEM_mallocN(256*4+sizeof(int), "makeshadbuf2");
for(y=0; y<size; y+=16) {
if(y< size/2) miny= y+15-size/2;
else miny= y-size/2;
for(x=0; x<size; x+=16) {
/* is tile within spotbundle? */
a= size/2;
if(x< a) minx= x+15-a;
else minx= x-a;
dist= sqrt( (float)(minx*minx+miny*miny) );
if(square==0 && dist>(float)(a+12)) { /* 12, tested with a onlyshadow lamp */
a= 256; verg= 0; /* 0x80000000; */ /* 0x7FFFFFFF; */
rz1= (&verg)+1;
}
else {
copy_to_ztile(rectz, size, x, y, 16, rcline);
rz1= (int *)rcline;
verg= (*rz1 & 0xFFFFFF00);
for(a=0;a<256;a++,rz1++) {
if( (*rz1 & 0xFFFFFF00) !=verg) break;
}
}
if(a==256) { /* complete empty tile */
*ctile= 0;
*ztile= *(rz1-1);
}
else {
/* ACOMP etc. are defined to work L/B endian */
rc= rcline;
rz1= (int *)rcline;
verg= rc[ACOMP];
verg1= rc[BCOMP];
rc+= 4;
byt1= 1; byt2= 1;
for(a=1;a<256;a++,rc+=4) {
byt1 &= (verg==rc[ACOMP]);
byt2 &= (verg1==rc[BCOMP]);
if(byt1==0) break;
}
if(byt1 && byt2) { /* only store byte */
*ctile= 1;
*ztile= (unsigned long)MEM_mallocN(256+4, "tile1");
rz= (int *)*ztile;
*rz= *rz1;
zt= (char *)(rz+1);
rc= rcline;
for(a=0; a<256; a++, zt++, rc+=4) *zt= rc[GCOMP];
}
else if(byt1) { /* only store short */
*ctile= 2;
*ztile= (unsigned long)MEM_mallocN(2*256+4,"Tile2");
rz= (int *)*ztile;
*rz= *rz1;
zt= (char *)(rz+1);
rc= rcline;
for(a=0; a<256; a++, zt+=2, rc+=4) {
zt[0]= rc[BCOMP];
zt[1]= rc[GCOMP];
}
}
else { /* store triple */
*ctile= 3;
*ztile= (unsigned long)MEM_mallocN(3*256,"Tile3");
zt= (char *)*ztile;
rc= rcline;
for(a=0; a<256; a++, zt+=3, rc+=4) {
zt[0]= rc[ACOMP];
zt[1]= rc[BCOMP];
zt[2]= rc[GCOMP];
}
}
}
ztile++;
ctile++;
}
}
MEM_freeN(rcline);
}
void makeshadowbuf(Render *re, LampRen *lar)
{
ShadBuf *shb= lar->shb;
float wsize, *jitbuf, twozero[2]= {0.0f, 0.0f};
int *rectz, samples;
/* jitter, weights */
shb->jit= give_jitter_tab(shb->samp);
make_jitter_weight_tab(shb, lar->filtertype);
shb->totbuf= lar->buffers;
if(shb->totbuf==4) jitbuf= give_jitter_tab(2);
else if(shb->totbuf==9) jitbuf= give_jitter_tab(3);
else jitbuf= twozero;
/* matrices and window: in winmat the transformation is being put,
transforming from observer view to lamp view, including lamp window matrix */
wsize= shb->pixsize*(shb->size/2.0);
i_window(-wsize, wsize, -wsize, wsize, shb->d, shb->clipend, shb->winmat);
MTC_Mat4MulMat4(shb->persmat, shb->viewmat, shb->winmat);
/* temp, will be restored */
MTC_Mat4SwapMat4(shb->persmat, re->winmat);
/* zbuffering */
rectz= MEM_mapallocN(sizeof(int)*shb->size*shb->size, "makeshadbuf");
project_renderdata(re, projectvert, 0, 0);
for(samples=0; samples<shb->totbuf; samples++) {
zbuffer_shadow(re, lar, rectz, shb->size, jitbuf[2*samples], jitbuf[2*samples+1]);
/* create Z tiles (for compression): this system is 24 bits!!! */
compress_shadowbuf(shb, rectz, lar->mode & LA_SQUARE);
}
MEM_freeN(rectz);
/* old matrix back */
MTC_Mat4SwapMat4(shb->persmat, re->winmat);
/* printf("lampbuf %d\n", sizeoflampbuf(shb)); */
}
void freeshadowbuf(LampRen *lar)
{
if(lar->shb) {
ShadBuf *shb= lar->shb;
ShadSampleBuf *shsample;
int b, v;
v= (shb->size*shb->size)/256;
for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
unsigned long *ztile= shsample->zbuf;
char *ctile= shsample->cbuf;
for(b=0; b<v; b++, ztile++, ctile++)
if(*ctile) MEM_freeN((void *) *ztile);
MEM_freeN(shsample->zbuf);
MEM_freeN(shsample->cbuf);
}
BLI_freelistN(&shb->buffers);
if(shb->weight) MEM_freeN(shb->weight);
MEM_freeN(lar->shb);
lar->shb= NULL;
}
}
static int firstreadshadbuf(ShadBuf *shb, ShadSampleBuf *shsample, int **rz, int xs, int ys, int nr)
{
/* return a 1 if fully compressed shadbuf-tile && z==const */
int ofs;
char *ct;
/* always test borders of shadowbuffer */
if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
/* calc z */
ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
ct= shsample->cbuf+ofs;
if(*ct==0) {
if(nr==0) {
*rz= *( (int **)(shsample->zbuf+ofs) );
return 1;
}
else if(*rz!= *( (int **)(shsample->zbuf+ofs) )) return 0;
return 1;
}
return 0;
}
/* return 1.0 : fully in light */
static float readshadowbuf(ShadBuf *shb, ShadSampleBuf *shsample, int bias, int xs, int ys, int zs)
{
float temp;
int *rz, ofs;
int zsamp=0;
char *ct, *cz;
/* simpleclip */
/* if(xs<0 || ys<0) return 1.0; */
/* if(xs>=shb->size || ys>=shb->size) return 1.0; */
/* always test borders of shadowbuffer */
if(xs<0) xs= 0; else if(xs>=shb->size) xs= shb->size-1;
if(ys<0) ys= 0; else if(ys>=shb->size) ys= shb->size-1;
/* calc z */
ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
ct= shsample->cbuf+ofs;
rz= *( (int **)(shsample->zbuf+ofs) );
if(*ct==3) {
ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
cz= (char *)&zsamp;
cz[ACOMP]= ct[0];
cz[BCOMP]= ct[1];
cz[GCOMP]= ct[2];
}
else if(*ct==2) {
ct= ((char *)rz);
ct+= 4+2*16*(ys & 15)+2*(xs & 15);
zsamp= *rz;
cz= (char *)&zsamp;
cz[BCOMP]= ct[0];
cz[GCOMP]= ct[1];
}
else if(*ct==1) {
ct= ((char *)rz);
ct+= 4+16*(ys & 15)+(xs & 15);
zsamp= *rz;
cz= (char *)&zsamp;
cz[GCOMP]= ct[0];
}
else {
/* got warning on this from DEC alpha (64 bits).... */
/* but it's working code! (ton) */
zsamp= (int) rz;
}
/* if(zsamp >= 0x7FFFFE00) return 1.0; */ /* no shaduw when sampling at eternal distance */
if(zsamp > zs) return 1.0; /* absolute no shadow */
else if( zsamp < zs-bias) return 0.0 ; /* absolute in shadow */
else { /* soft area */
temp= ( (float)(zs- zsamp) )/(float)bias;
return 1.0 - temp*temp;
}
}
/* the externally called shadow testing (reading) function */
/* return 1.0: no shadow at all */
float testshadowbuf(ShadBuf *shb, float *rco, float *dxco, float *dyco, float inp)
{
ShadSampleBuf *shsample;
float fac, co[4], dx[3], dy[3], shadfac=0.0f;
float xs1,ys1, siz, *jit, *weight, xres, yres;
int xs, ys, zs, bias, *rz;
short a, num;
if(inp <= 0.0f) return 0.0f;
/* rotate renderco en osaco */
siz= 0.5f*(float)shb->size;
VECCOPY(co, rco);
co[3]= 1.0f;
MTC_Mat4MulVec4fl(shb->persmat, co); /* rational hom co */
xs1= siz*(1.0f+co[0]/co[3]);
ys1= siz*(1.0f+co[1]/co[3]);
/* Clip for z: clipsta and clipend clip values of the shadow buffer. We
* can test for -1.0/1.0 because of the properties of the
* coordinate transformations. */
fac= (co[2]/co[3]);
if(fac>=1.0f) {
return 0.0f;
} else if(fac<= -1.0f) {
return 1.0f;
}
zs= ((float)0x7FFFFFFF)*fac;
/* take num*num samples, increase area with fac */
num= shb->samp*shb->samp;
fac= shb->soft;
/* with inp==1.0, bias is half the size. correction value was 1.1, giving errors
on cube edges, with one side being almost frontal lighted (ton) */
bias= (1.5f-inp*inp)*shb->bias;
if(num==1) {
for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
shadfac += readshadowbuf(shb, shsample, bias, (int)xs1, (int)ys1, zs);
return shadfac/(float)shb->totbuf;
}
/* calculate filter size */
VECCOPY(co, dxco);
co[3]= 1.0;
MTC_Mat4Mul3Vecfl(shb->persmat, co);
dx[0]= co[0]/co[3];
dx[1]= co[1]/co[3];
VECCOPY(co, dyco);
co[3]= 1.0;
MTC_Mat4Mul3Vecfl(shb->persmat, co);
dy[0]= co[0]/co[3];
dy[1]= co[1]/co[3];
xres= fac*( fabs(dx[0])+fabs(dy[0]) );
yres= fac*( fabs(dx[1])+fabs(dy[1]) );
if(xres<fac) xres= fac;
if(yres<fac) yres= fac;
xs1-= (xres)/2;
ys1-= (yres)/2;
if(xres<16.0f && yres<16.0f) {
shsample= shb->buffers.first;
if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)ys1, 0)) {
if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)ys1, 1)) {
if(firstreadshadbuf(shb, shsample, &rz, (int)xs1, (int)(ys1+yres), 1)) {
if(firstreadshadbuf(shb, shsample, &rz, (int)(xs1+xres), (int)(ys1+yres), 1)) {
return readshadowbuf(shb, shsample, bias,(int)xs1, (int)ys1, zs);
}
}
}
}
}
for(shsample= shb->buffers.first; shsample; shsample= shsample->next) {
jit= shb->jit;
weight= shb->weight;
for(a=num; a>0; a--, jit+=2, weight++) {
/* instead of jit i tried random: ugly! */
/* note: the plus 0.5 gives best sampling results, jit goes from -0.5 to 0.5 */
/* xs1 and ys1 are already corrected to be corner of sample area */
xs= xs1 + xres*(jit[0] + 0.5f);
ys= ys1 + yres*(jit[1] + 0.5f);
shadfac+= *weight * readshadowbuf(shb, shsample, bias, xs, ys, zs);
}
}
/* Renormalizes for the sample number: */
return shadfac/(float)shb->totbuf;
}
/* different function... sampling behind clipend can be LIGHT, bias is negative! */
/* return: light */
static float readshadowbuf_halo(ShadBuf *shb, ShadSampleBuf *shsample, int xs, int ys, int zs)
{
float temp;
int *rz, ofs;
int bias, zbias, zsamp;
char *ct, *cz;
/* negative! The other side is more important */
bias= -shb->bias;
/* simpleclip */
if(xs<0 || ys<0) return 0.0;
if(xs>=shb->size || ys>=shb->size) return 0.0;
/* calc z */
ofs= (ys>>4)*(shb->size>>4) + (xs>>4);
ct= shsample->cbuf+ofs;
rz= *( (int **)(shsample->zbuf+ofs) );
if(*ct==3) {
ct= ((char *)rz)+3*16*(ys & 15)+3*(xs & 15);
cz= (char *)&zsamp;
zsamp= 0;
cz[ACOMP]= ct[0];
cz[BCOMP]= ct[1];
cz[GCOMP]= ct[2];
}
else if(*ct==2) {
ct= ((char *)rz);
ct+= 4+2*16*(ys & 15)+2*(xs & 15);
zsamp= *rz;
cz= (char *)&zsamp;
cz[BCOMP]= ct[0];
cz[GCOMP]= ct[1];
}
else if(*ct==1) {
ct= ((char *)rz);
ct+= 4+16*(ys & 15)+(xs & 15);
zsamp= *rz;
cz= (char *)&zsamp;
cz[GCOMP]= ct[0];
}
else {
/* same as before */
/* still working code! (ton) */
zsamp= (int) rz;
}
/* NO schadow when sampled at 'eternal' distance */
if(zsamp >= 0x7FFFFE00) return 1.0;
if(zsamp > zs) return 1.0; /* absolute no shadww */
else {
/* bias is negative, so the (zs-bias) can be beyond 0x7fffffff */
zbias= 0x7fffffff - zs;
if(zbias > -bias) {
if( zsamp < zs-bias) return 0.0 ; /* absolute in shadow */
}
else return 0.0 ; /* absolute shadow */
}
/* soft area */
temp= ( (float)(zs- zsamp) )/(float)bias;
return 1.0 - temp*temp;
}
float shadow_halo(LampRen *lar, float *p1, float *p2)
{
/* p1 p2 already are rotated in spot-space */
ShadBuf *shb= lar->shb;
ShadSampleBuf *shsample;
float co[4], siz;
float labda, labdao, labdax, labday, ldx, ldy;
float zf, xf1, yf1, zf1, xf2, yf2, zf2;
float count, lightcount;
int x, y, z, xs1, ys1;
int dx = 0, dy = 0;
siz= 0.5*(float)shb->size;
co[0]= p1[0];
co[1]= p1[1];
co[2]= p1[2]/lar->sh_zfac;
co[3]= 1.0;
MTC_Mat4MulVec4fl(shb->winmat, co); /* rational hom co */
xf1= siz*(1.0+co[0]/co[3]);
yf1= siz*(1.0+co[1]/co[3]);
zf1= (co[2]/co[3]);
co[0]= p2[0];
co[1]= p2[1];
co[2]= p2[2]/lar->sh_zfac;
co[3]= 1.0;
MTC_Mat4MulVec4fl(shb->winmat, co); /* rational hom co */
xf2= siz*(1.0+co[0]/co[3]);
yf2= siz*(1.0+co[1]/co[3]);
zf2= (co[2]/co[3]);
/* the 2dda (a pixel line formula) */
xs1= (int)xf1;
ys1= (int)yf1;
if(xf1 != xf2) {
if(xf2-xf1 > 0.0) {
labdax= (xf1-xs1-1.0)/(xf1-xf2);
ldx= -shb->shadhalostep/(xf1-xf2);
dx= shb->shadhalostep;
}
else {
labdax= (xf1-xs1)/(xf1-xf2);
ldx= shb->shadhalostep/(xf1-xf2);
dx= -shb->shadhalostep;
}
}
else {
labdax= 1.0;
ldx= 0.0;
}
if(yf1 != yf2) {
if(yf2-yf1 > 0.0) {
labday= (yf1-ys1-1.0)/(yf1-yf2);
ldy= -shb->shadhalostep/(yf1-yf2);
dy= shb->shadhalostep;
}
else {
labday= (yf1-ys1)/(yf1-yf2);
ldy= shb->shadhalostep/(yf1-yf2);
dy= -shb->shadhalostep;
}
}
else {
labday= 1.0;
ldy= 0.0;
}
x= xs1;
y= ys1;
labda= count= lightcount= 0.0;
/* printf("start %x %x \n", (int)(0x7FFFFFFF*zf1), (int)(0x7FFFFFFF*zf2)); */
while(1) {
labdao= labda;
if(labdax==labday) {
labdax+= ldx;
x+= dx;
labday+= ldy;
y+= dy;
}
else {
if(labdax<labday) {
labdax+= ldx;
x+= dx;
} else {
labday+= ldy;
y+= dy;
}
}
labda= MIN2(labdax, labday);
if(labda==labdao || labda>=1.0) break;
zf= zf1 + labda*(zf2-zf1);
count+= (float)shb->totbuf;
if(zf<= -1.0) lightcount += 1.0; /* close to the spot */
else {
/* make sure, behind the clipend we extend halolines. */
if(zf>=1.0) z= 0x7FFFF000;
else z= (int)(0x7FFFF000*zf);
for(shsample= shb->buffers.first; shsample; shsample= shsample->next)
lightcount+= readshadowbuf_halo(shb, shsample, x, y, z);
}
}
if(count!=0.0) return (lightcount/count);
return 0.0;
}