* break out with glare node * Added OpenCL kernels compatible with AMD still need some testing.
215 lines
7.6 KiB
C++
215 lines
7.6 KiB
C++
/*
|
|
* Copyright 2011, Blender Foundation.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*
|
|
* Contributor:
|
|
* Jeroen Bakker
|
|
* Monique Dewanchand
|
|
*/
|
|
|
|
#include "COM_FastGaussianBlurOperation.h"
|
|
#include "MEM_guardedalloc.h"
|
|
#include "BLI_utildefines.h"
|
|
|
|
FastGaussianBlurOperation::FastGaussianBlurOperation(): BlurBaseOperation()
|
|
{
|
|
this->iirgaus = NULL;
|
|
}
|
|
|
|
void FastGaussianBlurOperation::executePixel(float *color,int x, int y, MemoryBuffer *inputBuffers[], void *data)
|
|
{
|
|
MemoryBuffer *newData = (MemoryBuffer*)data;
|
|
newData->read(color, x, y);
|
|
}
|
|
|
|
bool FastGaussianBlurOperation::determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output)
|
|
{
|
|
rcti newInput;
|
|
rcti sizeInput;
|
|
sizeInput.xmin = 0;
|
|
sizeInput.ymin = 0;
|
|
sizeInput.xmax = 5;
|
|
sizeInput.ymax = 5;
|
|
|
|
NodeOperation * operation = this->getInputOperation(1);
|
|
if (operation->determineDependingAreaOfInterest(&sizeInput, readOperation, output)) {
|
|
return true;
|
|
}
|
|
else {
|
|
if (this->iirgaus) {
|
|
return false;
|
|
}
|
|
else {
|
|
newInput.xmin = 0;
|
|
newInput.ymin = 0;
|
|
newInput.xmax = this->getWidth();
|
|
newInput.ymax = this->getHeight();
|
|
}
|
|
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
|
|
}
|
|
}
|
|
|
|
void FastGaussianBlurOperation::initExecution()
|
|
{
|
|
BlurBaseOperation::initExecution();
|
|
BlurBaseOperation::initMutex();
|
|
}
|
|
|
|
void FastGaussianBlurOperation::deinitExecution()
|
|
{
|
|
if (this->iirgaus) {
|
|
delete this->iirgaus;
|
|
this->iirgaus = NULL;
|
|
}
|
|
BlurBaseOperation::deinitMutex();
|
|
}
|
|
|
|
void *FastGaussianBlurOperation::initializeTileData(rcti *rect, MemoryBuffer **memoryBuffers)
|
|
{
|
|
lockMutex();
|
|
if (!iirgaus) {
|
|
MemoryBuffer *newBuf = (MemoryBuffer*)this->inputProgram->initializeTileData(rect, memoryBuffers);
|
|
MemoryBuffer *copy = newBuf->duplicate();
|
|
updateSize(memoryBuffers);
|
|
|
|
int c;
|
|
sx = data->sizex * this->size/2.0f;
|
|
sy = data->sizey * this->size/2.0f;
|
|
|
|
if ((sx == sy) && (sx > 0.f)) {
|
|
for (c=0; c<COM_NUMBER_OF_CHANNELS; ++c)
|
|
IIR_gauss(copy, sx, c, 3);
|
|
}
|
|
else {
|
|
if (sx > 0.f) {
|
|
for (c=0; c<COM_NUMBER_OF_CHANNELS; ++c)
|
|
IIR_gauss(copy, sx, c, 1);
|
|
}
|
|
if (sy > 0.f) {
|
|
for (c=0; c<COM_NUMBER_OF_CHANNELS; ++c)
|
|
IIR_gauss(copy, sy, c, 2);
|
|
}
|
|
}
|
|
this->iirgaus = copy;
|
|
}
|
|
unlockMutex();
|
|
return iirgaus;
|
|
}
|
|
|
|
void FastGaussianBlurOperation::IIR_gauss(MemoryBuffer *src, float sigma, int chan, int xy)
|
|
{
|
|
double q, q2, sc, cf[4], tsM[9], tsu[3], tsv[3];
|
|
double *X, *Y, *W;
|
|
int i, x, y, sz;
|
|
float *buffer = src->getBuffer();
|
|
|
|
// <0.5 not valid, though can have a possibly useful sort of sharpening effect
|
|
if (sigma < 0.5f) return;
|
|
|
|
if ((xy < 1) || (xy > 3)) xy = 3;
|
|
|
|
// XXX The YVV macro defined below explicitly expects sources of at least 3x3 pixels,
|
|
// so just skiping blur along faulty direction if src's def is below that limit!
|
|
if (src->getWidth() < 3) xy &= ~(int) 1;
|
|
if (src->getHeight() < 3) xy &= ~(int) 2;
|
|
if (xy < 1) return;
|
|
|
|
// see "Recursive Gabor Filtering" by Young/VanVliet
|
|
// all factors here in double.prec. Required, because for single.prec it seems to blow up if sigma > ~200
|
|
if (sigma >= 3.556f)
|
|
q = 0.9804f*(sigma - 3.556f) + 2.5091f;
|
|
else // sigma >= 0.5
|
|
q = (0.0561f*sigma + 0.5784f)*sigma - 0.2568f;
|
|
q2 = q*q;
|
|
sc = (1.1668 + q)*(3.203729649 + (2.21566 + q)*q);
|
|
// no gabor filtering here, so no complex multiplies, just the regular coefs.
|
|
// all negated here, so as not to have to recalc Triggs/Sdika matrix
|
|
cf[1] = q*(5.788961737 + (6.76492 + 3.0*q)*q)/ sc;
|
|
cf[2] = -q2*(3.38246 + 3.0*q)/sc;
|
|
// 0 & 3 unchanged
|
|
cf[3] = q2*q/sc;
|
|
cf[0] = 1.0 - cf[1] - cf[2] - cf[3];
|
|
|
|
// Triggs/Sdika border corrections,
|
|
// it seems to work, not entirely sure if it is actually totally correct,
|
|
// Besides J.M.Geusebroek's anigauss.c (see http://www.science.uva.nl/~mark),
|
|
// found one other implementation by Cristoph Lampert,
|
|
// but neither seem to be quite the same, result seems to be ok so far anyway.
|
|
// Extra scale factor here to not have to do it in filter,
|
|
// though maybe this had something to with the precision errors
|
|
sc = cf[0]/((1.0 + cf[1] - cf[2] + cf[3])*(1.0 - cf[1] - cf[2] - cf[3])*(1.0 + cf[2] + (cf[1] - cf[3])*cf[3]));
|
|
tsM[0] = sc*(-cf[3]*cf[1] + 1.0 - cf[3]*cf[3] - cf[2]);
|
|
tsM[1] = sc*((cf[3] + cf[1])*(cf[2] + cf[3]*cf[1]));
|
|
tsM[2] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
|
|
tsM[3] = sc*(cf[1] + cf[3]*cf[2]);
|
|
tsM[4] = sc*(-(cf[2] - 1.0)*(cf[2] + cf[3]*cf[1]));
|
|
tsM[5] = sc*(-(cf[3]*cf[1] + cf[3]*cf[3] + cf[2] - 1.0)*cf[3]);
|
|
tsM[6] = sc*(cf[3]*cf[1] + cf[2] + cf[1]*cf[1] - cf[2]*cf[2]);
|
|
tsM[7] = sc*(cf[1]*cf[2] + cf[3]*cf[2]*cf[2] - cf[1]*cf[3]*cf[3] - cf[3]*cf[3]*cf[3] - cf[3]*cf[2] + cf[3]);
|
|
tsM[8] = sc*(cf[3]*(cf[1] + cf[3]*cf[2]));
|
|
|
|
#define YVV(L) \
|
|
{ \
|
|
W[0] = cf[0]*X[0] + cf[1]*X[0] + cf[2]*X[0] + cf[3]*X[0]; \
|
|
W[1] = cf[0]*X[1] + cf[1]*W[0] + cf[2]*X[0] + cf[3]*X[0]; \
|
|
W[2] = cf[0]*X[2] + cf[1]*W[1] + cf[2]*W[0] + cf[3]*X[0]; \
|
|
for (i=3; i<L; i++) \
|
|
W[i] = cf[0]*X[i] + cf[1]*W[i-1] + cf[2]*W[i-2] + cf[3]*W[i-3]; \
|
|
tsu[0] = W[L-1] - X[L-1]; \
|
|
tsu[1] = W[L-2] - X[L-1]; \
|
|
tsu[2] = W[L-3] - X[L-1]; \
|
|
tsv[0] = tsM[0]*tsu[0] + tsM[1]*tsu[1] + tsM[2]*tsu[2] + X[L-1]; \
|
|
tsv[1] = tsM[3]*tsu[0] + tsM[4]*tsu[1] + tsM[5]*tsu[2] + X[L-1]; \
|
|
tsv[2] = tsM[6]*tsu[0] + tsM[7]*tsu[1] + tsM[8]*tsu[2] + X[L-1]; \
|
|
Y[L-1] = cf[0]*W[L-1] + cf[1]*tsv[0] + cf[2]*tsv[1] + cf[3]*tsv[2]; \
|
|
Y[L-2] = cf[0]*W[L-2] + cf[1]*Y[L-1] + cf[2]*tsv[0] + cf[3]*tsv[1]; \
|
|
Y[L-3] = cf[0]*W[L-3] + cf[1]*Y[L-2] + cf[2]*Y[L-1] + cf[3]*tsv[0]; \
|
|
for (i=L-4; i>=0; i--) \
|
|
Y[i] = cf[0]*W[i] + cf[1]*Y[i+1] + cf[2]*Y[i+2] + cf[3]*Y[i+3]; \
|
|
}
|
|
|
|
// intermediate buffers
|
|
sz = MAX2(src->getWidth(), src->getHeight());
|
|
X = (double*)MEM_callocN(sz*sizeof(double), "IIR_gauss X buf");
|
|
Y = (double*)MEM_callocN(sz*sizeof(double), "IIR_gauss Y buf");
|
|
W = (double*)MEM_callocN(sz*sizeof(double), "IIR_gauss W buf");
|
|
if (xy & 1) { // H
|
|
for (y=0; y<src->getHeight(); ++y) {
|
|
const int yx = y*src->getWidth();
|
|
for (x=0; x<src->getWidth(); ++x)
|
|
X[x] = buffer[(x + yx)*COM_NUMBER_OF_CHANNELS + chan];
|
|
YVV(src->getWidth());
|
|
for (x=0; x<src->getWidth(); ++x)
|
|
buffer[(x + yx)*COM_NUMBER_OF_CHANNELS + chan] = Y[x];
|
|
}
|
|
}
|
|
if (xy & 2) { // V
|
|
for (x=0; x<src->getWidth(); ++x) {
|
|
for (y=0; y<src->getHeight(); ++y)
|
|
X[y] = buffer[(x + y*src->getWidth())*COM_NUMBER_OF_CHANNELS + chan];
|
|
YVV(src->getHeight());
|
|
for (y=0; y<src->getHeight(); ++y)
|
|
buffer[(x + y*src->getWidth())*COM_NUMBER_OF_CHANNELS + chan] = Y[y];
|
|
}
|
|
}
|
|
|
|
MEM_freeN(X);
|
|
MEM_freeN(W);
|
|
MEM_freeN(Y);
|
|
#undef YVV
|
|
|
|
}
|