This repository has been archived on 2023-10-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
blender-archive/source/blender/compositor/operations/COM_GaussianXBlurOperation.cc
Manuel Castilla daa7c59e38 Compositor: Full frame Bokeh Blur and Blur nodes
Adds full frame implementation to these nodes operations.

When enabling "extend bounds" node option, tiled implementation
result is slightly different because it's using `TranslateOperation`
with bilinear sampling for centering.
Full frame always uses nearest to don't lose image quality.
It has the disadvantage of causing image jiggling on backdrop
when switching size values as it's not pixel perfect.
This is fixed by rounding to even.

No functional changes.

Part of T88150.

Reviewed By: jbakker

Differential Revision: https://developer.blender.org/D12167
2021-08-23 17:08:45 +02:00

209 lines
7.3 KiB
C++

/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright 2011, Blender Foundation.
*/
#include "COM_GaussianXBlurOperation.h"
#include "BLI_math.h"
#include "COM_OpenCLDevice.h"
#include "MEM_guardedalloc.h"
#include "RE_pipeline.h"
namespace blender::compositor {
GaussianXBlurOperation::GaussianXBlurOperation() : GaussianBlurBaseOperation(eDimension::X)
{
}
void *GaussianXBlurOperation::initializeTileData(rcti * /*rect*/)
{
lockMutex();
if (!this->m_sizeavailable) {
updateGauss();
}
void *buffer = getInputOperation(0)->initializeTileData(nullptr);
unlockMutex();
return buffer;
}
/* TODO(manzanilla): to be removed with tiled implementation. */
void GaussianXBlurOperation::initExecution()
{
GaussianBlurBaseOperation::initExecution();
initMutex();
if (this->m_sizeavailable && execution_model_ == eExecutionModel::Tiled) {
float rad = max_ff(m_size * m_data.sizex, 0.0f);
m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS);
/* TODO(sergey): De-duplicate with the case below and Y blur. */
this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize);
#ifdef BLI_HAVE_SSE2
this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
#endif
}
}
/* TODO(manzanilla): to be removed with tiled implementation. */
void GaussianXBlurOperation::updateGauss()
{
if (this->m_gausstab == nullptr) {
updateSize();
float rad = max_ff(m_size * m_data.sizex, 0.0f);
rad = min_ff(rad, MAX_GAUSSTAB_RADIUS);
m_filtersize = min_ii(ceil(rad), MAX_GAUSSTAB_RADIUS);
this->m_gausstab = BlurBaseOperation::make_gausstab(rad, m_filtersize);
#ifdef BLI_HAVE_SSE2
this->m_gausstab_sse = BlurBaseOperation::convert_gausstab_sse(this->m_gausstab, m_filtersize);
#endif
}
}
void GaussianXBlurOperation::executePixel(float output[4], int x, int y, void *data)
{
float ATTR_ALIGN(16) color_accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
float multiplier_accum = 0.0f;
MemoryBuffer *inputBuffer = (MemoryBuffer *)data;
const rcti &input_rect = inputBuffer->get_rect();
float *buffer = inputBuffer->getBuffer();
int bufferwidth = inputBuffer->getWidth();
int bufferstartx = input_rect.xmin;
int bufferstarty = input_rect.ymin;
int xmin = max_ii(x - m_filtersize, input_rect.xmin);
int xmax = min_ii(x + m_filtersize + 1, input_rect.xmax);
int ymin = max_ii(y, input_rect.ymin);
int step = getStep();
int offsetadd = getOffsetAdd();
int bufferindex = ((xmin - bufferstartx) * 4) + ((ymin - bufferstarty) * 4 * bufferwidth);
#ifdef BLI_HAVE_SSE2
__m128 accum_r = _mm_load_ps(color_accum);
for (int nx = xmin, index = (xmin - x) + this->m_filtersize; nx < xmax;
nx += step, index += step) {
__m128 reg_a = _mm_load_ps(&buffer[bufferindex]);
reg_a = _mm_mul_ps(reg_a, this->m_gausstab_sse[index]);
accum_r = _mm_add_ps(accum_r, reg_a);
multiplier_accum += this->m_gausstab[index];
bufferindex += offsetadd;
}
_mm_store_ps(color_accum, accum_r);
#else
for (int nx = xmin, index = (xmin - x) + this->m_filtersize; nx < xmax;
nx += step, index += step) {
const float multiplier = this->m_gausstab[index];
madd_v4_v4fl(color_accum, &buffer[bufferindex], multiplier);
multiplier_accum += multiplier;
bufferindex += offsetadd;
}
#endif
mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum);
}
void GaussianXBlurOperation::executeOpenCL(OpenCLDevice *device,
MemoryBuffer *outputMemoryBuffer,
cl_mem clOutputBuffer,
MemoryBuffer **inputMemoryBuffers,
std::list<cl_mem> *clMemToCleanUp,
std::list<cl_kernel> * /*clKernelsToCleanUp*/)
{
cl_kernel gaussianXBlurOperationKernel = device->COM_clCreateKernel(
"gaussianXBlurOperationKernel", nullptr);
cl_int filter_size = this->m_filtersize;
cl_mem gausstab = clCreateBuffer(device->getContext(),
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
sizeof(float) * (this->m_filtersize * 2 + 1),
this->m_gausstab,
nullptr);
device->COM_clAttachMemoryBufferToKernelParameter(gaussianXBlurOperationKernel,
0,
1,
clMemToCleanUp,
inputMemoryBuffers,
this->m_inputProgram);
device->COM_clAttachOutputMemoryBufferToKernelParameter(
gaussianXBlurOperationKernel, 2, clOutputBuffer);
device->COM_clAttachMemoryBufferOffsetToKernelParameter(
gaussianXBlurOperationKernel, 3, outputMemoryBuffer);
clSetKernelArg(gaussianXBlurOperationKernel, 4, sizeof(cl_int), &filter_size);
device->COM_clAttachSizeToKernelParameter(gaussianXBlurOperationKernel, 5, this);
clSetKernelArg(gaussianXBlurOperationKernel, 6, sizeof(cl_mem), &gausstab);
device->COM_clEnqueueRange(gaussianXBlurOperationKernel, outputMemoryBuffer, 7, this);
clReleaseMemObject(gausstab);
}
void GaussianXBlurOperation::deinitExecution()
{
GaussianBlurBaseOperation::deinitExecution();
if (this->m_gausstab) {
MEM_freeN(this->m_gausstab);
this->m_gausstab = nullptr;
}
#ifdef BLI_HAVE_SSE2
if (this->m_gausstab_sse) {
MEM_freeN(this->m_gausstab_sse);
this->m_gausstab_sse = nullptr;
}
#endif
deinitMutex();
}
bool GaussianXBlurOperation::determineDependingAreaOfInterest(rcti *input,
ReadBufferOperation *readOperation,
rcti *output)
{
rcti newInput;
if (!this->m_sizeavailable) {
rcti sizeInput;
sizeInput.xmin = 0;
sizeInput.ymin = 0;
sizeInput.xmax = 5;
sizeInput.ymax = 5;
NodeOperation *operation = this->getInputOperation(1);
if (operation->determineDependingAreaOfInterest(&sizeInput, readOperation, output)) {
return true;
}
}
{
if (this->m_sizeavailable && this->m_gausstab != nullptr) {
newInput.xmax = input->xmax + this->m_filtersize + 1;
newInput.xmin = input->xmin - this->m_filtersize - 1;
newInput.ymax = input->ymax;
newInput.ymin = input->ymin;
}
else {
newInput.xmax = this->getWidth();
newInput.xmin = 0;
newInput.ymax = this->getHeight();
newInput.ymin = 0;
}
return NodeOperation::determineDependingAreaOfInterest(&newInput, readOperation, output);
}
}
} // namespace blender::compositor