Pretty much straightforward change which gives around 30% speedup on my laptop and around 2x speedup on desktop in the BI (which uses gts580). Tested with huge blurs (like 10% of blur) which was rather common during Caminandes. For now OpenCL is only limited for blur size more than 100 pixels. This is a bit experimental still, feedback is welcome. Reviewers: jbakker, lukastoenne Subscribers: ton Differential Revision: https://developer.blender.org/D576
		
			
				
	
	
		
			67 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			67 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
 * Copyright 2011, Blender Foundation.
 | 
						|
 *
 | 
						|
 * This program is free software; you can redistribute it and/or
 | 
						|
 * modify it under the terms of the GNU General Public License
 | 
						|
 * as published by the Free Software Foundation; either version 2
 | 
						|
 * of the License, or (at your option) any later version.
 | 
						|
 *
 | 
						|
 * This program is distributed in the hope that it will be useful,
 | 
						|
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
 * GNU General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU General Public License
 | 
						|
 * along with this program; if not, write to the Free Software Foundation,
 | 
						|
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 | 
						|
 *
 | 
						|
 * Contributor: 
 | 
						|
 *		Jeroen Bakker 
 | 
						|
 *		Monique Dewanchand
 | 
						|
 */
 | 
						|
 | 
						|
#ifndef _COM_GaussianXBlurOperation_h
 | 
						|
#define _COM_GaussianXBlurOperation_h
 | 
						|
#include "COM_NodeOperation.h"
 | 
						|
#include "COM_BlurBaseOperation.h"
 | 
						|
 | 
						|
class GaussianXBlurOperation : public BlurBaseOperation {
 | 
						|
private:
 | 
						|
	float *m_gausstab;
 | 
						|
#ifdef __SSE2__
 | 
						|
	__m128 *m_gausstab_sse;
 | 
						|
#endif
 | 
						|
	int m_filtersize;
 | 
						|
	void updateGauss();
 | 
						|
public:
 | 
						|
	GaussianXBlurOperation();
 | 
						|
 | 
						|
	/**
 | 
						|
	 * @brief the inner loop of this program
 | 
						|
	 */
 | 
						|
	void executePixel(float output[4], int x, int y, void *data);
 | 
						|
 | 
						|
	void executeOpenCL(OpenCLDevice *device,
 | 
						|
	                   MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
 | 
						|
	                   MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
 | 
						|
	                   list<cl_kernel> *clKernelsToCleanUp);
 | 
						|
 | 
						|
	/**
 | 
						|
	 * @brief initialize the execution
 | 
						|
	 */
 | 
						|
	void initExecution();
 | 
						|
 | 
						|
	/**
 | 
						|
	 * @brief Deinitialize the execution
 | 
						|
	 */
 | 
						|
	void deinitExecution();
 | 
						|
	
 | 
						|
	void *initializeTileData(rcti *rect);
 | 
						|
	bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output);
 | 
						|
 | 
						|
	void checkOpenCL() {
 | 
						|
		this->setOpenCL(m_data.sizex >= 128);
 | 
						|
	}
 | 
						|
};
 | 
						|
#endif
 |