This reverts commitc6bf5d4724. Related to D2264: When multi process opencl kernel compilation is in place single-program compiles slower then multi-program.c6bf5d4724was created as single-program compiled faster, but this is not the case anymore. So let's revert this change. Production scenes like victor and barbershop even render quicker. Change in Cycles OpenCL compilation times > job | scene_name | compilation_time | render_time > Baseline | empty | 22.73 | 20.63 > T61514 | empty | 10.63 | 21.06 > Baseline | bmw | 56.44 | 191.00 > T61514 | bmw | 17.91 | 198.44 > Baseline | fishycat | 59.50 | 393.48 > T61514 | fishycat | 19.57 | 394.20 > Baseline | barbershop | 212.28 | 1623.53 > T61514 | barbershop | 54.10 | 1188.16 > Baseline | victor | 67.51 | 1459.80 > T61514 | victor | 22.06 | 1381.58 > Baseline | classroom | 51.46 | 341.23 > T61514 | classroom | 17.55 | 341.08 > Baseline | koro | 62.48 | 475.96 > T61514 | koro | 18.92 | 472.43 > Baseline | pavillion | 54.37 | 903.48 > T61514 | pavillion | 17.43 | 905.77 > Baseline | splash279 | 47.43 | 52.92 > T61514 | splash279 | 16.48 | 55.26 > Baseline | volume_emission | 145.22 | 62.38 > T61514 | volume_emission | 36.22 | 62.59 Reviewers: #cycles, brecht, sergey Reviewed By: #cycles, brecht Differential Revision: https://developer.blender.org/D4349
210 lines
5.7 KiB
C++
210 lines
5.7 KiB
C++
/*
|
|
* Copyright 2011-2016 Blender Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "util/util_debug.h"
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include "bvh/bvh_params.h"
|
|
|
|
#include "util/util_logging.h"
|
|
#include "util/util_string.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
DebugFlags::CPU::CPU()
|
|
: avx2(true),
|
|
avx(true),
|
|
sse41(true),
|
|
sse3(true),
|
|
sse2(true),
|
|
bvh_layout(BVH_LAYOUT_DEFAULT),
|
|
split_kernel(false)
|
|
{
|
|
reset();
|
|
}
|
|
|
|
void DebugFlags::CPU::reset()
|
|
{
|
|
#define STRINGIFY(x) #x
|
|
#define CHECK_CPU_FLAGS(flag, env) \
|
|
do { \
|
|
flag = (getenv(env) == NULL); \
|
|
if(!flag) { \
|
|
VLOG(1) << "Disabling " << STRINGIFY(flag) << " instruction set."; \
|
|
} \
|
|
} while(0)
|
|
|
|
CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2");
|
|
CHECK_CPU_FLAGS(avx, "CYCLES_CPU_NO_AVX");
|
|
CHECK_CPU_FLAGS(sse41, "CYCLES_CPU_NO_SSE41");
|
|
CHECK_CPU_FLAGS(sse3, "CYCLES_CPU_NO_SSE3");
|
|
CHECK_CPU_FLAGS(sse2, "CYCLES_CPU_NO_SSE2");
|
|
|
|
#undef STRINGIFY
|
|
#undef CHECK_CPU_FLAGS
|
|
|
|
if(getenv("CYCLES_BVH2") != NULL) {
|
|
bvh_layout = BVH_LAYOUT_BVH2;
|
|
}
|
|
else if(getenv("CYCLES_BVH4") != NULL) {
|
|
bvh_layout = BVH_LAYOUT_BVH4;
|
|
}
|
|
else if(getenv("CYCLES_BVH8") != NULL) {
|
|
bvh_layout = BVH_LAYOUT_BVH8;
|
|
}
|
|
else {
|
|
bvh_layout = BVH_LAYOUT_DEFAULT;
|
|
}
|
|
|
|
split_kernel = false;
|
|
}
|
|
|
|
DebugFlags::CUDA::CUDA()
|
|
: adaptive_compile(false),
|
|
split_kernel(false)
|
|
{
|
|
reset();
|
|
}
|
|
|
|
void DebugFlags::CUDA::reset()
|
|
{
|
|
if(getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
|
|
adaptive_compile = true;
|
|
|
|
split_kernel = false;
|
|
}
|
|
|
|
DebugFlags::OpenCL::OpenCL()
|
|
: device_type(DebugFlags::OpenCL::DEVICE_ALL),
|
|
kernel_type(DebugFlags::OpenCL::KERNEL_DEFAULT),
|
|
debug(false),
|
|
single_program(false)
|
|
{
|
|
reset();
|
|
}
|
|
|
|
void DebugFlags::OpenCL::reset()
|
|
{
|
|
/* Initialize device type from environment variables. */
|
|
device_type = DebugFlags::OpenCL::DEVICE_ALL;
|
|
char *device = getenv("CYCLES_OPENCL_TEST");
|
|
if(device) {
|
|
if(strcmp(device, "NONE") == 0) {
|
|
device_type = DebugFlags::OpenCL::DEVICE_NONE;
|
|
}
|
|
else if(strcmp(device, "ALL") == 0) {
|
|
device_type = DebugFlags::OpenCL::DEVICE_ALL;
|
|
}
|
|
else if(strcmp(device, "DEFAULT") == 0) {
|
|
device_type = DebugFlags::OpenCL::DEVICE_DEFAULT;
|
|
}
|
|
else if(strcmp(device, "CPU") == 0) {
|
|
device_type = DebugFlags::OpenCL::DEVICE_CPU;
|
|
}
|
|
else if(strcmp(device, "GPU") == 0) {
|
|
device_type = DebugFlags::OpenCL::DEVICE_GPU;
|
|
}
|
|
else if(strcmp(device, "ACCELERATOR") == 0) {
|
|
device_type = DebugFlags::OpenCL::DEVICE_ACCELERATOR;
|
|
}
|
|
}
|
|
/* Initialize kernel type from environment variables. */
|
|
kernel_type = DebugFlags::OpenCL::KERNEL_DEFAULT;
|
|
if(getenv("CYCLES_OPENCL_MEGA_KERNEL_TEST") != NULL) {
|
|
kernel_type = DebugFlags::OpenCL::KERNEL_MEGA;
|
|
}
|
|
else if(getenv("CYCLES_OPENCL_SPLIT_KERNEL_TEST") != NULL) {
|
|
kernel_type = DebugFlags::OpenCL::KERNEL_SPLIT;
|
|
}
|
|
/* Initialize other flags from environment variables. */
|
|
debug = (getenv("CYCLES_OPENCL_DEBUG") != NULL);
|
|
single_program = (getenv("CYCLES_OPENCL_SINGLE_PROGRAM") != NULL);
|
|
}
|
|
|
|
DebugFlags::DebugFlags()
|
|
: viewport_static_bvh(false)
|
|
{
|
|
/* Nothing for now. */
|
|
}
|
|
|
|
void DebugFlags::reset()
|
|
{
|
|
viewport_static_bvh = false;
|
|
cpu.reset();
|
|
cuda.reset();
|
|
opencl.reset();
|
|
}
|
|
|
|
std::ostream& operator <<(std::ostream &os,
|
|
DebugFlagsConstRef debug_flags)
|
|
{
|
|
os << "CPU flags:\n"
|
|
<< " AVX2 : " << string_from_bool(debug_flags.cpu.avx2) << "\n"
|
|
<< " AVX : " << string_from_bool(debug_flags.cpu.avx) << "\n"
|
|
<< " SSE4.1 : " << string_from_bool(debug_flags.cpu.sse41) << "\n"
|
|
<< " SSE3 : " << string_from_bool(debug_flags.cpu.sse3) << "\n"
|
|
<< " SSE2 : " << string_from_bool(debug_flags.cpu.sse2) << "\n"
|
|
<< " BVH layout : " << bvh_layout_name(debug_flags.cpu.bvh_layout) << "\n"
|
|
<< " Split : " << string_from_bool(debug_flags.cpu.split_kernel) << "\n";
|
|
|
|
os << "CUDA flags:\n"
|
|
<< " Adaptive Compile: " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n";
|
|
|
|
const char *opencl_device_type,
|
|
*opencl_kernel_type;
|
|
switch(debug_flags.opencl.device_type) {
|
|
case DebugFlags::OpenCL::DEVICE_NONE:
|
|
opencl_device_type = "NONE";
|
|
break;
|
|
case DebugFlags::OpenCL::DEVICE_ALL:
|
|
opencl_device_type = "ALL";
|
|
break;
|
|
case DebugFlags::OpenCL::DEVICE_DEFAULT:
|
|
opencl_device_type = "DEFAULT";
|
|
break;
|
|
case DebugFlags::OpenCL::DEVICE_CPU:
|
|
opencl_device_type = "CPU";
|
|
break;
|
|
case DebugFlags::OpenCL::DEVICE_GPU:
|
|
opencl_device_type = "GPU";
|
|
break;
|
|
case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
|
|
opencl_device_type = "ACCELERATOR";
|
|
break;
|
|
}
|
|
switch(debug_flags.opencl.kernel_type) {
|
|
case DebugFlags::OpenCL::KERNEL_DEFAULT:
|
|
opencl_kernel_type = "DEFAULT";
|
|
break;
|
|
case DebugFlags::OpenCL::KERNEL_MEGA:
|
|
opencl_kernel_type = "MEGA";
|
|
break;
|
|
case DebugFlags::OpenCL::KERNEL_SPLIT:
|
|
opencl_kernel_type = "SPLIT";
|
|
break;
|
|
}
|
|
os << "OpenCL flags:\n"
|
|
<< " Device type : " << opencl_device_type << "\n"
|
|
<< " Kernel type : " << opencl_kernel_type << "\n"
|
|
<< " Debug : " << string_from_bool(debug_flags.opencl.debug) << "\n"
|
|
<< " Single program : " << string_from_bool(debug_flags.opencl.single_program) << "\n"
|
|
<< " Memory limit : " << string_human_readable_size(debug_flags.opencl.mem_limit) << "\n";
|
|
return os;
|
|
}
|
|
|
|
CCL_NAMESPACE_END
|