1
1
This repository has been archived on 2023-10-09. You can view files and clone it, but cannot push or open issues or pull requests.
Files
blender-archive/intern/cycles/util/debug.cpp
Michael Jones 654e1e901b Cycles: Use local atomics for faster shader sorting (enabled on Metal)
This patch adds two new kernels: SORT_BUCKET_PASS and SORT_WRITE_PASS. These replace PREFIX_SUM and SORTED_PATHS_ARRAY on supported devices (currently implemented on Metal, but will be trivial to enable on the other backends). The new kernels exploit sort partitioning (see D15331) by sorting each partition separately using local atomics. This can give an overall render speedup of 2-3% depending on architecture. As before, we fall back to the original non-partitioned sorting when the shader count is "too high".

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D16909
2023-02-06 11:18:26 +00:00

101 lines
1.6 KiB
C++

/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#include "util/debug.h"
#include <stdlib.h>
#include "bvh/params.h"
#include "util/log.h"
#include "util/string.h"
CCL_NAMESPACE_BEGIN
DebugFlags::CPU::CPU()
{
reset();
}
void DebugFlags::CPU::reset()
{
#define STRINGIFY(x) #x
#define CHECK_CPU_FLAGS(flag, env) \
do { \
flag = (getenv(env) == NULL); \
if (!flag) { \
VLOG_INFO << "Disabling " << STRINGIFY(flag) << " instruction set."; \
} \
} while (0)
CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2");
CHECK_CPU_FLAGS(sse41, "CYCLES_CPU_NO_SSE41");
CHECK_CPU_FLAGS(sse2, "CYCLES_CPU_NO_SSE2");
#undef STRINGIFY
#undef CHECK_CPU_FLAGS
bvh_layout = BVH_LAYOUT_AUTO;
}
DebugFlags::CUDA::CUDA()
{
reset();
}
DebugFlags::HIP::HIP()
{
reset();
}
DebugFlags::Metal::Metal()
{
reset();
}
void DebugFlags::CUDA::reset()
{
if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
adaptive_compile = true;
}
void DebugFlags::HIP::reset()
{
if (getenv("CYCLES_HIP_ADAPTIVE_COMPILE") != NULL)
adaptive_compile = true;
}
void DebugFlags::Metal::reset()
{
if (getenv("CYCLES_METAL_ADAPTIVE_COMPILE") != NULL)
adaptive_compile = true;
if (auto str = getenv("CYCLES_METAL_LOCAL_ATOMIC_SORT"))
use_local_atomic_sort = (atoi(str) != 0);
}
DebugFlags::OptiX::OptiX()
{
reset();
}
void DebugFlags::OptiX::reset()
{
use_debug = false;
}
DebugFlags::DebugFlags()
{
/* Nothing for now. */
}
void DebugFlags::reset()
{
cpu.reset();
cuda.reset();
optix.reset();
metal.reset();
}
CCL_NAMESPACE_END