Cycles: Remove Fermi texture code.
This should be the last Fermi removal commit, unless I missed something. It's been a pleasure Fermi!
This commit is contained in:
@@ -359,7 +359,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
|
||||
info.description = "Multi Device";
|
||||
info.num = 0;
|
||||
|
||||
info.has_fermi_limits = false;
|
||||
info.has_half_images = true;
|
||||
info.has_volume_decoupled = true;
|
||||
info.bvh_layout_mask = BVH_LAYOUT_ALL;
|
||||
@@ -395,8 +394,6 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
|
||||
}
|
||||
|
||||
/* Accumulate device info. */
|
||||
info.has_fermi_limits = info.has_fermi_limits ||
|
||||
device.has_fermi_limits;
|
||||
info.has_half_images &= device.has_half_images;
|
||||
info.has_volume_decoupled &= device.has_volume_decoupled;
|
||||
info.bvh_layout_mask = device.bvh_layout_mask & info.bvh_layout_mask;
|
||||
|
||||
@@ -56,7 +56,6 @@ public:
|
||||
int num;
|
||||
bool display_device; /* GPU is used as a display device. */
|
||||
bool advanced_shading; /* Supports full shading system. */
|
||||
bool has_fermi_limits; /* Fixed number of textures limit. */
|
||||
bool has_half_images; /* Support half-float textures. */
|
||||
bool has_volume_decoupled; /* Decoupled volume shading. */
|
||||
BVHLayoutMask bvh_layout_mask; /* Bitmask of supported BVH layouts. */
|
||||
@@ -73,7 +72,6 @@ public:
|
||||
cpu_threads = 0;
|
||||
display_device = false;
|
||||
advanced_shading = true;
|
||||
has_fermi_limits = false;
|
||||
has_half_images = false;
|
||||
has_volume_decoupled = false;
|
||||
bvh_layout_mask = BVH_LAYOUT_NONE;
|
||||
|
||||
@@ -309,9 +309,7 @@ public:
|
||||
|
||||
delete split_kernel;
|
||||
|
||||
if(!info.has_fermi_limits) {
|
||||
texture_info.free();
|
||||
}
|
||||
texture_info.free();
|
||||
|
||||
cuda_assert(cuCtxDestroy(cuContext));
|
||||
}
|
||||
@@ -680,7 +678,7 @@ public:
|
||||
|
||||
void load_texture_info()
|
||||
{
|
||||
if(!info.has_fermi_limits && need_texture_info) {
|
||||
if(need_texture_info) {
|
||||
texture_info.copy_to_device();
|
||||
need_texture_info = false;
|
||||
}
|
||||
@@ -1018,9 +1016,6 @@ public:
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
/* Check if we are on sm_30 or above, for bindless textures. */
|
||||
bool has_fermi_limits = info.has_fermi_limits;
|
||||
|
||||
/* General variables for both architectures */
|
||||
string bind_name = mem.name;
|
||||
size_t dsize = datatype_size(mem.data_type);
|
||||
@@ -1076,25 +1071,6 @@ public:
|
||||
/* Image Texture Storage */
|
||||
CUtexref texref = NULL;
|
||||
|
||||
if(has_fermi_limits) {
|
||||
if(mem.data_depth > 1) {
|
||||
/* Kernel uses different bind names for 2d and 3d float textures,
|
||||
* so we have to adjust couple of things here.
|
||||
*/
|
||||
vector<string> tokens;
|
||||
string_split(tokens, mem.name, "_");
|
||||
bind_name = string_printf("__tex_image_%s_3d_%s",
|
||||
tokens[2].c_str(),
|
||||
tokens[3].c_str());
|
||||
}
|
||||
|
||||
cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str()));
|
||||
|
||||
if(!texref) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
CUarray_format_enum format;
|
||||
switch(mem.data_type) {
|
||||
case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
|
||||
@@ -1187,97 +1163,68 @@ public:
|
||||
cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
|
||||
}
|
||||
|
||||
if(!has_fermi_limits) {
|
||||
/* Kepler+, bindless textures. */
|
||||
int flat_slot = 0;
|
||||
if(string_startswith(mem.name, "__tex_image")) {
|
||||
int pos = string(mem.name).rfind("_");
|
||||
flat_slot = atoi(mem.name + pos + 1);
|
||||
}
|
||||
else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
CUDA_RESOURCE_DESC resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
|
||||
if(array_3d) {
|
||||
resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
|
||||
resDesc.res.array.hArray = array_3d;
|
||||
resDesc.flags = 0;
|
||||
}
|
||||
else if(mem.data_height > 0) {
|
||||
resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
|
||||
resDesc.res.pitch2D.devPtr = mem.device_pointer;
|
||||
resDesc.res.pitch2D.format = format;
|
||||
resDesc.res.pitch2D.numChannels = mem.data_elements;
|
||||
resDesc.res.pitch2D.height = mem.data_height;
|
||||
resDesc.res.pitch2D.width = mem.data_width;
|
||||
resDesc.res.pitch2D.pitchInBytes = dst_pitch;
|
||||
}
|
||||
else {
|
||||
resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
|
||||
resDesc.res.linear.devPtr = mem.device_pointer;
|
||||
resDesc.res.linear.format = format;
|
||||
resDesc.res.linear.numChannels = mem.data_elements;
|
||||
resDesc.res.linear.sizeInBytes = mem.device_size;
|
||||
}
|
||||
|
||||
CUDA_TEXTURE_DESC texDesc;
|
||||
memset(&texDesc, 0, sizeof(texDesc));
|
||||
texDesc.addressMode[0] = address_mode;
|
||||
texDesc.addressMode[1] = address_mode;
|
||||
texDesc.addressMode[2] = address_mode;
|
||||
texDesc.filterMode = filter_mode;
|
||||
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
|
||||
|
||||
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
|
||||
|
||||
/* Resize once */
|
||||
if(flat_slot >= texture_info.size()) {
|
||||
/* Allocate some slots in advance, to reduce amount
|
||||
* of re-allocations. */
|
||||
texture_info.resize(flat_slot + 128);
|
||||
}
|
||||
|
||||
/* Set Mapping and tag that we need to (re-)upload to device */
|
||||
TextureInfo& info = texture_info[flat_slot];
|
||||
info.data = (uint64_t)cmem->texobject;
|
||||
info.cl_buffer = 0;
|
||||
info.interpolation = mem.interpolation;
|
||||
info.extension = mem.extension;
|
||||
info.width = mem.data_width;
|
||||
info.height = mem.data_height;
|
||||
info.depth = mem.data_depth;
|
||||
need_texture_info = true;
|
||||
/* Kepler+, bindless textures. */
|
||||
int flat_slot = 0;
|
||||
if(string_startswith(mem.name, "__tex_image")) {
|
||||
int pos = string(mem.name).rfind("_");
|
||||
flat_slot = atoi(mem.name + pos + 1);
|
||||
}
|
||||
else {
|
||||
/* Fermi, fixed texture slots. */
|
||||
if(array_3d) {
|
||||
cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT));
|
||||
}
|
||||
else if(mem.data_height > 0) {
|
||||
CUDA_ARRAY_DESCRIPTOR array_desc;
|
||||
array_desc.Format = format;
|
||||
array_desc.Height = mem.data_height;
|
||||
array_desc.Width = mem.data_width;
|
||||
array_desc.NumChannels = mem.data_elements;
|
||||
cuda_assert(cuTexRefSetAddress2D_v3(texref, &array_desc, mem.device_pointer, dst_pitch));
|
||||
}
|
||||
else {
|
||||
cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
|
||||
}
|
||||
|
||||
/* Attach to texture reference. */
|
||||
cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
|
||||
cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
|
||||
cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
|
||||
cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
|
||||
cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
|
||||
if(mem.data_depth > 1) {
|
||||
cuda_assert(cuTexRefSetAddressMode(texref, 2, address_mode));
|
||||
}
|
||||
assert(0);
|
||||
}
|
||||
|
||||
CUDA_RESOURCE_DESC resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
|
||||
if(array_3d) {
|
||||
resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
|
||||
resDesc.res.array.hArray = array_3d;
|
||||
resDesc.flags = 0;
|
||||
}
|
||||
else if(mem.data_height > 0) {
|
||||
resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
|
||||
resDesc.res.pitch2D.devPtr = mem.device_pointer;
|
||||
resDesc.res.pitch2D.format = format;
|
||||
resDesc.res.pitch2D.numChannels = mem.data_elements;
|
||||
resDesc.res.pitch2D.height = mem.data_height;
|
||||
resDesc.res.pitch2D.width = mem.data_width;
|
||||
resDesc.res.pitch2D.pitchInBytes = dst_pitch;
|
||||
}
|
||||
else {
|
||||
resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
|
||||
resDesc.res.linear.devPtr = mem.device_pointer;
|
||||
resDesc.res.linear.format = format;
|
||||
resDesc.res.linear.numChannels = mem.data_elements;
|
||||
resDesc.res.linear.sizeInBytes = mem.device_size;
|
||||
}
|
||||
|
||||
CUDA_TEXTURE_DESC texDesc;
|
||||
memset(&texDesc, 0, sizeof(texDesc));
|
||||
texDesc.addressMode[0] = address_mode;
|
||||
texDesc.addressMode[1] = address_mode;
|
||||
texDesc.addressMode[2] = address_mode;
|
||||
texDesc.filterMode = filter_mode;
|
||||
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
|
||||
|
||||
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
|
||||
|
||||
/* Resize once */
|
||||
if(flat_slot >= texture_info.size()) {
|
||||
/* Allocate some slots in advance, to reduce amount
|
||||
* of re-allocations. */
|
||||
texture_info.resize(flat_slot + 128);
|
||||
}
|
||||
|
||||
/* Set Mapping and tag that we need to (re-)upload to device */
|
||||
TextureInfo& info = texture_info[flat_slot];
|
||||
info.data = (uint64_t)cmem->texobject;
|
||||
info.cl_buffer = 0;
|
||||
info.interpolation = mem.interpolation;
|
||||
info.extension = mem.extension;
|
||||
info.width = mem.data_width;
|
||||
info.height = mem.data_height;
|
||||
info.depth = mem.data_depth;
|
||||
need_texture_info = true;
|
||||
}
|
||||
|
||||
void tex_free(device_memory& mem)
|
||||
@@ -2545,7 +2492,6 @@ void device_cuda_info(vector<DeviceInfo>& devices)
|
||||
info.num = num;
|
||||
|
||||
info.advanced_shading = (major >= 3);
|
||||
info.has_fermi_limits = !(major >= 3);
|
||||
info.has_half_images = (major >= 3);
|
||||
info.has_volume_decoupled = false;
|
||||
info.bvh_layout_mask = BVH_LAYOUT_BVH2;
|
||||
|
||||
Reference in New Issue
Block a user