Compare commits

...

1 Commits

Author SHA1 Message Date
063ce7f550 Fluid: Initial changes for OpenMP GPU support
Contains basic support for OpenMP GPU offloading.
That is, offloading of fluid KERNEL loops to the GPU.

This branch offloads pressure and advection calls only - the 2 most
expensive operation per step. In theory though, any function can be
offloaded.

For now, this branch needs to be build with a compiler that supports
Nvidia GPU offloading. Exact GPU models need to be specified via CMake.
2021-09-13 15:03:52 +02:00
65 changed files with 6158 additions and 8345 deletions

View File

@@ -747,7 +747,7 @@ set_and_warn_dependency(WITH_TBB WITH_CYCLES OFF)
set_and_warn_dependency(WITH_TBB WITH_USD OFF) set_and_warn_dependency(WITH_TBB WITH_USD OFF)
set_and_warn_dependency(WITH_TBB WITH_OPENIMAGEDENOISE OFF) set_and_warn_dependency(WITH_TBB WITH_OPENIMAGEDENOISE OFF)
set_and_warn_dependency(WITH_TBB WITH_OPENVDB OFF) set_and_warn_dependency(WITH_TBB WITH_OPENVDB OFF)
set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID OFF) #set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID OFF)
# NanoVDB requires OpenVDB to convert the data structure # NanoVDB requires OpenVDB to convert the data structure
set_and_warn_dependency(WITH_OPENVDB WITH_NANOVDB OFF) set_and_warn_dependency(WITH_OPENVDB WITH_NANOVDB OFF)
@@ -1239,15 +1239,29 @@ endif()
if(WITH_OPENMP) if(WITH_OPENMP)
if(NOT OPENMP_CUSTOM) if(NOT OPENMP_CUSTOM)
find_package(OpenMP) find_package(OpenMP)
list(APPEND CMAKE_MODULE_PATH "${LLVM_LIBPATH}/cmake/openmp")
find_package(OpenMPTarget REQUIRED NVPTX)
endif() endif()
if(OPENMP_FOUND) if(OPENMP_FOUND)
if(NOT WITH_OPENMP_STATIC) if(NOT WITH_OPENMP_STATIC)
message(STATUS "============ No Static OpenMP")
message(${OpenMPTarget_NVPTX_FLAGS})
string(APPEND CMAKE_C_FLAGS " ${OpenMP_C_FLAGS}") string(APPEND CMAKE_C_FLAGS " ${OpenMP_C_FLAGS}")
string(APPEND CMAKE_CXX_FLAGS " ${OpenMP_CXX_FLAGS}") string(APPEND CMAKE_CXX_FLAGS " ${OpenMP_CXX_FLAGS}")
string(APPEND CMAKE_EXE_LINKER_FLAGS " ${OpenMP_LINKER_FLAGS}") string(APPEND CMAKE_EXE_LINKER_FLAGS " ${OpenMP_LINKER_FLAGS}")
string(APPEND CMAKE_MODULE_LINKER_FLAGS " ${OpenMP_LINKER_FLAGS}") string(APPEND CMAKE_MODULE_LINKER_FLAGS " ${OpenMP_LINKER_FLAGS}")
string(APPEND CMAKE_C_FLAGS " -gline-tables-only -fopenmp-targets=nvptx64-nvidia-cuda")
string(APPEND CMAKE_CXX_FLAGS " -gline-tables-only -fopenmp-targets=nvptx64-nvidia-cuda")
string(APPEND CMAKE_EXE_LINKER_FLAGS " -fopenmp-targets=nvptx64-nvidia-cuda")
string(APPEND CMAKE_MODULE_LINKER_FLAGS " -fopenmp-targets=nvptx64-nvidia-cuda")
set(PLATFORM_LINKLIBS "${PLATFORM_LINKLIBS};-fopenmp=libomp;-fopenmp-targets=nvptx64-nvidia-cuda")
else() else()
message(STATUS "============= Static OpenMP")
# Typically avoid adding flags as defines but we can't # Typically avoid adding flags as defines but we can't
# pass OpenMP flags to the linker for static builds, meaning # pass OpenMP flags to the linker for static builds, meaning
# we can't add any OpenMP related flags to CFLAGS variables # we can't add any OpenMP related flags to CFLAGS variables

View File

@@ -30,6 +30,10 @@ if(APPLE)
set(BUILD_CLANG_TOOLS ON) set(BUILD_CLANG_TOOLS ON)
endif() endif()
if(UNIX AND NOT APPLE)
set(LLVM_BUILD_OPENMP ^^openmp)
set(LLVM_TARGETS ${LLVM_TARGETS} ^^NVPTX)
endif()
set(LLVM_EXTRA_ARGS set(LLVM_EXTRA_ARGS
-DLLVM_USE_CRT_RELEASE=MD -DLLVM_USE_CRT_RELEASE=MD
@@ -40,10 +44,18 @@ set(LLVM_EXTRA_ARGS
-DLLVM_ENABLE_TERMINFO=OFF -DLLVM_ENABLE_TERMINFO=OFF
-DLLVM_BUILD_LLVM_C_DYLIB=OFF -DLLVM_BUILD_LLVM_C_DYLIB=OFF
-DLLVM_ENABLE_UNWIND_TABLES=OFF -DLLVM_ENABLE_UNWIND_TABLES=OFF
-DLLVM_ENABLE_PROJECTS=clang${LLVM_BUILD_CLANG_TOOLS_EXTRA} -DLLVM_ENABLE_PROJECTS=clang${LLVM_BUILD_CLANG_TOOLS_EXTRA}${LLVM_BUILD_OPENMP}
${LLVM_XML2_ARGS} ${LLVM_XML2_ARGS}
) )
if(UNIX AND NOT APPLE)
list(APPEND LLVM_EXTRA_ARGS
-DCLANG_OPENMP_NVPTX_DEFAULT_ARCH=sm_61
-DLIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES=61
)
endif()
if(WIN32) if(WIN32)
set(LLVM_GENERATOR "Ninja") set(LLVM_GENERATOR "Ninja")
else() else()

View File

@@ -63,7 +63,8 @@ if(EXISTS ${LIBDIR})
# OpenMP usually can't be statically linked into shared libraries, # OpenMP usually can't be statically linked into shared libraries,
# due to not being compiled with position independent code. # due to not being compiled with position independent code.
if(NOT WITH_PYTHON_MODULE) if(NOT WITH_PYTHON_MODULE)
set(WITH_OPENMP_STATIC ON) #message(STATUS "============= ENABLING Static OpenMP")
#set(WITH_OPENMP_STATIC ON)
endif() endif()
set(Boost_NO_BOOST_CMAKE ON) set(Boost_NO_BOOST_CMAKE ON)
set(BOOST_ROOT ${LIBDIR}/boost) set(BOOST_ROOT ${LIBDIR}/boost)

View File

@@ -81,7 +81,7 @@ static double cos_vnvnvn(
normalize_vn_vnvn(dvec1, v1, v2, dims); normalize_vn_vnvn(dvec1, v1, v2, dims);
double d = dot_vnvn(dvec0, dvec1, dims); double d = dot_vnvn(dvec0, dvec1, dims);
/* sanity check */ /* sanity check */
d = max(-1.0, min(1.0, d)); d = maxV(-1.0, minV(1.0, d));
return d; return d;
} }

View File

@@ -29,9 +29,9 @@
* \ingroup curve_fit * \ingroup curve_fit
*/ */
#ifdef _MSC_VER //#ifdef _MSC_VER
# define _USE_MATH_DEFINES # define _USE_MATH_DEFINES
#endif //#endif
#include <math.h> #include <math.h>
#include <float.h> #include <float.h>
@@ -456,7 +456,7 @@ static double points_calc_circumference_factor(
const double len_tangent = dot < 0.0 ? len_vnvn(tan_l, tan_r, dims) : len_negated_vnvn(tan_l, tan_r, dims); const double len_tangent = dot < 0.0 ? len_vnvn(tan_l, tan_r, dims) : len_negated_vnvn(tan_l, tan_r, dims);
if (len_tangent > DBL_EPSILON) { if (len_tangent > DBL_EPSILON) {
/* only clamp to avoid precision error */ /* only clamp to avoid precision error */
double angle = acos(max(-fabs(dot), -1.0)); double angle = acos(maxV(-fabs(dot), -1.0));
/* Angle may be less than the length when the tangents define >180 degrees of the circle, /* Angle may be less than the length when the tangents define >180 degrees of the circle,
* (tangents that point away from each other). * (tangents that point away from each other).
* We could try support this but will likely cause extreme >1 scales which could cause other issues. */ * We could try support this but will likely cause extreme >1 scales which could cause other issues. */
@@ -607,7 +607,7 @@ static void cubic_from_points_offset_fallback(
for (uint k = 0; k < 2; k++) { for (uint k = 0; k < 2; k++) {
sub_vn_vnvn(tmp, p0, pt, dims); sub_vn_vnvn(tmp, p0, pt, dims);
project_vn_vnvn_normalized(tmp, tmp, a[k], dims); project_vn_vnvn_normalized(tmp, tmp, a[k], dims);
dists[k] = max(dists[k], dot_vnvn(tmp, a[k], dims)); dists[k] = maxV(dists[k], dot_vnvn(tmp, a[k], dims));
} }
} }
@@ -796,7 +796,7 @@ static void cubic_from_points(
dist_sq_test += sq((pt[j] - center[j]) * clamp_scale); dist_sq_test += sq((pt[j] - center[j]) * clamp_scale);
} }
#endif #endif
dist_sq_max = max(dist_sq_max, dist_sq_test); dist_sq_max = maxV(dist_sq_max, dist_sq_test);
} }
} }

View File

@@ -45,12 +45,12 @@ MINLINE double sq(const double d)
} }
#ifndef _MSC_VER #ifndef _MSC_VER
MINLINE double min(const double a, const double b) MINLINE double minV(const double a, const double b)
{ {
return b < a ? b : a; return b < a ? b : a;
} }
MINLINE double max(const double a, const double b) MINLINE double maxV(const double a, const double b)
{ {
return a < b ? b : a; return a < b ? b : a;
} }

View File

@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
// //
#include "draco/animation/keyframe_animation.h" #include "draco/animation/keyframe_animation.h"
#include <iostream>
namespace draco { namespace draco {

View File

@@ -55,26 +55,30 @@ if(NOT WITH_MANTA_DEPENDENCIES)
add_definitions(-DNO_CNPY=1) add_definitions(-DNO_CNPY=1)
endif() endif()
set(MANTA_HLP
helper
)
set(MANTA_PP
preprocessed
)
if(WITH_MANTA_DEPENDENCIES) if(WITH_MANTA_DEPENDENCIES)
set(MANTA_DEP set(MANTA_DEP
dependencies dependencies
) )
endif() endif()
if(WITH_TBB) if(WITH_OPENMP)
add_definitions(-DTBB=1) add_definitions(-DOPENMP=1)
if(WITH_OPENMP_STATIC)
list(APPEND LIB
${OpenMP_LIBRARIES}
)
endif()
endif()
if(WITH_OPENMP)
add_definitions(-DOPENMP_OFFLOAD=1)
endif() endif()
if(WITH_OPENVDB) if(WITH_OPENVDB)
add_definitions(-DOPENVDB=1) add_definitions(-DOPENVDB=1)
# OpenVDB headers use deprecated TBB headers, silence warning. # OpenVDB headers use deprecated TBB headers, silence warning.
add_definitions(-DTBB_SUPPRESS_DEPRECATED_MESSAGES=1) #add_definitions(-DTBB_SUPPRESS_DEPRECATED_MESSAGES=1)
endif() endif()
if(WITH_OPENVDB_BLOSC) if(WITH_OPENVDB_BLOSC)
@@ -90,12 +94,12 @@ if(WITH_MANTA_NUMPY AND WITH_PYTHON_NUMPY)
endif() endif()
set(INC set(INC
${MANTA_PP} preprocessed
${MANTA_PP}/fileio preprocessed/fileio
${MANTA_PP}/python preprocessed/python
${MANTA_PP}/plugin preprocessed/plugin
${MANTA_HLP}/pwrapper helper/pwrapper
${MANTA_HLP}/util helper/util
) )
if(WITH_MANTA_DEPENDENCIES) if(WITH_MANTA_DEPENDENCIES)
@@ -115,15 +119,6 @@ if(WITH_MANTA_NUMPY AND WITH_PYTHON_NUMPY)
) )
endif() endif()
if(WITH_TBB)
list(APPEND INC_SYS
${TBB_INCLUDE_DIRS}
)
list(APPEND LIB
${TBB_LIBRARIES}
)
endif()
if(WITH_OPENVDB) if(WITH_OPENVDB)
list(APPEND INC_SYS list(APPEND INC_SYS
${OPENVDB_INCLUDE_DIRS} ${OPENVDB_INCLUDE_DIRS}
@@ -142,120 +137,120 @@ if(WITH_OPENVDB)
endif() endif()
set(SRC set(SRC
${MANTA_PP}/commonkernels.h preprocessed/commonkernels.h
${MANTA_PP}/commonkernels.h.reg.cpp preprocessed/commonkernels.h.reg.cpp
${MANTA_PP}/conjugategrad.cpp preprocessed/conjugategrad.cpp
${MANTA_PP}/conjugategrad.h preprocessed/conjugategrad.h
${MANTA_PP}/conjugategrad.h.reg.cpp preprocessed/conjugategrad.h.reg.cpp
${MANTA_PP}/edgecollapse.cpp preprocessed/edgecollapse.cpp
${MANTA_PP}/edgecollapse.h preprocessed/edgecollapse.h
${MANTA_PP}/edgecollapse.h.reg.cpp preprocessed/edgecollapse.h.reg.cpp
${MANTA_PP}/fastmarch.cpp preprocessed/fastmarch.cpp
${MANTA_PP}/fastmarch.h preprocessed/fastmarch.h
${MANTA_PP}/fastmarch.h.reg.cpp preprocessed/fastmarch.h.reg.cpp
${MANTA_PP}/fileio/iogrids.cpp preprocessed/fileio/iogrids.cpp
${MANTA_PP}/fileio/iomeshes.cpp preprocessed/fileio/iomeshes.cpp
${MANTA_PP}/fileio/ioparticles.cpp preprocessed/fileio/ioparticles.cpp
${MANTA_PP}/fileio/ioutil.cpp preprocessed/fileio/ioutil.cpp
${MANTA_PP}/fileio/iovdb.cpp preprocessed/fileio/iovdb.cpp
${MANTA_PP}/fileio/mantaio.cpp preprocessed/fileio/mantaio.cpp
${MANTA_PP}/fileio/mantaio.h preprocessed/fileio/mantaio.h
${MANTA_PP}/fileio/mantaio.h.reg.cpp preprocessed/fileio/mantaio.h.reg.cpp
${MANTA_PP}/fluidsolver.cpp preprocessed/fluidsolver.cpp
${MANTA_PP}/fluidsolver.h preprocessed/fluidsolver.h
${MANTA_PP}/fluidsolver.h.reg.cpp preprocessed/fluidsolver.h.reg.cpp
${MANTA_PP}/general.cpp preprocessed/general.cpp
${MANTA_PP}/general.h preprocessed/general.h
${MANTA_PP}/general.h.reg.cpp preprocessed/general.h.reg.cpp
${MANTA_PP}/gitinfo.h preprocessed/gitinfo.h
${MANTA_PP}/grid.cpp preprocessed/grid.cpp
${MANTA_PP}/grid.h preprocessed/grid.h
${MANTA_PP}/grid.h.reg.cpp preprocessed/grid.h.reg.cpp
${MANTA_PP}/grid4d.cpp preprocessed/grid4d.cpp
${MANTA_PP}/grid4d.h preprocessed/grid4d.h
${MANTA_PP}/grid4d.h.reg.cpp preprocessed/grid4d.h.reg.cpp
${MANTA_PP}/kernel.cpp preprocessed/kernel.cpp
${MANTA_PP}/kernel.h preprocessed/kernel.h
${MANTA_PP}/kernel.h.reg.cpp preprocessed/kernel.h.reg.cpp
${MANTA_PP}/levelset.cpp preprocessed/levelset.cpp
${MANTA_PP}/levelset.h preprocessed/levelset.h
${MANTA_PP}/levelset.h.reg.cpp preprocessed/levelset.h.reg.cpp
${MANTA_PP}/mesh.cpp preprocessed/mesh.cpp
${MANTA_PP}/mesh.h preprocessed/mesh.h
${MANTA_PP}/mesh.h.reg.cpp preprocessed/mesh.h.reg.cpp
${MANTA_PP}/movingobs.cpp preprocessed/movingobs.cpp
${MANTA_PP}/movingobs.h preprocessed/movingobs.h
${MANTA_PP}/movingobs.h.reg.cpp preprocessed/movingobs.h.reg.cpp
${MANTA_PP}/multigrid.cpp preprocessed/multigrid.cpp
${MANTA_PP}/multigrid.h preprocessed/multigrid.h
${MANTA_PP}/multigrid.h.reg.cpp preprocessed/multigrid.h.reg.cpp
${MANTA_PP}/noisefield.cpp preprocessed/noisefield.cpp
${MANTA_PP}/noisefield.h preprocessed/noisefield.h
${MANTA_PP}/noisefield.h.reg.cpp preprocessed/noisefield.h.reg.cpp
${MANTA_PP}/particle.cpp preprocessed/particle.cpp
${MANTA_PP}/particle.h preprocessed/particle.h
${MANTA_PP}/particle.h.reg.cpp preprocessed/particle.h.reg.cpp
${MANTA_PP}/plugin/advection.cpp preprocessed/plugin/advection.cpp
${MANTA_PP}/plugin/apic.cpp preprocessed/plugin/apic.cpp
${MANTA_PP}/plugin/extforces.cpp preprocessed/plugin/extforces.cpp
${MANTA_PP}/plugin/fire.cpp preprocessed/plugin/fire.cpp
${MANTA_PP}/plugin/flip.cpp preprocessed/plugin/flip.cpp
${MANTA_PP}/plugin/fluidguiding.cpp preprocessed/plugin/fluidguiding.cpp
${MANTA_PP}/plugin/initplugins.cpp preprocessed/plugin/initplugins.cpp
${MANTA_PP}/plugin/kepsilon.cpp preprocessed/plugin/kepsilon.cpp
${MANTA_PP}/plugin/meshplugins.cpp preprocessed/plugin/meshplugins.cpp
${MANTA_PP}/plugin/pressure.cpp preprocessed/plugin/pressure.cpp
${MANTA_PP}/plugin/ptsplugins.cpp preprocessed/plugin/ptsplugins.cpp
${MANTA_PP}/plugin/secondaryparticles.cpp preprocessed/plugin/secondaryparticles.cpp
${MANTA_PP}/plugin/surfaceturbulence.cpp preprocessed/plugin/surfaceturbulence.cpp
${MANTA_PP}/plugin/viscosity.cpp # preprocessed/plugin/viscosity.cpp
${MANTA_PP}/plugin/vortexplugins.cpp preprocessed/plugin/vortexplugins.cpp
${MANTA_PP}/plugin/waveletturbulence.cpp preprocessed/plugin/waveletturbulence.cpp
${MANTA_PP}/plugin/waves.cpp preprocessed/plugin/waves.cpp
${MANTA_PP}/python/defines.py preprocessed/python/defines.py
${MANTA_PP}/python/defines.py.reg.cpp preprocessed/python/defines.py.reg.cpp
${MANTA_PP}/registration.cpp preprocessed/registration.cpp
${MANTA_PP}/shapes.cpp preprocessed/shapes.cpp
${MANTA_PP}/shapes.h preprocessed/shapes.h
${MANTA_PP}/shapes.h.reg.cpp preprocessed/shapes.h.reg.cpp
${MANTA_PP}/test.cpp preprocessed/test.cpp
${MANTA_PP}/timing.cpp preprocessed/timing.cpp
${MANTA_PP}/timing.h preprocessed/timing.h
${MANTA_PP}/timing.h.reg.cpp preprocessed/timing.h.reg.cpp
${MANTA_PP}/turbulencepart.cpp preprocessed/turbulencepart.cpp
${MANTA_PP}/turbulencepart.h preprocessed/turbulencepart.h
${MANTA_PP}/turbulencepart.h.reg.cpp preprocessed/turbulencepart.h.reg.cpp
${MANTA_PP}/vortexpart.cpp preprocessed/vortexpart.cpp
${MANTA_PP}/vortexpart.h preprocessed/vortexpart.h
${MANTA_PP}/vortexpart.h.reg.cpp preprocessed/vortexpart.h.reg.cpp
${MANTA_PP}/vortexsheet.cpp preprocessed/vortexsheet.cpp
${MANTA_PP}/vortexsheet.h preprocessed/vortexsheet.h
${MANTA_PP}/vortexsheet.h.reg.cpp preprocessed/vortexsheet.h.reg.cpp
${MANTA_HLP}/pwrapper/manta.h helper/pwrapper/manta.h
${MANTA_HLP}/pwrapper/pclass.cpp helper/pwrapper/pclass.cpp
${MANTA_HLP}/pwrapper/pclass.h helper/pwrapper/pclass.h
${MANTA_HLP}/pwrapper/pconvert.cpp helper/pwrapper/pconvert.cpp
${MANTA_HLP}/pwrapper/pconvert.h helper/pwrapper/pconvert.h
${MANTA_HLP}/pwrapper/pvec3.cpp helper/pwrapper/pvec3.cpp
${MANTA_HLP}/pwrapper/pythonInclude.h helper/pwrapper/pythonInclude.h
${MANTA_HLP}/pwrapper/registry.cpp helper/pwrapper/registry.cpp
${MANTA_HLP}/pwrapper/registry.h helper/pwrapper/registry.h
${MANTA_HLP}/util/integrator.h helper/util/integrator.h
${MANTA_HLP}/util/interpol.h helper/util/interpol.h
${MANTA_HLP}/util/interpolHigh.h helper/util/interpolHigh.h
${MANTA_HLP}/util/matrixbase.h helper/util/matrixbase.h
${MANTA_HLP}/util/mcubes.h helper/util/mcubes.h
${MANTA_HLP}/util/quaternion.h helper/util/quaternion.h
${MANTA_HLP}/util/randomstream.h helper/util/randomstream.h
${MANTA_HLP}/util/rcmatrix.h helper/util/rcmatrix.h
${MANTA_HLP}/util/simpleimage.cpp helper/util/simpleimage.cpp
${MANTA_HLP}/util/simpleimage.h helper/util/simpleimage.h
${MANTA_HLP}/util/solvana.h helper/util/solvana.h
${MANTA_HLP}/util/vector4d.cpp helper/util/vector4d.cpp
${MANTA_HLP}/util/vector4d.h helper/util/vector4d.h
${MANTA_HLP}/util/vectorbase.cpp helper/util/vectorbase.cpp
${MANTA_HLP}/util/vectorbase.h helper/util/vectorbase.h
) )
if(WITH_MANTA_DEPENDENCIES) if(WITH_MANTA_DEPENDENCIES)
@@ -266,16 +261,34 @@ if(WITH_MANTA_DEPENDENCIES)
endif() endif()
if(WITH_MANTA_NUMPY AND WITH_PYTHON_NUMPY) if(WITH_MANTA_NUMPY AND WITH_PYTHON_NUMPY)
list(APPEND SRC list(APPEND SRC
${MANTA_PP}/plugin/numpyconvert.cpp preprocessed/plugin/numpyconvert.cpp
${MANTA_PP}/plugin/tfplugins.cpp preprocessed/plugin/tfplugins.cpp
${MANTA_HLP}/pwrapper/numpyWrap.cpp helper/pwrapper/numpyWrap.cpp
${MANTA_HLP}/pwrapper/numpyWrap.h helper/pwrapper/numpyWrap.h
) )
endif() endif()
set(LIB set(LIB
${PYTHON_LINKFLAGS} ${PYTHON_LINKFLAGS}
${PYTHON_LIBRARIES} ${PYTHON_LIBRARIES}
${OPENVDB_LIBRARIES}
) )
blender_add_lib(extern_mantaflow "${SRC}" "${INC}" "${INC_SYS}" "${LIB}") #blender_add_lib(extern_mantaflow "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
add_library(extern_mantaflow SHARED "${SRC}")
include_directories(preprocessed)
include_directories(preprocessed/fileio)
include_directories(preprocessed/python)
include_directories(preprocessed/plugin)
include_directories(helper/pwrapper)
include_directories(helper/util)
include_directories(${PYTHON_INCLUDE_DIRS})
include_directories(${ZLIB_INCLUDE_DIRS})
include_directories(${OPENVDB_INCLUDE_DIRS})
#target_include_directories(extern_mantaflow PRIVATE "${INC}")
target_link_libraries(extern_mantaflow PRIVATE "${LIB}")
blender_source_group(extern_mantaflow "${SRC}")

View File

@@ -7,8 +7,11 @@
# ==================== 1) ENVIRONMENT SETUP ============================================= # ==================== 1) ENVIRONMENT SETUP =============================================
# YOUR INSTALLATION PATHS GO HERE: # YOUR INSTALLATION PATHS GO HERE:
MANTA_INSTALLATION=/Users/sebbas/Developer/Mantaflow/mantaflowDevelop MANTA_INSTALLATION=/home/sebbas/Developer/Mantaflow
BLENDER_INSTALLATION=/Users/sebbas/Developer/Blender BLENDER_INSTALLATION=/home/sebbas/Developer/Blender
CC=/home/sebbas/Developer/LLVM-Project/install/bin/clang
CXX=/home/sebbas/Developer/LLVM-Project/install/bin/clang++
# Try to check out Mantaflow repository before building? # Try to check out Mantaflow repository before building?
CLEAN_REPOSITORY=0 CLEAN_REPOSITORY=0
@@ -20,8 +23,13 @@ WITH_DEPENDENCIES=0
USE_NUMPY=0 USE_NUMPY=0
# Choose which multithreading platform to use for Mantaflow preprocessing # Choose which multithreading platform to use for Mantaflow preprocessing
USE_OMP=0 USE_OMP=1
USE_TBB=1 USE_TBB=0
# Use OpenMP offloading too?
if [[ "$USE_OMP" -eq "1" ]]; then
USE_OMP_OFFLOAD=1
fi
if [[ "$USE_OMP" -eq "1" && "$USE_TBB" -eq "1" ]]; then if [[ "$USE_OMP" -eq "1" && "$USE_TBB" -eq "1" ]]; then
echo "Cannot build Mantaflow for OpenMP and TBB at the same time" echo "Cannot build Mantaflow for OpenMP and TBB at the same time"
@@ -56,7 +64,7 @@ fi
MANTA_BUILD_PATH=$MANTA_INSTALLATION/build_blender/ MANTA_BUILD_PATH=$MANTA_INSTALLATION/build_blender/
mkdir -p $MANTA_BUILD_PATH mkdir -p $MANTA_BUILD_PATH
cd $MANTA_BUILD_PATH cd $MANTA_BUILD_PATH
cmake ../mantaflowgit -DGUI=0 -DOPENMP=$USE_OMP -DTBB=$USE_TBB -DBLENDER=1 -DPREPDEBUG=1 -DNUMPY=$USE_NUMPY && make -j8 cmake ../mantaflowgit -DGUI=0 -DOPENMP=$USE_OMP -DTBB=$USE_TBB -DOPENMP_OFFLOAD=$USE_OMP_OFFLOAD -DBLENDER=1 -DPREPDEBUG=1 -DNUMPY=$USE_NUMPY -DPYTHON_VERSION=3 -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX && make -j8
# ==================== 3) COPY MANTAFLOW FILES TO BLENDER ROOT =========================== # ==================== 3) COPY MANTAFLOW FILES TO BLENDER ROOT ===========================

View File

@@ -1035,7 +1035,7 @@ template<class N, class T> struct RCFixedMatrix {
typedef RCMatrix<int, Real> Matrix; typedef RCMatrix<int, Real> Matrix;
typedef RCFixedMatrix<int, Real> FixedMatrix; typedef RCFixedMatrix<int, Real> FixedMatrix;
} } // namespace Manta
#undef parallel_for #undef parallel_for
#undef parallel_end #undef parallel_end

File diff suppressed because it is too large Load Diff

View File

@@ -18,6 +18,8 @@
#include "conjugategrad.h" #include "conjugategrad.h"
#include "commonkernels.h" #include "commonkernels.h"
#include <chrono>
using namespace std::chrono;
using namespace std; using namespace std;
namespace Manta { namespace Manta {
@@ -213,9 +215,9 @@ struct GridDotProduct : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, const Grid<Real> &a, const Grid<Real> &b, double &result) inline void op(int i, int j, int k, const Grid<Real> &a, const Grid<Real> &b, double &result)
{ {
result += (a[idx] * b[idx]); result += (a(i, j, k) * b(i, j, k));
} }
inline operator double() inline operator double()
{ {
@@ -235,28 +237,39 @@ struct GridDotProduct : public KernelBase {
return b; return b;
} }
typedef Grid<Real> type1; typedef Grid<Real> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel GridDotProduct ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, a, b, result);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const int _maxX = maxX;
} const int _maxY = maxY;
GridDotProduct(GridDotProduct &o, tbb::split) : KernelBase(o), a(o.a), b(o.b), result(0.0) if (maxZ > 1) {
{ const Grid<Real> &a = getArg0();
} const Grid<Real> &b = getArg1();
void join(const GridDotProduct &o) #pragma omp target teams distribute parallel for reduction(+:result) collapse(2) schedule(static,1)
{ {
result += o.result; for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, a, b, result);
}
{
this->result = result;
}
}
else {
const int k = 0;
const Grid<Real> &a = getArg0();
const Grid<Real> &b = getArg1();
#pragma omp target teams distribute parallel for reduction(+:result) collapse(1) schedule(static,1)
{
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, a, b, result);
}
{
this->result = result;
}
}
} }
const Grid<Real> &a; const Grid<Real> &a;
const Grid<Real> &b; const Grid<Real> &b;
@@ -315,29 +328,21 @@ struct InitSigma : public KernelBase {
return temp; return temp;
} }
typedef Grid<Real> type3; typedef Grid<Real> type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel InitSigma ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, flags, dst, rhs, temp, sigma);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
InitSigma(InitSigma &o, tbb::split) {
: KernelBase(o), flags(o.flags), dst(o.dst), rhs(o.rhs), temp(o.temp), sigma(0) double sigma = 0;
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const InitSigma &o) op(i, flags, dst, rhs, temp, sigma);
{ #pragma omp critical
sigma += o.sigma; {
this->sigma += sigma;
}
}
} }
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &dst; Grid<Real> &dst;
@@ -356,8 +361,9 @@ struct UpdateSearchVec : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<Real> &dst, Grid<Real> &src, Real factor) const inline void op(int i, int j, int k, Grid<Real> &dst, Grid<Real> &src, Real factor)
{ {
const IndexInt idx = dst.index(i, j, k);
dst[idx] = src[idx] + factor * dst[idx]; dst[idx] = src[idx] + factor * dst[idx];
} }
inline Grid<Real> &getArg0() inline Grid<Real> &getArg0()
@@ -375,21 +381,35 @@ struct UpdateSearchVec : public KernelBase {
return factor; return factor;
} }
typedef Real type2; typedef Real type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel UpdateSearchVec ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, dst, src, factor);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
Grid<Real> &dst = getArg0();
Grid<Real> &src = getArg1();
Real &factor = getArg2();
#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
{
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, dst, src, factor);
}
}
else {
const int k = 0;
Grid<Real> &dst = getArg0();
Grid<Real> &src = getArg1();
Real &factor = getArg2();
#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
{
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, dst, src, factor);
}
}
} }
Grid<Real> &dst; Grid<Real> &dst;
Grid<Real> &src; Grid<Real> &src;
@@ -406,8 +426,10 @@ GridCg<APPLYMAT>::GridCg(Grid<Real> &dst,
Grid<Real> &search, Grid<Real> &search,
const FlagGrid &flags, const FlagGrid &flags,
Grid<Real> &tmp, Grid<Real> &tmp,
std::vector<Grid<Real> *> matrixAVec, Grid<Real> *pA0,
std::vector<Grid<Real> *> rhsVec) Grid<Real> *pAi,
Grid<Real> *pAj,
Grid<Real> *pAk)
: GridCgInterface(), : GridCgInterface(),
mInited(false), mInited(false),
mIterations(0), mIterations(0),
@@ -417,8 +439,10 @@ GridCg<APPLYMAT>::GridCg(Grid<Real> &dst,
mSearch(search), mSearch(search),
mFlags(flags), mFlags(flags),
mTmp(tmp), mTmp(tmp),
mMatrixA(matrixAVec), mpA0(pA0),
mVecRhs(rhsVec), mpAi(pAi),
mpAj(pAj),
mpAk(pAk),
mPcMethod(PC_None), mPcMethod(PC_None),
mpPCA0(nullptr), mpPCA0(nullptr),
mpPCAi(nullptr), mpPCAi(nullptr),
@@ -436,54 +460,37 @@ template<class APPLYMAT> void GridCg<APPLYMAT>::doInit()
mInited = true; mInited = true;
mIterations = 0; mIterations = 0;
mDst.clear(); mDst.clear(1);
mResidual.copyFrom(mRhs); // p=0, residual = b mResidual.copyFrom(mRhs, true, 1); // p=0, residual = b
if (mPcMethod == PC_ICP) { if (mPcMethod == PC_ICP) {
assertMsg(mDst.is3D(), "ICP only supports 3D grids so far"); // assertMsg(mDst.is3D(), "ICP only supports 3D grids so far");
InitPreconditionIncompCholesky(mFlags, InitPreconditionIncompCholesky(
*mpPCA0, mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk);
*mpPCAi, ApplyPreconditionIncompCholesky(
*mpPCAj, mTmp, mResidual, mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk);
*mpPCAk,
*mMatrixA[0],
*mMatrixA[1],
*mMatrixA[2],
*mMatrixA[3]);
ApplyPreconditionIncompCholesky(mTmp,
mResidual,
mFlags,
*mpPCA0,
*mpPCAi,
*mpPCAj,
*mpPCAk,
*mMatrixA[0],
*mMatrixA[1],
*mMatrixA[2],
*mMatrixA[3]);
} }
else if (mPcMethod == PC_mICP) { else if (mPcMethod == PC_mICP) {
assertMsg(mDst.is3D(), "mICP only supports 3D grids so far"); // assertMsg(mDst.is3D(), "mICP only supports 3D grids so far");
InitPreconditionModifiedIncompCholesky2( InitPreconditionModifiedIncompCholesky2(mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk);
mFlags, *mpPCA0, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3]);
ApplyPreconditionModifiedIncompCholesky2( ApplyPreconditionModifiedIncompCholesky2(
mTmp, mResidual, mFlags, *mpPCA0, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3]); mTmp, mResidual, mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk);
} }
else if (mPcMethod == PC_MGP) { else if (mPcMethod == PC_MGP) {
InitPreconditionMultigrid( InitPreconditionMultigrid(mMG, *mpA0, *mpAi, *mpAj, *mpAk, mAccuracy);
mMG, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3], mAccuracy);
ApplyPreconditionMultigrid(mMG, mTmp, mResidual); ApplyPreconditionMultigrid(mMG, mTmp, mResidual);
} }
else { else {
mTmp.copyFrom(mResidual); mTmp.copyFrom(mResidual, true, 1);
} }
mSearch.copyFrom(mTmp); mSearch.copyFrom(mTmp, true, 1);
mSigma = GridDotProduct(mTmp, mResidual); mSigma = GridDotProduct(mTmp, mResidual);
} }
template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate() template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate(Real &time)
{ {
auto start = high_resolution_clock::now();
if (!mInited) if (!mInited)
doInit(); doInit();
@@ -493,7 +500,14 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate()
// this could reinterpret the mpA pointers (not so clean right now) // this could reinterpret the mpA pointers (not so clean right now)
// tmp = applyMat(search) // tmp = applyMat(search)
APPLYMAT(mFlags, mTmp, mSearch, mMatrixA, mVecRhs); APPLYMAT(mFlags, mTmp, mSearch, *mpA0, *mpAi, *mpAj, *mpAk);
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
time += duration.count();
// std::cout << "APPLYMAT Time taken: " << duration.count() << std::endl;
start = high_resolution_clock::now();
// alpha = sigma/dot(tmp, search) // alpha = sigma/dot(tmp, search)
Real dp = GridDotProduct(mTmp, mSearch); Real dp = GridDotProduct(mTmp, mSearch);
@@ -501,35 +515,49 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate()
if (fabs(dp) > 0.) if (fabs(dp) > 0.)
alpha = mSigma / (Real)dp; alpha = mSigma / (Real)dp;
stop = high_resolution_clock::now();
duration = duration_cast<microseconds>(stop - start);
time += duration.count();
// std::cout << "GridDotProduct Time taken: " << duration.count() << std::endl;
start = high_resolution_clock::now();
gridScaledAdd<Real, Real>(mDst, mSearch, alpha); // dst += search * alpha gridScaledAdd<Real, Real>(mDst, mSearch, alpha); // dst += search * alpha
gridScaledAdd<Real, Real>(mResidual, mTmp, -alpha); // residual += tmp * -alpha gridScaledAdd<Real, Real>(mResidual, mTmp, -alpha); // residual += tmp * -alpha
stop = high_resolution_clock::now();
duration = duration_cast<microseconds>(stop - start);
time += duration.count();
// std::cout << "gridScaledAdd Time taken: " << duration.count() << std::endl;
start = high_resolution_clock::now();
if (mPcMethod == PC_ICP) if (mPcMethod == PC_ICP)
ApplyPreconditionIncompCholesky(mTmp, ApplyPreconditionIncompCholesky(
mResidual, mTmp, mResidual, mFlags, *mpPCA0, *mpPCAi, *mpPCAj, *mpPCAk, *mpA0, *mpAi, *mpAj, *mpAk);
mFlags,
*mpPCA0,
*mpPCAi,
*mpPCAj,
*mpPCAk,
*mMatrixA[0],
*mMatrixA[1],
*mMatrixA[2],
*mMatrixA[3]);
else if (mPcMethod == PC_mICP) else if (mPcMethod == PC_mICP)
ApplyPreconditionModifiedIncompCholesky2( ApplyPreconditionModifiedIncompCholesky2(
mTmp, mResidual, mFlags, *mpPCA0, *mMatrixA[0], *mMatrixA[1], *mMatrixA[2], *mMatrixA[3]); mTmp, mResidual, mFlags, *mpPCA0, *mpA0, *mpAi, *mpAj, *mpAk);
else if (mPcMethod == PC_MGP) else if (mPcMethod == PC_MGP)
ApplyPreconditionMultigrid(mMG, mTmp, mResidual); ApplyPreconditionMultigrid(mMG, mTmp, mResidual);
else else
mTmp.copyFrom(mResidual); mTmp.copyFrom(mResidual, true, 1);
stop = high_resolution_clock::now();
duration = duration_cast<microseconds>(stop - start);
time += duration.count();
// std::cout << "copyFrom Time taken: " << duration.count() << std::endl;
start = high_resolution_clock::now();
// use the l2 norm of the residual for convergence check? (usually max norm is recommended // use the l2 norm of the residual for convergence check? (usually max norm is recommended
// instead) // instead)
if (this->mUseL2Norm) { if (this->mUseL2Norm) {
// std::cout << "USING L2" << std::endl;
mResNorm = GridSumSqr(mResidual).sum; mResNorm = GridSumSqr(mResidual).sum;
} }
else { else {
// std::cout << "NOT USING L2" << std::endl;
mResNorm = mResidual.getMaxAbs(); mResNorm = mResidual.getMaxAbs();
} }
@@ -539,27 +567,43 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate()
return false; return false;
} }
stop = high_resolution_clock::now();
duration = duration_cast<microseconds>(stop - start);
time += duration.count();
// std::cout << "GridSumSqr Time taken: " << duration.count() << std::endl;
start = high_resolution_clock::now();
Real sigmaNew = GridDotProduct(mTmp, mResidual); Real sigmaNew = GridDotProduct(mTmp, mResidual);
Real beta = sigmaNew / mSigma; Real beta = sigmaNew / mSigma;
stop = high_resolution_clock::now();
duration = duration_cast<microseconds>(stop - start);
time += duration.count();
// std::cout << "GridDotProduct Time taken: " << duration.count() << std::endl;
start = high_resolution_clock::now();
// search = tmp + beta * search // search = tmp + beta * search
UpdateSearchVec(mSearch, mTmp, beta); UpdateSearchVec(mSearch, mTmp, beta);
debMsg("GridCg::iterate i=" << mIterations << " sigmaNew=" << sigmaNew << " sigmaLast=" << mSigma stop = high_resolution_clock::now();
<< " alpha=" << alpha << " beta=" << beta << " ", duration = duration_cast<microseconds>(stop - start);
CG_DEBUGLEVEL); time += duration.count();
// std::cout << "UpdateSearchVec Time taken: " << duration.count() << std::endl;
// debMsg("GridCg::iterate i="<<mIterations<<" sigmaNew="<<sigmaNew<<" sigmaLast="<<mSigma<<"
// alpha="<<alpha<<" beta="<<beta<<" ", CG_DEBUGLEVEL);
mSigma = sigmaNew; mSigma = sigmaNew;
if (!(mResNorm < 1e35)) { if (!(mResNorm < 1e35)) {
if (mPcMethod == PC_MGP) { if (mPcMethod == PC_MGP) {
// diverging solves can be caused by the static multigrid mode, we cannot detect this here, // diverging solves can be caused by the static multigrid mode, we cannot detect this here,
// though only the pressure solve call "knows" whether the MG is static or dynamics... // though only the pressure solve call "knows" whether the MG is static or dynamics...
debMsg( // debMsg("GridCg::iterate: Warning - this diverging solve can be caused by the 'static' mode
"GridCg::iterate: Warning - this diverging solve can be caused by the 'static' mode of " // of the MG preconditioner. If the static mode is active, try switching to dynamic.", 1);
"the MG preconditioner. If the static mode is active, try switching to dynamic.",
1);
} }
errMsg("GridCg::iterate: The CG solver diverged, residual norm > 1e30, stopping."); // errMsg("GridCg::iterate: The CG solver diverged, residual norm > 1e30, stopping.");
} }
// debMsg("PB-CG-Norms::p"<<sqrt( GridOpNormNosqrt(mpDst, mpFlags).getValue() ) <<" // debMsg("PB-CG-Norms::p"<<sqrt( GridOpNormNosqrt(mpDst, mpFlags).getValue() ) <<"
@@ -571,8 +615,9 @@ template<class APPLYMAT> bool GridCg<APPLYMAT>::iterate()
template<class APPLYMAT> void GridCg<APPLYMAT>::solve(int maxIter) template<class APPLYMAT> void GridCg<APPLYMAT>::solve(int maxIter)
{ {
Real time = 0;
for (int iter = 0; iter < maxIter; iter++) { for (int iter = 0; iter < maxIter; iter++) {
if (!iterate()) if (!iterate(time))
iter = maxIter; iter = maxIter;
} }
return; return;
@@ -583,13 +628,13 @@ template<class APPLYMAT>
void GridCg<APPLYMAT>::setICPreconditioner( void GridCg<APPLYMAT>::setICPreconditioner(
PreconditionType method, Grid<Real> *A0, Grid<Real> *Ai, Grid<Real> *Aj, Grid<Real> *Ak) PreconditionType method, Grid<Real> *A0, Grid<Real> *Ai, Grid<Real> *Aj, Grid<Real> *Ak)
{ {
assertMsg(method == PC_ICP || method == PC_mICP, // assertMsg(method==PC_ICP || method==PC_mICP, "GridCg<APPLYMAT>::setICPreconditioner: Invalid
"GridCg<APPLYMAT>::setICPreconditioner: Invalid method specified."); // method specified.");
mPcMethod = method; mPcMethod = method;
if ((!A0->is3D())) { if ((!A0->is3D())) {
if (gPrint2dWarning) { if (gPrint2dWarning) {
debMsg("ICP/mICP pre-conditioning only supported in 3D for now, disabling it.", 1); // debMsg("ICP/mICP pre-conditioning only supported in 3D for now, disabling it.", 1);
gPrint2dWarning = false; gPrint2dWarning = false;
} }
mPcMethod = PC_None; mPcMethod = PC_None;
@@ -603,7 +648,7 @@ void GridCg<APPLYMAT>::setICPreconditioner(
template<class APPLYMAT> template<class APPLYMAT>
void GridCg<APPLYMAT>::setMGPreconditioner(PreconditionType method, GridMg *MG) void GridCg<APPLYMAT>::setMGPreconditioner(PreconditionType method, GridMg *MG)
{ {
assertMsg(method == PC_MGP, "GridCg<APPLYMAT>::setMGPreconditioner: Invalid method specified."); // assertMsg(method==PC_MGP, "GridCg<APPLYMAT>::setMGPreconditioner: Invalid method specified.");
mPcMethod = method; mPcMethod = method;
mMG = MG; mMG = MG;
@@ -612,9 +657,6 @@ void GridCg<APPLYMAT>::setMGPreconditioner(PreconditionType method, GridMg *MG)
// explicit instantiation // explicit instantiation
template class GridCg<ApplyMatrix>; template class GridCg<ApplyMatrix>;
template class GridCg<ApplyMatrix2D>; template class GridCg<ApplyMatrix2D>;
template class GridCg<ApplyMatrixViscosityU>;
template class GridCg<ApplyMatrixViscosityV>;
template class GridCg<ApplyMatrixViscosityW>;
//***************************************************************************** //*****************************************************************************
// diffusion for real and vec grids, e.g. for viscosity // diffusion for real and vec grids, e.g. for viscosity
@@ -655,44 +697,33 @@ void cgSolveDiffusion(const FlagGrid &flags,
} }
} }
GridCgInterface *gcg; GridCgInterface *gcg = nullptr;
// note , no preconditioning for now... // note , no preconditioning for now...
const int maxIter = (int)(cgMaxIterFac * flags.getSize().max()) * (flags.is3D() ? 1 : 4); const int maxIter = (int)(cgMaxIterFac * flags.getSize().max()) * (flags.is3D() ? 1 : 4);
if (grid.getType() & GridBase::TypeReal) { if (grid.getType() & GridBase::TypeReal) {
Grid<Real> &u = ((Grid<Real> &)grid); Grid<Real> &u = ((Grid<Real> &)grid);
rhs.copyFrom(u); rhs.copyFrom(u);
vector<Grid<Real> *> matA{&A0, &Ai, &Aj}; if (flags.is3D())
gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
if (flags.is3D()) { else
matA.push_back(&Ak); gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, matA);
}
else {
gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, matA);
}
gcg->setAccuracy(cgAccuracy); gcg->setAccuracy(cgAccuracy);
gcg->solve(maxIter); gcg->solve(maxIter);
debMsg("FluidSolver::solveDiffusion iterations:" << gcg->getIterations() // debMsg("FluidSolver::solveDiffusion iterations:"<<gcg->getIterations()<<",
<< ", res:" << gcg->getSigma(), // res:"<<gcg->getSigma(), CG_DEBUGLEVEL);
CG_DEBUGLEVEL);
} }
else if ((grid.getType() & GridBase::TypeVec3) || (grid.getType() & GridBase::TypeMAC)) { else if ((grid.getType() & GridBase::TypeVec3) || (grid.getType() & GridBase::TypeMAC)) {
Grid<Vec3> &vec = ((Grid<Vec3> &)grid); Grid<Vec3> &vec = ((Grid<Vec3> &)grid);
Grid<Real> u(parent); Grid<Real> u(parent);
vector<Grid<Real> *> matA{&A0, &Ai, &Aj};
// core solve is same as for a regular real grid // core solve is same as for a regular real grid
if (flags.is3D()) { if (flags.is3D())
matA.push_back(&Ak); gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
gcg = new GridCg<ApplyMatrix>(u, rhs, residual, search, flags, tmp, matA); else
} gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
else {
gcg = new GridCg<ApplyMatrix2D>(u, rhs, residual, search, flags, tmp, matA);
}
gcg->setAccuracy(cgAccuracy); gcg->setAccuracy(cgAccuracy);
// diffuse every component separately // diffuse every component separately
@@ -702,15 +733,14 @@ void cgSolveDiffusion(const FlagGrid &flags,
rhs.copyFrom(u); rhs.copyFrom(u);
gcg->solve(maxIter); gcg->solve(maxIter);
debMsg("FluidSolver::solveDiffusion vec3, iterations:" << gcg->getIterations() // debMsg("FluidSolver::solveDiffusion vec3, iterations:"<<gcg->getIterations()<<",
<< ", res:" << gcg->getSigma(), // res:"<<gcg->getSigma(), CG_DEBUGLEVEL);
CG_DEBUGLEVEL);
setComponent(u, vec, component); setComponent(u, vec, component);
} }
} }
else { else {
errMsg("cgSolveDiffusion: Grid Type is not supported (only Real, Vec3, MAC, or Levelset)"); // errMsg("cgSolveDiffusion: Grid Type is not supported (only Real, Vec3, MAC, or Levelset)");
} }
delete gcg; delete gcg;

View File

@@ -37,7 +37,7 @@ class GridCgInterface {
virtual ~GridCgInterface(){}; virtual ~GridCgInterface(){};
// solving functions // solving functions
virtual bool iterate() = 0; virtual bool iterate(Real &time) = 0;
virtual void solve(int maxIter) = 0; virtual void solve(int maxIter) = 0;
// precond // precond
@@ -78,12 +78,16 @@ template<class APPLYMAT> class GridCg : public GridCgInterface {
Grid<Real> &search, Grid<Real> &search,
const FlagGrid &flags, const FlagGrid &flags,
Grid<Real> &tmp, Grid<Real> &tmp,
std::vector<Grid<Real> *> matrixAVec, Grid<Real> *A0,
std::vector<Grid<Real> *> rhsVec = {}); Grid<Real> *pAi,
~GridCg(){}; Grid<Real> *pAj,
Grid<Real> *pAk);
~GridCg()
{
}
void doInit(); void doInit();
bool iterate(); bool iterate(Real &time);
void solve(int maxIter); void solve(int maxIter);
//! init pointers, and copy values from "normal" matrix //! init pointers, and copy values from "normal" matrix
void setICPreconditioner( void setICPreconditioner(
@@ -129,10 +133,7 @@ template<class APPLYMAT> class GridCg : public GridCgInterface {
const FlagGrid &mFlags; const FlagGrid &mFlags;
Grid<Real> &mTmp; Grid<Real> &mTmp;
//! shape of A matrix defined here (e.g. diagonal, positive neighbor cells, etc) Grid<Real> *mpA0, *mpAi, *mpAj, *mpAk;
std::vector<Grid<Real> *> mMatrixA;
//! shape of rhs vector defined here (e.g. 1 rhs for regular fluids solve, 3 rhs for viscosity)
std::vector<Grid<Real> *> mVecRhs;
PreconditionType mPcMethod; PreconditionType mPcMethod;
//! preconditioning grids //! preconditioning grids
@@ -153,33 +154,32 @@ struct ApplyMatrix : public KernelBase {
ApplyMatrix(const FlagGrid &flags, ApplyMatrix(const FlagGrid &flags,
Grid<Real> &dst, Grid<Real> &dst,
const Grid<Real> &src, const Grid<Real> &src,
const std::vector<Grid<Real> *> matrixA, Grid<Real> &A0,
const std::vector<Grid<Real> *> vecRhs) Grid<Real> &Ai,
: KernelBase(&flags, 0), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs) Grid<Real> &Aj,
Grid<Real> &Ak)
: KernelBase(&flags, 0), flags(flags), dst(dst), src(src), A0(A0), Ai(Ai), Aj(Aj), Ak(Ak)
{ {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, inline void op(int i,
int j,
int k,
const FlagGrid &flags, const FlagGrid &flags,
Grid<Real> &dst, Grid<Real> &dst,
const Grid<Real> &src, const Grid<Real> &src,
const std::vector<Grid<Real> *> matrixA, Grid<Real> &A0,
const std::vector<Grid<Real> *> vecRhs) const Grid<Real> &Ai,
Grid<Real> &Aj,
Grid<Real> &Ak)
{ {
unusedParameter(vecRhs); // Not needed in this matrix application const IndexInt idx = dst.index(i, j, k);
if (matrixA.size() != 4)
errMsg("ConjugateGrad: Invalid A matrix in apply matrix step");
Grid<Real> &A0 = *matrixA[0];
Grid<Real> &Ai = *matrixA[1];
Grid<Real> &Aj = *matrixA[2];
Grid<Real> &Ak = *matrixA[3];
if (!flags.isFluid(idx)) { if (!flags.isFluid(idx)) {
dst[idx] = src[idx]; dst[idx] = src[idx];
return; return;
} }
const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ();
dst[idx] = src[idx] * A0[idx] + src[idx - X] * Ai[idx - X] + src[idx + X] * Ai[idx] + dst[idx] = src[idx] * A0[idx] + src[idx - X] * Ai[idx - X] + src[idx + X] * Ai[idx] +
src[idx - Y] * Aj[idx - Y] + src[idx + Y] * Aj[idx] + src[idx - Z] * Ak[idx - Z] + src[idx - Y] * Aj[idx - Y] + src[idx + Y] * Aj[idx] + src[idx - Z] * Ak[idx - Z] +
@@ -200,37 +200,71 @@ struct ApplyMatrix : public KernelBase {
return src; return src;
} }
typedef Grid<Real> type2; typedef Grid<Real> type2;
inline const std::vector<Grid<Real> *> &getArg3() inline Grid<Real> &getArg3()
{ {
return matrixA; return A0;
} }
typedef std::vector<Grid<Real> *> type3; typedef Grid<Real> type3;
inline const std::vector<Grid<Real> *> &getArg4() inline Grid<Real> &getArg4()
{ {
return vecRhs; return Ai;
} }
typedef std::vector<Grid<Real> *> type4; typedef Grid<Real> type4;
void runMessage() inline Grid<Real> &getArg5()
{ {
debMsg("Executing kernel ApplyMatrix ", 3); return Aj;
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, flags, dst, src, matrixA, vecRhs);
} }
typedef Grid<Real> type5;
inline Grid<Real> &getArg6()
{
return Ak;
}
typedef Grid<Real> type6;
void runMessage(){};
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
const FlagGrid &flags = getArg0();
Grid<Real> &dst = getArg1();
const Grid<Real> &src = getArg2();
Grid<Real> &A0 = getArg3();
Grid<Real> &Ai = getArg4();
Grid<Real> &Aj = getArg5();
Grid<Real> &Ak = getArg6();
#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
{
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak);
}
}
else {
const int k = 0;
const FlagGrid &flags = getArg0();
Grid<Real> &dst = getArg1();
const Grid<Real> &src = getArg2();
Grid<Real> &A0 = getArg3();
Grid<Real> &Ai = getArg4();
Grid<Real> &Aj = getArg5();
Grid<Real> &Ak = getArg6();
#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
{
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak);
}
}
} }
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &dst; Grid<Real> &dst;
const Grid<Real> &src; const Grid<Real> &src;
const std::vector<Grid<Real> *> matrixA; Grid<Real> &A0;
const std::vector<Grid<Real> *> vecRhs; Grid<Real> &Ai;
Grid<Real> &Aj;
Grid<Real> &Ak;
}; };
//! Kernel: Apply symmetric stored Matrix. 2D version //! Kernel: Apply symmetric stored Matrix. 2D version
@@ -239,32 +273,34 @@ struct ApplyMatrix2D : public KernelBase {
ApplyMatrix2D(const FlagGrid &flags, ApplyMatrix2D(const FlagGrid &flags,
Grid<Real> &dst, Grid<Real> &dst,
const Grid<Real> &src, const Grid<Real> &src,
const std::vector<Grid<Real> *> matrixA, Grid<Real> &A0,
const std::vector<Grid<Real> *> vecRhs) Grid<Real> &Ai,
: KernelBase(&flags, 0), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs) Grid<Real> &Aj,
Grid<Real> &Ak)
: KernelBase(&flags, 0), flags(flags), dst(dst), src(src), A0(A0), Ai(Ai), Aj(Aj), Ak(Ak)
{ {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, inline void op(int i,
int j,
int k,
const FlagGrid &flags, const FlagGrid &flags,
Grid<Real> &dst, Grid<Real> &dst,
const Grid<Real> &src, const Grid<Real> &src,
const std::vector<Grid<Real> *> matrixA, Grid<Real> &A0,
const std::vector<Grid<Real> *> vecRhs) const Grid<Real> &Ai,
Grid<Real> &Aj,
Grid<Real> &Ak)
{ {
unusedParameter(vecRhs); // Not needed in this matrix application unusedParameter(Ak); // only there for parameter compatibility with ApplyMatrix
if (matrixA.size() != 3)
errMsg("ConjugateGrad: Invalid A matrix in apply matrix step");
Grid<Real> &A0 = *matrixA[0];
Grid<Real> &Ai = *matrixA[1];
Grid<Real> &Aj = *matrixA[2];
const IndexInt idx = dst.index(i, j, k);
if (!flags.isFluid(idx)) { if (!flags.isFluid(idx)) {
dst[idx] = src[idx]; dst[idx] = src[idx];
return; return;
} }
const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ();
dst[idx] = src[idx] * A0[idx] + src[idx - X] * Ai[idx - X] + src[idx + X] * Ai[idx] + dst[idx] = src[idx] * A0[idx] + src[idx - X] * Ai[idx - X] + src[idx + X] * Ai[idx] +
src[idx - Y] * Aj[idx - Y] + src[idx + Y] * Aj[idx]; src[idx - Y] * Aj[idx - Y] + src[idx + Y] * Aj[idx];
@@ -284,387 +320,73 @@ struct ApplyMatrix2D : public KernelBase {
return src; return src;
} }
typedef Grid<Real> type2; typedef Grid<Real> type2;
inline const std::vector<Grid<Real> *> &getArg3() inline Grid<Real> &getArg3()
{ {
return matrixA; return A0;
} }
typedef std::vector<Grid<Real> *> type3; typedef Grid<Real> type3;
inline const std::vector<Grid<Real> *> &getArg4() inline Grid<Real> &getArg4()
{ {
return vecRhs; return Ai;
} }
typedef std::vector<Grid<Real> *> type4; typedef Grid<Real> type4;
void runMessage() inline Grid<Real> &getArg5()
{ {
debMsg("Executing kernel ApplyMatrix2D ", 3); return Aj;
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, flags, dst, src, matrixA, vecRhs);
} }
typedef Grid<Real> type5;
inline Grid<Real> &getArg6()
{
return Ak;
}
typedef Grid<Real> type6;
void runMessage(){};
void run() void run()
{
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
}
const FlagGrid &flags;
Grid<Real> &dst;
const Grid<Real> &src;
const std::vector<Grid<Real> *> matrixA;
const std::vector<Grid<Real> *> vecRhs;
};
struct ApplyMatrixViscosityU : public KernelBase {
ApplyMatrixViscosityU(const FlagGrid &flags,
Grid<Real> &dst,
const Grid<Real> &src,
const std::vector<Grid<Real> *> matrixA,
const std::vector<Grid<Real> *> vecRhs)
: KernelBase(&flags, 1), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs)
{
runMessage();
run();
}
inline void op(int i,
int j,
int k,
const FlagGrid &flags,
Grid<Real> &dst,
const Grid<Real> &src,
const std::vector<Grid<Real> *> matrixA,
const std::vector<Grid<Real> *> vecRhs) const
{
if (matrixA.size() != 15)
errMsg("ConjugateGrad: Invalid A matrix in apply matrix step");
Grid<Real> &A0 = *matrixA[0];
Grid<Real> &Aplusi = *matrixA[1];
Grid<Real> &Aplusj = *matrixA[2];
Grid<Real> &Aplusk = *matrixA[3];
Grid<Real> &Aminusi = *matrixA[4];
Grid<Real> &Aminusj = *matrixA[5];
Grid<Real> &Aminusk = *matrixA[6];
if (vecRhs.size() != 2)
errMsg("ConjugateGrad: Invalid rhs vector in apply matrix step");
Grid<Real> &srcV = *vecRhs[0];
Grid<Real> &srcW = *vecRhs[1];
dst(i, j, k) = src(i, j, k) * A0(i, j, k) + src(i + 1, j, k) * Aplusi(i, j, k) +
src(i, j + 1, k) * Aplusj(i, j, k) + src(i, j, k + 1) * Aplusk(i, j, k) +
src(i - 1, j, k) * Aminusi(i, j, k) + src(i, j - 1, k) * Aminusj(i, j, k) +
src(i, j, k - 1) * Aminusk(i, j, k);
dst(i, j, k) += srcV(i, j + 1, k) * (*matrixA[7])(i, j, k) +
srcV(i - 1, j + 1, k) * (*matrixA[8])(i, j, k) +
srcV(i, j, k) * (*matrixA[9])(i, j, k) +
srcV(i - 1, j, k) * (*matrixA[10])(i, j, k) +
srcW(i, j, k + 1) * (*matrixA[11])(i, j, k) +
srcW(i - 1, j, k + 1) * (*matrixA[12])(i, j, k) +
srcW(i, j, k) * (*matrixA[13])(i, j, k) +
srcW(i - 1, j, k) * (*matrixA[14])(i, j, k);
}
inline const FlagGrid &getArg0()
{
return flags;
}
typedef FlagGrid type0;
inline Grid<Real> &getArg1()
{
return dst;
}
typedef Grid<Real> type1;
inline const Grid<Real> &getArg2()
{
return src;
}
typedef Grid<Real> type2;
inline const std::vector<Grid<Real> *> &getArg3()
{
return matrixA;
}
typedef std::vector<Grid<Real> *> type3;
inline const std::vector<Grid<Real> *> &getArg4()
{
return vecRhs;
}
typedef std::vector<Grid<Real> *> type4;
void runMessage()
{
debMsg("Executing kernel ApplyMatrixViscosityU ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++) const FlagGrid &flags = getArg0();
for (int j = 1; j < _maxY; j++) Grid<Real> &dst = getArg1();
for (int i = 1; i < _maxX; i++) const Grid<Real> &src = getArg2();
op(i, j, k, flags, dst, src, matrixA, vecRhs); Grid<Real> &A0 = getArg3();
Grid<Real> &Ai = getArg4();
Grid<Real> &Aj = getArg5();
Grid<Real> &Ak = getArg6();
#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
{
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) const FlagGrid &flags = getArg0();
for (int i = 1; i < _maxX; i++) Grid<Real> &dst = getArg1();
op(i, j, k, flags, dst, src, matrixA, vecRhs); const Grid<Real> &src = getArg2();
Grid<Real> &A0 = getArg3();
Grid<Real> &Ai = getArg4();
Grid<Real> &Aj = getArg5();
Grid<Real> &Ak = getArg6();
#pragma omp target teams distribute parallel for collapse(1) schedule(static, 1)
{
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, dst, src, A0, Ai, Aj, Ak);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &dst; Grid<Real> &dst;
const Grid<Real> &src; const Grid<Real> &src;
const std::vector<Grid<Real> *> matrixA; Grid<Real> &A0;
const std::vector<Grid<Real> *> vecRhs; Grid<Real> &Ai;
Grid<Real> &Aj;
Grid<Real> &Ak;
}; };
struct ApplyMatrixViscosityV : public KernelBase {
ApplyMatrixViscosityV(const FlagGrid &flags,
Grid<Real> &dst,
const Grid<Real> &src,
const std::vector<Grid<Real> *> matrixA,
const std::vector<Grid<Real> *> vecRhs)
: KernelBase(&flags, 1), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs)
{
runMessage();
run();
}
inline void op(int i,
int j,
int k,
const FlagGrid &flags,
Grid<Real> &dst,
const Grid<Real> &src,
const std::vector<Grid<Real> *> matrixA,
const std::vector<Grid<Real> *> vecRhs) const
{
if (matrixA.size() != 15)
errMsg("ConjugateGrad: Invalid A matrix in apply matrix step");
Grid<Real> &A0 = *matrixA[0];
Grid<Real> &Aplusi = *matrixA[1];
Grid<Real> &Aplusj = *matrixA[2];
Grid<Real> &Aplusk = *matrixA[3];
Grid<Real> &Aminusi = *matrixA[4];
Grid<Real> &Aminusj = *matrixA[5];
Grid<Real> &Aminusk = *matrixA[6];
if (vecRhs.size() != 2)
errMsg("ConjugateGrad: Invalid rhs vector in apply matrix step");
Grid<Real> &srcU = *vecRhs[0];
Grid<Real> &srcW = *vecRhs[1];
dst(i, j, k) = src(i, j, k) * A0(i, j, k) + src(i + 1, j, k) * Aplusi(i, j, k) +
src(i, j + 1, k) * Aplusj(i, j, k) + src(i, j, k + 1) * Aplusk(i, j, k) +
src(i - 1, j, k) * Aminusi(i, j, k) + src(i, j - 1, k) * Aminusj(i, j, k) +
src(i, j, k - 1) * Aminusk(i, j, k);
dst(i, j, k) += srcU(i + 1, j, k) * (*matrixA[7])(i, j, k) +
srcU(i + 1, j - 1, k) * (*matrixA[8])(i, j, k) +
srcU(i, j, k) * (*matrixA[9])(i, j, k) +
srcU(i, j - 1, k) * (*matrixA[10])(i, j, k) +
srcW(i, j, k + 1) * (*matrixA[11])(i, j, k) +
srcW(i, j - 1, k + 1) * (*matrixA[12])(i, j, k) +
srcW(i, j, k) * (*matrixA[13])(i, j, k) +
srcW(i, j - 1, k) * (*matrixA[14])(i, j, k);
}
inline const FlagGrid &getArg0()
{
return flags;
}
typedef FlagGrid type0;
inline Grid<Real> &getArg1()
{
return dst;
}
typedef Grid<Real> type1;
inline const Grid<Real> &getArg2()
{
return src;
}
typedef Grid<Real> type2;
inline const std::vector<Grid<Real> *> &getArg3()
{
return matrixA;
}
typedef std::vector<Grid<Real> *> type3;
inline const std::vector<Grid<Real> *> &getArg4()
{
return vecRhs;
}
typedef std::vector<Grid<Real> *> type4;
void runMessage()
{
debMsg("Executing kernel ApplyMatrixViscosityV ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, dst, src, matrixA, vecRhs);
}
else {
const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, dst, src, matrixA, vecRhs);
}
}
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags;
Grid<Real> &dst;
const Grid<Real> &src;
const std::vector<Grid<Real> *> matrixA;
const std::vector<Grid<Real> *> vecRhs;
};
struct ApplyMatrixViscosityW : public KernelBase {
ApplyMatrixViscosityW(const FlagGrid &flags,
Grid<Real> &dst,
const Grid<Real> &src,
const std::vector<Grid<Real> *> matrixA,
const std::vector<Grid<Real> *> vecRhs)
: KernelBase(&flags, 1), flags(flags), dst(dst), src(src), matrixA(matrixA), vecRhs(vecRhs)
{
runMessage();
run();
}
inline void op(int i,
int j,
int k,
const FlagGrid &flags,
Grid<Real> &dst,
const Grid<Real> &src,
const std::vector<Grid<Real> *> matrixA,
const std::vector<Grid<Real> *> vecRhs) const
{
if (matrixA.size() != 15)
errMsg("ConjugateGrad: Invalid A matrix in apply matrix step");
Grid<Real> &A0 = *matrixA[0];
Grid<Real> &Aplusi = *matrixA[1];
Grid<Real> &Aplusj = *matrixA[2];
Grid<Real> &Aplusk = *matrixA[3];
Grid<Real> &Aminusi = *matrixA[4];
Grid<Real> &Aminusj = *matrixA[5];
Grid<Real> &Aminusk = *matrixA[6];
if (vecRhs.size() != 2)
errMsg("ConjugateGrad: Invalid rhs vector in apply matrix step");
Grid<Real> &srcU = *vecRhs[0];
Grid<Real> &srcV = *vecRhs[1];
dst(i, j, k) = src(i, j, k) * A0(i, j, k) + src(i + 1, j, k) * Aplusi(i, j, k) +
src(i, j + 1, k) * Aplusj(i, j, k) + src(i, j, k + 1) * Aplusk(i, j, k) +
src(i - 1, j, k) * Aminusi(i, j, k) + src(i, j - 1, k) * Aminusj(i, j, k) +
src(i, j, k - 1) * Aminusk(i, j, k);
dst(i, j, k) += srcU(i + 1, j, k) * (*matrixA[7])(i, j, k) +
srcU(i + 1, j, k - 1) * (*matrixA[8])(i, j, k) +
srcU(i, j, k) * (*matrixA[9])(i, j, k) +
srcU(i, j, k - 1) * (*matrixA[10])(i, j, k) +
srcV(i, j + 1, k) * (*matrixA[11])(i, j, k) +
srcV(i, j + 1, k - 1) * (*matrixA[12])(i, j, k) +
srcV(i, j, k) * (*matrixA[13])(i, j, k) +
srcV(i, j, k - 1) * (*matrixA[14])(i, j, k);
}
inline const FlagGrid &getArg0()
{
return flags;
}
typedef FlagGrid type0;
inline Grid<Real> &getArg1()
{
return dst;
}
typedef Grid<Real> type1;
inline const Grid<Real> &getArg2()
{
return src;
}
typedef Grid<Real> type2;
inline const std::vector<Grid<Real> *> &getArg3()
{
return matrixA;
}
typedef std::vector<Grid<Real> *> type3;
inline const std::vector<Grid<Real> *> &getArg4()
{
return vecRhs;
}
typedef std::vector<Grid<Real> *> type4;
void runMessage()
{
debMsg("Executing kernel ApplyMatrixViscosityW ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, dst, src, matrixA, vecRhs);
}
else {
const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, dst, src, matrixA, vecRhs);
}
}
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags;
Grid<Real> &dst;
const Grid<Real> &src;
const std::vector<Grid<Real> *> matrixA;
const std::vector<Grid<Real> *> vecRhs;
};
/* NOTE: Use this template for new matrix application kernels
//! Template for matrix application kernels
KERNEL()
void ApplyMatrixTemplate (const FlagGrid& flags, Grid<Real>& dst, const Grid<Real>& src,
const std::vector<Grid<Real> *> matrixA, const std::vector<Grid<Real> *> vecRhs)
{
// The kernel must define how to use the grids from the matrixA and vecRhs lists
}
*/
//! Kernel: Construct the matrix for the poisson equation //! Kernel: Construct the matrix for the poisson equation
struct MakeLaplaceMatrix : public KernelBase { struct MakeLaplaceMatrix : public KernelBase {
@@ -687,7 +409,7 @@ struct MakeLaplaceMatrix : public KernelBase {
Grid<Real> &Ai, Grid<Real> &Ai,
Grid<Real> &Aj, Grid<Real> &Aj,
Grid<Real> &Ak, Grid<Real> &Ak,
const MACGrid *fractions = 0) const const MACGrid *fractions = 0)
{ {
if (!flags.isFluid(i, j, k)) if (!flags.isFluid(i, j, k))
return; return;
@@ -765,37 +487,42 @@ struct MakeLaplaceMatrix : public KernelBase {
return fractions; return fractions;
} }
typedef MACGrid type5; typedef MACGrid type5;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel MakeLaplaceMatrix ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++) const FlagGrid &flags = getArg0();
for (int j = 1; j < _maxY; j++) Grid<Real> &A0 = getArg1();
for (int i = 1; i < _maxX; i++) Grid<Real> &Ai = getArg2();
op(i, j, k, flags, A0, Ai, Aj, Ak, fractions); Grid<Real> &Aj = getArg3();
Grid<Real> &Ak = getArg4();
const MACGrid *fractions = getArg5();
#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
{
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, A0, Ai, Aj, Ak, fractions);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) const FlagGrid &flags = getArg0();
for (int i = 1; i < _maxX; i++) Grid<Real> &A0 = getArg1();
op(i, j, k, flags, A0, Ai, Aj, Ak, fractions); Grid<Real> &Ai = getArg2();
Grid<Real> &Aj = getArg3();
Grid<Real> &Ak = getArg4();
const MACGrid *fractions = getArg5();
#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
{
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, A0, Ai, Aj, Ak, fractions);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &A0; Grid<Real> &A0;
Grid<Real> &Ai; Grid<Real> &Ai;

View File

@@ -236,13 +236,7 @@ struct SetLevelsetBoundaries : public KernelBase {
return phi; return phi;
} }
typedef Grid<Real> type0; typedef Grid<Real> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel SetLevelsetBoundaries ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void run() void run()
{ {
const int _maxX = maxX; const int _maxX = maxX;
@@ -297,14 +291,8 @@ struct knExtrapolateMACSimple : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, inline void op(
int j, int i, int j, int k, MACGrid &vel, int distance, Grid<int> &tmp, const int d, const int c)
int k,
MACGrid &vel,
int distance,
Grid<int> &tmp,
const int d,
const int c) const
{ {
static const Vec3i nb[6] = {Vec3i(1, 0, 0), static const Vec3i nb[6] = {Vec3i(1, 0, 0),
Vec3i(-1, 0, 0), Vec3i(-1, 0, 0),
@@ -359,37 +347,35 @@ struct knExtrapolateMACSimple : public KernelBase {
return c; return c;
} }
typedef int type4; typedef int type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knExtrapolateMACSimple ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, distance, tmp, d, c);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, distance, tmp, d, c);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, distance, tmp, d, c);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, distance, tmp, d, c);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
MACGrid &vel; MACGrid &vel;
int distance; int distance;
Grid<int> &tmp; Grid<int> &tmp;
@@ -405,7 +391,7 @@ struct knExtrapolateIntoBnd : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, FlagGrid &flags, MACGrid &vel, const MACGrid &velTmp) const inline void op(int i, int j, int k, FlagGrid &flags, MACGrid &vel, const MACGrid &velTmp)
{ {
int c = 0; int c = 0;
Vec3 v(0, 0, 0); Vec3 v(0, 0, 0);
@@ -467,37 +453,35 @@ struct knExtrapolateIntoBnd : public KernelBase {
return velTmp; return velTmp;
} }
typedef MACGrid type2; typedef MACGrid type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knExtrapolateIntoBnd ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, velTmp);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, velTmp);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, velTmp);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, velTmp);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
FlagGrid &flags; FlagGrid &flags;
MACGrid &vel; MACGrid &vel;
const MACGrid &velTmp; const MACGrid &velTmp;
@@ -538,8 +522,7 @@ struct knUnprojectNormalComp : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op( inline void op(int i, int j, int k, FlagGrid &flags, MACGrid &vel, Grid<Real> &phi, Real maxDist)
int i, int j, int k, FlagGrid &flags, MACGrid &vel, Grid<Real> &phi, Real maxDist) const
{ {
// apply inside, within range near obstacle surface // apply inside, within range near obstacle surface
if (phi(i, j, k) > 0. || phi(i, j, k) < -maxDist) if (phi(i, j, k) > 0. || phi(i, j, k) < -maxDist)
@@ -573,37 +556,35 @@ struct knUnprojectNormalComp : public KernelBase {
return maxDist; return maxDist;
} }
typedef Real type3; typedef Real type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knUnprojectNormalComp ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, phi, maxDist);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, phi, maxDist);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, phi, maxDist);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, phi, maxDist);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
FlagGrid &flags; FlagGrid &flags;
MACGrid &vel; MACGrid &vel;
Grid<Real> &phi; Grid<Real> &phi;
@@ -712,7 +693,7 @@ struct knExtrapolateMACFromWeight : public KernelBase {
Grid<Vec3> &weight, Grid<Vec3> &weight,
int distance, int distance,
const int d, const int d,
const int c) const const int c)
{ {
static const Vec3i nb[6] = {Vec3i(1, 0, 0), static const Vec3i nb[6] = {Vec3i(1, 0, 0),
Vec3i(-1, 0, 0), Vec3i(-1, 0, 0),
@@ -766,37 +747,35 @@ struct knExtrapolateMACFromWeight : public KernelBase {
return c; return c;
} }
typedef int type4; typedef int type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knExtrapolateMACFromWeight ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, weight, distance, d, c);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, weight, distance, d, c);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, weight, distance, d, c);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, weight, distance, d, c);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
MACGrid &vel; MACGrid &vel;
Grid<Vec3> &weight; Grid<Vec3> &weight;
int distance; int distance;
@@ -1011,14 +990,8 @@ template<class S> struct knExtrapolateLsSimple : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, inline void op(
int j, int i, int j, int k, Grid<S> &val, int distance, Grid<int> &tmp, const int d, S direction)
int k,
Grid<S> &val,
int distance,
Grid<int> &tmp,
const int d,
S direction) const
{ {
const int dim = (val.is3D() ? 3 : 2); const int dim = (val.is3D() ? 3 : 2);
if (tmp(i, j, k) != 0) if (tmp(i, j, k) != 0)
@@ -1065,37 +1038,35 @@ template<class S> struct knExtrapolateLsSimple : public KernelBase {
return direction; return direction;
} }
typedef S type4; typedef S type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knExtrapolateLsSimple ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, val, distance, tmp, d, direction);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, val, distance, tmp, d, direction);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, val, distance, tmp, d, direction);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, val, distance, tmp, d, direction);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
Grid<S> &val; Grid<S> &val;
int distance; int distance;
Grid<int> &tmp; Grid<int> &tmp;
@@ -1110,7 +1081,7 @@ template<class S> struct knSetRemaining : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, Grid<S> &phi, Grid<int> &tmp, S distance) const inline void op(int i, int j, int k, Grid<S> &phi, Grid<int> &tmp, S distance)
{ {
if (tmp(i, j, k) != 0) if (tmp(i, j, k) != 0)
return; return;
@@ -1131,37 +1102,35 @@ template<class S> struct knSetRemaining : public KernelBase {
return distance; return distance;
} }
typedef S type2; typedef S type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knSetRemaining ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, phi, tmp, distance);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, phi, tmp, distance);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, phi, tmp, distance);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, phi, tmp, distance);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
Grid<S> &phi; Grid<S> &phi;
Grid<int> &tmp; Grid<int> &tmp;
S distance; S distance;

View File

@@ -1256,7 +1256,7 @@ struct knQuantize : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<Real> &grid, Real step) const inline void op(IndexInt idx, Grid<Real> &grid, Real step)
{ {
quantizeReal(grid(idx), step); quantizeReal(grid(idx), step);
} }
@@ -1270,21 +1270,17 @@ struct knQuantize : public KernelBase {
return step; return step;
} }
typedef Real type1; typedef Real type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knQuantize ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, grid, step);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, grid, step);
}
} }
Grid<Real> &grid; Grid<Real> &grid;
Real step; Real step;
@@ -1331,7 +1327,7 @@ struct knQuantizeVec3 : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<Vec3> &grid, Real step) const inline void op(IndexInt idx, Grid<Vec3> &grid, Real step)
{ {
for (int c = 0; c < 3; ++c) for (int c = 0; c < 3; ++c)
quantizeReal(grid(idx)[c], step); quantizeReal(grid(idx)[c], step);
@@ -1346,21 +1342,17 @@ struct knQuantizeVec3 : public KernelBase {
return step; return step;
} }
typedef Real type1; typedef Real type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knQuantizeVec3 ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, grid, step);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, grid, step);
}
} }
Grid<Vec3> &grid; Grid<Vec3> &grid;
Real step; Real step;

View File

@@ -66,7 +66,7 @@ void *safeGzopen(const char *filename, const char *mode)
#endif // NO_ZLIB != 1 #endif // NO_ZLIB != 1
} }
#if defined(OPENVDB) #if OPENVDB == 1
// Convert from OpenVDB value to Manta value. // Convert from OpenVDB value to Manta value.
template<class S, class T> void convertFrom(S &in, T *out) template<class S, class T> void convertFrom(S &in, T *out)
{ {

View File

@@ -29,10 +29,10 @@
#if OPENVDB == 1 #if OPENVDB == 1
# include "openvdb/openvdb.h" # include "openvdb/openvdb.h"
# include "openvdb/points/PointConversion.h" # include <openvdb/points/PointConversion.h>
# include "openvdb/points/PointCount.h" # include <openvdb/points/PointCount.h>
# include "openvdb/tools/Clip.h" # include <openvdb/tools/Clip.h>
# include "openvdb/tools/Dense.h" # include <openvdb/tools/Dense.h>
#endif #endif
#define POSITION_NAME "P" #define POSITION_NAME "P"
@@ -433,9 +433,9 @@ int writeObjectsVDB(const string &filename,
debMsg("Writing int grid '" << mantaGrid->getName() << "' to vdb file " << filename, 1); debMsg("Writing int grid '" << mantaGrid->getName() << "' to vdb file " << filename, 1);
Grid<int> *mantaIntGrid = (Grid<int> *)mantaGrid; Grid<int> *mantaIntGrid = (Grid<int> *)mantaGrid;
if (clipGrid && mantaIntGrid->saveSparse()) { if (clipGrid && mantaIntGrid->saveSparse()) {
assertMsg(clipGrid->getSize() == mantaGrid->getSize(), // assertMsg(clipGrid->getSize() == mantaGrid->getSize(), "writeObjectsVDB: Clip grid and
"writeObjectsVDB: Clip grid and exported grid must have the same size " // exported grid must have the same size " << clipGrid->getSize() << " vs " <<
<< clipGrid->getSize() << " vs " << mantaGrid->getSize()); // mantaGrid->getSize());
} }
vdbGrid = exportVDB<int, openvdb::Int32Grid>(mantaIntGrid, clip, vdbClipGrid); vdbGrid = exportVDB<int, openvdb::Int32Grid>(mantaIntGrid, clip, vdbClipGrid);
gridsVDB.push_back(vdbGrid); gridsVDB.push_back(vdbGrid);
@@ -448,9 +448,9 @@ int writeObjectsVDB(const string &filename,
// Only supply clip grid if real grid is not equal to the clip grid // Only supply clip grid if real grid is not equal to the clip grid
openvdb::FloatGrid::Ptr tmpClipGrid = (mantaRealGrid == clipGrid) ? nullptr : vdbClipGrid; openvdb::FloatGrid::Ptr tmpClipGrid = (mantaRealGrid == clipGrid) ? nullptr : vdbClipGrid;
if (clipGrid && mantaRealGrid->saveSparse()) { if (clipGrid && mantaRealGrid->saveSparse()) {
assertMsg(clipGrid->getSize() == mantaGrid->getSize(), // assertMsg(clipGrid->getSize() == mantaGrid->getSize(), "writeObjectsVDB: Clip grid and
"writeObjectsVDB: Clip grid and exported grid must have the same size " // exported grid must have the same size " << clipGrid->getSize() << " vs " <<
<< clipGrid->getSize() << " vs " << mantaGrid->getSize()); // mantaGrid->getSize());
} }
vdbGrid = exportVDB<Real, openvdb::FloatGrid>(mantaRealGrid, clip, tmpClipGrid); vdbGrid = exportVDB<Real, openvdb::FloatGrid>(mantaRealGrid, clip, tmpClipGrid);
gridsVDB.push_back(vdbGrid); gridsVDB.push_back(vdbGrid);
@@ -461,9 +461,9 @@ int writeObjectsVDB(const string &filename,
openvdb::GRID_UNKNOWN; openvdb::GRID_UNKNOWN;
Grid<Vec3> *mantaVec3Grid = (Grid<Vec3> *)mantaGrid; Grid<Vec3> *mantaVec3Grid = (Grid<Vec3> *)mantaGrid;
if (clipGrid && mantaVec3Grid->saveSparse()) { if (clipGrid && mantaVec3Grid->saveSparse()) {
assertMsg(clipGrid->getSize() == mantaGrid->getSize(), // assertMsg(clipGrid->getSize() == mantaGrid->getSize(), "writeObjectsVDB: Clip grid and
"writeObjectsVDB: Clip grid and exported grid must have the same size " // exported grid must have the same size " << clipGrid->getSize() << " vs " <<
<< clipGrid->getSize() << " vs " << mantaGrid->getSize()); // mantaGrid->getSize());
} }
vdbGrid = exportVDB<Vec3, openvdb::Vec3SGrid>(mantaVec3Grid, clip, vdbClipGrid); vdbGrid = exportVDB<Vec3, openvdb::Vec3SGrid>(mantaVec3Grid, clip, vdbClipGrid);
gridsVDB.push_back(vdbGrid); gridsVDB.push_back(vdbGrid);
@@ -519,7 +519,7 @@ int writeObjectsVDB(const string &filename,
} }
} }
// Write only if there is at least one grid, optionally write with compression. // Write only if the is at least one grid, optionally write with compression.
if (gridsVDB.size()) { if (gridsVDB.size()) {
int vdb_flags = openvdb::io::COMPRESS_ACTIVE_MASK; int vdb_flags = openvdb::io::COMPRESS_ACTIVE_MASK;
switch (compression) { switch (compression) {
@@ -534,8 +534,7 @@ int writeObjectsVDB(const string &filename,
} }
case COMPRESSION_BLOSC: { case COMPRESSION_BLOSC: {
# if OPENVDB_BLOSC == 1 # if OPENVDB_BLOSC == 1
// Cannot use |= here, causes segfault with blosc 1.5.0 (== recommended version) vdb_flags |= openvdb::io::COMPRESS_BLOSC;
vdb_flags = openvdb::io::COMPRESS_BLOSC;
# else # else
debMsg("OpenVDB was built without Blosc support, using Zip compression instead", 1); debMsg("OpenVDB was built without Blosc support, using Zip compression instead", 1);
vdb_flags |= openvdb::io::COMPRESS_ZIP; vdb_flags |= openvdb::io::COMPRESS_ZIP;
@@ -696,36 +695,28 @@ int readObjectsVDB(const string &filename, std::vector<PbClass *> *objects, floa
// Compare metadata with allocated grid setup. This prevents invalid index access. // Compare metadata with allocated grid setup. This prevents invalid index access.
if (notZero(metaRes) && metaRes != origRes) { if (notZero(metaRes) && metaRes != origRes) {
debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read.
<< "' has not been read. Meta grid res " << metaRes // Meta grid res " << metaRes << " vs " << origRes << " current grid size", 1);
<< " vs " << origRes << " current grid size",
1);
readFailure++; readFailure++;
break; break;
} }
if (notZero(metaVoxelSize) && metaVoxelSize != voxelSize) { if (notZero(metaVoxelSize) && metaVoxelSize != voxelSize) {
debMsg("readObjectsVDB Warning: Grid '" // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read.
<< vdbGrid->getName() << "' has not been read. Meta voxel size " // Meta voxel size " << metaVoxelSize << " vs " << voxelSize << " current voxel size", 1);
<< metaVoxelSize << " vs " << voxelSize << " current voxel size",
1);
readFailure++; readFailure++;
break; break;
} }
if (metaBBoxMax.x > origRes.x || metaBBoxMax.y > origRes.y || metaBBoxMax.z > origRes.z) { if (metaBBoxMax.x > origRes.x || metaBBoxMax.y > origRes.y || metaBBoxMax.z > origRes.z) {
debMsg("readObjectsVDB Warning: Grid '" // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read.
<< vdbGrid->getName() << "' has not been read. Vdb bbox max " << metaBBoxMax // Vdb bbox max " << metaBBoxMax << " vs " << origRes << " current grid size", 1);
<< " vs " << origRes << " current grid size",
1);
readFailure++; readFailure++;
break; break;
} }
const Vec3i origOrigin(0); const Vec3i origOrigin(0);
if (metaBBoxMin.x < origOrigin.x || metaBBoxMin.y < origOrigin.y || if (metaBBoxMin.x < origOrigin.x || metaBBoxMin.y < origOrigin.y ||
metaBBoxMin.z < origOrigin.z) { metaBBoxMin.z < origOrigin.z) {
debMsg("readObjectsVDB Warning: Grid '" // debMsg("readObjectsVDB Warning: Grid '" << vdbGrid->getName() << "' has not been read.
<< vdbGrid->getName() << "' has not been read. Vdb bbox min " << metaBBoxMin // Vdb bbox min " << metaBBoxMin << " vs " << origOrigin << " current grid origin", 1);
<< " vs " << origOrigin << " current grid origin",
1);
readFailure++; readFailure++;
break; break;
} }

View File

@@ -384,7 +384,6 @@ class FluidSolver : public PbClass {
GridStorage<Real> mGrids4dReal; GridStorage<Real> mGrids4dReal;
GridStorage<Vec3> mGrids4dVec; GridStorage<Vec3> mGrids4dVec;
GridStorage<Vec4> mGrids4dVec4; GridStorage<Vec4> mGrids4dVec4;
public: public:
PbArgs _args; PbArgs _args;
} }

View File

@@ -242,39 +242,6 @@ inline bool c_isnan(float c)
return d != d; return d != d;
} }
//! Swap so that a<b
template<class T> inline void sort(T &a, T &b)
{
if (a > b)
std::swap(a, b);
}
//! Swap so that a<b<c
template<class T> inline void sort(T &a, T &b, T &c)
{
if (a > b)
std::swap(a, b);
if (a > c)
std::swap(a, c);
if (b > c)
std::swap(b, c);
}
//! Swap so that a<b<c<d
template<class T> inline void sort(T &a, T &b, T &c, T &d)
{
if (a > b)
std::swap(a, b);
if (c > d)
std::swap(c, d);
if (a > c)
std::swap(a, c);
if (b > d)
std::swap(b, d);
if (b > c)
std::swap(b, c);
}
} // namespace Manta } // namespace Manta
#endif #endif

View File

@@ -1,3 +1,3 @@
#define MANTA_GIT_VERSION "commit d5d9a6c28daa8f21426d7a285f48639c0d8fd13f" #define MANTA_GIT_VERSION "commit 39b7a415721ecbf6643612a24e8eadd221aeb934"

File diff suppressed because it is too large Load Diff

View File

@@ -383,13 +383,15 @@ class GridBase : public PbClass {
} }
} }
protected: // TODO (sebbas): Moved attributes to public for now
GridType mType; GridType mType;
Vec3i mSize; Vec3i mSize;
Real mDx; Real mDx;
bool m3D; // precomputed Z shift: to ensure 2D compatibility, always use this instead of sx*sy ! bool m3D;
// precomputed Z shift: to ensure 2D compatibility, always use this instead of sx*sy !
IndexInt mStrideZ; IndexInt mStrideZ;
protected:
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -401,7 +403,7 @@ class GridBase : public PbClass {
template<class T> class Grid : public GridBase { template<class T> class Grid : public GridBase {
public: public:
//! init new grid, values are set to zero //! init new grid, values are set to zero
Grid(FluidSolver *parent, bool show = true, bool sparse = false); Grid(FluidSolver *parent, bool show = true, bool sparse = false, bool offload = false);
static int _W_10(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static int _W_10(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
PbClass *obj = Pb::objFromPy(_self); PbClass *obj = Pb::objFromPy(_self);
@@ -416,7 +418,8 @@ template<class T> class Grid : public GridBase {
FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock); FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock);
bool show = _args.getOpt<bool>("show", 1, true, &_lock); bool show = _args.getOpt<bool>("show", 1, true, &_lock);
bool sparse = _args.getOpt<bool>("sparse", 2, false, &_lock); bool sparse = _args.getOpt<bool>("sparse", 2, false, &_lock);
obj = new Grid(parent, show, sparse); bool offload = _args.getOpt<bool>("offload", 3, false, &_lock);
obj = new Grid(parent, show, sparse, offload);
obj->registerObject(_self, &_args); obj->registerObject(_self, &_args);
_args.check(); _args.check();
} }
@@ -490,7 +493,7 @@ template<class T> class Grid : public GridBase {
} }
//! set all cells to zero //! set all cells to zero
void clear(); void clear(bool isOmp = false);
static PyObject *_W_13(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static PyObject *_W_13(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
try { try {
@@ -501,9 +504,10 @@ template<class T> class Grid : public GridBase {
PyObject *_retval = nullptr; PyObject *_retval = nullptr;
{ {
ArgLocker _lock; ArgLocker _lock;
bool isOmp = _args.getOpt<bool>("isOmp", 0, false, &_lock);
pbo->_args.copy(_args); pbo->_args.copy(_args);
_retval = getPyNone(); _retval = getPyNone();
pbo->clear(); pbo->clear(isOmp);
pbo->_args.check(); pbo->_args.check();
} }
pbFinalizePlugin(pbo->getParent(), "Grid::clear", !noTiming); pbFinalizePlugin(pbo->getParent(), "Grid::clear", !noTiming);
@@ -591,6 +595,11 @@ template<class T> class Grid : public GridBase {
{ {
return mSaveSparse; return mSaveSparse;
} }
//! query if this grid should be saved as a sparse grid
inline bool isOffload()
{
return mOffload;
}
//! set data //! set data
inline void set(int i, int j, int k, T &val) inline void set(int i, int j, int k, T &val)
@@ -616,9 +625,8 @@ template<class T> class Grid : public GridBase {
return interpol<T>(mData, mSize, mStrideZ, pos); return interpol<T>(mData, mSize, mStrideZ, pos);
case 2: case 2:
return interpolCubic<T>(mData, mSize, mStrideZ, pos); return interpolCubic<T>(mData, mSize, mStrideZ, pos);
default:
assertMsg(false, "Unknown interpolation order " << order);
} }
// default: assertMsg(false, "Unknown interpolation order "<<order); }
return T(0.); // should never be reached, just to prevent compiler warnings return T(0.); // should never be reached, just to prevent compiler warnings
} }
@@ -628,7 +636,7 @@ template<class T> class Grid : public GridBase {
//! content... //! content...
// Grid<T>& operator=(const Grid<T>& a); // Grid<T>& operator=(const Grid<T>& a);
//! copy content from other grid (use this one instead of operator= !) //! copy content from other grid (use this one instead of operator= !)
Grid<T> &copyFrom(const Grid<T> &a, bool copyType = true); Grid<T> &copyFrom(const Grid<T> &a, bool copyType = true, bool isOmp = false);
static PyObject *_W_14(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static PyObject *_W_14(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
try { try {
@@ -641,8 +649,9 @@ template<class T> class Grid : public GridBase {
ArgLocker _lock; ArgLocker _lock;
const Grid<T> &a = *_args.getPtr<Grid<T>>("a", 0, &_lock); const Grid<T> &a = *_args.getPtr<Grid<T>>("a", 0, &_lock);
bool copyType = _args.getOpt<bool>("copyType", 1, true, &_lock); bool copyType = _args.getOpt<bool>("copyType", 1, true, &_lock);
bool isOmp = _args.getOpt<bool>("isOmp", 2, false, &_lock);
pbo->_args.copy(_args); pbo->_args.copy(_args);
_retval = toPy(pbo->copyFrom(a, copyType)); _retval = toPy(pbo->copyFrom(a, copyType, isOmp));
pbo->_args.check(); pbo->_args.check();
} }
pbFinalizePlugin(pbo->getParent(), "Grid::copyFrom", !noTiming); pbFinalizePlugin(pbo->getParent(), "Grid::copyFrom", !noTiming);
@@ -1276,6 +1285,157 @@ template<class T> class Grid : public GridBase {
} }
} }
//! OpenMP data mapping from / to target device
void mapToOmp();
static PyObject *_W_38(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
pbPreparePlugin(pbo->getParent(), "Grid::mapToOmp", !noTiming);
PyObject *_retval = nullptr;
{
ArgLocker _lock;
pbo->_args.copy(_args);
_retval = getPyNone();
pbo->mapToOmp();
pbo->_args.check();
}
pbFinalizePlugin(pbo->getParent(), "Grid::mapToOmp", !noTiming);
return _retval;
}
catch (std::exception &e) {
pbSetError("Grid::mapToOmp", e.what());
return 0;
}
}
void mapFromOmp();
static PyObject *_W_39(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
pbPreparePlugin(pbo->getParent(), "Grid::mapFromOmp", !noTiming);
PyObject *_retval = nullptr;
{
ArgLocker _lock;
pbo->_args.copy(_args);
_retval = getPyNone();
pbo->mapFromOmp();
pbo->_args.check();
}
pbFinalizePlugin(pbo->getParent(), "Grid::mapFromOmp", !noTiming);
return _retval;
}
catch (std::exception &e) {
pbSetError("Grid::mapFromOmp", e.what());
return 0;
}
}
void mapAllocOmp();
static PyObject *_W_40(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
pbPreparePlugin(pbo->getParent(), "Grid::mapAllocOmp", !noTiming);
PyObject *_retval = nullptr;
{
ArgLocker _lock;
pbo->_args.copy(_args);
_retval = getPyNone();
pbo->mapAllocOmp();
pbo->_args.check();
}
pbFinalizePlugin(pbo->getParent(), "Grid::mapAllocOmp", !noTiming);
return _retval;
}
catch (std::exception &e) {
pbSetError("Grid::mapAllocOmp", e.what());
return 0;
}
}
void mapDeleteOmp();
static PyObject *_W_41(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
pbPreparePlugin(pbo->getParent(), "Grid::mapDeleteOmp", !noTiming);
PyObject *_retval = nullptr;
{
ArgLocker _lock;
pbo->_args.copy(_args);
_retval = getPyNone();
pbo->mapDeleteOmp();
pbo->_args.check();
}
pbFinalizePlugin(pbo->getParent(), "Grid::mapDeleteOmp", !noTiming);
return _retval;
}
catch (std::exception &e) {
pbSetError("Grid::mapDeleteOmp", e.what());
return 0;
}
}
void updateToOmp();
static PyObject *_W_42(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
pbPreparePlugin(pbo->getParent(), "Grid::updateToOmp", !noTiming);
PyObject *_retval = nullptr;
{
ArgLocker _lock;
pbo->_args.copy(_args);
_retval = getPyNone();
pbo->updateToOmp();
pbo->_args.check();
}
pbFinalizePlugin(pbo->getParent(), "Grid::updateToOmp", !noTiming);
return _retval;
}
catch (std::exception &e) {
pbSetError("Grid::updateToOmp", e.what());
return 0;
}
}
void updateFromOmp();
static PyObject *_W_43(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
Grid *pbo = dynamic_cast<Grid *>(Pb::objFromPy(_self));
bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
pbPreparePlugin(pbo->getParent(), "Grid::updateFromOmp", !noTiming);
PyObject *_retval = nullptr;
{
ArgLocker _lock;
pbo->_args.copy(_args);
_retval = getPyNone();
pbo->updateFromOmp();
pbo->_args.check();
}
pbFinalizePlugin(pbo->getParent(), "Grid::updateFromOmp", !noTiming);
return _retval;
}
catch (std::exception &e) {
pbSetError("Grid::updateFromOmp", e.what());
return 0;
}
}
// c++ only operators // c++ only operators
template<class S> Grid<T> &operator+=(const Grid<S> &a); template<class S> Grid<T> &operator+=(const Grid<S> &a);
template<class S> Grid<T> &operator+=(const S &a); template<class S> Grid<T> &operator+=(const S &a);
@@ -1299,10 +1459,12 @@ template<class T> class Grid : public GridBase {
return mData[index(i, j, k)]; return mData[index(i, j, k)];
} }
protected: // TODO (sebbas): Moved attributes to public for now
T *mData; T *mData;
protected:
bool mExternalData; // True if mData is managed outside of the Fluidsolver bool mExternalData; // True if mData is managed outside of the Fluidsolver
bool mSaveSparse; // True if this grid may be cached in a sparse structure bool mSaveSparse; // True if this grid may be cached in a sparse structure
bool mOffload; // True if this grid shall be allocated on an OpenMP offload device too
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -1314,12 +1476,12 @@ template<class T> class Grid : public GridBase {
//! Special function for staggered grids //! Special function for staggered grids
class MACGrid : public Grid<Vec3> { class MACGrid : public Grid<Vec3> {
public: public:
MACGrid(FluidSolver *parent, bool show = true, bool sparse = false) MACGrid(FluidSolver *parent, bool show = true, bool sparse = false, bool offload = false)
: Grid<Vec3>(parent, show, sparse) : Grid<Vec3>(parent, show, sparse, offload)
{ {
mType = (GridType)(TypeMAC | TypeVec3); mType = (GridType)(TypeMAC | TypeVec3);
} }
static int _W_38(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static int _W_44(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
PbClass *obj = Pb::objFromPy(_self); PbClass *obj = Pb::objFromPy(_self);
if (obj) if (obj)
@@ -1333,7 +1495,8 @@ class MACGrid : public Grid<Vec3> {
FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock); FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock);
bool show = _args.getOpt<bool>("show", 1, true, &_lock); bool show = _args.getOpt<bool>("show", 1, true, &_lock);
bool sparse = _args.getOpt<bool>("sparse", 2, false, &_lock); bool sparse = _args.getOpt<bool>("sparse", 2, false, &_lock);
obj = new MACGrid(parent, show, sparse); bool offload = _args.getOpt<bool>("offload", 3, false, &_lock);
obj = new MACGrid(parent, show, sparse, offload);
obj->registerObject(_self, &_args); obj->registerObject(_self, &_args);
_args.check(); _args.check();
} }
@@ -1392,17 +1555,16 @@ class MACGrid : public Grid<Vec3> {
case 1: case 1:
return interpolComponent<comp>(mData, mSize, mStrideZ, pos); return interpolComponent<comp>(mData, mSize, mStrideZ, pos);
case 2: case 2:
return interpolCubicMAC(mData, mSize, mStrideZ, pos)[comp]; // warning - not yet optimized return interpolCubicMAC(mData, mSize, mStrideZ, pos)[comp];
default: } // warning - not yet optimized
assertMsg(false, "Unknown interpolation order " << order); // default: assertMsg(false, "Unknown interpolation order "<<order); }
}
return 0.; // should never be reached, just to prevent compiler warnings return 0.; // should never be reached, just to prevent compiler warnings
} }
//! set all boundary cells of a MAC grid to certain value (Dirchlet). Respects staggered grid //! set all boundary cells of a MAC grid to certain value (Dirchlet). Respects staggered grid
//! locations optionally, only set normal components //! locations optionally, only set normal components
void setBoundMAC(Vec3 value, int boundaryWidth, bool normalOnly = false); void setBoundMAC(Vec3 value, int boundaryWidth, bool normalOnly = false);
static PyObject *_W_39(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static PyObject *_W_45(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
try { try {
PbArgs _args(_linargs, _kwds); PbArgs _args(_linargs, _kwds);
@@ -1439,12 +1601,16 @@ class MACGrid : public Grid<Vec3> {
//! Special functions for FlagGrid //! Special functions for FlagGrid
class FlagGrid : public Grid<int> { class FlagGrid : public Grid<int> {
public: public:
FlagGrid(FluidSolver *parent, int dim = 3, bool show = true, bool sparse = false) FlagGrid(FluidSolver *parent,
: Grid<int>(parent, show, sparse) int dim = 3,
bool show = true,
bool sparse = false,
bool offload = false)
: Grid<int>(parent, show, sparse, offload)
{ {
mType = (GridType)(TypeFlags | TypeInt); mType = (GridType)(TypeFlags | TypeInt);
} }
static int _W_40(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static int _W_46(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
PbClass *obj = Pb::objFromPy(_self); PbClass *obj = Pb::objFromPy(_self);
if (obj) if (obj)
@@ -1459,7 +1625,8 @@ class FlagGrid : public Grid<int> {
int dim = _args.getOpt<int>("dim", 1, 3, &_lock); int dim = _args.getOpt<int>("dim", 1, 3, &_lock);
bool show = _args.getOpt<bool>("show", 2, true, &_lock); bool show = _args.getOpt<bool>("show", 2, true, &_lock);
bool sparse = _args.getOpt<bool>("sparse", 3, false, &_lock); bool sparse = _args.getOpt<bool>("sparse", 3, false, &_lock);
obj = new FlagGrid(parent, dim, show, sparse); bool offload = _args.getOpt<bool>("offload", 4, false, &_lock);
obj = new FlagGrid(parent, dim, show, sparse, offload);
obj->registerObject(_self, &_args); obj->registerObject(_self, &_args);
_args.check(); _args.check();
} }
@@ -1625,7 +1792,7 @@ class FlagGrid : public Grid<int> {
const std::string &inflow = " ", const std::string &inflow = " ",
const std::string &outflow = " ", const std::string &outflow = " ",
Grid<Real> *phiWalls = 0x00); Grid<Real> *phiWalls = 0x00);
static PyObject *_W_41(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static PyObject *_W_47(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
try { try {
PbArgs _args(_linargs, _kwds); PbArgs _args(_linargs, _kwds);
@@ -1659,7 +1826,7 @@ class FlagGrid : public Grid<int> {
//! set fluid flags inside levelset (liquids) //! set fluid flags inside levelset (liquids)
void updateFromLevelset(LevelsetGrid &levelset); void updateFromLevelset(LevelsetGrid &levelset);
static PyObject *_W_42(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static PyObject *_W_48(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
try { try {
PbArgs _args(_linargs, _kwds); PbArgs _args(_linargs, _kwds);
@@ -1686,7 +1853,7 @@ class FlagGrid : public Grid<int> {
//! set all cells (except obs/in/outflow) to type (fluid by default) //! set all cells (except obs/in/outflow) to type (fluid by default)
void fillGrid(int type = TypeFluid); void fillGrid(int type = TypeFluid);
static PyObject *_W_43(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static PyObject *_W_49(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
try { try {
PbArgs _args(_linargs, _kwds); PbArgs _args(_linargs, _kwds);
@@ -1715,7 +1882,7 @@ class FlagGrid : public Grid<int> {
//! warning for large grids! only regular int returned (due to python interface) //! warning for large grids! only regular int returned (due to python interface)
//! optionally creates mask in RealGrid (1 where flag matches, 0 otherwise) //! optionally creates mask in RealGrid (1 where flag matches, 0 otherwise)
int countCells(int flag, int bnd = 0, Grid<Real> *mask = nullptr); int countCells(int flag, int bnd = 0, Grid<Real> *mask = nullptr);
static PyObject *_W_44(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static PyObject *_W_50(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
try { try {
PbArgs _args(_linargs, _kwds); PbArgs _args(_linargs, _kwds);
@@ -1868,7 +2035,7 @@ template<class T, class S> struct gridAdd : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other)
{ {
me[idx] += other[idx]; me[idx] += other[idx];
} }
@@ -1882,21 +2049,17 @@ template<class T, class S> struct gridAdd : public KernelBase {
return other; return other;
} }
typedef Grid<S> type1; typedef Grid<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel gridAdd ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid<T> &me; Grid<T> &me;
const Grid<S> &other; const Grid<S> &other;
@@ -1907,7 +2070,7 @@ template<class T, class S> struct gridSub : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other)
{ {
me[idx] -= other[idx]; me[idx] -= other[idx];
} }
@@ -1921,21 +2084,17 @@ template<class T, class S> struct gridSub : public KernelBase {
return other; return other;
} }
typedef Grid<S> type1; typedef Grid<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel gridSub ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid<T> &me; Grid<T> &me;
const Grid<S> &other; const Grid<S> &other;
@@ -1946,7 +2105,7 @@ template<class T, class S> struct gridMult : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other)
{ {
me[idx] *= other[idx]; me[idx] *= other[idx];
} }
@@ -1960,21 +2119,17 @@ template<class T, class S> struct gridMult : public KernelBase {
return other; return other;
} }
typedef Grid<S> type1; typedef Grid<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel gridMult ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid<T> &me; Grid<T> &me;
const Grid<S> &other; const Grid<S> &other;
@@ -1985,7 +2140,7 @@ template<class T, class S> struct gridDiv : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other) const inline void op(IndexInt idx, Grid<T> &me, const Grid<S> &other)
{ {
me[idx] /= other[idx]; me[idx] /= other[idx];
} }
@@ -1999,21 +2154,17 @@ template<class T, class S> struct gridDiv : public KernelBase {
return other; return other;
} }
typedef Grid<S> type1; typedef Grid<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel gridDiv ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid<T> &me; Grid<T> &me;
const Grid<S> &other; const Grid<S> &other;
@@ -2024,7 +2175,7 @@ template<class T, class S> struct gridAddScalar : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<T> &me, const S &other) const inline void op(IndexInt idx, Grid<T> &me, const S &other)
{ {
me[idx] += other; me[idx] += other;
} }
@@ -2038,21 +2189,17 @@ template<class T, class S> struct gridAddScalar : public KernelBase {
return other; return other;
} }
typedef S type1; typedef S type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel gridAddScalar ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid<T> &me; Grid<T> &me;
const S &other; const S &other;
@@ -2063,7 +2210,7 @@ template<class T, class S> struct gridMultScalar : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<T> &me, const S &other) const inline void op(IndexInt idx, Grid<T> &me, const S &other)
{ {
me[idx] *= other; me[idx] *= other;
} }
@@ -2077,25 +2224,22 @@ template<class T, class S> struct gridMultScalar : public KernelBase {
return other; return other;
} }
typedef S type1; typedef S type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel gridMultScalar ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid<T> &me; Grid<T> &me;
const S &other; const S &other;
}; };
template<class T, class S> struct gridScaledAdd : public KernelBase { template<class T, class S> struct gridScaledAdd : public KernelBase {
gridScaledAdd(Grid<T> &me, const Grid<T> &other, const S &factor) gridScaledAdd(Grid<T> &me, const Grid<T> &other, const S &factor)
: KernelBase(&me, 0), me(me), other(other), factor(factor) : KernelBase(&me, 0), me(me), other(other), factor(factor)
@@ -2103,8 +2247,9 @@ template<class T, class S> struct gridScaledAdd : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<T> &me, const Grid<T> &other, const S &factor) const inline void op(int i, int j, int k, Grid<T> &me, const Grid<T> &other, const S &factor)
{ {
const IndexInt idx = me.index(i, j, k);
me[idx] += factor * other[idx]; me[idx] += factor * other[idx];
} }
inline Grid<T> &getArg0() inline Grid<T> &getArg0()
@@ -2122,21 +2267,35 @@ template<class T, class S> struct gridScaledAdd : public KernelBase {
return factor; return factor;
} }
typedef S type2; typedef S type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel gridScaledAdd ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other, factor);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const int _maxX = maxX;
const int _maxY = maxY;
if (maxZ > 1) {
Grid<T> &me = getArg0();
const Grid<T> &other = getArg1();
const S &factor = getArg2();
#pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
{
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, me, other, factor);
}
}
else {
const int k = 0;
Grid<T> &me = getArg0();
const Grid<T> &other = getArg1();
const S &factor = getArg2();
#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
{
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, me, other, factor);
}
}
} }
Grid<T> &me; Grid<T> &me;
const Grid<T> &other; const Grid<T> &other;
@@ -2149,7 +2308,7 @@ template<class T> struct gridSetConst : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<T> &grid, T value) const inline void op(IndexInt idx, Grid<T> &grid, T value)
{ {
grid[idx] = value; grid[idx] = value;
} }
@@ -2163,21 +2322,17 @@ template<class T> struct gridSetConst : public KernelBase {
return value; return value;
} }
typedef T type1; typedef T type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel gridSetConst ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, grid, value);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, grid, value);
}
} }
Grid<T> &grid; Grid<T> &grid;
T value; T value;
@@ -2279,7 +2434,7 @@ template<class S> struct knInterpolateGridTempl : public KernelBase {
const Grid<S> &source, const Grid<S> &source,
const Vec3 &sourceFactor, const Vec3 &sourceFactor,
Vec3 offset, Vec3 offset,
int orderSpace = 1) const int orderSpace = 1)
{ {
Vec3 pos = Vec3(i, j, k) * sourceFactor + offset; Vec3 pos = Vec3(i, j, k) * sourceFactor + offset;
if (!source.is3D()) if (!source.is3D())
@@ -2311,37 +2466,35 @@ template<class S> struct knInterpolateGridTempl : public KernelBase {
return orderSpace; return orderSpace;
} }
typedef int type4; typedef int type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knInterpolateGridTempl ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, target, source, sourceFactor, offset, orderSpace);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, target, source, sourceFactor, offset, orderSpace);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, target, source, sourceFactor, offset, orderSpace);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, target, source, sourceFactor, offset, orderSpace);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
Grid<S> &target; Grid<S> &target;
const Grid<S> &source; const Grid<S> &source;
const Vec3 &sourceFactor; const Vec3 &sourceFactor;

View File

@@ -8,11 +8,11 @@ namespace Manta {
#ifdef _C_FlagGrid #ifdef _C_FlagGrid
static const Pb::Register _R_26("FlagGrid", "FlagGrid", "Grid<int>"); static const Pb::Register _R_26("FlagGrid", "FlagGrid", "Grid<int>");
template<> const char *Namify<FlagGrid>::S = "FlagGrid"; template<> const char *Namify<FlagGrid>::S = "FlagGrid";
static const Pb::Register _R_27("FlagGrid", "FlagGrid", FlagGrid::_W_40); static const Pb::Register _R_27("FlagGrid", "FlagGrid", FlagGrid::_W_46);
static const Pb::Register _R_28("FlagGrid", "initDomain", FlagGrid::_W_41); static const Pb::Register _R_28("FlagGrid", "initDomain", FlagGrid::_W_47);
static const Pb::Register _R_29("FlagGrid", "updateFromLevelset", FlagGrid::_W_42); static const Pb::Register _R_29("FlagGrid", "updateFromLevelset", FlagGrid::_W_48);
static const Pb::Register _R_30("FlagGrid", "fillGrid", FlagGrid::_W_43); static const Pb::Register _R_30("FlagGrid", "fillGrid", FlagGrid::_W_49);
static const Pb::Register _R_31("FlagGrid", "countCells", FlagGrid::_W_44); static const Pb::Register _R_31("FlagGrid", "countCells", FlagGrid::_W_50);
#endif #endif
#ifdef _C_Grid #ifdef _C_Grid
static const Pb::Register _R_32("Grid<int>", "Grid<int>", "GridBase"); static const Pb::Register _R_32("Grid<int>", "Grid<int>", "GridBase");
@@ -45,86 +45,104 @@ static const Pb::Register _R_57("Grid<int>", "setBound", Grid<int>::_W_34);
static const Pb::Register _R_58("Grid<int>", "setBoundNeumann", Grid<int>::_W_35); static const Pb::Register _R_58("Grid<int>", "setBoundNeumann", Grid<int>::_W_35);
static const Pb::Register _R_59("Grid<int>", "getDataPointer", Grid<int>::_W_36); static const Pb::Register _R_59("Grid<int>", "getDataPointer", Grid<int>::_W_36);
static const Pb::Register _R_60("Grid<int>", "printGrid", Grid<int>::_W_37); static const Pb::Register _R_60("Grid<int>", "printGrid", Grid<int>::_W_37);
static const Pb::Register _R_61("Grid<Real>", "Grid<Real>", "GridBase"); static const Pb::Register _R_61("Grid<int>", "mapToOmp", Grid<int>::_W_38);
static const Pb::Register _R_62("Grid<int>", "mapFromOmp", Grid<int>::_W_39);
static const Pb::Register _R_63("Grid<int>", "mapAllocOmp", Grid<int>::_W_40);
static const Pb::Register _R_64("Grid<int>", "mapDeleteOmp", Grid<int>::_W_41);
static const Pb::Register _R_65("Grid<int>", "updateToOmp", Grid<int>::_W_42);
static const Pb::Register _R_66("Grid<int>", "updateFromOmp", Grid<int>::_W_43);
static const Pb::Register _R_67("Grid<Real>", "Grid<Real>", "GridBase");
template<> const char *Namify<Grid<Real>>::S = "Grid<Real>"; template<> const char *Namify<Grid<Real>>::S = "Grid<Real>";
static const Pb::Register _R_62("Grid<Real>", "Grid", Grid<Real>::_W_10); static const Pb::Register _R_68("Grid<Real>", "Grid", Grid<Real>::_W_10);
static const Pb::Register _R_63("Grid<Real>", "save", Grid<Real>::_W_11); static const Pb::Register _R_69("Grid<Real>", "save", Grid<Real>::_W_11);
static const Pb::Register _R_64("Grid<Real>", "load", Grid<Real>::_W_12); static const Pb::Register _R_70("Grid<Real>", "load", Grid<Real>::_W_12);
static const Pb::Register _R_65("Grid<Real>", "clear", Grid<Real>::_W_13); static const Pb::Register _R_71("Grid<Real>", "clear", Grid<Real>::_W_13);
static const Pb::Register _R_66("Grid<Real>", "copyFrom", Grid<Real>::_W_14); static const Pb::Register _R_72("Grid<Real>", "copyFrom", Grid<Real>::_W_14);
static const Pb::Register _R_67("Grid<Real>", "getGridType", Grid<Real>::_W_15); static const Pb::Register _R_73("Grid<Real>", "getGridType", Grid<Real>::_W_15);
static const Pb::Register _R_68("Grid<Real>", "add", Grid<Real>::_W_16); static const Pb::Register _R_74("Grid<Real>", "add", Grid<Real>::_W_16);
static const Pb::Register _R_69("Grid<Real>", "sub", Grid<Real>::_W_17); static const Pb::Register _R_75("Grid<Real>", "sub", Grid<Real>::_W_17);
static const Pb::Register _R_70("Grid<Real>", "setConst", Grid<Real>::_W_18); static const Pb::Register _R_76("Grid<Real>", "setConst", Grid<Real>::_W_18);
static const Pb::Register _R_71("Grid<Real>", "addConst", Grid<Real>::_W_19); static const Pb::Register _R_77("Grid<Real>", "addConst", Grid<Real>::_W_19);
static const Pb::Register _R_72("Grid<Real>", "addScaled", Grid<Real>::_W_20); static const Pb::Register _R_78("Grid<Real>", "addScaled", Grid<Real>::_W_20);
static const Pb::Register _R_73("Grid<Real>", "mult", Grid<Real>::_W_21); static const Pb::Register _R_79("Grid<Real>", "mult", Grid<Real>::_W_21);
static const Pb::Register _R_74("Grid<Real>", "multConst", Grid<Real>::_W_22); static const Pb::Register _R_80("Grid<Real>", "multConst", Grid<Real>::_W_22);
static const Pb::Register _R_75("Grid<Real>", "safeDivide", Grid<Real>::_W_23); static const Pb::Register _R_81("Grid<Real>", "safeDivide", Grid<Real>::_W_23);
static const Pb::Register _R_76("Grid<Real>", "clamp", Grid<Real>::_W_24); static const Pb::Register _R_82("Grid<Real>", "clamp", Grid<Real>::_W_24);
static const Pb::Register _R_77("Grid<Real>", "stomp", Grid<Real>::_W_25); static const Pb::Register _R_83("Grid<Real>", "stomp", Grid<Real>::_W_25);
static const Pb::Register _R_78("Grid<Real>", "permuteAxes", Grid<Real>::_W_26); static const Pb::Register _R_84("Grid<Real>", "permuteAxes", Grid<Real>::_W_26);
static const Pb::Register _R_79("Grid<Real>", "permuteAxesCopyToGrid", Grid<Real>::_W_27); static const Pb::Register _R_85("Grid<Real>", "permuteAxesCopyToGrid", Grid<Real>::_W_27);
static const Pb::Register _R_80("Grid<Real>", "join", Grid<Real>::_W_28); static const Pb::Register _R_86("Grid<Real>", "join", Grid<Real>::_W_28);
static const Pb::Register _R_81("Grid<Real>", "getMaxAbs", Grid<Real>::_W_29); static const Pb::Register _R_87("Grid<Real>", "getMaxAbs", Grid<Real>::_W_29);
static const Pb::Register _R_82("Grid<Real>", "getMax", Grid<Real>::_W_30); static const Pb::Register _R_88("Grid<Real>", "getMax", Grid<Real>::_W_30);
static const Pb::Register _R_83("Grid<Real>", "getMin", Grid<Real>::_W_31); static const Pb::Register _R_89("Grid<Real>", "getMin", Grid<Real>::_W_31);
static const Pb::Register _R_84("Grid<Real>", "getL1", Grid<Real>::_W_32); static const Pb::Register _R_90("Grid<Real>", "getL1", Grid<Real>::_W_32);
static const Pb::Register _R_85("Grid<Real>", "getL2", Grid<Real>::_W_33); static const Pb::Register _R_91("Grid<Real>", "getL2", Grid<Real>::_W_33);
static const Pb::Register _R_86("Grid<Real>", "setBound", Grid<Real>::_W_34); static const Pb::Register _R_92("Grid<Real>", "setBound", Grid<Real>::_W_34);
static const Pb::Register _R_87("Grid<Real>", "setBoundNeumann", Grid<Real>::_W_35); static const Pb::Register _R_93("Grid<Real>", "setBoundNeumann", Grid<Real>::_W_35);
static const Pb::Register _R_88("Grid<Real>", "getDataPointer", Grid<Real>::_W_36); static const Pb::Register _R_94("Grid<Real>", "getDataPointer", Grid<Real>::_W_36);
static const Pb::Register _R_89("Grid<Real>", "printGrid", Grid<Real>::_W_37); static const Pb::Register _R_95("Grid<Real>", "printGrid", Grid<Real>::_W_37);
static const Pb::Register _R_90("Grid<Vec3>", "Grid<Vec3>", "GridBase"); static const Pb::Register _R_96("Grid<Real>", "mapToOmp", Grid<Real>::_W_38);
static const Pb::Register _R_97("Grid<Real>", "mapFromOmp", Grid<Real>::_W_39);
static const Pb::Register _R_98("Grid<Real>", "mapAllocOmp", Grid<Real>::_W_40);
static const Pb::Register _R_99("Grid<Real>", "mapDeleteOmp", Grid<Real>::_W_41);
static const Pb::Register _R_100("Grid<Real>", "updateToOmp", Grid<Real>::_W_42);
static const Pb::Register _R_101("Grid<Real>", "updateFromOmp", Grid<Real>::_W_43);
static const Pb::Register _R_102("Grid<Vec3>", "Grid<Vec3>", "GridBase");
template<> const char *Namify<Grid<Vec3>>::S = "Grid<Vec3>"; template<> const char *Namify<Grid<Vec3>>::S = "Grid<Vec3>";
static const Pb::Register _R_91("Grid<Vec3>", "Grid", Grid<Vec3>::_W_10); static const Pb::Register _R_103("Grid<Vec3>", "Grid", Grid<Vec3>::_W_10);
static const Pb::Register _R_92("Grid<Vec3>", "save", Grid<Vec3>::_W_11); static const Pb::Register _R_104("Grid<Vec3>", "save", Grid<Vec3>::_W_11);
static const Pb::Register _R_93("Grid<Vec3>", "load", Grid<Vec3>::_W_12); static const Pb::Register _R_105("Grid<Vec3>", "load", Grid<Vec3>::_W_12);
static const Pb::Register _R_94("Grid<Vec3>", "clear", Grid<Vec3>::_W_13); static const Pb::Register _R_106("Grid<Vec3>", "clear", Grid<Vec3>::_W_13);
static const Pb::Register _R_95("Grid<Vec3>", "copyFrom", Grid<Vec3>::_W_14); static const Pb::Register _R_107("Grid<Vec3>", "copyFrom", Grid<Vec3>::_W_14);
static const Pb::Register _R_96("Grid<Vec3>", "getGridType", Grid<Vec3>::_W_15); static const Pb::Register _R_108("Grid<Vec3>", "getGridType", Grid<Vec3>::_W_15);
static const Pb::Register _R_97("Grid<Vec3>", "add", Grid<Vec3>::_W_16); static const Pb::Register _R_109("Grid<Vec3>", "add", Grid<Vec3>::_W_16);
static const Pb::Register _R_98("Grid<Vec3>", "sub", Grid<Vec3>::_W_17); static const Pb::Register _R_110("Grid<Vec3>", "sub", Grid<Vec3>::_W_17);
static const Pb::Register _R_99("Grid<Vec3>", "setConst", Grid<Vec3>::_W_18); static const Pb::Register _R_111("Grid<Vec3>", "setConst", Grid<Vec3>::_W_18);
static const Pb::Register _R_100("Grid<Vec3>", "addConst", Grid<Vec3>::_W_19); static const Pb::Register _R_112("Grid<Vec3>", "addConst", Grid<Vec3>::_W_19);
static const Pb::Register _R_101("Grid<Vec3>", "addScaled", Grid<Vec3>::_W_20); static const Pb::Register _R_113("Grid<Vec3>", "addScaled", Grid<Vec3>::_W_20);
static const Pb::Register _R_102("Grid<Vec3>", "mult", Grid<Vec3>::_W_21); static const Pb::Register _R_114("Grid<Vec3>", "mult", Grid<Vec3>::_W_21);
static const Pb::Register _R_103("Grid<Vec3>", "multConst", Grid<Vec3>::_W_22); static const Pb::Register _R_115("Grid<Vec3>", "multConst", Grid<Vec3>::_W_22);
static const Pb::Register _R_104("Grid<Vec3>", "safeDivide", Grid<Vec3>::_W_23); static const Pb::Register _R_116("Grid<Vec3>", "safeDivide", Grid<Vec3>::_W_23);
static const Pb::Register _R_105("Grid<Vec3>", "clamp", Grid<Vec3>::_W_24); static const Pb::Register _R_117("Grid<Vec3>", "clamp", Grid<Vec3>::_W_24);
static const Pb::Register _R_106("Grid<Vec3>", "stomp", Grid<Vec3>::_W_25); static const Pb::Register _R_118("Grid<Vec3>", "stomp", Grid<Vec3>::_W_25);
static const Pb::Register _R_107("Grid<Vec3>", "permuteAxes", Grid<Vec3>::_W_26); static const Pb::Register _R_119("Grid<Vec3>", "permuteAxes", Grid<Vec3>::_W_26);
static const Pb::Register _R_108("Grid<Vec3>", "permuteAxesCopyToGrid", Grid<Vec3>::_W_27); static const Pb::Register _R_120("Grid<Vec3>", "permuteAxesCopyToGrid", Grid<Vec3>::_W_27);
static const Pb::Register _R_109("Grid<Vec3>", "join", Grid<Vec3>::_W_28); static const Pb::Register _R_121("Grid<Vec3>", "join", Grid<Vec3>::_W_28);
static const Pb::Register _R_110("Grid<Vec3>", "getMaxAbs", Grid<Vec3>::_W_29); static const Pb::Register _R_122("Grid<Vec3>", "getMaxAbs", Grid<Vec3>::_W_29);
static const Pb::Register _R_111("Grid<Vec3>", "getMax", Grid<Vec3>::_W_30); static const Pb::Register _R_123("Grid<Vec3>", "getMax", Grid<Vec3>::_W_30);
static const Pb::Register _R_112("Grid<Vec3>", "getMin", Grid<Vec3>::_W_31); static const Pb::Register _R_124("Grid<Vec3>", "getMin", Grid<Vec3>::_W_31);
static const Pb::Register _R_113("Grid<Vec3>", "getL1", Grid<Vec3>::_W_32); static const Pb::Register _R_125("Grid<Vec3>", "getL1", Grid<Vec3>::_W_32);
static const Pb::Register _R_114("Grid<Vec3>", "getL2", Grid<Vec3>::_W_33); static const Pb::Register _R_126("Grid<Vec3>", "getL2", Grid<Vec3>::_W_33);
static const Pb::Register _R_115("Grid<Vec3>", "setBound", Grid<Vec3>::_W_34); static const Pb::Register _R_127("Grid<Vec3>", "setBound", Grid<Vec3>::_W_34);
static const Pb::Register _R_116("Grid<Vec3>", "setBoundNeumann", Grid<Vec3>::_W_35); static const Pb::Register _R_128("Grid<Vec3>", "setBoundNeumann", Grid<Vec3>::_W_35);
static const Pb::Register _R_117("Grid<Vec3>", "getDataPointer", Grid<Vec3>::_W_36); static const Pb::Register _R_129("Grid<Vec3>", "getDataPointer", Grid<Vec3>::_W_36);
static const Pb::Register _R_118("Grid<Vec3>", "printGrid", Grid<Vec3>::_W_37); static const Pb::Register _R_130("Grid<Vec3>", "printGrid", Grid<Vec3>::_W_37);
static const Pb::Register _R_131("Grid<Vec3>", "mapToOmp", Grid<Vec3>::_W_38);
static const Pb::Register _R_132("Grid<Vec3>", "mapFromOmp", Grid<Vec3>::_W_39);
static const Pb::Register _R_133("Grid<Vec3>", "mapAllocOmp", Grid<Vec3>::_W_40);
static const Pb::Register _R_134("Grid<Vec3>", "mapDeleteOmp", Grid<Vec3>::_W_41);
static const Pb::Register _R_135("Grid<Vec3>", "updateToOmp", Grid<Vec3>::_W_42);
static const Pb::Register _R_136("Grid<Vec3>", "updateFromOmp", Grid<Vec3>::_W_43);
#endif #endif
#ifdef _C_GridBase #ifdef _C_GridBase
static const Pb::Register _R_119("GridBase", "GridBase", "PbClass"); static const Pb::Register _R_137("GridBase", "GridBase", "PbClass");
template<> const char *Namify<GridBase>::S = "GridBase"; template<> const char *Namify<GridBase>::S = "GridBase";
static const Pb::Register _R_120("GridBase", "GridBase", GridBase::_W_0); static const Pb::Register _R_138("GridBase", "GridBase", GridBase::_W_0);
static const Pb::Register _R_121("GridBase", "getSizeX", GridBase::_W_1); static const Pb::Register _R_139("GridBase", "getSizeX", GridBase::_W_1);
static const Pb::Register _R_122("GridBase", "getSizeY", GridBase::_W_2); static const Pb::Register _R_140("GridBase", "getSizeY", GridBase::_W_2);
static const Pb::Register _R_123("GridBase", "getSizeZ", GridBase::_W_3); static const Pb::Register _R_141("GridBase", "getSizeZ", GridBase::_W_3);
static const Pb::Register _R_124("GridBase", "getSize", GridBase::_W_4); static const Pb::Register _R_142("GridBase", "getSize", GridBase::_W_4);
static const Pb::Register _R_125("GridBase", "is3D", GridBase::_W_5); static const Pb::Register _R_143("GridBase", "is3D", GridBase::_W_5);
static const Pb::Register _R_126("GridBase", "is4D", GridBase::_W_6); static const Pb::Register _R_144("GridBase", "is4D", GridBase::_W_6);
static const Pb::Register _R_127("GridBase", "getSizeT", GridBase::_W_7); static const Pb::Register _R_145("GridBase", "getSizeT", GridBase::_W_7);
static const Pb::Register _R_128("GridBase", "getStrideT", GridBase::_W_8); static const Pb::Register _R_146("GridBase", "getStrideT", GridBase::_W_8);
static const Pb::Register _R_129("GridBase", "setName", GridBase::_W_9); static const Pb::Register _R_147("GridBase", "setName", GridBase::_W_9);
#endif #endif
#ifdef _C_MACGrid #ifdef _C_MACGrid
static const Pb::Register _R_130("MACGrid", "MACGrid", "Grid<Vec3>"); static const Pb::Register _R_148("MACGrid", "MACGrid", "Grid<Vec3>");
template<> const char *Namify<MACGrid>::S = "MACGrid"; template<> const char *Namify<MACGrid>::S = "MACGrid";
static const Pb::Register _R_131("MACGrid", "MACGrid", MACGrid::_W_38); static const Pb::Register _R_149("MACGrid", "MACGrid", MACGrid::_W_44);
static const Pb::Register _R_132("MACGrid", "setBoundMAC", MACGrid::_W_39); static const Pb::Register _R_150("MACGrid", "setBoundMAC", MACGrid::_W_45);
#endif #endif
static const Pb::Register _R_7("GridType_TypeNone", 0); static const Pb::Register _R_7("GridType_TypeNone", 0);
static const Pb::Register _R_8("GridType_TypeReal", 1); static const Pb::Register _R_8("GridType_TypeReal", 1);
@@ -255,6 +273,24 @@ void PbRegister_file_7()
KEEP_UNUSED(_R_130); KEEP_UNUSED(_R_130);
KEEP_UNUSED(_R_131); KEEP_UNUSED(_R_131);
KEEP_UNUSED(_R_132); KEEP_UNUSED(_R_132);
KEEP_UNUSED(_R_133);
KEEP_UNUSED(_R_134);
KEEP_UNUSED(_R_135);
KEEP_UNUSED(_R_136);
KEEP_UNUSED(_R_137);
KEEP_UNUSED(_R_138);
KEEP_UNUSED(_R_139);
KEEP_UNUSED(_R_140);
KEEP_UNUSED(_R_141);
KEEP_UNUSED(_R_142);
KEEP_UNUSED(_R_143);
KEEP_UNUSED(_R_144);
KEEP_UNUSED(_R_145);
KEEP_UNUSED(_R_146);
KEEP_UNUSED(_R_147);
KEEP_UNUSED(_R_148);
KEEP_UNUSED(_R_149);
KEEP_UNUSED(_R_150);
} }
} }
} // namespace Manta } // namespace Manta

View File

@@ -179,29 +179,21 @@ struct kn4dMinReal : public KernelBase {
return val; return val;
} }
typedef Grid4d<Real> type0; typedef Grid4d<Real> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel kn4dMinReal ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, minVal);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
kn4dMinReal(kn4dMinReal &o, tbb::split) {
: KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max()) Real minVal = std::numeric_limits<Real>::max();
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const kn4dMinReal &o) op(i, val, minVal);
{ #pragma omp critical
minVal = min(minVal, o.minVal); {
this->minVal = min(minVal, this->minVal);
}
}
} }
Grid4d<Real> &val; Grid4d<Real> &val;
Real minVal; Real minVal;
@@ -234,29 +226,21 @@ struct kn4dMaxReal : public KernelBase {
return val; return val;
} }
typedef Grid4d<Real> type0; typedef Grid4d<Real> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel kn4dMaxReal ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, maxVal);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
kn4dMaxReal(kn4dMaxReal &o, tbb::split) {
: KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max()) Real maxVal = -std::numeric_limits<Real>::max();
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const kn4dMaxReal &o) op(i, val, maxVal);
{ #pragma omp critical
maxVal = max(maxVal, o.maxVal); {
this->maxVal = max(maxVal, this->maxVal);
}
}
} }
Grid4d<Real> &val; Grid4d<Real> &val;
Real maxVal; Real maxVal;
@@ -289,29 +273,21 @@ struct kn4dMinInt : public KernelBase {
return val; return val;
} }
typedef Grid4d<int> type0; typedef Grid4d<int> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel kn4dMinInt ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, minVal);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
kn4dMinInt(kn4dMinInt &o, tbb::split) {
: KernelBase(o), val(o.val), minVal(std::numeric_limits<int>::max()) int minVal = std::numeric_limits<int>::max();
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const kn4dMinInt &o) op(i, val, minVal);
{ #pragma omp critical
minVal = min(minVal, o.minVal); {
this->minVal = min(minVal, this->minVal);
}
}
} }
Grid4d<int> &val; Grid4d<int> &val;
int minVal; int minVal;
@@ -344,29 +320,21 @@ struct kn4dMaxInt : public KernelBase {
return val; return val;
} }
typedef Grid4d<int> type0; typedef Grid4d<int> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel kn4dMaxInt ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, maxVal);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
kn4dMaxInt(kn4dMaxInt &o, tbb::split) {
: KernelBase(o), val(o.val), maxVal(std::numeric_limits<int>::min()) int maxVal = std::numeric_limits<int>::min();
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const kn4dMaxInt &o) op(i, val, maxVal);
{ #pragma omp critical
maxVal = max(maxVal, o.maxVal); {
this->maxVal = max(maxVal, this->maxVal);
}
}
} }
Grid4d<int> &val; Grid4d<int> &val;
int maxVal; int maxVal;
@@ -400,29 +368,21 @@ template<class VEC> struct kn4dMinVec : public KernelBase {
return val; return val;
} }
typedef Grid4d<VEC> type0; typedef Grid4d<VEC> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel kn4dMinVec ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, minVal);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
kn4dMinVec(kn4dMinVec &o, tbb::split) {
: KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max()) Real minVal = std::numeric_limits<Real>::max();
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const kn4dMinVec &o) op(i, val, minVal);
{ #pragma omp critical
minVal = min(minVal, o.minVal); {
this->minVal = min(minVal, this->minVal);
}
}
} }
Grid4d<VEC> &val; Grid4d<VEC> &val;
Real minVal; Real minVal;
@@ -456,29 +416,21 @@ template<class VEC> struct kn4dMaxVec : public KernelBase {
return val; return val;
} }
typedef Grid4d<VEC> type0; typedef Grid4d<VEC> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel kn4dMaxVec ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, maxVal);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
kn4dMaxVec(kn4dMaxVec &o, tbb::split) {
: KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max()) Real maxVal = -std::numeric_limits<Real>::max();
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const kn4dMaxVec &o) op(i, val, maxVal);
{ #pragma omp critical
maxVal = max(maxVal, o.maxVal); {
this->maxVal = max(maxVal, this->maxVal);
}
}
} }
Grid4d<VEC> &val; Grid4d<VEC> &val;
Real maxVal; Real maxVal;
@@ -507,7 +459,7 @@ template<class T> struct kn4dSetConstReal : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, T val) const inline void op(IndexInt idx, Grid4d<T> &me, T val)
{ {
me[idx] = val; me[idx] = val;
} }
@@ -521,21 +473,17 @@ template<class T> struct kn4dSetConstReal : public KernelBase {
return val; return val;
} }
typedef T type1; typedef T type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel kn4dSetConstReal ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, val);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, val);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
T val; T val;
@@ -546,7 +494,7 @@ template<class T> struct kn4dAddConstReal : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, T val) const inline void op(IndexInt idx, Grid4d<T> &me, T val)
{ {
me[idx] += val; me[idx] += val;
} }
@@ -560,21 +508,17 @@ template<class T> struct kn4dAddConstReal : public KernelBase {
return val; return val;
} }
typedef T type1; typedef T type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel kn4dAddConstReal ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, val);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, val);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
T val; T val;
@@ -585,7 +529,7 @@ template<class T> struct kn4dMultConst : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, T val) const inline void op(IndexInt idx, Grid4d<T> &me, T val)
{ {
me[idx] *= val; me[idx] *= val;
} }
@@ -599,21 +543,17 @@ template<class T> struct kn4dMultConst : public KernelBase {
return val; return val;
} }
typedef T type1; typedef T type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel kn4dMultConst ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, val);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, val);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
T val; T val;
@@ -624,7 +564,7 @@ template<class T> struct kn4dClamp : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, T min, T max) const inline void op(IndexInt idx, Grid4d<T> &me, T min, T max)
{ {
me[idx] = clamp(me[idx], min, max); me[idx] = clamp(me[idx], min, max);
} }
@@ -643,21 +583,17 @@ template<class T> struct kn4dClamp : public KernelBase {
return max; return max;
} }
typedef T type2; typedef T type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel kn4dClamp ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, min, max);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, min, max);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
T min; T min;
@@ -788,7 +724,7 @@ struct knGetComp4d : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, const Grid4d<Vec4> &src, Grid4d<Real> &dst, int c) const inline void op(IndexInt idx, const Grid4d<Vec4> &src, Grid4d<Real> &dst, int c)
{ {
dst[idx] = src[idx][c]; dst[idx] = src[idx][c];
} }
@@ -807,21 +743,17 @@ struct knGetComp4d : public KernelBase {
return c; return c;
} }
typedef int type2; typedef int type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knGetComp4d ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, src, dst, c);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, src, dst, c);
}
} }
const Grid4d<Vec4> &src; const Grid4d<Vec4> &src;
Grid4d<Real> &dst; Grid4d<Real> &dst;
@@ -835,7 +767,7 @@ struct knSetComp4d : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, const Grid4d<Real> &src, Grid4d<Vec4> &dst, int c) const inline void op(IndexInt idx, const Grid4d<Real> &src, Grid4d<Vec4> &dst, int c)
{ {
dst[idx][c] = src[idx]; dst[idx][c] = src[idx];
} }
@@ -854,21 +786,17 @@ struct knSetComp4d : public KernelBase {
return c; return c;
} }
typedef int type2; typedef int type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knSetComp4d ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, src, dst, c);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, src, dst, c);
}
} }
const Grid4d<Real> &src; const Grid4d<Real> &src;
Grid4d<Vec4> &dst; Grid4d<Vec4> &dst;
@@ -959,7 +887,7 @@ template<class T> struct knSetBnd4d : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, int t, Grid4d<T> &grid, T value, int w) const inline void op(int i, int j, int k, int t, Grid4d<T> &grid, T value, int w)
{ {
bool bnd = (i <= w || i >= grid.getSizeX() - 1 - w || j <= w || j >= grid.getSizeY() - 1 - w || bool bnd = (i <= w || i >= grid.getSizeX() - 1 - w || j <= w || j >= grid.getSizeY() - 1 - w ||
k <= w || k >= grid.getSizeZ() - 1 - w || t <= w || t >= grid.getSizeT() - 1 - w); k <= w || k >= grid.getSizeZ() - 1 - w || t <= w || t >= grid.getSizeT() - 1 - w);
@@ -981,50 +909,47 @@ template<class T> struct knSetBnd4d : public KernelBase {
return w; return w;
} }
typedef int type2; typedef int type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knSetBnd4d ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ
<< " "
" t "
<< minT << " - " << maxT,
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX;
const int _maxY = maxY;
if (maxT > 1) { if (maxT > 1) {
for (int t = __r.begin(); t != (int)__r.end(); t++) const int _maxZ = maxZ;
for (int k = 0; k < maxZ; k++) #pragma omp parallel
for (int j = 0; j < maxY; j++) {
for (int i = 0; i < maxX; i++)
op(i, j, k, t, grid, value, w); #pragma omp for
for (int t = 0; t < maxT; t++)
for (int k = 0; k < _maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, t, grid, value, w);
}
} }
else if (maxZ > 1) { else if (maxZ > 1) {
const int t = 0; const int t = 0;
for (int k = __r.begin(); k != (int)__r.end(); k++) #pragma omp parallel
for (int j = 0; j < maxY; j++) {
for (int i = 0; i < maxX; i++)
op(i, j, k, t, grid, value, w); #pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, t, grid, value, w);
}
} }
else { else {
const int t = 0; const int t = 0;
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < maxX; i++) {
op(i, j, k, t, grid, value, w);
} #pragma omp for
} for (int j = 0; j < _maxY; j++)
void run() for (int i = 0; i < _maxX; i++)
{ op(i, j, k, t, grid, value, w);
if (maxT > 1) { }
tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this);
}
else if (maxZ > 1) {
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
}
else {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
} }
} }
Grid4d<T> &grid; Grid4d<T> &grid;
@@ -1043,7 +968,7 @@ template<class T> struct knSetBnd4dNeumann : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, int t, Grid4d<T> &grid, int w) const inline void op(int i, int j, int k, int t, Grid4d<T> &grid, int w)
{ {
bool set = false; bool set = false;
int si = i, sj = j, sk = k, st = t; int si = i, sj = j, sk = k, st = t;
@@ -1092,50 +1017,47 @@ template<class T> struct knSetBnd4dNeumann : public KernelBase {
return w; return w;
} }
typedef int type1; typedef int type1;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knSetBnd4dNeumann ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ
<< " "
" t "
<< minT << " - " << maxT,
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX;
const int _maxY = maxY;
if (maxT > 1) { if (maxT > 1) {
for (int t = __r.begin(); t != (int)__r.end(); t++) const int _maxZ = maxZ;
for (int k = 0; k < maxZ; k++) #pragma omp parallel
for (int j = 0; j < maxY; j++) {
for (int i = 0; i < maxX; i++)
op(i, j, k, t, grid, w); #pragma omp for
for (int t = 0; t < maxT; t++)
for (int k = 0; k < _maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, t, grid, w);
}
} }
else if (maxZ > 1) { else if (maxZ > 1) {
const int t = 0; const int t = 0;
for (int k = __r.begin(); k != (int)__r.end(); k++) #pragma omp parallel
for (int j = 0; j < maxY; j++) {
for (int i = 0; i < maxX; i++)
op(i, j, k, t, grid, w); #pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, t, grid, w);
}
} }
else { else {
const int t = 0; const int t = 0;
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < maxX; i++) {
op(i, j, k, t, grid, w);
} #pragma omp for
} for (int j = 0; j < _maxY; j++)
void run() for (int i = 0; i < _maxX; i++)
{ op(i, j, k, t, grid, w);
if (maxT > 1) { }
tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this);
}
else if (maxZ > 1) {
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
}
else {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
} }
} }
Grid4d<T> &grid; Grid4d<T> &grid;
@@ -1329,7 +1251,7 @@ template<class S> struct knSetRegion4d : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, int t, Grid4d<S> &dst, Vec4 start, Vec4 end, S value) const inline void op(int i, int j, int k, int t, Grid4d<S> &dst, Vec4 start, Vec4 end, S value)
{ {
Vec4 p(i, j, k, t); Vec4 p(i, j, k, t);
for (int c = 0; c < 4; ++c) for (int c = 0; c < 4; ++c)
@@ -1357,50 +1279,47 @@ template<class S> struct knSetRegion4d : public KernelBase {
return value; return value;
} }
typedef S type3; typedef S type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knSetRegion4d ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ
<< " "
" t "
<< minT << " - " << maxT,
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX;
const int _maxY = maxY;
if (maxT > 1) { if (maxT > 1) {
for (int t = __r.begin(); t != (int)__r.end(); t++) const int _maxZ = maxZ;
for (int k = 0; k < maxZ; k++) #pragma omp parallel
for (int j = 0; j < maxY; j++) {
for (int i = 0; i < maxX; i++)
op(i, j, k, t, dst, start, end, value); #pragma omp for
for (int t = 0; t < maxT; t++)
for (int k = 0; k < _maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, t, dst, start, end, value);
}
} }
else if (maxZ > 1) { else if (maxZ > 1) {
const int t = 0; const int t = 0;
for (int k = __r.begin(); k != (int)__r.end(); k++) #pragma omp parallel
for (int j = 0; j < maxY; j++) {
for (int i = 0; i < maxX; i++)
op(i, j, k, t, dst, start, end, value); #pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, t, dst, start, end, value);
}
} }
else { else {
const int t = 0; const int t = 0;
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < maxX; i++) {
op(i, j, k, t, dst, start, end, value);
} #pragma omp for
} for (int j = 0; j < _maxY; j++)
void run() for (int i = 0; i < _maxX; i++)
{ op(i, j, k, t, dst, start, end, value);
if (maxT > 1) { }
tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this);
}
else if (maxZ > 1) {
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
}
else {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
} }
} }
Grid4d<S> &dst; Grid4d<S> &dst;
@@ -1619,7 +1538,7 @@ template<class S> struct knInterpol4d : public KernelBase {
Grid4d<S> &target, Grid4d<S> &target,
Grid4d<S> &source, Grid4d<S> &source,
const Vec4 &srcFac, const Vec4 &srcFac,
const Vec4 &offset) const const Vec4 &offset)
{ {
Vec4 pos = Vec4(i, j, k, t) * srcFac + offset; Vec4 pos = Vec4(i, j, k, t) * srcFac + offset;
target(i, j, k, t) = source.getInterpolated(pos); target(i, j, k, t) = source.getInterpolated(pos);
@@ -1644,50 +1563,47 @@ template<class S> struct knInterpol4d : public KernelBase {
return offset; return offset;
} }
typedef Vec4 type3; typedef Vec4 type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knInterpol4d ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ
<< " "
" t "
<< minT << " - " << maxT,
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX;
const int _maxY = maxY;
if (maxT > 1) { if (maxT > 1) {
for (int t = __r.begin(); t != (int)__r.end(); t++) const int _maxZ = maxZ;
for (int k = 0; k < maxZ; k++) #pragma omp parallel
for (int j = 0; j < maxY; j++) {
for (int i = 0; i < maxX; i++)
op(i, j, k, t, target, source, srcFac, offset); #pragma omp for
for (int t = 0; t < maxT; t++)
for (int k = 0; k < _maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, t, target, source, srcFac, offset);
}
} }
else if (maxZ > 1) { else if (maxZ > 1) {
const int t = 0; const int t = 0;
for (int k = __r.begin(); k != (int)__r.end(); k++) #pragma omp parallel
for (int j = 0; j < maxY; j++) {
for (int i = 0; i < maxX; i++)
op(i, j, k, t, target, source, srcFac, offset); #pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, t, target, source, srcFac, offset);
}
} }
else { else {
const int t = 0; const int t = 0;
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < maxX; i++) {
op(i, j, k, t, target, source, srcFac, offset);
} #pragma omp for
} for (int j = 0; j < _maxY; j++)
void run() for (int i = 0; i < _maxX; i++)
{ op(i, j, k, t, target, source, srcFac, offset);
if (maxT > 1) { }
tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this);
}
else if (maxZ > 1) {
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
}
else {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
} }
} }
Grid4d<S> &target; Grid4d<S> &target;

View File

@@ -326,7 +326,6 @@ class Grid4dBase : public PbClass {
// precomputed Z,T shift: to ensure 2D compatibility, always use this instead of sx*sy ! // precomputed Z,T shift: to ensure 2D compatibility, always use this instead of sx*sy !
IndexInt mStrideZ; IndexInt mStrideZ;
IndexInt mStrideT; IndexInt mStrideT;
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -951,7 +950,6 @@ template<class T> class Grid4d : public Grid4dBase {
protected: protected:
T *mData; T *mData;
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -1027,7 +1025,7 @@ template<class T, class S> struct Grid4dAdd : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other)
{ {
me[idx] += other[idx]; me[idx] += other[idx];
} }
@@ -1041,21 +1039,17 @@ template<class T, class S> struct Grid4dAdd : public KernelBase {
return other; return other;
} }
typedef Grid4d<S> type1; typedef Grid4d<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel Grid4dAdd ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
const Grid4d<S> &other; const Grid4d<S> &other;
@@ -1066,7 +1060,7 @@ template<class T, class S> struct Grid4dSub : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other)
{ {
me[idx] -= other[idx]; me[idx] -= other[idx];
} }
@@ -1080,21 +1074,17 @@ template<class T, class S> struct Grid4dSub : public KernelBase {
return other; return other;
} }
typedef Grid4d<S> type1; typedef Grid4d<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel Grid4dSub ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
const Grid4d<S> &other; const Grid4d<S> &other;
@@ -1105,7 +1095,7 @@ template<class T, class S> struct Grid4dMult : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other)
{ {
me[idx] *= other[idx]; me[idx] *= other[idx];
} }
@@ -1119,21 +1109,17 @@ template<class T, class S> struct Grid4dMult : public KernelBase {
return other; return other;
} }
typedef Grid4d<S> type1; typedef Grid4d<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel Grid4dMult ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
const Grid4d<S> &other; const Grid4d<S> &other;
@@ -1144,7 +1130,7 @@ template<class T, class S> struct Grid4dDiv : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other) const inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<S> &other)
{ {
me[idx] /= other[idx]; me[idx] /= other[idx];
} }
@@ -1158,21 +1144,17 @@ template<class T, class S> struct Grid4dDiv : public KernelBase {
return other; return other;
} }
typedef Grid4d<S> type1; typedef Grid4d<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel Grid4dDiv ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
const Grid4d<S> &other; const Grid4d<S> &other;
@@ -1183,7 +1165,7 @@ template<class T, class S> struct Grid4dAddScalar : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, const S &other) const inline void op(IndexInt idx, Grid4d<T> &me, const S &other)
{ {
me[idx] += other; me[idx] += other;
} }
@@ -1197,21 +1179,17 @@ template<class T, class S> struct Grid4dAddScalar : public KernelBase {
return other; return other;
} }
typedef S type1; typedef S type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel Grid4dAddScalar ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
const S &other; const S &other;
@@ -1222,7 +1200,7 @@ template<class T, class S> struct Grid4dMultScalar : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, const S &other) const inline void op(IndexInt idx, Grid4d<T> &me, const S &other)
{ {
me[idx] *= other; me[idx] *= other;
} }
@@ -1236,21 +1214,17 @@ template<class T, class S> struct Grid4dMultScalar : public KernelBase {
return other; return other;
} }
typedef S type1; typedef S type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel Grid4dMultScalar ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
const S &other; const S &other;
@@ -1262,7 +1236,7 @@ template<class T, class S> struct Grid4dScaledAdd : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other, const S &factor) const inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other, const S &factor)
{ {
me[idx] += factor * other[idx]; me[idx] += factor * other[idx];
} }
@@ -1281,21 +1255,17 @@ template<class T, class S> struct Grid4dScaledAdd : public KernelBase {
return factor; return factor;
} }
typedef S type2; typedef S type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel Grid4dScaledAdd ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other, factor);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other, factor);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
const Grid4d<T> &other; const Grid4d<T> &other;
@@ -1308,7 +1278,7 @@ template<class T> struct Grid4dSafeDiv : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other) const inline void op(IndexInt idx, Grid4d<T> &me, const Grid4d<T> &other)
{ {
me[idx] = safeDivide(me[idx], other[idx]); me[idx] = safeDivide(me[idx], other[idx]);
} }
@@ -1322,21 +1292,17 @@ template<class T> struct Grid4dSafeDiv : public KernelBase {
return other; return other;
} }
typedef Grid4d<T> type1; typedef Grid4d<T> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel Grid4dSafeDiv ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
const Grid4d<T> &other; const Grid4d<T> &other;
@@ -1347,7 +1313,7 @@ template<class T> struct Grid4dSetConst : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid4d<T> &me, T value) const inline void op(IndexInt idx, Grid4d<T> &me, T value)
{ {
me[idx] = value; me[idx] = value;
} }
@@ -1361,21 +1327,17 @@ template<class T> struct Grid4dSetConst : public KernelBase {
return value; return value;
} }
typedef T type1; typedef T type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel Grid4dSetConst ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, value);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, value);
}
} }
Grid4d<T> &me; Grid4d<T> &me;
T value; T value;
@@ -1473,7 +1435,7 @@ template<class S> struct KnInterpolateGrid4dTempl : public KernelBase {
Grid4d<S> &target, Grid4d<S> &target,
Grid4d<S> &source, Grid4d<S> &source,
const Vec4 &sourceFactor, const Vec4 &sourceFactor,
Vec4 offset) const Vec4 offset)
{ {
Vec4 pos = Vec4(i, j, k, t) * sourceFactor + offset; Vec4 pos = Vec4(i, j, k, t) * sourceFactor + offset;
if (!source.is3D()) if (!source.is3D())
@@ -1502,50 +1464,47 @@ template<class S> struct KnInterpolateGrid4dTempl : public KernelBase {
return offset; return offset;
} }
typedef Vec4 type3; typedef Vec4 type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnInterpolateGrid4dTempl ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ
<< " "
" t "
<< minT << " - " << maxT,
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX;
const int _maxY = maxY;
if (maxT > 1) { if (maxT > 1) {
for (int t = __r.begin(); t != (int)__r.end(); t++) const int _maxZ = maxZ;
for (int k = 0; k < maxZ; k++) #pragma omp parallel
for (int j = 0; j < maxY; j++) {
for (int i = 0; i < maxX; i++)
op(i, j, k, t, target, source, sourceFactor, offset); #pragma omp for
for (int t = 0; t < maxT; t++)
for (int k = 0; k < _maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, t, target, source, sourceFactor, offset);
}
} }
else if (maxZ > 1) { else if (maxZ > 1) {
const int t = 0; const int t = 0;
for (int k = __r.begin(); k != (int)__r.end(); k++) #pragma omp parallel
for (int j = 0; j < maxY; j++) {
for (int i = 0; i < maxX; i++)
op(i, j, k, t, target, source, sourceFactor, offset); #pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, t, target, source, sourceFactor, offset);
}
} }
else { else {
const int t = 0; const int t = 0;
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < maxX; i++) {
op(i, j, k, t, target, source, sourceFactor, offset);
} #pragma omp for
} for (int j = 0; j < _maxY; j++)
void run() for (int i = 0; i < _maxX; i++)
{ op(i, j, k, t, target, source, sourceFactor, offset);
if (maxT > 1) { }
tbb::parallel_for(tbb::blocked_range<IndexInt>(minT, maxT), *this);
}
else if (maxZ > 1) {
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
}
else {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
} }
} }
Grid4d<S> &target; Grid4d<S> &target;

View File

@@ -71,19 +71,6 @@ class ParticleBase;
for (int j = bnd; j < (grid).getSizeY() - bnd; ++j) \ for (int j = bnd; j < (grid).getSizeY() - bnd; ++j) \
for (int i = bnd; i < (grid).getSizeX() - bnd; ++i) for (int i = bnd; i < (grid).getSizeX() - bnd; ++i)
#define FOR_NEIGHBORS_BND(grid, radius, bnd) \
for (int zj = ((grid).is3D() ? std::max(bnd, k - radius) : 0); \
zj <= ((grid).is3D() ? std::min(k + radius, (grid).getSizeZ() - 1 - bnd) : 0); \
zj++) \
for (int yj = std::max(bnd, j - radius); \
yj <= std::min(j + radius, (grid).getSizeY() - 1 - bnd); \
yj++) \
for (int xj = std::max(bnd, i - radius); \
xj <= std::min(i + radius, (grid).getSizeX() - 1 - bnd); \
xj++)
#define FOR_NEIGHBORS(grid, radius) FOR_NEIGHBORS_BND(grid, radius, 0)
//! Basic data structure for kernel data, initialized based on kernel type (e.g. single, idx, etc). //! Basic data structure for kernel data, initialized based on kernel type (e.g. single, idx, etc).
struct KernelBase { struct KernelBase {
int maxX, maxY, maxZ, minZ, maxT, minT; int maxX, maxY, maxZ, minZ, maxT, minT;

View File

@@ -62,7 +62,7 @@ struct InitFmIn : public KernelBase {
Grid<int> &fmFlags, Grid<int> &fmFlags,
Grid<Real> &phi, Grid<Real> &phi,
bool ignoreWalls, bool ignoreWalls,
int obstacleType) const int obstacleType)
{ {
const IndexInt idx = flags.index(i, j, k); const IndexInt idx = flags.index(i, j, k);
const Real v = phi[idx]; const Real v = phi[idx];
@@ -104,37 +104,35 @@ struct InitFmIn : public KernelBase {
return obstacleType; return obstacleType;
} }
typedef int type4; typedef int type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel InitFmIn ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<int> &fmFlags; Grid<int> &fmFlags;
Grid<Real> &phi; Grid<Real> &phi;
@@ -165,7 +163,7 @@ struct InitFmOut : public KernelBase {
Grid<int> &fmFlags, Grid<int> &fmFlags,
Grid<Real> &phi, Grid<Real> &phi,
bool ignoreWalls, bool ignoreWalls,
int obstacleType) const int obstacleType)
{ {
const IndexInt idx = flags.index(i, j, k); const IndexInt idx = flags.index(i, j, k);
const Real v = phi[idx]; const Real v = phi[idx];
@@ -205,37 +203,35 @@ struct InitFmOut : public KernelBase {
return obstacleType; return obstacleType;
} }
typedef int type4; typedef int type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel InitFmOut ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, fmFlags, phi, ignoreWalls, obstacleType);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<int> &fmFlags; Grid<int> &fmFlags;
Grid<Real> &phi; Grid<Real> &phi;
@@ -269,7 +265,7 @@ struct SetUninitialized : public KernelBase {
Grid<Real> &phi, Grid<Real> &phi,
const Real val, const Real val,
int ignoreWalls, int ignoreWalls,
int obstacleType) const int obstacleType)
{ {
if (ignoreWalls) { if (ignoreWalls) {
if ((fmFlags(i, j, k) != FlagInited) && ((flags(i, j, k) & obstacleType) == 0)) { if ((fmFlags(i, j, k) != FlagInited) && ((flags(i, j, k) & obstacleType) == 0)) {
@@ -311,37 +307,35 @@ struct SetUninitialized : public KernelBase {
return obstacleType; return obstacleType;
} }
typedef int type5; typedef int type5;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel SetUninitialized ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, fmFlags, phi, val, ignoreWalls, obstacleType);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const Grid<int> &flags; const Grid<int> &flags;
Grid<int> &fmFlags; Grid<int> &fmFlags;
Grid<Real> &phi; Grid<Real> &phi;
@@ -371,7 +365,8 @@ inline bool isAtInterface(const Grid<int> &fmFlags, Grid<Real> &phi, const Vec3i
//************************************************************************ //************************************************************************
// Levelset class def // Levelset class def
LevelsetGrid::LevelsetGrid(FluidSolver *parent, bool show) : Grid<Real>(parent, show) LevelsetGrid::LevelsetGrid(FluidSolver *parent, bool show, bool offload)
: Grid<Real>(parent, show, false /*sparse*/, offload)
{ {
mType = (GridType)(TypeLevelset | TypeReal); mType = (GridType)(TypeLevelset | TypeReal);
} }
@@ -394,7 +389,7 @@ struct KnJoin : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<Real> &a, const Grid<Real> &b) const inline void op(IndexInt idx, Grid<Real> &a, const Grid<Real> &b)
{ {
a[idx] = min(a[idx], b[idx]); a[idx] = min(a[idx], b[idx]);
} }
@@ -408,21 +403,17 @@ struct KnJoin : public KernelBase {
return b; return b;
} }
typedef Grid<Real> type1; typedef Grid<Real> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnJoin ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, a, b);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, a, b);
}
} }
Grid<Real> &a; Grid<Real> &a;
const Grid<Real> &b; const Grid<Real> &b;
@@ -440,11 +431,8 @@ struct KnSubtract : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, inline void op(
Grid<Real> &a, IndexInt idx, Grid<Real> &a, const Grid<Real> &b, const FlagGrid *flags, int subtractType)
const Grid<Real> &b,
const FlagGrid *flags,
int subtractType) const
{ {
if (flags && ((*flags)(idx)&subtractType) == 0) if (flags && ((*flags)(idx)&subtractType) == 0)
return; return;
@@ -471,21 +459,17 @@ struct KnSubtract : public KernelBase {
return subtractType; return subtractType;
} }
typedef int type3; typedef int type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnSubtract ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, a, b, flags, subtractType);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, a, b, flags, subtractType);
}
} }
Grid<Real> &a; Grid<Real> &a;
const Grid<Real> &b; const Grid<Real> &b;
@@ -668,7 +652,7 @@ struct KnFillApply : public KernelBase {
Grid<int> &visited, Grid<int> &visited,
const Real value, const Real value,
const int boundaryWidth, const int boundaryWidth,
const bool outside) const const bool outside)
{ {
if (visited(i, j, k) == ID_VISITED) if (visited(i, j, k) == ID_VISITED)
@@ -706,37 +690,35 @@ struct KnFillApply : public KernelBase {
return outside; return outside;
} }
typedef bool type4; typedef bool type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnFillApply ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = boundaryWidth; j < _maxY; j++) #pragma omp parallel
for (int i = boundaryWidth; i < _maxX; i++) {
op(i, j, k, target, visited, value, boundaryWidth, outside);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = boundaryWidth; j < _maxY; j++)
for (int i = boundaryWidth; i < _maxX; i++)
op(i, j, k, target, visited, value, boundaryWidth, outside);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = boundaryWidth; i < _maxX; i++) {
op(i, j, k, target, visited, value, boundaryWidth, outside);
#pragma omp for
for (int j = boundaryWidth; j < _maxY; j++)
for (int i = boundaryWidth; i < _maxX; i++)
op(i, j, k, target, visited, value, boundaryWidth, outside);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(boundaryWidth, maxY), *this);
}
Grid<Real> &target; Grid<Real> &target;
Grid<int> &visited; Grid<int> &visited;
const Real value; const Real value;

View File

@@ -27,7 +27,7 @@ class Mesh;
//! Special function for levelsets //! Special function for levelsets
class LevelsetGrid : public Grid<Real> { class LevelsetGrid : public Grid<Real> {
public: public:
LevelsetGrid(FluidSolver *parent, bool show = true); LevelsetGrid(FluidSolver *parent, bool show = true, bool offload = false);
static int _W_0(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static int _W_0(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
PbClass *obj = Pb::objFromPy(_self); PbClass *obj = Pb::objFromPy(_self);
@@ -41,7 +41,8 @@ class LevelsetGrid : public Grid<Real> {
ArgLocker _lock; ArgLocker _lock;
FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock); FluidSolver *parent = _args.getPtr<FluidSolver>("parent", 0, &_lock);
bool show = _args.getOpt<bool>("show", 1, true, &_lock); bool show = _args.getOpt<bool>("show", 1, true, &_lock);
obj = new LevelsetGrid(parent, show); bool offload = _args.getOpt<bool>("offload", 2, false, &_lock);
obj = new LevelsetGrid(parent, show, offload);
obj->registerObject(_self, &_args); obj->registerObject(_self, &_args);
_args.check(); _args.check();
} }
@@ -266,7 +267,6 @@ class LevelsetGrid : public Grid<Real> {
} }
static Real invalidTimeValue(); static Real invalidTimeValue();
public: public:
PbArgs _args; PbArgs _args;
} }

File diff suppressed because it is too large Load Diff

View File

@@ -796,7 +796,6 @@ class Mesh : public PbClass {
std::vector<MeshDataImpl<int> *> std::vector<MeshDataImpl<int> *>
mMdataInt; //! indicate that mdata of this mesh is copied, and needs to be freed mMdataInt; //! indicate that mdata of this mesh is copied, and needs to be freed
bool mFreeMdata; bool mFreeMdata;
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -882,7 +881,6 @@ class MeshDataBase : public PbClass {
protected: protected:
Mesh *mMesh; Mesh *mMesh;
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -1647,7 +1645,6 @@ template<class T> class MeshDataImpl : public MeshDataBase {
//! optionally , we might have an associated grid from which to grab new data //! optionally , we might have an associated grid from which to grab new data
Grid<T> *mpGridSource; //! unfortunately , we need to distinguish mac vs regular vec3 Grid<T> *mpGridSource; //! unfortunately , we need to distinguish mac vs regular vec3
bool mGridSourceMAC; bool mGridSourceMAC;
public: public:
PbArgs _args; PbArgs _args;
} }

View File

@@ -154,7 +154,6 @@ class MovingObstacle : public PbClass {
int mEmptyType; int mEmptyType;
int mID; int mID;
static int sIDcnt; static int sIDcnt;
public: public:
PbArgs _args; PbArgs _args;
} }

View File

@@ -428,7 +428,7 @@ struct knCopyA : public KernelBase {
const Grid<Real> *pA0, const Grid<Real> *pA0,
const Grid<Real> *pAi, const Grid<Real> *pAi,
const Grid<Real> *pAj, const Grid<Real> *pAj,
const Grid<Real> *pAk) const const Grid<Real> *pAk)
{ {
A0[idx * stencilSize0 + 0] = (*pA0)[idx]; A0[idx * stencilSize0 + 0] = (*pA0)[idx];
A0[idx * stencilSize0 + 1] = (*pAi)[idx]; A0[idx * stencilSize0 + 1] = (*pAi)[idx];
@@ -476,21 +476,17 @@ struct knCopyA : public KernelBase {
return pAk; return pAk;
} }
typedef Grid<Real> type7; typedef Grid<Real> type7;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knCopyA ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, sizeRef, A0, stencilSize0, is3D, pA0, pAi, pAj, pAk);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, sizeRef, A0, stencilSize0, is3D, pA0, pAi, pAj, pAk);
}
} }
std::vector<Real> &sizeRef; std::vector<Real> &sizeRef;
std::vector<Real> &A0; std::vector<Real> &A0;
@@ -523,7 +519,7 @@ struct knActivateVertices : public KernelBase {
std::vector<Real> &A0, std::vector<Real> &A0,
bool &nonZeroStencilSumFound, bool &nonZeroStencilSumFound,
bool &trivialEquationsFound, bool &trivialEquationsFound,
const GridMg &mg) const const GridMg &mg)
{ {
// active vertices on level 0 are vertices with non-zero diagonal entry in A // active vertices on level 0 are vertices with non-zero diagonal entry in A
type_0[idx] = GridMg::vtInactive; type_0[idx] = GridMg::vtInactive;
@@ -572,21 +568,17 @@ struct knActivateVertices : public KernelBase {
return mg; return mg;
} }
typedef GridMg type4; typedef GridMg type4;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knActivateVertices ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, type_0, A0, nonZeroStencilSumFound, trivialEquationsFound, mg);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, type_0, A0, nonZeroStencilSumFound, trivialEquationsFound, mg);
}
} }
std::vector<GridMg::VertexType> &type_0; std::vector<GridMg::VertexType> &type_0;
std::vector<Real> &A0; std::vector<Real> &A0;
@@ -642,7 +634,7 @@ struct knSetRhs : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, std::vector<Real> &b, const Grid<Real> &rhs, const GridMg &mg) const inline void op(IndexInt idx, std::vector<Real> &b, const Grid<Real> &rhs, const GridMg &mg)
{ {
b[idx] = rhs[idx]; b[idx] = rhs[idx];
@@ -666,21 +658,17 @@ struct knSetRhs : public KernelBase {
return mg; return mg;
} }
typedef GridMg type2; typedef GridMg type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knSetRhs ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, b, rhs, mg);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, b, rhs, mg);
}
} }
std::vector<Real> &b; std::vector<Real> &b;
const Grid<Real> &rhs; const Grid<Real> &rhs;
@@ -702,7 +690,7 @@ template<class T> struct knSet : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, std::vector<T> &data, T value) const inline void op(IndexInt idx, std::vector<T> &data, T value)
{ {
data[idx] = value; data[idx] = value;
} }
@@ -716,21 +704,17 @@ template<class T> struct knSet : public KernelBase {
return value; return value;
} }
typedef T type1; typedef T type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knSet ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, data, value);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, data, value);
}
} }
std::vector<T> &data; std::vector<T> &data;
T value; T value;
@@ -743,7 +727,7 @@ template<class T> struct knCopyToVector : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, std::vector<T> &dst, const Grid<T> &src) const inline void op(IndexInt idx, std::vector<T> &dst, const Grid<T> &src)
{ {
dst[idx] = src[idx]; dst[idx] = src[idx];
} }
@@ -757,21 +741,17 @@ template<class T> struct knCopyToVector : public KernelBase {
return src; return src;
} }
typedef Grid<T> type1; typedef Grid<T> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knCopyToVector ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, dst, src);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, dst, src);
}
} }
std::vector<T> &dst; std::vector<T> &dst;
const Grid<T> &src; const Grid<T> &src;
@@ -784,7 +764,7 @@ template<class T> struct knCopyToGrid : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, const std::vector<T> &src, Grid<T> &dst) const inline void op(IndexInt idx, const std::vector<T> &src, Grid<T> &dst)
{ {
dst[idx] = src[idx]; dst[idx] = src[idx];
} }
@@ -798,21 +778,17 @@ template<class T> struct knCopyToGrid : public KernelBase {
return dst; return dst;
} }
typedef Grid<T> type1; typedef Grid<T> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knCopyToGrid ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, src, dst);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, src, dst);
}
} }
const std::vector<T> &src; const std::vector<T> &src;
Grid<T> &dst; Grid<T> &dst;
@@ -825,7 +801,7 @@ template<class T> struct knAddAssign : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, std::vector<T> &dst, const std::vector<T> &src) const inline void op(IndexInt idx, std::vector<T> &dst, const std::vector<T> &src)
{ {
dst[idx] += src[idx]; dst[idx] += src[idx];
} }
@@ -839,21 +815,17 @@ template<class T> struct knAddAssign : public KernelBase {
return src; return src;
} }
typedef std::vector<T> type1; typedef std::vector<T> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knAddAssign ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, dst, src);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, dst, src);
}
} }
std::vector<T> &dst; std::vector<T> &dst;
const std::vector<T> &src; const std::vector<T> &src;
@@ -930,7 +902,7 @@ struct knActivateCoarseVertices : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, std::vector<GridMg::VertexType> &type, int unused) const inline void op(IndexInt idx, std::vector<GridMg::VertexType> &type, int unused)
{ {
// set all remaining 'free' vertices to 'removed', // set all remaining 'free' vertices to 'removed',
if (type[idx] == GridMg::vtFree) if (type[idx] == GridMg::vtFree)
@@ -952,21 +924,17 @@ struct knActivateCoarseVertices : public KernelBase {
return unused; return unused;
} }
typedef int type1; typedef int type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knActivateCoarseVertices ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, type, unused);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, type, unused);
}
} }
std::vector<GridMg::VertexType> &type; std::vector<GridMg::VertexType> &type;
int unused; int unused;
@@ -1052,11 +1020,8 @@ struct knGenCoarseGridOperator : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, inline void op(
std::vector<Real> &sizeRef, IndexInt idx, std::vector<Real> &sizeRef, std::vector<Real> &A, int l, const GridMg &mg)
std::vector<Real> &A,
int l,
const GridMg &mg) const
{ {
if (mg.mType[l][idx] == GridMg::vtInactive) if (mg.mType[l][idx] == GridMg::vtInactive)
return; return;
@@ -1178,21 +1143,17 @@ struct knGenCoarseGridOperator : public KernelBase {
return mg; return mg;
} }
typedef GridMg type3; typedef GridMg type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knGenCoarseGridOperator ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, sizeRef, A, l, mg);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for schedule(static, 1)
for (IndexInt i = 0; i < _sz; i++)
op(i, sizeRef, A, l, mg);
}
} }
std::vector<Real> &sizeRef; std::vector<Real> &sizeRef;
std::vector<Real> &A; std::vector<Real> &A;
@@ -1232,7 +1193,7 @@ struct knSmoothColor : public KernelBase {
const Vec3i &blockSize, const Vec3i &blockSize,
const std::vector<Vec3i> &colorOffs, const std::vector<Vec3i> &colorOffs,
int l, int l,
const GridMg &mg) const const GridMg &mg)
{ {
Vec3i blockOff(int(idx) % blockSize.x, Vec3i blockOff(int(idx) % blockSize.x,
(int(idx) % (blockSize.x * blockSize.y)) / blockSize.x, (int(idx) % (blockSize.x * blockSize.y)) / blockSize.x,
@@ -1318,21 +1279,17 @@ struct knSmoothColor : public KernelBase {
return mg; return mg;
} }
typedef GridMg type5; typedef GridMg type5;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knSmoothColor ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, numBlocks, x, blockSize, colorOffs, l, mg);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for schedule(static, 1)
for (IndexInt i = 0; i < _sz; i++)
op(i, numBlocks, x, blockSize, colorOffs, l, mg);
}
} }
ThreadSize &numBlocks; ThreadSize &numBlocks;
std::vector<Real> &x; std::vector<Real> &x;
@@ -1386,7 +1343,7 @@ struct knCalcResidual : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, std::vector<Real> &r, int l, const GridMg &mg) const inline void op(IndexInt idx, std::vector<Real> &r, int l, const GridMg &mg)
{ {
if (mg.mType[l][idx] == GridMg::vtInactive) if (mg.mType[l][idx] == GridMg::vtInactive)
return; return;
@@ -1443,21 +1400,17 @@ struct knCalcResidual : public KernelBase {
return mg; return mg;
} }
typedef GridMg type2; typedef GridMg type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knCalcResidual ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, r, l, mg);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for schedule(static, 1)
for (IndexInt i = 0; i < _sz; i++)
op(i, r, l, mg);
}
} }
std::vector<Real> &r; std::vector<Real> &r;
int l; int l;
@@ -1506,29 +1459,21 @@ struct knResidualNormSumSqr : public KernelBase {
return mg; return mg;
} }
typedef GridMg type2; typedef GridMg type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knResidualNormSumSqr ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, r, l, mg, result);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
knResidualNormSumSqr(knResidualNormSumSqr &o, tbb::split) {
: KernelBase(o), r(o.r), l(o.l), mg(o.mg), result(Real(0)) Real result = Real(0);
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const knResidualNormSumSqr &o) op(i, r, l, mg, result);
{ #pragma omp critical
result += o.result; {
this->result += result;
}
}
} }
const vector<Real> &r; const vector<Real> &r;
int l; int l;
@@ -1700,7 +1645,7 @@ struct knRestrict : public KernelBase {
std::vector<Real> &dst, std::vector<Real> &dst,
const std::vector<Real> &src, const std::vector<Real> &src,
int l_dst, int l_dst,
const GridMg &mg) const const GridMg &mg)
{ {
if (mg.mType[l_dst][idx] == GridMg::vtInactive) if (mg.mType[l_dst][idx] == GridMg::vtInactive)
return; return;
@@ -1746,21 +1691,17 @@ struct knRestrict : public KernelBase {
return mg; return mg;
} }
typedef GridMg type3; typedef GridMg type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knRestrict ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, dst, src, l_dst, mg);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for schedule(static, 1)
for (IndexInt i = 0; i < _sz; i++)
op(i, dst, src, l_dst, mg);
}
} }
std::vector<Real> &dst; std::vector<Real> &dst;
const std::vector<Real> &src; const std::vector<Real> &src;
@@ -1784,7 +1725,7 @@ struct knInterpolate : public KernelBase {
std::vector<Real> &dst, std::vector<Real> &dst,
const std::vector<Real> &src, const std::vector<Real> &src,
int l_dst, int l_dst,
const GridMg &mg) const const GridMg &mg)
{ {
if (mg.mType[l_dst][idx] == GridMg::vtInactive) if (mg.mType[l_dst][idx] == GridMg::vtInactive)
return; return;
@@ -1827,21 +1768,17 @@ struct knInterpolate : public KernelBase {
return mg; return mg;
} }
typedef GridMg type3; typedef GridMg type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knInterpolate ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, dst, src, l_dst, mg);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for schedule(static, 1)
for (IndexInt i = 0; i < _sz; i++)
op(i, dst, src, l_dst, mg);
}
} }
std::vector<Real> &dst; std::vector<Real> &dst;
const std::vector<Real> &src; const std::vector<Real> &src;

View File

@@ -236,7 +236,6 @@ class WaveletNoiseField : public PbClass {
static int randomSeed; static int randomSeed;
// global reference count for noise tile // global reference count for noise tile
static std::atomic<int> mNoiseReferenceCount; static std::atomic<int> mNoiseReferenceCount;
public: public:
PbArgs _args; PbArgs _args;
} }

View File

@@ -455,7 +455,7 @@ template<class T, class S> struct knPdataAdd : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other)
{ {
me[idx] += other[idx]; me[idx] += other[idx];
} }
@@ -469,21 +469,17 @@ template<class T, class S> struct knPdataAdd : public KernelBase {
return other; return other;
} }
typedef ParticleDataImpl<S> type1; typedef ParticleDataImpl<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataAdd ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const ParticleDataImpl<S> &other; const ParticleDataImpl<S> &other;
@@ -495,7 +491,7 @@ template<class T, class S> struct knPdataSub : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other)
{ {
me[idx] -= other[idx]; me[idx] -= other[idx];
} }
@@ -509,21 +505,17 @@ template<class T, class S> struct knPdataSub : public KernelBase {
return other; return other;
} }
typedef ParticleDataImpl<S> type1; typedef ParticleDataImpl<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataSub ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const ParticleDataImpl<S> &other; const ParticleDataImpl<S> &other;
@@ -535,7 +527,7 @@ template<class T, class S> struct knPdataMult : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other)
{ {
me[idx] *= other[idx]; me[idx] *= other[idx];
} }
@@ -549,21 +541,17 @@ template<class T, class S> struct knPdataMult : public KernelBase {
return other; return other;
} }
typedef ParticleDataImpl<S> type1; typedef ParticleDataImpl<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataMult ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const ParticleDataImpl<S> &other; const ParticleDataImpl<S> &other;
@@ -575,7 +563,7 @@ template<class T, class S> struct knPdataDiv : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other) const inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<S> &other)
{ {
me[idx] /= other[idx]; me[idx] /= other[idx];
} }
@@ -589,21 +577,17 @@ template<class T, class S> struct knPdataDiv : public KernelBase {
return other; return other;
} }
typedef ParticleDataImpl<S> type1; typedef ParticleDataImpl<S> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataDiv ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const ParticleDataImpl<S> &other; const ParticleDataImpl<S> &other;
@@ -615,7 +599,7 @@ template<class T> struct knPdataSafeDiv : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<T> &other) const inline void op(IndexInt idx, ParticleDataImpl<T> &me, const ParticleDataImpl<T> &other)
{ {
me[idx] = safeDivide(me[idx], other[idx]); me[idx] = safeDivide(me[idx], other[idx]);
} }
@@ -629,21 +613,17 @@ template<class T> struct knPdataSafeDiv : public KernelBase {
return other; return other;
} }
typedef ParticleDataImpl<T> type1; typedef ParticleDataImpl<T> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataSafeDiv ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const ParticleDataImpl<T> &other; const ParticleDataImpl<T> &other;
@@ -656,7 +636,7 @@ template<class T, class S> struct knPdataSetScalar : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) const inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other)
{ {
me[idx] = other; me[idx] = other;
} }
@@ -670,21 +650,17 @@ template<class T, class S> struct knPdataSetScalar : public KernelBase {
return other; return other;
} }
typedef S type1; typedef S type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataSetScalar ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const S &other; const S &other;
@@ -696,7 +672,7 @@ template<class T, class S> struct knPdataAddScalar : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) const inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other)
{ {
me[idx] += other; me[idx] += other;
} }
@@ -710,21 +686,17 @@ template<class T, class S> struct knPdataAddScalar : public KernelBase {
return other; return other;
} }
typedef S type1; typedef S type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataAddScalar ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const S &other; const S &other;
@@ -736,7 +708,7 @@ template<class T, class S> struct knPdataMultScalar : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other) const inline void op(IndexInt idx, ParticleDataImpl<T> &me, const S &other)
{ {
me[idx] *= other; me[idx] *= other;
} }
@@ -750,21 +722,17 @@ template<class T, class S> struct knPdataMultScalar : public KernelBase {
return other; return other;
} }
typedef S type1; typedef S type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataMultScalar ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const S &other; const S &other;
@@ -779,7 +747,7 @@ template<class T, class S> struct knPdataScaledAdd : public KernelBase {
inline void op(IndexInt idx, inline void op(IndexInt idx,
ParticleDataImpl<T> &me, ParticleDataImpl<T> &me,
const ParticleDataImpl<T> &other, const ParticleDataImpl<T> &other,
const S &factor) const const S &factor)
{ {
me[idx] += factor * other[idx]; me[idx] += factor * other[idx];
} }
@@ -798,21 +766,17 @@ template<class T, class S> struct knPdataScaledAdd : public KernelBase {
return factor; return factor;
} }
typedef S type2; typedef S type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataScaledAdd ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other, factor);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other, factor);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const ParticleDataImpl<T> &other; const ParticleDataImpl<T> &other;
@@ -826,7 +790,7 @@ template<class T> struct knPdataClamp : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin, const T vmax) const inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin, const T vmax)
{ {
me[idx] = clamp(me[idx], vmin, vmax); me[idx] = clamp(me[idx], vmin, vmax);
} }
@@ -845,21 +809,17 @@ template<class T> struct knPdataClamp : public KernelBase {
return vmax; return vmax;
} }
typedef T type2; typedef T type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataClamp ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, vmin, vmax);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, vmin, vmax);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const T vmin; const T vmin;
@@ -872,7 +832,7 @@ template<class T> struct knPdataClampMin : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin) const inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmin)
{ {
me[idx] = std::max(vmin, me[idx]); me[idx] = std::max(vmin, me[idx]);
} }
@@ -886,21 +846,17 @@ template<class T> struct knPdataClampMin : public KernelBase {
return vmin; return vmin;
} }
typedef T type1; typedef T type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataClampMin ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, vmin);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, vmin);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const T vmin; const T vmin;
@@ -912,7 +868,7 @@ template<class T> struct knPdataClampMax : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmax) const inline void op(IndexInt idx, ParticleDataImpl<T> &me, const T vmax)
{ {
me[idx] = std::min(vmax, me[idx]); me[idx] = std::min(vmax, me[idx]);
} }
@@ -926,21 +882,17 @@ template<class T> struct knPdataClampMax : public KernelBase {
return vmax; return vmax;
} }
typedef T type1; typedef T type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataClampMax ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, vmax);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, vmax);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const T vmax; const T vmax;
@@ -953,7 +905,7 @@ struct knPdataClampMinVec3 : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmin) const inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmin)
{ {
me[idx].x = std::max(vmin, me[idx].x); me[idx].x = std::max(vmin, me[idx].x);
me[idx].y = std::max(vmin, me[idx].y); me[idx].y = std::max(vmin, me[idx].y);
@@ -969,21 +921,17 @@ struct knPdataClampMinVec3 : public KernelBase {
return vmin; return vmin;
} }
typedef Real type1; typedef Real type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataClampMinVec3 ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, vmin);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, vmin);
}
} }
ParticleDataImpl<Vec3> &me; ParticleDataImpl<Vec3> &me;
const Real vmin; const Real vmin;
@@ -996,7 +944,7 @@ struct knPdataClampMaxVec3 : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmax) const inline void op(IndexInt idx, ParticleDataImpl<Vec3> &me, const Real vmax)
{ {
me[idx].x = std::min(vmax, me[idx].x); me[idx].x = std::min(vmax, me[idx].x);
me[idx].y = std::min(vmax, me[idx].y); me[idx].y = std::min(vmax, me[idx].y);
@@ -1012,21 +960,17 @@ struct knPdataClampMaxVec3 : public KernelBase {
return vmax; return vmax;
} }
typedef Real type1; typedef Real type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataClampMaxVec3 ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, vmax);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, vmax);
}
} }
ParticleDataImpl<Vec3> &me; ParticleDataImpl<Vec3> &me;
const Real vmax; const Real vmax;
@@ -1071,7 +1015,7 @@ template<class T, class S> struct knPdataSetScalarIntFlag : public KernelBase {
ParticleDataImpl<T> &me, ParticleDataImpl<T> &me,
const S &other, const S &other,
const ParticleDataImpl<int> &t, const ParticleDataImpl<int> &t,
const int itype) const const int itype)
{ {
if (t[idx] & itype) if (t[idx] & itype)
me[idx] = other; me[idx] = other;
@@ -1096,21 +1040,17 @@ template<class T, class S> struct knPdataSetScalarIntFlag : public KernelBase {
return itype; return itype;
} }
typedef int type3; typedef int type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPdataSetScalarIntFlag ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other, t, itype);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other, t, itype);
}
} }
ParticleDataImpl<T> &me; ParticleDataImpl<T> &me;
const S &other; const S &other;
@@ -1223,29 +1163,21 @@ template<typename T> struct KnPtsSum : public KernelBase {
return itype; return itype;
} }
typedef int type2; typedef int type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnPtsSum ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, t, itype, result);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
KnPtsSum(KnPtsSum &o, tbb::split) {
: KernelBase(o), val(o.val), t(o.t), itype(o.itype), result(T(0.)) T result = T(0.);
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const KnPtsSum &o) op(i, val, t, itype, result);
{ #pragma omp critical
result += o.result; {
this->result += result;
}
}
} }
const ParticleDataImpl<T> &val; const ParticleDataImpl<T> &val;
const ParticleDataImpl<int> *t; const ParticleDataImpl<int> *t;
@@ -1275,28 +1207,21 @@ template<typename T> struct KnPtsSumSquare : public KernelBase {
return val; return val;
} }
typedef ParticleDataImpl<T> type0; typedef ParticleDataImpl<T> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnPtsSumSquare ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, result);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
KnPtsSumSquare(KnPtsSumSquare &o, tbb::split) : KernelBase(o), val(o.val), result(0.) {
{ Real result = 0.;
} #pragma omp for nowait
void join(const KnPtsSumSquare &o) for (IndexInt i = 0; i < _sz; i++)
{ op(i, val, result);
result += o.result; #pragma omp critical
{
this->result += result;
}
}
} }
const ParticleDataImpl<T> &val; const ParticleDataImpl<T> &val;
Real result; Real result;
@@ -1324,28 +1249,21 @@ template<typename T> struct KnPtsSumMagnitude : public KernelBase {
return val; return val;
} }
typedef ParticleDataImpl<T> type0; typedef ParticleDataImpl<T> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnPtsSumMagnitude ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, result);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
KnPtsSumMagnitude(KnPtsSumMagnitude &o, tbb::split) : KernelBase(o), val(o.val), result(0.) {
{ Real result = 0.;
} #pragma omp for nowait
void join(const KnPtsSumMagnitude &o) for (IndexInt i = 0; i < _sz; i++)
{ op(i, val, result);
result += o.result; #pragma omp critical
{
this->result += result;
}
}
} }
const ParticleDataImpl<T> &val; const ParticleDataImpl<T> &val;
Real result; Real result;
@@ -1392,29 +1310,21 @@ struct CompPdata_Min : public KernelBase {
return val; return val;
} }
typedef ParticleDataImpl<T> type0; typedef ParticleDataImpl<T> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel CompPdata_Min ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, minVal);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
CompPdata_Min(CompPdata_Min &o, tbb::split) {
: KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max()) Real minVal = std::numeric_limits<Real>::max();
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const CompPdata_Min &o) op(i, val, minVal);
{ #pragma omp critical
minVal = min(minVal, o.minVal); {
this->minVal = min(minVal, this->minVal);
}
}
} }
const ParticleDataImpl<T> &val; const ParticleDataImpl<T> &val;
Real minVal; Real minVal;
@@ -1447,29 +1357,21 @@ struct CompPdata_Max : public KernelBase {
return val; return val;
} }
typedef ParticleDataImpl<T> type0; typedef ParticleDataImpl<T> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel CompPdata_Max ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, maxVal);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
CompPdata_Max(CompPdata_Max &o, tbb::split) {
: KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max()) Real maxVal = -std::numeric_limits<Real>::max();
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const CompPdata_Max &o) op(i, val, maxVal);
{ #pragma omp critical
maxVal = max(maxVal, o.maxVal); {
this->maxVal = max(maxVal, this->maxVal);
}
}
} }
const ParticleDataImpl<T> &val; const ParticleDataImpl<T> &val;
Real maxVal; Real maxVal;
@@ -1545,29 +1447,21 @@ struct CompPdata_MinVec3 : public KernelBase {
return val; return val;
} }
typedef ParticleDataImpl<Vec3> type0; typedef ParticleDataImpl<Vec3> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel CompPdata_MinVec3 ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, minVal);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
CompPdata_MinVec3(CompPdata_MinVec3 &o, tbb::split) {
: KernelBase(o), val(o.val), minVal(std::numeric_limits<Real>::max()) Real minVal = std::numeric_limits<Real>::max();
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const CompPdata_MinVec3 &o) op(i, val, minVal);
{ #pragma omp critical
minVal = min(minVal, o.minVal); {
this->minVal = min(minVal, this->minVal);
}
}
} }
const ParticleDataImpl<Vec3> &val; const ParticleDataImpl<Vec3> &val;
Real minVal; Real minVal;
@@ -1599,29 +1493,21 @@ struct CompPdata_MaxVec3 : public KernelBase {
return val; return val;
} }
typedef ParticleDataImpl<Vec3> type0; typedef ParticleDataImpl<Vec3> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel CompPdata_MaxVec3 ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, val, maxVal);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
CompPdata_MaxVec3(CompPdata_MaxVec3 &o, tbb::split) {
: KernelBase(o), val(o.val), maxVal(-std::numeric_limits<Real>::max()) Real maxVal = -std::numeric_limits<Real>::max();
{ #pragma omp for nowait
} for (IndexInt i = 0; i < _sz; i++)
void join(const CompPdata_MaxVec3 &o) op(i, val, maxVal);
{ #pragma omp critical
maxVal = max(maxVal, o.maxVal); {
this->maxVal = max(maxVal, this->maxVal);
}
}
} }
const ParticleDataImpl<Vec3> &val; const ParticleDataImpl<Vec3> &val;
Real maxVal; Real maxVal;

View File

@@ -205,7 +205,6 @@ class ParticleBase : public PbClass {
//! custom seed for particle systems, used by plugins //! custom seed for particle systems, used by plugins
int mSeed; //! fix global random seed storage, used mainly by functions in this class int mSeed; //! fix global random seed storage, used mainly by functions in this class
static int globalSeed; static int globalSeed;
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -629,7 +628,6 @@ template<class S> class ParticleSystem : public ParticleBase {
std::vector<S> mData; std::vector<S> mData;
//! reduce storage , called by doCompress //! reduce storage , called by doCompress
virtual void compress(); virtual void compress();
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -920,7 +918,6 @@ class ParticleIndexSystem : public ParticleSystem<ParticleIndexData> {
return -1; return -1;
} }
}; };
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -985,7 +982,6 @@ template<class DATA, class CON> class ConnectedParticleSystem : public ParticleS
protected: protected:
std::vector<CON> mSegments; std::vector<CON> mSegments;
virtual void compress(); virtual void compress();
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -1075,7 +1071,6 @@ class ParticleDataBase : public PbClass {
protected: protected:
ParticleBase *mpParticleSys; ParticleBase *mpParticleSys;
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -1848,7 +1843,6 @@ template<class T> class ParticleDataImpl : public ParticleDataBase {
//! optionally , we might have an associated grid from which to grab new data //! optionally , we might have an associated grid from which to grab new data
Grid<T> *mpGridSource; //! unfortunately , we need to distinguish mac vs regular vec3 Grid<T> *mpGridSource; //! unfortunately , we need to distinguish mac vs regular vec3
bool mGridSourceMAC; bool mGridSourceMAC;
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -1912,19 +1906,17 @@ template<class S> void ParticleSystem<S>::transformPositions(Vec3i dimOld, Vec3i
// check for deletion/invalid position, otherwise return velocity // check for deletion/invalid position, otherwise return velocity
template<class S> struct _GridAdvectKernel : public KernelBase { template<class S> struct GridAdvectKernel : public KernelBase {
_GridAdvectKernel(const KernelBase &base, GridAdvectKernel(std::vector<S> &p,
std::vector<S> &p, const MACGrid &vel,
const MACGrid &vel, const FlagGrid &flags,
const FlagGrid &flags, const Real dt,
const Real dt, const bool deleteInObstacle,
const bool deleteInObstacle, const bool stopInObstacle,
const bool stopInObstacle, const bool skipNew,
const bool skipNew, const ParticleDataImpl<int> *ptype,
const ParticleDataImpl<int> *ptype, const int exclude)
const int exclude, : KernelBase(p.size()),
std::vector<Vec3> &u)
: KernelBase(base),
p(p), p(p),
vel(vel), vel(vel),
flags(flags), flags(flags),
@@ -1934,8 +1926,10 @@ template<class S> struct _GridAdvectKernel : public KernelBase {
skipNew(skipNew), skipNew(skipNew),
ptype(ptype), ptype(ptype),
exclude(exclude), exclude(exclude),
u(u) u((size))
{ {
runMessage();
run();
} }
inline void op(IndexInt idx, inline void op(IndexInt idx,
std::vector<S> &p, std::vector<S> &p,
@@ -1947,7 +1941,7 @@ template<class S> struct _GridAdvectKernel : public KernelBase {
const bool skipNew, const bool skipNew,
const ParticleDataImpl<int> *ptype, const ParticleDataImpl<int> *ptype,
const int exclude, const int exclude,
std::vector<Vec3> &u) const std::vector<Vec3> &u)
{ {
if ((p[idx].flag & ParticleBase::PDELETE) || (ptype && ((*ptype)[idx] & exclude)) || if ((p[idx].flag & ParticleBase::PDELETE) || (ptype && ((*ptype)[idx] & exclude)) ||
(skipNew && (p[idx].flag & ParticleBase::PNEW))) { (skipNew && (p[idx].flag & ParticleBase::PNEW))) {
@@ -1968,66 +1962,6 @@ template<class S> struct _GridAdvectKernel : public KernelBase {
} }
u[idx] = vel.getInterpolated(p[idx].pos) * dt; u[idx] = vel.getInterpolated(p[idx].pos) * dt;
} }
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, p, vel, flags, dt, deleteInObstacle, stopInObstacle, skipNew, ptype, exclude, u);
}
void run()
{
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
}
std::vector<S> &p;
const MACGrid &vel;
const FlagGrid &flags;
const Real dt;
const bool deleteInObstacle;
const bool stopInObstacle;
const bool skipNew;
const ParticleDataImpl<int> *ptype;
const int exclude;
std::vector<Vec3> &u;
};
template<class S> struct GridAdvectKernel : public KernelBase {
GridAdvectKernel(std::vector<S> &p,
const MACGrid &vel,
const FlagGrid &flags,
const Real dt,
const bool deleteInObstacle,
const bool stopInObstacle,
const bool skipNew,
const ParticleDataImpl<int> *ptype,
const int exclude)
: KernelBase(p.size()),
_inner(KernelBase(p.size()),
p,
vel,
flags,
dt,
deleteInObstacle,
stopInObstacle,
skipNew,
ptype,
exclude,
u),
p(p),
vel(vel),
flags(flags),
dt(dt),
deleteInObstacle(deleteInObstacle),
stopInObstacle(stopInObstacle),
skipNew(skipNew),
ptype(ptype),
exclude(exclude),
u((size))
{
runMessage();
run();
}
void run()
{
_inner.run();
}
inline operator std::vector<Vec3>() inline operator std::vector<Vec3>()
{ {
return u; return u;
@@ -2081,14 +2015,18 @@ template<class S> struct GridAdvectKernel : public KernelBase {
return exclude; return exclude;
} }
typedef int type8; typedef int type8;
void runMessage() void runMessage(){};
void run()
{ {
debMsg("Executing kernel GridAdvectKernel ", 3); const IndexInt _sz = size;
debMsg("Kernel range" #pragma omp parallel
<< " size " << size << " ", {
4);
}; #pragma omp for
_GridAdvectKernel<S> _inner; for (IndexInt i = 0; i < _sz; i++)
op(i, p, vel, flags, dt, deleteInObstacle, stopInObstacle, skipNew, ptype, exclude, u);
}
}
std::vector<S> &p; std::vector<S> &p;
const MACGrid &vel; const MACGrid &vel;
const FlagGrid &flags; const FlagGrid &flags;
@@ -2112,7 +2050,7 @@ template<class S> struct KnDeleteInObstacle : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, std::vector<S> &p, const FlagGrid &flags) const inline void op(IndexInt idx, std::vector<S> &p, const FlagGrid &flags)
{ {
if (p[idx].flag & ParticleBase::PDELETE) if (p[idx].flag & ParticleBase::PDELETE)
return; return;
@@ -2130,21 +2068,17 @@ template<class S> struct KnDeleteInObstacle : public KernelBase {
return flags; return flags;
} }
typedef FlagGrid type1; typedef FlagGrid type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnDeleteInObstacle ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, p, flags);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, p, flags);
}
} }
std::vector<S> &p; std::vector<S> &p;
const FlagGrid &flags; const FlagGrid &flags;
@@ -2189,7 +2123,7 @@ template<class S> struct KnClampPositions : public KernelBase {
ParticleDataImpl<Vec3> *posOld = nullptr, ParticleDataImpl<Vec3> *posOld = nullptr,
bool stopInObstacle = true, bool stopInObstacle = true,
const ParticleDataImpl<int> *ptype = nullptr, const ParticleDataImpl<int> *ptype = nullptr,
const int exclude = 0) const const int exclude = 0)
{ {
if (p[idx].flag & ParticleBase::PDELETE) if (p[idx].flag & ParticleBase::PDELETE)
return; return;
@@ -2235,21 +2169,17 @@ template<class S> struct KnClampPositions : public KernelBase {
return exclude; return exclude;
} }
typedef int type5; typedef int type5;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnClampPositions ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, p, flags, posOld, stopInObstacle, ptype, exclude);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, p, flags, posOld, stopInObstacle, ptype, exclude);
}
} }
std::vector<S> &p; std::vector<S> &p;
const FlagGrid &flags; const FlagGrid &flags;
@@ -2341,13 +2271,7 @@ template<class S> struct KnProjectParticles : public KernelBase {
return rand; return rand;
} }
typedef RandomStream type2; typedef RandomStream type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnProjectParticles ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void run() void run()
{ {
const IndexInt _sz = size; const IndexInt _sz = size;
@@ -2389,7 +2313,7 @@ template<class S> struct KnProjectOutOfBnd : public KernelBase {
const Real bnd, const Real bnd,
const bool *axis, const bool *axis,
const ParticleDataImpl<int> *ptype, const ParticleDataImpl<int> *ptype,
const int exclude) const const int exclude)
{ {
if (!part.isActive(idx) || (ptype && ((*ptype)[idx] & exclude))) if (!part.isActive(idx) || (ptype && ((*ptype)[idx] & exclude)))
return; return;
@@ -2438,21 +2362,17 @@ template<class S> struct KnProjectOutOfBnd : public KernelBase {
return exclude; return exclude;
} }
typedef int type5; typedef int type5;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnProjectOutOfBnd ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, part, flags, bnd, axis, ptype, exclude);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, part, flags, bnd, axis, ptype, exclude);
}
} }
ParticleSystem<S> &part; ParticleSystem<S> &part;
const FlagGrid &flags; const FlagGrid &flags;

View File

@@ -59,7 +59,7 @@ template<class T> struct SemiLagrange : public KernelBase {
Real dt, Real dt,
bool isLevelset, bool isLevelset,
int orderSpace, int orderSpace,
int orderTrace) const int orderTrace)
{ {
if (orderTrace == 1) { if (orderTrace == 1) {
// traceback position // traceback position
@@ -117,37 +117,35 @@ template<class T> struct SemiLagrange : public KernelBase {
return orderTrace; return orderTrace;
} }
typedef int type7; typedef int type7;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel SemiLagrange ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, dst, src, dt, isLevelset, orderSpace, orderTrace);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
const MACGrid &vel; const MACGrid &vel;
Grid<T> &dst; Grid<T> &dst;
@@ -189,7 +187,7 @@ struct SemiLagrangeMAC : public KernelBase {
const MACGrid &src, const MACGrid &src,
Real dt, Real dt,
int orderSpace, int orderSpace,
int orderTrace) const int orderTrace)
{ {
if (orderTrace == 1) { if (orderTrace == 1) {
// get currect velocity at MAC position // get currect velocity at MAC position
@@ -259,37 +257,35 @@ struct SemiLagrangeMAC : public KernelBase {
return orderTrace; return orderTrace;
} }
typedef int type6; typedef int type6;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel SemiLagrangeMAC ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, dst, src, dt, orderSpace, orderTrace);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
const MACGrid &vel; const MACGrid &vel;
MACGrid &dst; MACGrid &dst;
@@ -331,7 +327,7 @@ template<class T> struct MacCormackCorrect : public KernelBase {
const Grid<T> &bwd, const Grid<T> &bwd,
Real strength, Real strength,
bool isLevelSet, bool isLevelSet,
bool isMAC = false) const bool isMAC = false)
{ {
dst[idx] = fwd[idx]; dst[idx] = fwd[idx];
@@ -380,21 +376,17 @@ template<class T> struct MacCormackCorrect : public KernelBase {
return isMAC; return isMAC;
} }
typedef bool type7; typedef bool type7;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel MacCormackCorrect ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
}
} }
const FlagGrid &flags; const FlagGrid &flags;
Grid<T> &dst; Grid<T> &dst;
@@ -440,7 +432,7 @@ template<class T> struct MacCormackCorrectMAC : public KernelBase {
const Grid<T> &bwd, const Grid<T> &bwd,
Real strength, Real strength,
bool isLevelSet, bool isLevelSet,
bool isMAC = false) const bool isMAC = false)
{ {
bool skip[3] = {false, false, false}; bool skip[3] = {false, false, false};
@@ -505,37 +497,35 @@ template<class T> struct MacCormackCorrectMAC : public KernelBase {
return isMAC; return isMAC;
} }
typedef bool type7; typedef bool type7;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel MacCormackCorrectMAC ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, dst, old, fwd, bwd, strength, isLevelSet, isMAC);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<T> &dst; Grid<T> &dst;
const Grid<T> &old; const Grid<T> &old;
@@ -762,7 +752,7 @@ template<class T> struct MacCormackClamp : public KernelBase {
const Grid<T> &orig, const Grid<T> &orig,
const Grid<T> &fwd, const Grid<T> &fwd,
Real dt, Real dt,
const int clampMode) const const int clampMode)
{ {
T dval = dst(i, j, k); T dval = dst(i, j, k);
Vec3i gridUpper = flags.getSize() - 1; Vec3i gridUpper = flags.getSize() - 1;
@@ -830,37 +820,35 @@ template<class T> struct MacCormackClamp : public KernelBase {
return clampMode; return clampMode;
} }
typedef int type6; typedef int type6;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel MacCormackClamp ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
const MACGrid &vel; const MACGrid &vel;
Grid<T> &dst; Grid<T> &dst;
@@ -901,7 +889,7 @@ struct MacCormackClampMAC : public KernelBase {
const MACGrid &orig, const MACGrid &orig,
const MACGrid &fwd, const MACGrid &fwd,
Real dt, Real dt,
const int clampMode) const const int clampMode)
{ {
Vec3 pos(i, j, k); Vec3 pos(i, j, k);
Vec3 dval = dst(i, j, k); Vec3 dval = dst(i, j, k);
@@ -957,37 +945,35 @@ struct MacCormackClampMAC : public KernelBase {
return clampMode; return clampMode;
} }
typedef int type6; typedef int type6;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel MacCormackClampMAC ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, dst, orig, fwd, dt, clampMode);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
const MACGrid &vel; const MACGrid &vel;
MACGrid &dst; MACGrid &dst;
@@ -1016,27 +1002,39 @@ void fnAdvectSemiLagrange(FluidSolver *parent,
bool levelset = orig.getType() & GridBase::TypeLevelset; bool levelset = orig.getType() & GridBase::TypeLevelset;
// forward step // forward step
GridType fwd(parent); GridType *fwd = new GridType(parent, true, false, false);
SemiLagrange<T>(flags, vel, fwd, orig, dt, levelset, orderSpace, orderTrace); SemiLagrange<T>(flags, vel, *fwd, orig, dt, levelset, orderSpace, orderTrace);
if (order == 1) { if (order == 1) {
orig.swap(fwd); #if OPENMP && OPENMP_OFFLOAD
orig.copyFrom(*fwd, true, false);
#else
orig.swap(*fwd);
#endif
} }
else if (order == 2) { // MacCormack else if (order == 2) { // MacCormack
GridType bwd(parent); GridType bwd(parent);
GridType newGrid(parent); GridType *newGrid = new GridType(parent, true, false, false);
// bwd <- backwards step // bwd <- backwards step
SemiLagrange<T>(flags, vel, bwd, fwd, -dt, levelset, orderSpace, orderTrace); SemiLagrange<T>(flags, vel, bwd, *fwd, -dt, levelset, orderSpace, orderTrace);
// newGrid <- compute correction // newGrid <- compute correction
MacCormackCorrect<T>(flags, newGrid, orig, fwd, bwd, strength, levelset); MacCormackCorrect<T>(flags, *newGrid, orig, *fwd, bwd, strength, levelset);
// clamp values // clamp values
MacCormackClamp<T>(flags, vel, newGrid, orig, fwd, dt, clampMode); MacCormackClamp<T>(flags, vel, *newGrid, orig, *fwd, dt, clampMode);
orig.swap(newGrid); #if OPENMP && OPENMP_OFFLOAD
orig.copyFrom(*newGrid, true, false);
#else
orig.swap(*newGrid);
#endif
if (newGrid)
delete newGrid;
} }
if (fwd)
delete fwd;
} }
// outflow functions // outflow functions
@@ -1087,7 +1085,7 @@ struct extrapolateVelConvectiveBC : public KernelBase {
const MACGrid &vel, const MACGrid &vel,
MACGrid &velDst, MACGrid &velDst,
const MACGrid &velPrev, const MACGrid &velPrev,
Real timeStep) const Real timeStep)
{ {
if (flags.isOutflow(i, j, k)) { if (flags.isOutflow(i, j, k)) {
const Vec3 bulkVel = getBulkVel(flags, vel, i, j, k); const Vec3 bulkVel = getBulkVel(flags, vel, i, j, k);
@@ -1154,37 +1152,35 @@ struct extrapolateVelConvectiveBC : public KernelBase {
return timeStep; return timeStep;
} }
typedef Real type4; typedef Real type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel extrapolateVelConvectiveBC ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, velDst, velPrev, timeStep);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, velDst, velPrev, timeStep);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, velDst, velPrev, timeStep);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, velDst, velPrev, timeStep);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
const MACGrid &vel; const MACGrid &vel;
MACGrid &velDst; MACGrid &velDst;
@@ -1200,8 +1196,7 @@ struct copyChangedVels : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op( inline void op(int i, int j, int k, const FlagGrid &flags, const MACGrid &velDst, MACGrid &vel)
int i, int j, int k, const FlagGrid &flags, const MACGrid &velDst, MACGrid &vel) const
{ {
if (flags.isOutflow(i, j, k)) if (flags.isOutflow(i, j, k))
vel(i, j, k) = velDst(i, j, k); vel(i, j, k) = velDst(i, j, k);
@@ -1221,37 +1216,35 @@ struct copyChangedVels : public KernelBase {
return vel; return vel;
} }
typedef MACGrid type2; typedef MACGrid type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel copyChangedVels ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, velDst, vel);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, velDst, vel);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, velDst, vel);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, velDst, vel);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
const MACGrid &velDst; const MACGrid &velDst;
MACGrid &vel; MACGrid &vel;
@@ -1275,7 +1268,7 @@ struct knResetPhiInObs : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, const FlagGrid &flags, Grid<Real> &sdf) const inline void op(int i, int j, int k, const FlagGrid &flags, Grid<Real> &sdf)
{ {
if (flags.isObstacle(i, j, k) && (sdf(i, j, k) < 0.)) { if (flags.isObstacle(i, j, k) && (sdf(i, j, k) < 0.)) {
sdf(i, j, k) = 0.1; sdf(i, j, k) = 0.1;
@@ -1291,37 +1284,35 @@ struct knResetPhiInObs : public KernelBase {
return sdf; return sdf;
} }
typedef Grid<Real> type1; typedef Grid<Real> type1;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knResetPhiInObs ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, sdf);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, sdf);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, sdf);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, sdf);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &sdf; Grid<Real> &sdf;
}; };
@@ -1378,33 +1369,45 @@ void fnAdvectSemiLagrange<MACGrid>(FluidSolver *parent,
Real dt = parent->getDt(); Real dt = parent->getDt();
// forward step // forward step
MACGrid fwd(parent); MACGrid *fwd = new MACGrid(parent, true, false, false);
SemiLagrangeMAC(flags, vel, fwd, orig, dt, orderSpace, orderTrace); SemiLagrangeMAC(flags, vel, *fwd, orig, dt, orderSpace, orderTrace);
if (orderSpace != 1) { if (orderSpace != 1) {
debMsg("Warning higher order for MAC grids not yet implemented...", 1); debMsg("Warning higher order for MAC grids not yet implemented...", 1);
} }
if (order == 1) { if (order == 1) {
applyOutflowBC(flags, fwd, orig, dt); applyOutflowBC(flags, *fwd, orig, dt);
orig.swap(fwd); #if OPENMP && OPENMP_OFFLOAD
orig.copyFrom(*fwd, true, false);
#else
orig.swap(*fwd);
#endif
} }
else if (order == 2) { // MacCormack else if (order == 2) { // MacCormack
MACGrid bwd(parent); MACGrid bwd(parent);
MACGrid newGrid(parent); MACGrid *newGrid = new MACGrid(parent, true, false, false);
// bwd <- backwards step // bwd <- backwards step
SemiLagrangeMAC(flags, vel, bwd, fwd, -dt, orderSpace, orderTrace); SemiLagrangeMAC(flags, vel, bwd, *fwd, -dt, orderSpace, orderTrace);
// newGrid <- compute correction // newGrid <- compute correction
MacCormackCorrectMAC<Vec3>(flags, newGrid, orig, fwd, bwd, strength, false, true); MacCormackCorrectMAC<Vec3>(flags, *newGrid, orig, *fwd, bwd, strength, false, true);
// clamp values // clamp values
MacCormackClampMAC(flags, vel, newGrid, orig, fwd, dt, clampMode); MacCormackClampMAC(flags, vel, *newGrid, orig, *fwd, dt, clampMode);
applyOutflowBC(flags, newGrid, orig, dt); applyOutflowBC(flags, *newGrid, orig, dt);
orig.swap(newGrid); #if OPENMP && OPENMP_OFFLOAD
orig.copyFrom(*newGrid, true, false);
#else
orig.swap(*newGrid);
#endif
if (newGrid)
delete newGrid;
} }
if (fwd)
delete fwd;
} }
//! Perform semi-lagrangian advection of target Real- or Vec3 grid //! Perform semi-lagrangian advection of target Real- or Vec3 grid

View File

@@ -239,13 +239,7 @@ struct knApicMapLinearVec3ToMACGrid : public KernelBase {
return boundaryWidth; return boundaryWidth;
} }
typedef int type9; typedef int type9;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knApicMapLinearVec3ToMACGrid ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void run() void run()
{ {
const IndexInt _sz = size; const IndexInt _sz = size;
@@ -370,7 +364,7 @@ struct knApicMapLinearMACGridToVec3 : public KernelBase {
const FlagGrid &flags, const FlagGrid &flags,
const ParticleDataImpl<int> *ptype, const ParticleDataImpl<int> *ptype,
const int exclude, const int exclude,
const int boundaryWidth) const const int boundaryWidth)
{ {
if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude))) if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude)))
return; return;
@@ -509,21 +503,17 @@ struct knApicMapLinearMACGridToVec3 : public KernelBase {
return boundaryWidth; return boundaryWidth;
} }
typedef int type9; typedef int type9;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knApicMapLinearMACGridToVec3 ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, vp, cpx, cpy, cpz, p, vg, flags, ptype, exclude, boundaryWidth);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, vp, cpx, cpy, cpz, p, vg, flags, ptype, exclude, boundaryWidth);
}
} }
ParticleDataImpl<Vec3> &vp; ParticleDataImpl<Vec3> &vp;
ParticleDataImpl<Vec3> &cpx; ParticleDataImpl<Vec3> &cpx;

View File

@@ -52,7 +52,7 @@ struct KnApplyForceField : public KernelBase {
const Grid<Vec3> &force, const Grid<Vec3> &force,
const Grid<Real> *include, const Grid<Real> *include,
bool additive, bool additive,
bool isMAC) const bool isMAC)
{ {
bool curFluid = flags.isFluid(i, j, k); bool curFluid = flags.isFluid(i, j, k);
bool curEmpty = flags.isEmpty(i, j, k); bool curEmpty = flags.isEmpty(i, j, k);
@@ -105,37 +105,35 @@ struct KnApplyForceField : public KernelBase {
return isMAC; return isMAC;
} }
typedef bool type5; typedef bool type5;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnApplyForceField ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, force, include, additive, isMAC);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, force, include, additive, isMAC);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, force, include, additive, isMAC);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, force, include, additive, isMAC);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
MACGrid &vel; MACGrid &vel;
const Grid<Vec3> &force; const Grid<Vec3> &force;
@@ -165,7 +163,7 @@ struct KnApplyForce : public KernelBase {
MACGrid &vel, MACGrid &vel,
Vec3 force, Vec3 force,
const Grid<Real> *exclude, const Grid<Real> *exclude,
bool additive) const bool additive)
{ {
bool curFluid = flags.isFluid(i, j, k); bool curFluid = flags.isFluid(i, j, k);
bool curEmpty = flags.isEmpty(i, j, k); bool curEmpty = flags.isEmpty(i, j, k);
@@ -206,37 +204,35 @@ struct KnApplyForce : public KernelBase {
return additive; return additive;
} }
typedef bool type4; typedef bool type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnApplyForce ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, force, exclude, additive);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, force, exclude, additive);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, force, exclude, additive);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, force, exclude, additive);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
MACGrid &vel; MACGrid &vel;
Vec3 force; Vec3 force;
@@ -346,7 +342,7 @@ struct KnAddBuoyancy : public KernelBase {
const FlagGrid &flags, const FlagGrid &flags,
const Grid<Real> &factor, const Grid<Real> &factor,
MACGrid &vel, MACGrid &vel,
Vec3 strength) const Vec3 strength)
{ {
if (!flags.isFluid(i, j, k)) if (!flags.isFluid(i, j, k))
return; return;
@@ -377,37 +373,35 @@ struct KnAddBuoyancy : public KernelBase {
return strength; return strength;
} }
typedef Vec3 type3; typedef Vec3 type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnAddBuoyancy ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, factor, vel, strength);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, factor, vel, strength);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, factor, vel, strength);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, factor, vel, strength);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
const Grid<Real> &factor; const Grid<Real> &factor;
MACGrid &vel; MACGrid &vel;
@@ -662,7 +656,7 @@ struct KnSetInflow : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, MACGrid &vel, int dim, int p0, const Vec3 &val) const inline void op(int i, int j, int k, MACGrid &vel, int dim, int p0, const Vec3 &val)
{ {
Vec3i p(i, j, k); Vec3i p(i, j, k);
if (p[dim] == p0 || p[dim] == p0 + 1) if (p[dim] == p0 || p[dim] == p0 + 1)
@@ -688,37 +682,35 @@ struct KnSetInflow : public KernelBase {
return val; return val;
} }
typedef Vec3 type3; typedef Vec3 type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnSetInflow ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, vel, dim, p0, val);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, vel, dim, p0, val);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, vel, dim, p0, val);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, vel, dim, p0, val);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
MACGrid &vel; MACGrid &vel;
int dim; int dim;
int p0; int p0;
@@ -784,8 +776,7 @@ struct KnSetWallBcs : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op( inline void op(int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const MACGrid *obvel)
int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const MACGrid *obvel) const
{ {
bool curFluid = flags.isFluid(i, j, k); bool curFluid = flags.isFluid(i, j, k);
@@ -848,37 +839,35 @@ struct KnSetWallBcs : public KernelBase {
return obvel; return obvel;
} }
typedef MACGrid type2; typedef MACGrid type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnSetWallBcs ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, obvel);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, obvel);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, obvel);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, obvel);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
MACGrid &vel; MACGrid &vel;
const MACGrid *obvel; const MACGrid *obvel;
@@ -912,7 +901,7 @@ struct KnSetWallBcsFrac : public KernelBase {
MACGrid &velTarget, MACGrid &velTarget,
const MACGrid *obvel, const MACGrid *obvel,
const Grid<Real> *phiObs, const Grid<Real> *phiObs,
const int &boundaryWidth = 0) const const int &boundaryWidth = 0)
{ {
bool curFluid = flags.isFluid(i, j, k); bool curFluid = flags.isFluid(i, j, k);
bool curObs = flags.isObstacle(i, j, k); bool curObs = flags.isObstacle(i, j, k);
@@ -1025,37 +1014,35 @@ struct KnSetWallBcsFrac : public KernelBase {
return boundaryWidth; return boundaryWidth;
} }
typedef int type5; typedef int type5;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnSetWallBcsFrac ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, velTarget, obvel, phiObs, boundaryWidth);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
const MACGrid &vel; const MACGrid &vel;
MACGrid &velTarget; MACGrid &velTarget;
@@ -1127,8 +1114,7 @@ struct KnAddForceIfLower : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op( inline void op(int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Vec3> &force)
int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Vec3> &force) const
{ {
bool curFluid = flags.isFluid(i, j, k); bool curFluid = flags.isFluid(i, j, k);
bool curEmpty = flags.isEmpty(i, j, k); bool curEmpty = flags.isEmpty(i, j, k);
@@ -1173,37 +1159,35 @@ struct KnAddForceIfLower : public KernelBase {
return force; return force;
} }
typedef Grid<Vec3> type2; typedef Grid<Vec3> type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnAddForceIfLower ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, force);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, force);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, vel, force);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, force);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
MACGrid &vel; MACGrid &vel;
const Grid<Vec3> &force; const Grid<Vec3> &force;
@@ -1266,7 +1250,7 @@ struct KnConfForce : public KernelBase {
const Grid<Real> &grid, const Grid<Real> &grid,
const Grid<Vec3> &curl, const Grid<Vec3> &curl,
Real str, Real str,
const Grid<Real> *strGrid) const const Grid<Real> *strGrid)
{ {
Vec3 grad = 0.5 * Vec3(grid(i + 1, j, k) - grid(i - 1, j, k), Vec3 grad = 0.5 * Vec3(grid(i + 1, j, k) - grid(i - 1, j, k),
grid(i, j + 1, k) - grid(i, j - 1, k), grid(i, j + 1, k) - grid(i, j - 1, k),
@@ -1303,37 +1287,35 @@ struct KnConfForce : public KernelBase {
return strGrid; return strGrid;
} }
typedef Grid<Real> type4; typedef Grid<Real> type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnConfForce ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, force, grid, curl, str, strGrid);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, force, grid, curl, str, strGrid);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, force, grid, curl, str, strGrid);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, force, grid, curl, str, strGrid);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
Grid<Vec3> &force; Grid<Vec3> &force;
const Grid<Real> &grid; const Grid<Real> &grid;
const Grid<Vec3> &curl; const Grid<Vec3> &curl;
@@ -1514,7 +1496,7 @@ struct KnDissolveSmoke : public KernelBase {
int speed, int speed,
bool logFalloff, bool logFalloff,
float dydx, float dydx,
float fac) const float fac)
{ {
bool curFluid = flags.isFluid(i, j, k); bool curFluid = flags.isFluid(i, j, k);
@@ -1602,37 +1584,35 @@ struct KnDissolveSmoke : public KernelBase {
return fac; return fac;
} }
typedef float type9; typedef float type9;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnDissolveSmoke ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, density, heat, red, green, blue, speed, logFalloff, dydx, fac);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &density; Grid<Real> &density;
Grid<Real> *heat; Grid<Real> *heat;

View File

@@ -71,7 +71,7 @@ struct KnProcessBurn : public KernelBase {
Real ignitionTemp, Real ignitionTemp,
Real maxTemp, Real maxTemp,
Real dt, Real dt,
Vec3 flameSmokeColor) const Vec3 flameSmokeColor)
{ {
// Save initial values // Save initial values
Real origFuel = fuel(i, j, k); Real origFuel = fuel(i, j, k);
@@ -179,19 +179,44 @@ struct KnProcessBurn : public KernelBase {
return flameSmokeColor; return flameSmokeColor;
} }
typedef Vec3 type12; typedef Vec3 type12;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnProcessBurn ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
#pragma omp parallel
{
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i,
j,
k,
fuel,
density,
react,
red,
green,
blue,
heat,
burningRate,
flameSmoke,
ignitionTemp,
maxTemp,
dt,
flameSmokeColor);
}
}
else {
const int k = 0;
#pragma omp parallel
{
#pragma omp for
for (int j = 1; j < _maxY; j++) for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++) for (int i = 1; i < _maxX; i++)
op(i, op(i,
@@ -210,35 +235,8 @@ struct KnProcessBurn : public KernelBase {
maxTemp, maxTemp,
dt, dt,
flameSmokeColor); flameSmokeColor);
}
} }
else {
const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++)
for (int i = 1; i < _maxX; i++)
op(i,
j,
k,
fuel,
density,
react,
red,
green,
blue,
heat,
burningRate,
flameSmoke,
ignitionTemp,
maxTemp,
dt,
flameSmokeColor);
}
}
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
} }
Grid<Real> &fuel; Grid<Real> &fuel;
Grid<Real> &density; Grid<Real> &density;
@@ -344,7 +342,7 @@ struct KnUpdateFlame : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, const Grid<Real> &react, Grid<Real> &flame) const inline void op(int i, int j, int k, const Grid<Real> &react, Grid<Real> &flame)
{ {
if (react(i, j, k) > 0.0f) if (react(i, j, k) > 0.0f)
flame(i, j, k) = pow(react(i, j, k), 0.5f); flame(i, j, k) = pow(react(i, j, k), 0.5f);
@@ -361,37 +359,35 @@ struct KnUpdateFlame : public KernelBase {
return flame; return flame;
} }
typedef Grid<Real> type1; typedef Grid<Real> type1;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnUpdateFlame ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, react, flame);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, react, flame);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, react, flame);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, react, flame);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const Grid<Real> &react; const Grid<Real> &react;
Grid<Real> &flame; Grid<Real> &flame;
}; };

View File

@@ -283,7 +283,7 @@ struct knClearFluidFlags : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, FlagGrid &flags, int dummy = 0) const inline void op(int i, int j, int k, FlagGrid &flags, int dummy = 0)
{ {
if (flags.isFluid(i, j, k)) { if (flags.isFluid(i, j, k)) {
flags(i, j, k) = (flags(i, j, k) | FlagGrid::TypeEmpty) & ~FlagGrid::TypeFluid; flags(i, j, k) = (flags(i, j, k) | FlagGrid::TypeEmpty) & ~FlagGrid::TypeFluid;
@@ -299,37 +299,35 @@ struct knClearFluidFlags : public KernelBase {
return dummy; return dummy;
} }
typedef int type1; typedef int type1;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knClearFluidFlags ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, dummy);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, dummy);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, dummy);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, dummy);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
FlagGrid &flags; FlagGrid &flags;
int dummy; int dummy;
}; };
@@ -342,7 +340,7 @@ struct knSetNbObstacle : public KernelBase {
run(); run();
} }
inline void op( inline void op(
int i, int j, int k, FlagGrid &nflags, const FlagGrid &flags, const Grid<Real> &phiObs) const int i, int j, int k, FlagGrid &nflags, const FlagGrid &flags, const Grid<Real> &phiObs)
{ {
if (phiObs(i, j, k) > 0.) if (phiObs(i, j, k) > 0.)
return; return;
@@ -381,37 +379,35 @@ struct knSetNbObstacle : public KernelBase {
return phiObs; return phiObs;
} }
typedef Grid<Real> type2; typedef Grid<Real> type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knSetNbObstacle ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, nflags, flags, phiObs);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, nflags, flags, phiObs);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, nflags, flags, phiObs);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, nflags, flags, phiObs);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
FlagGrid &nflags; FlagGrid &nflags;
const FlagGrid &flags; const FlagGrid &flags;
const Grid<Real> &phiObs; const Grid<Real> &phiObs;
@@ -438,7 +434,11 @@ void markFluidCells(const BasicParticleSystem &parts,
if (phiObs) { if (phiObs) {
FlagGrid tmp(flags); FlagGrid tmp(flags);
knSetNbObstacle(tmp, flags, *phiObs); knSetNbObstacle(tmp, flags, *phiObs);
#if OPENMP && OPENMP_OFFLOAD
flags.copyFrom(tmp, true, false);
#else
flags.swap(tmp); flags.swap(tmp);
#endif
} }
} }
static PyObject *_W_3(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static PyObject *_W_3(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
@@ -818,33 +818,37 @@ struct ComputeUnionLevelsetPindex : public KernelBase {
LevelsetGrid &phi, LevelsetGrid &phi,
const Real radius, const Real radius,
const ParticleDataImpl<int> *ptype, const ParticleDataImpl<int> *ptype,
const int exclude) const const int exclude)
{ {
const Vec3 gridPos = Vec3(i, j, k) + Vec3(0.5); // shifted by half cell const Vec3 gridPos = Vec3(i, j, k) + Vec3(0.5); // shifted by half cell
Real phiv = radius * 1.0; // outside Real phiv = radius * 1.0; // outside
const int r = int(radius) + 1;
FOR_NEIGHBORS(phi, r) int r = int(radius) + 1;
{ int rZ = phi.is3D() ? r : 0;
for (int zj = k - rZ; zj <= k + rZ; zj++)
for (int yj = j - r; yj <= j + r; yj++)
for (int xj = i - r; xj <= i + r; xj++) {
if (!phi.isInBounds(Vec3i(xj, yj, zj)))
continue;
// note, for the particle indices in indexSys the access is periodic (ie, dont skip for eg // note, for the particle indices in indexSys the access is periodic (ie, dont skip for
// inBounds(sx,10,10) // eg inBounds(sx,10,10)
IndexInt isysIdxS = index.index(xj, yj, zj); IndexInt isysIdxS = index.index(xj, yj, zj);
IndexInt pStart = index(isysIdxS), pEnd = 0; IndexInt pStart = index(isysIdxS), pEnd = 0;
if (phi.isInBounds(isysIdxS + 1)) if (phi.isInBounds(isysIdxS + 1))
pEnd = index(isysIdxS + 1); pEnd = index(isysIdxS + 1);
else else
pEnd = indexSys.size(); pEnd = indexSys.size();
// now loop over particles in cell // now loop over particles in cell
for (IndexInt p = pStart; p < pEnd; ++p) { for (IndexInt p = pStart; p < pEnd; ++p) {
const int psrc = indexSys[p].sourceIndex; const int psrc = indexSys[p].sourceIndex;
if (ptype && ((*ptype)[psrc] & exclude)) if (ptype && ((*ptype)[psrc] & exclude))
continue; continue;
const Vec3 pos = parts[psrc].pos; const Vec3 pos = parts[psrc].pos;
phiv = std::min(phiv, fabs(norm(gridPos - pos)) - radius); phiv = std::min(phiv, fabs(norm(gridPos - pos)) - radius);
} }
} }
phi(i, j, k) = phiv; phi(i, j, k) = phiv;
} }
inline const Grid<int> &getArg0() inline const Grid<int> &getArg0()
@@ -882,37 +886,35 @@ struct ComputeUnionLevelsetPindex : public KernelBase {
return exclude; return exclude;
} }
typedef int type6; typedef int type6;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel ComputeUnionLevelsetPindex ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, index, parts, indexSys, phi, radius, ptype, exclude);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const Grid<int> &index; const Grid<int> &index;
const BasicParticleSystem &parts; const BasicParticleSystem &parts;
const ParticleIndexSystem &indexSys; const ParticleIndexSystem &indexSys;
@@ -1015,42 +1017,46 @@ struct ComputeAveragedLevelsetWeight : public KernelBase {
const ParticleDataImpl<int> *ptype, const ParticleDataImpl<int> *ptype,
const int exclude, const int exclude,
Grid<Vec3> *save_pAcc = nullptr, Grid<Vec3> *save_pAcc = nullptr,
Grid<Real> *save_rAcc = nullptr) const Grid<Real> *save_rAcc = nullptr)
{ {
const Vec3 gridPos = Vec3(i, j, k) + Vec3(0.5); // shifted by half cell const Vec3 gridPos = Vec3(i, j, k) + Vec3(0.5); // shifted by half cell
Real phiv = radius * 1.0; // outside Real phiv = radius * 1.0; // outside
// loop over neighborhood, similar to ComputeUnionLevelsetPindex // loop over neighborhood, similar to ComputeUnionLevelsetPindex
const Real sradiusInv = 1. / (4. * radius * radius); const Real sradiusInv = 1. / (4. * radius * radius);
const int r = int(radius) + 1; int r = int(1. * radius) + 1;
int rZ = phi.is3D() ? r : 0;
// accumulators // accumulators
Real wacc = 0.; Real wacc = 0.;
Vec3 pacc = Vec3(0.); Vec3 pacc = Vec3(0.);
Real racc = 0.; Real racc = 0.;
FOR_NEIGHBORS(phi, r) for (int zj = k - rZ; zj <= k + rZ; zj++)
{ for (int yj = j - r; yj <= j + r; yj++)
for (int xj = i - r; xj <= i + r; xj++) {
if (!phi.isInBounds(Vec3i(xj, yj, zj)))
continue;
IndexInt isysIdxS = index.index(xj, yj, zj); IndexInt isysIdxS = index.index(xj, yj, zj);
IndexInt pStart = index(isysIdxS), pEnd = 0; IndexInt pStart = index(isysIdxS), pEnd = 0;
if (phi.isInBounds(isysIdxS + 1)) if (phi.isInBounds(isysIdxS + 1))
pEnd = index(isysIdxS + 1); pEnd = index(isysIdxS + 1);
else else
pEnd = indexSys.size(); pEnd = indexSys.size();
for (IndexInt p = pStart; p < pEnd; ++p) { for (IndexInt p = pStart; p < pEnd; ++p) {
IndexInt psrc = indexSys[p].sourceIndex; IndexInt psrc = indexSys[p].sourceIndex;
if (ptype && ((*ptype)[psrc] & exclude)) if (ptype && ((*ptype)[psrc] & exclude))
continue; continue;
Vec3 pos = parts[psrc].pos; Vec3 pos = parts[psrc].pos;
Real s = normSquare(gridPos - pos) * sradiusInv; Real s = normSquare(gridPos - pos) * sradiusInv;
// Real w = std::max(0., cubed(1.-s) ); // Real w = std::max(0., cubed(1.-s) );
Real w = std::max(0., (1. - s)); // a bit smoother Real w = std::max(0., (1. - s)); // a bit smoother
wacc += w; wacc += w;
racc += radius * w; racc += radius * w;
pacc += pos * w; pacc += pos * w;
} }
} }
if (wacc > VECTOR_EPSILON) { if (wacc > VECTOR_EPSILON) {
racc /= wacc; racc /= wacc;
@@ -1109,37 +1115,46 @@ struct ComputeAveragedLevelsetWeight : public KernelBase {
return save_rAcc; return save_rAcc;
} }
typedef Grid<Real> type8; typedef Grid<Real> type8;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel ComputeAveragedLevelsetWeight ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, parts, index, indexSys, phi, radius, ptype, exclude, save_pAcc, save_rAcc);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i,
j,
k,
parts,
index,
indexSys,
phi,
radius,
ptype,
exclude,
save_pAcc,
save_rAcc);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, parts, index, indexSys, phi, radius, ptype, exclude, save_pAcc, save_rAcc);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, parts, index, indexSys, phi, radius, ptype, exclude, save_pAcc, save_rAcc);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const BasicParticleSystem &parts; const BasicParticleSystem &parts;
const Grid<int> &index; const Grid<int> &index;
const ParticleIndexSystem &indexSys; const ParticleIndexSystem &indexSys;
@@ -1165,7 +1180,7 @@ template<class T> struct knSmoothGrid : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor) const inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor)
{ {
T val = me(i, j, k) + me(i + 1, j, k) + me(i - 1, j, k) + me(i, j + 1, k) + me(i, j - 1, k); T val = me(i, j, k) + me(i + 1, j, k) + me(i - 1, j, k) + me(i, j + 1, k) + me(i, j - 1, k);
if (me.is3D()) { if (me.is3D()) {
@@ -1188,37 +1203,35 @@ template<class T> struct knSmoothGrid : public KernelBase {
return factor; return factor;
} }
typedef Real type2; typedef Real type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knSmoothGrid ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, me, tmp, factor);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, me, tmp, factor);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, me, tmp, factor);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, me, tmp, factor);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const Grid<T> &me; const Grid<T> &me;
Grid<T> &tmp; Grid<T> &tmp;
Real factor; Real factor;
@@ -1231,7 +1244,7 @@ template<class T> struct knSmoothGridNeg : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor) const inline void op(int i, int j, int k, const Grid<T> &me, Grid<T> &tmp, Real factor)
{ {
T val = me(i, j, k) + me(i + 1, j, k) + me(i - 1, j, k) + me(i, j + 1, k) + me(i, j - 1, k); T val = me(i, j, k) + me(i + 1, j, k) + me(i - 1, j, k) + me(i, j + 1, k) + me(i, j - 1, k);
if (me.is3D()) { if (me.is3D()) {
@@ -1258,37 +1271,35 @@ template<class T> struct knSmoothGridNeg : public KernelBase {
return factor; return factor;
} }
typedef Real type2; typedef Real type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knSmoothGridNeg ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, me, tmp, factor);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, me, tmp, factor);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, me, tmp, factor);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, me, tmp, factor);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const Grid<T> &me; const Grid<T> &me;
Grid<T> &tmp; Grid<T> &tmp;
Real factor; Real factor;
@@ -1396,7 +1407,7 @@ struct correctLevelset : public KernelBase {
const Grid<Real> &rAcc, const Grid<Real> &rAcc,
const Real radius, const Real radius,
const Real t_low, const Real t_low,
const Real t_high) const const Real t_high)
{ {
if (rAcc(i, j, k) <= VECTOR_EPSILON) if (rAcc(i, j, k) <= VECTOR_EPSILON)
return; // outside nothing happens return; // outside nothing happens
@@ -1463,37 +1474,35 @@ struct correctLevelset : public KernelBase {
return t_high; return t_high;
} }
typedef Real type5; typedef Real type5;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel correctLevelset ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, phi, pAcc, rAcc, radius, t_low, t_high);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
LevelsetGrid &phi; LevelsetGrid &phi;
const Grid<Vec3> &pAcc; const Grid<Vec3> &pAcc;
const Grid<Real> &rAcc; const Grid<Real> &rAcc;
@@ -1624,7 +1633,7 @@ struct knPushOutofObs : public KernelBase {
const Real shift, const Real shift,
const Real thresh, const Real thresh,
const ParticleDataImpl<int> *ptype, const ParticleDataImpl<int> *ptype,
const int exclude) const const int exclude)
{ {
if (!parts.isActive(idx) || (ptype && ((*ptype)[idx] & exclude))) if (!parts.isActive(idx) || (ptype && ((*ptype)[idx] & exclude)))
return; return;
@@ -1675,21 +1684,17 @@ struct knPushOutofObs : public KernelBase {
return exclude; return exclude;
} }
typedef int type6; typedef int type6;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knPushOutofObs ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, parts, flags, phiObs, shift, thresh, ptype, exclude);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, parts, flags, phiObs, shift, thresh, ptype, exclude);
}
} }
BasicParticleSystem &parts; BasicParticleSystem &parts;
const FlagGrid &flags; const FlagGrid &flags;
@@ -1759,10 +1764,7 @@ template<class T> struct knSafeDivReal : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, inline void op(IndexInt idx, Grid<T> &me, const Grid<Real> &other, Real cutoff = VECTOR_EPSILON)
Grid<T> &me,
const Grid<Real> &other,
Real cutoff = VECTOR_EPSILON) const
{ {
if (other[idx] < cutoff) { if (other[idx] < cutoff) {
me[idx] = 0.; me[idx] = 0.;
@@ -1787,21 +1789,17 @@ template<class T> struct knSafeDivReal : public KernelBase {
return cutoff; return cutoff;
} }
typedef Real type2; typedef Real type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knSafeDivReal ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, me, other, cutoff);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, me, other, cutoff);
}
} }
Grid<T> &me; Grid<T> &me;
const Grid<Real> &other; const Grid<Real> &other;
@@ -1879,13 +1877,7 @@ struct knMapLinearVec3ToMACGrid : public KernelBase {
return exclude; return exclude;
} }
typedef int type6; typedef int type6;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knMapLinearVec3ToMACGrid ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void run() void run()
{ {
const IndexInt _sz = size; const IndexInt _sz = size;
@@ -2022,13 +2014,7 @@ template<class T> struct knMapLinear : public KernelBase {
return psource; return psource;
} }
typedef ParticleDataImpl<T> type4; typedef ParticleDataImpl<T> type4;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knMapLinear ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void run() void run()
{ {
const IndexInt _sz = size; const IndexInt _sz = size;
@@ -2153,7 +2139,7 @@ template<class T> struct knMapFromGrid : public KernelBase {
inline void op(IndexInt idx, inline void op(IndexInt idx,
const BasicParticleSystem &p, const BasicParticleSystem &p,
const Grid<T> &gsrc, const Grid<T> &gsrc,
ParticleDataImpl<T> &target) const ParticleDataImpl<T> &target)
{ {
if (!p.isActive(idx)) if (!p.isActive(idx))
return; return;
@@ -2174,21 +2160,17 @@ template<class T> struct knMapFromGrid : public KernelBase {
return target; return target;
} }
typedef ParticleDataImpl<T> type2; typedef ParticleDataImpl<T> type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knMapFromGrid ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, p, gsrc, target);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, p, gsrc, target);
}
} }
const BasicParticleSystem &p; const BasicParticleSystem &p;
const Grid<T> &gsrc; const Grid<T> &gsrc;
@@ -2298,7 +2280,7 @@ struct knMapLinearMACGridToVec3_PIC : public KernelBase {
const MACGrid &vel, const MACGrid &vel,
ParticleDataImpl<Vec3> &pvel, ParticleDataImpl<Vec3> &pvel,
const ParticleDataImpl<int> *ptype, const ParticleDataImpl<int> *ptype,
const int exclude) const const int exclude)
{ {
if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude))) if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude)))
return; return;
@@ -2335,21 +2317,17 @@ struct knMapLinearMACGridToVec3_PIC : public KernelBase {
return exclude; return exclude;
} }
typedef int type5; typedef int type5;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knMapLinearMACGridToVec3_PIC ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, p, flags, vel, pvel, ptype, exclude);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, p, flags, vel, pvel, ptype, exclude);
}
} }
const BasicParticleSystem &p; const BasicParticleSystem &p;
const FlagGrid &flags; const FlagGrid &flags;
@@ -2438,7 +2416,7 @@ struct knMapLinearMACGridToVec3_FLIP : public KernelBase {
ParticleDataImpl<Vec3> &pvel, ParticleDataImpl<Vec3> &pvel,
const Real flipRatio, const Real flipRatio,
const ParticleDataImpl<int> *ptype, const ParticleDataImpl<int> *ptype,
const int exclude) const const int exclude)
{ {
if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude))) if (!p.isActive(idx) || (ptype && ((*ptype)[idx] & exclude)))
return; return;
@@ -2486,21 +2464,17 @@ struct knMapLinearMACGridToVec3_FLIP : public KernelBase {
return exclude; return exclude;
} }
typedef int type7; typedef int type7;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knMapLinearMACGridToVec3_FLIP ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, p, flags, vel, oldVel, pvel, flipRatio, ptype, exclude);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, p, flags, vel, oldVel, pvel, flipRatio, ptype, exclude);
}
} }
const BasicParticleSystem &p; const BasicParticleSystem &p;
const FlagGrid &flags; const FlagGrid &flags;
@@ -2592,7 +2566,7 @@ struct knCombineVels : public KernelBase {
MACGrid &combineVel, MACGrid &combineVel,
const LevelsetGrid *phi, const LevelsetGrid *phi,
Real narrowBand, Real narrowBand,
Real thresh) const Real thresh)
{ {
int idx = vel.index(i, j, k); int idx = vel.index(i, j, k);
@@ -2648,37 +2622,35 @@ struct knCombineVels : public KernelBase {
return thresh; return thresh;
} }
typedef Real type5; typedef Real type5;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knCombineVels ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, vel, w, combineVel, phi, narrowBand, thresh);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
MACGrid &vel; MACGrid &vel;
const Grid<Vec3> &w; const Grid<Vec3> &w;
MACGrid &combineVel; MACGrid &combineVel;

View File

@@ -62,7 +62,7 @@ struct apply1DKernelDirX : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) const inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel)
{ {
int nx = in.getSizeX(); int nx = in.getSizeX();
int kn = kernel.n; int kn = kernel.n;
@@ -91,37 +91,35 @@ struct apply1DKernelDirX : public KernelBase {
return kernel; return kernel;
} }
typedef Matrix type2; typedef Matrix type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel apply1DKernelDirX ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, in, out, kernel);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, in, out, kernel);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, in, out, kernel);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, in, out, kernel);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const MACGrid &in; const MACGrid &in;
MACGrid &out; MACGrid &out;
const Matrix &kernel; const Matrix &kernel;
@@ -136,7 +134,7 @@ struct apply1DKernelDirY : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) const inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel)
{ {
int ny = in.getSizeY(); int ny = in.getSizeY();
int kn = kernel.n; int kn = kernel.n;
@@ -165,37 +163,35 @@ struct apply1DKernelDirY : public KernelBase {
return kernel; return kernel;
} }
typedef Matrix type2; typedef Matrix type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel apply1DKernelDirY ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, in, out, kernel);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, in, out, kernel);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, in, out, kernel);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, in, out, kernel);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const MACGrid &in; const MACGrid &in;
MACGrid &out; MACGrid &out;
const Matrix &kernel; const Matrix &kernel;
@@ -210,7 +206,7 @@ struct apply1DKernelDirZ : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel) const inline void op(int i, int j, int k, const MACGrid &in, MACGrid &out, const Matrix &kernel)
{ {
int nz = in.getSizeZ(); int nz = in.getSizeZ();
int kn = kernel.n; int kn = kernel.n;
@@ -239,37 +235,35 @@ struct apply1DKernelDirZ : public KernelBase {
return kernel; return kernel;
} }
typedef Matrix type2; typedef Matrix type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel apply1DKernelDirZ ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, in, out, kernel);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, in, out, kernel);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, in, out, kernel);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, in, out, kernel);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const MACGrid &in; const MACGrid &in;
MACGrid &out; MACGrid &out;
const Matrix &kernel; const Matrix &kernel;
@@ -569,197 +563,88 @@ void prox_f(MACGrid &v,
// ***************************************************************************** // *****************************************************************************
// re-uses main pressure solve from pressure.cpp // TODO (sebbas): Disabled for now
void solvePressure(MACGrid &vel, // // re-uses main pressure solve from pressure.cpp
Grid<Real> &pressure, // void solvePressure(
const FlagGrid &flags, // MACGrid& vel, Grid<Real>& pressure, const FlagGrid& flags, Real cgAccuracy = 1e-3,
Real cgAccuracy = 1e-3, // const Grid<Real>* phi = nullptr,
const Grid<Real> *phi = nullptr, // const Grid<Real>* perCellCorr = nullptr,
const Grid<Real> *perCellCorr = nullptr, // const MACGrid* fractions = nullptr,
const MACGrid *fractions = nullptr, // const MACGrid* obvel = nullptr,
const MACGrid *obvel = nullptr, // Real gfClamp = 1e-04,
Real gfClamp = 1e-04, // Real cgMaxIterFac = 1.5,
Real cgMaxIterFac = 1.5, // bool precondition = true,
bool precondition = true, // int preconditioner = 1,
int preconditioner = 1, // bool enforceCompatibility = false,
bool enforceCompatibility = false, // bool useL2Norm = false,
bool useL2Norm = false, // bool zeroPressureFixing = false,
bool zeroPressureFixing = false, // const Grid<Real> *curv = nullptr,
const Grid<Real> *curv = nullptr, // const Real surfTens = 0.0,
const Real surfTens = 0.0, // Grid<Real>* retRhs = nullptr );
Grid<Real> *retRhs = nullptr);
//! Main function for fluid guiding , includes "regular" pressure solve // //! Main function for fluid guiding , includes "regular" pressure solve
// PYTHON() void PD_fluid_guiding(MACGrid& vel, MACGrid& velT,
// Grid<Real>& pressure, FlagGrid& flags, Grid<Real>& weight, int blurRadius = 5,
// Real theta = 1.0, Real tau = 1.0, Real sigma = 1.0,
// Real epsRel = 1e-3, Real epsAbs = 1e-3, int maxIters = 200,
// // duplicated for pressure solve
// Grid<Real>* phi = nullptr, Grid<Real>* perCellCorr = nullptr, MACGrid* fractions = nullptr,
// MACGrid* obvel = nullptr, Real gfClamp = 1e-04, Real cgMaxIterFac = 1.5, Real cgAccuracy = 1e-3,
// int preconditioner = 1, bool zeroPressureFixing = false, const Grid<Real> *curv = nullptr,
// const Real surfTens = 0.)
// {
// FluidSolver* parent = vel.getParent();
void PD_fluid_guiding(MACGrid &vel, // // initialize dual/slack variables
MACGrid &velT, // MACGrid velC = MACGrid(parent); velC.copyFrom(vel);
Grid<Real> &pressure, // MACGrid x = MACGrid(parent);
FlagGrid &flags, // MACGrid y = MACGrid(parent);
Grid<Real> &weight, // MACGrid z = MACGrid(parent);
int blurRadius = 5, // MACGrid x0 = MACGrid(parent);
Real theta = 1.0, // MACGrid z0 = MACGrid(parent);
Real tau = 1.0,
Real sigma = 1.0,
Real epsRel = 1e-3,
Real epsAbs = 1e-3,
int maxIters = 200,
Grid<Real> *phi = nullptr,
Grid<Real> *perCellCorr = nullptr,
MACGrid *fractions = nullptr,
MACGrid *obvel = nullptr,
Real gfClamp = 1e-04,
Real cgMaxIterFac = 1.5,
Real cgAccuracy = 1e-3,
int preconditioner = 1,
bool zeroPressureFixing = false,
const Grid<Real> *curv = nullptr,
const Real surfTens = 0.)
{
FluidSolver *parent = vel.getParent();
// initialize dual/slack variables // // precomputation
MACGrid velC = MACGrid(parent); // ADMM_precompute_Separable(blurRadius);
velC.copyFrom(vel); // MACGrid Q = MACGrid(parent);
MACGrid x = MACGrid(parent); // precomputeQ(Q, flags, velT, velC, gBlurKernel, sigma);
MACGrid y = MACGrid(parent); // MACGrid invA = MACGrid(parent);
MACGrid z = MACGrid(parent); // precomputeInvA(invA, weight, sigma);
MACGrid x0 = MACGrid(parent);
MACGrid z0 = MACGrid(parent);
// precomputation // // loop
ADMM_precompute_Separable(blurRadius); // int iter = 0;
MACGrid Q = MACGrid(parent); // for (iter = 0; iter < maxIters; iter++) {
precomputeQ(Q, flags, velT, velC, gBlurKernel, sigma); // // x-update
MACGrid invA = MACGrid(parent); // x0.copyFrom(x);
precomputeInvA(invA, weight, sigma); // x.multConst(1.0 / sigma);
// x.add(y);
// prox_f(x, flags, Q, velC, sigma, invA);
// x.multConst(-sigma); x.addScaled(y, sigma); x.add(x0);
// loop // // z-update
int iter = 0; // z0.copyFrom(z);
for (iter = 0; iter < maxIters; iter++) { // z.addScaled(x, -tau);
// x-update // Real cgAccuracyAdaptive = cgAccuracy;
x0.copyFrom(x);
x.multConst(1.0 / sigma);
x.add(y);
prox_f(x, flags, Q, velC, sigma, invA);
x.multConst(-sigma);
x.addScaled(y, sigma);
x.add(x0);
// z-update // solvePressure (z, pressure, flags, cgAccuracyAdaptive, phi, perCellCorr, fractions, obvel,
z0.copyFrom(z); // gfClamp, cgMaxIterFac, true, preconditioner, false, false, zeroPressureFixing, curv, surfTens );
z.addScaled(x, -tau);
Real cgAccuracyAdaptive = cgAccuracy;
solvePressure(z, // // y-update
pressure, // y.copyFrom(z);
flags, // y.sub(z0);
cgAccuracyAdaptive, // y.multConst(theta);
phi, // y.add(z);
perCellCorr,
fractions,
obvel,
gfClamp,
cgMaxIterFac,
true,
preconditioner,
false,
false,
zeroPressureFixing,
curv,
surfTens);
// y-update // // stopping criterion
y.copyFrom(z); // bool stop = (iter > 0 && getRNorm(z, z0) < getEpsDual(epsAbs, epsRel, z));
y.sub(z0);
y.multConst(theta);
y.add(z);
// stopping criterion // if (stop || (iter == maxIters - 1)) break;
bool stop = (iter > 0 && getRNorm(z, z0) < getEpsDual(epsAbs, epsRel, z)); // }
if (stop || (iter == maxIters - 1)) // // vel_new = z
break; // vel.copyFrom(z);
}
// vel_new = z // debMsg("PD_fluid_guiding iterations:" << iter, 1);
vel.copyFrom(z); // }
debMsg("PD_fluid_guiding iterations:" << iter, 1);
}
static PyObject *_W_2(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{
try {
PbArgs _args(_linargs, _kwds);
FluidSolver *parent = _args.obtainParent();
bool noTiming = _args.getOpt<bool>("notiming", -1, 0);
pbPreparePlugin(parent, "PD_fluid_guiding", !noTiming);
PyObject *_retval = nullptr;
{
ArgLocker _lock;
MACGrid &vel = *_args.getPtr<MACGrid>("vel", 0, &_lock);
MACGrid &velT = *_args.getPtr<MACGrid>("velT", 1, &_lock);
Grid<Real> &pressure = *_args.getPtr<Grid<Real>>("pressure", 2, &_lock);
FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 3, &_lock);
Grid<Real> &weight = *_args.getPtr<Grid<Real>>("weight", 4, &_lock);
int blurRadius = _args.getOpt<int>("blurRadius", 5, 5, &_lock);
Real theta = _args.getOpt<Real>("theta", 6, 1.0, &_lock);
Real tau = _args.getOpt<Real>("tau", 7, 1.0, &_lock);
Real sigma = _args.getOpt<Real>("sigma", 8, 1.0, &_lock);
Real epsRel = _args.getOpt<Real>("epsRel", 9, 1e-3, &_lock);
Real epsAbs = _args.getOpt<Real>("epsAbs", 10, 1e-3, &_lock);
int maxIters = _args.getOpt<int>("maxIters", 11, 200, &_lock);
Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 12, nullptr, &_lock);
Grid<Real> *perCellCorr = _args.getPtrOpt<Grid<Real>>("perCellCorr", 13, nullptr, &_lock);
MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 14, nullptr, &_lock);
MACGrid *obvel = _args.getPtrOpt<MACGrid>("obvel", 15, nullptr, &_lock);
Real gfClamp = _args.getOpt<Real>("gfClamp", 16, 1e-04, &_lock);
Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 17, 1.5, &_lock);
Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 18, 1e-3, &_lock);
int preconditioner = _args.getOpt<int>("preconditioner", 19, 1, &_lock);
bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 20, false, &_lock);
const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 21, nullptr, &_lock);
const Real surfTens = _args.getOpt<Real>("surfTens", 22, 0., &_lock);
_retval = getPyNone();
PD_fluid_guiding(vel,
velT,
pressure,
flags,
weight,
blurRadius,
theta,
tau,
sigma,
epsRel,
epsAbs,
maxIters,
phi,
perCellCorr,
fractions,
obvel,
gfClamp,
cgMaxIterFac,
cgAccuracy,
preconditioner,
zeroPressureFixing,
curv,
surfTens);
_args.check();
}
pbFinalizePlugin(parent, "PD_fluid_guiding", !noTiming);
return _retval;
}
catch (std::exception &e) {
pbSetError("PD_fluid_guiding", e.what());
return 0;
}
}
static const Pb::Register _RP_PD_fluid_guiding("", "PD_fluid_guiding", _W_2);
extern "C" {
void PbRegister_PD_fluid_guiding()
{
KEEP_UNUSED(_RP_PD_fluid_guiding);
}
}
//! reset precomputation //! reset precomputation
void releaseBlurPrecomp() void releaseBlurPrecomp()
@@ -768,7 +653,7 @@ void releaseBlurPrecomp()
gBlurKernelRadius = -1; gBlurKernelRadius = -1;
gBlurKernel = 0.f; gBlurKernel = 0.f;
} }
static PyObject *_W_3(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static PyObject *_W_2(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
try { try {
PbArgs _args(_linargs, _kwds); PbArgs _args(_linargs, _kwds);
@@ -790,7 +675,7 @@ static PyObject *_W_3(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
return 0; return 0;
} }
} }
static const Pb::Register _RP_releaseBlurPrecomp("", "releaseBlurPrecomp", _W_3); static const Pb::Register _RP_releaseBlurPrecomp("", "releaseBlurPrecomp", _W_2);
extern "C" { extern "C" {
void PbRegister_releaseBlurPrecomp() void PbRegister_releaseBlurPrecomp()
{ {

View File

@@ -56,7 +56,7 @@ struct KnApplyNoiseInfl : public KernelBase {
const WaveletNoiseField &noise, const WaveletNoiseField &noise,
const Grid<Real> &sdf, const Grid<Real> &sdf,
Real scale, Real scale,
Real sigma) const Real sigma)
{ {
if (!flags.isFluid(i, j, k) || sdf(i, j, k) > sigma) if (!flags.isFluid(i, j, k) || sdf(i, j, k) > sigma)
return; return;
@@ -96,37 +96,35 @@ struct KnApplyNoiseInfl : public KernelBase {
return sigma; return sigma;
} }
typedef Real type5; typedef Real type5;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnApplyNoiseInfl ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, density, noise, sdf, scale, sigma);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, density, noise, sdf, scale, sigma);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, density, noise, sdf, scale, sigma);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, density, noise, sdf, scale, sigma);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &density; Grid<Real> &density;
const WaveletNoiseField &noise; const WaveletNoiseField &noise;
@@ -202,7 +200,7 @@ struct KnAddNoise : public KernelBase {
Grid<Real> &density, Grid<Real> &density,
const WaveletNoiseField &noise, const WaveletNoiseField &noise,
const Grid<Real> *sdf, const Grid<Real> *sdf,
Real scale) const Real scale)
{ {
if (!flags.isFluid(i, j, k) || (sdf && (*sdf)(i, j, k) > 0.)) if (!flags.isFluid(i, j, k) || (sdf && (*sdf)(i, j, k) > 0.))
return; return;
@@ -233,37 +231,35 @@ struct KnAddNoise : public KernelBase {
return scale; return scale;
} }
typedef Real type4; typedef Real type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnAddNoise ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, density, noise, sdf, scale);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, density, noise, sdf, scale);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, density, noise, sdf, scale);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, density, noise, sdf, scale);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &density; Grid<Real> &density;
const WaveletNoiseField &noise; const WaveletNoiseField &noise;
@@ -329,7 +325,7 @@ template<class T> struct knSetPdataNoise : public KernelBase {
const BasicParticleSystem &parts, const BasicParticleSystem &parts,
ParticleDataImpl<T> &pdata, ParticleDataImpl<T> &pdata,
const WaveletNoiseField &noise, const WaveletNoiseField &noise,
Real scale) const Real scale)
{ {
pdata[idx] = noise.evaluate(parts.getPos(idx)) * scale; pdata[idx] = noise.evaluate(parts.getPos(idx)) * scale;
} }
@@ -353,21 +349,17 @@ template<class T> struct knSetPdataNoise : public KernelBase {
return scale; return scale;
} }
typedef Real type3; typedef Real type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knSetPdataNoise ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, parts, pdata, noise, scale);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, parts, pdata, noise, scale);
}
} }
const BasicParticleSystem &parts; const BasicParticleSystem &parts;
ParticleDataImpl<T> &pdata; ParticleDataImpl<T> &pdata;
@@ -389,7 +381,7 @@ template<class T> struct knSetPdataNoiseVec : public KernelBase {
const BasicParticleSystem &parts, const BasicParticleSystem &parts,
ParticleDataImpl<T> &pdata, ParticleDataImpl<T> &pdata,
const WaveletNoiseField &noise, const WaveletNoiseField &noise,
Real scale) const Real scale)
{ {
pdata[idx] = noise.evaluateVec(parts.getPos(idx)) * scale; pdata[idx] = noise.evaluateVec(parts.getPos(idx)) * scale;
} }
@@ -413,21 +405,17 @@ template<class T> struct knSetPdataNoiseVec : public KernelBase {
return scale; return scale;
} }
typedef Real type3; typedef Real type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knSetPdataNoiseVec ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, parts, pdata, noise, scale);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, parts, pdata, noise, scale);
}
} }
const BasicParticleSystem &parts; const BasicParticleSystem &parts;
ParticleDataImpl<T> &pdata; ParticleDataImpl<T> &pdata;
@@ -688,7 +676,7 @@ struct KnApplyEmission : public KernelBase {
const Grid<Real> &source, const Grid<Real> &source,
const Grid<Real> *emissionTexture, const Grid<Real> *emissionTexture,
bool isAbsolute, bool isAbsolute,
int type) const int type)
{ {
// if type is given, only apply emission when celltype matches type from flaggrid // if type is given, only apply emission when celltype matches type from flaggrid
// and if emission texture is given, only apply emission when some emission is present at cell // and if emission texture is given, only apply emission when some emission is present at cell
@@ -733,37 +721,35 @@ struct KnApplyEmission : public KernelBase {
return type; return type;
} }
typedef int type5; typedef int type5;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnApplyEmission ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, target, source, emissionTexture, isAbsolute, type);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &target; Grid<Real> &target;
const Grid<Real> &source; const Grid<Real> &source;
@@ -837,7 +823,7 @@ struct KnApplyDensity : public KernelBase {
Grid<Real> &density, Grid<Real> &density,
const Grid<Real> &sdf, const Grid<Real> &sdf,
Real value, Real value,
Real sigma) const Real sigma)
{ {
if (!flags.isFluid(i, j, k) || sdf(i, j, k) > sigma) if (!flags.isFluid(i, j, k) || sdf(i, j, k) > sigma)
return; return;
@@ -868,37 +854,35 @@ struct KnApplyDensity : public KernelBase {
return sigma; return sigma;
} }
typedef Real type4; typedef Real type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnApplyDensity ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, density, sdf, value, sigma);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, density, sdf, value, sigma);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, density, sdf, value, sigma);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, density, sdf, value, sigma);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &density; Grid<Real> &density;
const Grid<Real> &sdf; const Grid<Real> &sdf;
@@ -1041,7 +1025,7 @@ struct KnResetInObstacle : public KernelBase {
Grid<Real> *red, Grid<Real> *red,
Grid<Real> *green, Grid<Real> *green,
Grid<Real> *blue, Grid<Real> *blue,
Real resetValue) const Real resetValue)
{ {
if (!flags.isObstacle(i, j, k)) if (!flags.isObstacle(i, j, k))
return; return;
@@ -1115,37 +1099,35 @@ struct KnResetInObstacle : public KernelBase {
return resetValue; return resetValue;
} }
typedef Real type9; typedef Real type9;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnResetInObstacle ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, density, heat, fuel, flame, red, green, blue, resetValue);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
FlagGrid &flags; FlagGrid &flags;
MACGrid &vel; MACGrid &vel;
Grid<Real> *density; Grid<Real> *density;
@@ -1692,7 +1674,7 @@ struct KnUpdateFractions : public KernelBase {
const Grid<Real> &phiObs, const Grid<Real> &phiObs,
MACGrid &fractions, MACGrid &fractions,
const int &boundaryWidth, const int &boundaryWidth,
const Real fracThreshold) const const Real fracThreshold)
{ {
// walls at domain bounds and inner objects // walls at domain bounds and inner objects
@@ -1787,37 +1769,35 @@ struct KnUpdateFractions : public KernelBase {
return fracThreshold; return fracThreshold;
} }
typedef Real type4; typedef Real type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnUpdateFractions ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, phiObs, fractions, boundaryWidth, fracThreshold);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
const Grid<Real> &phiObs; const Grid<Real> &phiObs;
MACGrid &fractions; MACGrid &fractions;
@@ -1896,7 +1876,7 @@ struct KnUpdateFlagsObs : public KernelBase {
const Grid<Real> &phiObs, const Grid<Real> &phiObs,
const Grid<Real> *phiOut, const Grid<Real> *phiOut,
const Grid<Real> *phiIn, const Grid<Real> *phiIn,
int boundaryWidth) const int boundaryWidth)
{ {
bool isObs = false; bool isObs = false;
@@ -1964,37 +1944,35 @@ struct KnUpdateFlagsObs : public KernelBase {
return boundaryWidth; return boundaryWidth;
} }
typedef int type5; typedef int type5;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnUpdateFlagsObs ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = boundaryWidth; j < _maxY; j++) #pragma omp parallel
for (int i = boundaryWidth; i < _maxX; i++) {
op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = boundaryWidth; j < _maxY; j++)
for (int i = boundaryWidth; i < _maxX; i++)
op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = boundaryWidth; i < _maxX; i++) {
op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth);
#pragma omp for
for (int j = boundaryWidth; j < _maxY; j++)
for (int i = boundaryWidth; i < _maxX; i++)
op(i, j, k, flags, fractions, phiObs, phiOut, phiIn, boundaryWidth);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(boundaryWidth, maxY), *this);
}
FlagGrid &flags; FlagGrid &flags;
const MACGrid *fractions; const MACGrid *fractions;
const Grid<Real> &phiObs; const Grid<Real> &phiObs;
@@ -2067,7 +2045,7 @@ struct kninitVortexVelocity : public KernelBase {
const Grid<Real> &phiObs, const Grid<Real> &phiObs,
MACGrid &vel, MACGrid &vel,
const Vec3 &center, const Vec3 &center,
const Real &radius) const const Real &radius)
{ {
if (phiObs(i, j, k) >= -1.) { if (phiObs(i, j, k) >= -1.) {
@@ -2115,37 +2093,35 @@ struct kninitVortexVelocity : public KernelBase {
return radius; return radius;
} }
typedef Real type3; typedef Real type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel kninitVortexVelocity ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, phiObs, vel, center, radius);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, phiObs, vel, center, radius);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, phiObs, vel, center, radius);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, phiObs, vel, center, radius);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const Grid<Real> &phiObs; const Grid<Real> &phiObs;
MACGrid &vel; MACGrid &vel;
const Vec3 &center; const Vec3 &center;
@@ -2318,7 +2294,7 @@ template<class T> struct knBlurGrid : public KernelBase {
Grid<T> &originGrid, Grid<T> &originGrid,
Grid<T> &targetGrid, Grid<T> &targetGrid,
GaussianKernelCreator &gkSigma, GaussianKernelCreator &gkSigma,
int cdir) const int cdir)
{ {
targetGrid(i, j, k) = convolveGrid<T>(originGrid, gkSigma, Vec3(i, j, k), cdir); targetGrid(i, j, k) = convolveGrid<T>(originGrid, gkSigma, Vec3(i, j, k), cdir);
} }
@@ -2342,37 +2318,35 @@ template<class T> struct knBlurGrid : public KernelBase {
return cdir; return cdir;
} }
typedef int type3; typedef int type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knBlurGrid ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, originGrid, targetGrid, gkSigma, cdir);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, originGrid, targetGrid, gkSigma, cdir);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, originGrid, targetGrid, gkSigma, cdir);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, originGrid, targetGrid, gkSigma, cdir);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
Grid<T> &originGrid; Grid<T> &originGrid;
Grid<T> &targetGrid; Grid<T> &targetGrid;
GaussianKernelCreator &gkSigma; GaussianKernelCreator &gkSigma;
@@ -2412,7 +2386,7 @@ struct KnBlurMACGridGauss : public KernelBase {
MACGrid &originGrid, MACGrid &originGrid,
MACGrid &target, MACGrid &target,
GaussianKernelCreator &gkSigma, GaussianKernelCreator &gkSigma,
int cdir) const int cdir)
{ {
Vec3 pos(i, j, k); Vec3 pos(i, j, k);
Vec3 step(1.0, 0.0, 0.0); Vec3 step(1.0, 0.0, 0.0);
@@ -2462,37 +2436,35 @@ struct KnBlurMACGridGauss : public KernelBase {
return cdir; return cdir;
} }
typedef int type3; typedef int type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnBlurMACGridGauss ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, originGrid, target, gkSigma, cdir);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, originGrid, target, gkSigma, cdir);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, originGrid, target, gkSigma, cdir);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, originGrid, target, gkSigma, cdir);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
MACGrid &originGrid; MACGrid &originGrid;
MACGrid &target; MACGrid &target;
GaussianKernelCreator &gkSigma; GaussianKernelCreator &gkSigma;

View File

@@ -61,7 +61,7 @@ struct KnTurbulenceClamp : public KernelBase {
Real minK, Real minK,
Real maxK, Real maxK,
Real minNu, Real minNu,
Real maxNu) const Real maxNu)
{ {
Real eps = egrid[idx]; Real eps = egrid[idx];
Real ke = clamp(kgrid[idx], minK, maxK); Real ke = clamp(kgrid[idx], minK, maxK);
@@ -104,21 +104,17 @@ struct KnTurbulenceClamp : public KernelBase {
return maxNu; return maxNu;
} }
typedef Real type5; typedef Real type5;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnTurbulenceClamp ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, kgrid, egrid, minK, maxK, minNu, maxNu);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, kgrid, egrid, minK, maxK, minNu, maxNu);
}
} }
Grid<Real> &kgrid; Grid<Real> &kgrid;
Grid<Real> &egrid; Grid<Real> &egrid;
@@ -163,7 +159,7 @@ struct KnComputeProduction : public KernelBase {
Grid<Real> &prod, Grid<Real> &prod,
Grid<Real> &nuT, Grid<Real> &nuT,
Grid<Real> *strain, Grid<Real> *strain,
Real pscale = 1.0f) const Real pscale = 1.0f)
{ {
Real curEps = eps(i, j, k); Real curEps = eps(i, j, k);
if (curEps > 0) { if (curEps > 0) {
@@ -234,37 +230,35 @@ struct KnComputeProduction : public KernelBase {
return pscale; return pscale;
} }
typedef Real type7; typedef Real type7;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnComputeProduction ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, velCenter, ke, eps, prod, nuT, strain, pscale);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const MACGrid &vel; const MACGrid &vel;
const Grid<Vec3> &velCenter; const Grid<Vec3> &velCenter;
const Grid<Real> &ke; const Grid<Real> &ke;
@@ -345,7 +339,7 @@ struct KnAddTurbulenceSource : public KernelBase {
run(); run();
} }
inline void op( inline void op(
IndexInt idx, Grid<Real> &kgrid, Grid<Real> &egrid, const Grid<Real> &pgrid, Real dt) const IndexInt idx, Grid<Real> &kgrid, Grid<Real> &egrid, const Grid<Real> &pgrid, Real dt)
{ {
Real eps = egrid[idx], prod = pgrid[idx], ke = kgrid[idx]; Real eps = egrid[idx], prod = pgrid[idx], ke = kgrid[idx];
if (ke <= 0) if (ke <= 0)
@@ -379,21 +373,17 @@ struct KnAddTurbulenceSource : public KernelBase {
return dt; return dt;
} }
typedef Real type3; typedef Real type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnAddTurbulenceSource ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, kgrid, egrid, pgrid, dt);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, kgrid, egrid, pgrid, dt);
}
} }
Grid<Real> &kgrid; Grid<Real> &kgrid;
Grid<Real> &egrid; Grid<Real> &egrid;

View File

@@ -138,9 +138,10 @@ struct MakeRhs : public KernelBase {
} }
} }
// TODO (sebbas): Disabled for now
// per cell divergence correction (optional) // per cell divergence correction (optional)
if (perCellCorr) // if(perCellCorr)
set += perCellCorr->get(i, j, k); // set += perCellCorr->get(i,j,k);
// obtain sum, cell count // obtain sum, cell count
sum += set; sum += set;
@@ -198,19 +199,61 @@ struct MakeRhs : public KernelBase {
return gfClamp; return gfClamp;
} }
typedef Real type9; typedef Real type9;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel MakeRhs ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++) const FlagGrid &flags = getArg0();
Grid<Real> &rhs = getArg1();
const MACGrid &vel = getArg2();
const Grid<Real> *perCellCorr = getArg3();
const MACGrid *fractions = getArg4();
const MACGrid *obvel = getArg5();
const Grid<Real> *phi = getArg6();
const Grid<Real> *curv = getArg7();
const Real &surfTens = getArg8();
const Real &gfClamp = getArg9();
#pragma omp target teams distribute parallel for reduction(+:cnt, sum) collapse(3) schedule(static,1)
{
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i,
j,
k,
flags,
rhs,
vel,
perCellCorr,
fractions,
obvel,
phi,
curv,
surfTens,
gfClamp,
cnt,
sum);
}
{
this->sum = sum;
}
}
else {
const int k = 0;
const FlagGrid &flags = getArg0();
Grid<Real> &rhs = getArg1();
const MACGrid &vel = getArg2();
const Grid<Real> *perCellCorr = getArg3();
const MACGrid *fractions = getArg4();
const MACGrid *obvel = getArg5();
const Grid<Real> *phi = getArg6();
const Grid<Real> *curv = getArg7();
const Real &surfTens = getArg8();
const Real &gfClamp = getArg9();
#pragma omp target teams distribute parallel for reduction(+:cnt, sum) collapse(2) schedule(static,1)
{
for (int j = 1; j < _maxY; j++) for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++) for (int i = 1; i < _maxX; i++)
op(i, op(i,
@@ -228,55 +271,11 @@ struct MakeRhs : public KernelBase {
gfClamp, gfClamp,
cnt, cnt,
sum); sum);
}
{
this->sum = sum;
}
} }
else {
const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++)
for (int i = 1; i < _maxX; i++)
op(i,
j,
k,
flags,
rhs,
vel,
perCellCorr,
fractions,
obvel,
phi,
curv,
surfTens,
gfClamp,
cnt,
sum);
}
}
void run()
{
if (maxZ > 1)
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
MakeRhs(MakeRhs &o, tbb::split)
: KernelBase(o),
flags(o.flags),
rhs(o.rhs),
vel(o.vel),
perCellCorr(o.perCellCorr),
fractions(o.fractions),
obvel(o.obvel),
phi(o.phi),
curv(o.curv),
surfTens(o.surfTens),
gfClamp(o.gfClamp),
cnt(0),
sum(0)
{
}
void join(const MakeRhs &o)
{
cnt += o.cnt;
sum += o.sum;
} }
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &rhs; Grid<Real> &rhs;
@@ -302,7 +301,7 @@ struct knCorrectVelocity : public KernelBase {
run(); run();
} }
inline void op( inline void op(
int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Real> &pressure) const int i, int j, int k, const FlagGrid &flags, MACGrid &vel, const Grid<Real> &pressure)
{ {
const IndexInt idx = flags.index(i, j, k); const IndexInt idx = flags.index(i, j, k);
if (flags.isFluid(idx)) { if (flags.isFluid(idx)) {
@@ -353,37 +352,36 @@ struct knCorrectVelocity : public KernelBase {
return pressure; return pressure;
} }
typedef Grid<Real> type2; typedef Grid<Real> type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knCorrectVelocity ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++) const FlagGrid &flags = getArg0();
for (int j = 1; j < _maxY; j++) MACGrid &vel = getArg1();
for (int i = 1; i < _maxX; i++) const Grid<Real> &pressure = getArg2();
op(i, j, k, flags, vel, pressure); #pragma omp target teams distribute parallel for collapse(3) schedule(static, 1)
{
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, pressure);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) const FlagGrid &flags = getArg0();
for (int i = 1; i < _maxX; i++) MACGrid &vel = getArg1();
op(i, j, k, flags, vel, pressure); const Grid<Real> &pressure = getArg2();
#pragma omp target teams distribute parallel for collapse(2) schedule(static, 1)
{
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, vel, pressure);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
MACGrid &vel; MACGrid &vel;
const Grid<Real> &pressure; const Grid<Real> &pressure;
@@ -441,7 +439,7 @@ struct ApplyGhostFluidDiagonal : public KernelBase {
Grid<Real> &A0, Grid<Real> &A0,
const FlagGrid &flags, const FlagGrid &flags,
const Grid<Real> &phi, const Grid<Real> &phi,
const Real gfClamp) const const Real gfClamp)
{ {
const int X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ(); const int X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ();
const IndexInt idx = flags.index(i, j, k); const IndexInt idx = flags.index(i, j, k);
@@ -483,37 +481,35 @@ struct ApplyGhostFluidDiagonal : public KernelBase {
return gfClamp; return gfClamp;
} }
typedef Real type3; typedef Real type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel ApplyGhostFluidDiagonal ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, A0, flags, phi, gfClamp);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, A0, flags, phi, gfClamp);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, A0, flags, phi, gfClamp);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, A0, flags, phi, gfClamp);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
Grid<Real> &A0; Grid<Real> &A0;
const FlagGrid &flags; const FlagGrid &flags;
const Grid<Real> &phi; const Grid<Real> &phi;
@@ -551,7 +547,7 @@ struct knCorrectVelocityGhostFluid : public KernelBase {
const Grid<Real> &phi, const Grid<Real> &phi,
Real gfClamp, Real gfClamp,
const Grid<Real> *curv, const Grid<Real> *curv,
const Real surfTens) const const Real surfTens)
{ {
const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ(); const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ();
const IndexInt idx = flags.index(i, j, k); const IndexInt idx = flags.index(i, j, k);
@@ -640,37 +636,35 @@ struct knCorrectVelocityGhostFluid : public KernelBase {
return surfTens; return surfTens;
} }
typedef Real type6; typedef Real type6;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knCorrectVelocityGhostFluid ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, flags, pressure, phi, gfClamp, curv, surfTens);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
MACGrid &vel; MACGrid &vel;
const FlagGrid &flags; const FlagGrid &flags;
const Grid<Real> &pressure; const Grid<Real> &pressure;
@@ -710,7 +704,7 @@ struct knReplaceClampedGhostFluidVels : public KernelBase {
const FlagGrid &flags, const FlagGrid &flags,
const Grid<Real> &pressure, const Grid<Real> &pressure,
const Grid<Real> &phi, const Grid<Real> &phi,
Real gfClamp) const Real gfClamp)
{ {
const IndexInt idx = flags.index(i, j, k); const IndexInt idx = flags.index(i, j, k);
const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ(); const IndexInt X = flags.getStrideX(), Y = flags.getStrideY(), Z = flags.getStrideZ();
@@ -758,37 +752,35 @@ struct knReplaceClampedGhostFluidVels : public KernelBase {
return gfClamp; return gfClamp;
} }
typedef Real type4; typedef Real type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knReplaceClampedGhostFluidVels ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, flags, pressure, phi, gfClamp);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, flags, pressure, phi, gfClamp);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, flags, pressure, phi, gfClamp);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, flags, pressure, phi, gfClamp);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
MACGrid &vel; MACGrid &vel;
const FlagGrid &flags; const FlagGrid &flags;
const Grid<Real> &pressure; const Grid<Real> &pressure;
@@ -822,28 +814,21 @@ struct CountEmptyCells : public KernelBase {
return flags; return flags;
} }
typedef FlagGrid type0; typedef FlagGrid type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel CountEmptyCells ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, flags, numEmpty);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
CountEmptyCells(CountEmptyCells &o, tbb::split) : KernelBase(o), flags(o.flags), numEmpty(0) {
{ int numEmpty = 0;
} #pragma omp for nowait
void join(const CountEmptyCells &o) for (IndexInt i = 0; i < _sz; i++)
{ op(i, flags, numEmpty);
numEmpty += o.numEmpty; #pragma omp critical
{
this->numEmpty += numEmpty;
}
}
} }
const FlagGrid &flags; const FlagGrid &flags;
int numEmpty; int numEmpty;
@@ -964,11 +949,19 @@ void computePressureRhs(Grid<Real> &rhs,
const Real surfTens = 0.) const Real surfTens = 0.)
{ {
// compute divergence and init right hand side // compute divergence and init right hand side
MakeRhs kernMakeRhs( // auto kernMakeRhs = new MakeRhs(flags, rhs, vel, perCellCorr, fractions, obvel, phi, curv,
flags, rhs, vel, perCellCorr, fractions, obvel, phi, curv, surfTens, gfClamp); // surfTens, gfClamp );
printf("pressure = %p, flags = %p, rhs = %p, vel = %p\n",
pressure.mData,
flags.mData,
rhs.mData,
vel.mData);
MakeRhs(flags, rhs, vel, perCellCorr, fractions, obvel, phi, curv, surfTens, gfClamp);
if (enforceCompatibility) // TODO (sebbas): Disabled for now
rhs += (Real)(-kernMakeRhs.sum / (Real)kernMakeRhs.cnt); // if(enforceCompatibility)
// rhs += (Real)(-kernMakeRhs->sum / (Real)kernMakeRhs->cnt);
// delete kernMakeRhs;
} }
static PyObject *_W_1(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static PyObject *_W_1(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
@@ -1050,6 +1043,13 @@ void solvePressureSystem(Grid<Real> &rhs,
MACGrid &vel, MACGrid &vel,
Grid<Real> &pressure, Grid<Real> &pressure,
const FlagGrid &flags, const FlagGrid &flags,
Grid<Real> *residual = nullptr,
Grid<Real> *search = nullptr,
Grid<Real> *A0 = nullptr,
Grid<Real> *Ai = nullptr,
Grid<Real> *Aj = nullptr,
Grid<Real> *Ak = nullptr,
Grid<Real> *tmp = nullptr,
Real cgAccuracy = 1e-3, Real cgAccuracy = 1e-3,
const Grid<Real> *phi = nullptr, const Grid<Real> *phi = nullptr,
const Grid<Real> *perCellCorr = nullptr, const Grid<Real> *perCellCorr = nullptr,
@@ -1069,19 +1069,37 @@ void solvePressureSystem(Grid<Real> &rhs,
// reserve temp grids // reserve temp grids
FluidSolver *parent = flags.getParent(); FluidSolver *parent = flags.getParent();
Grid<Real> residual(parent);
Grid<Real> search(parent); bool cleanUp = false;
Grid<Real> A0(parent); if (!residual) {
Grid<Real> Ai(parent); residual = new Grid<Real>(parent, true, false, true);
Grid<Real> Aj(parent); search = new Grid<Real>(parent, true, false, true);
Grid<Real> Ak(parent); A0 = new Grid<Real>(parent, true, false, true);
Grid<Real> tmp(parent); Ai = new Grid<Real>(parent, true, false, true);
Aj = new Grid<Real>(parent, true, false, true);
Ak = new Grid<Real>(parent, true, false, true);
tmp = new Grid<Real>(parent, true, false, true);
cleanUp = true;
}
else {
residual->clear(true);
search->clear(true);
A0->clear(true);
Ai->clear(true);
Aj->clear(true);
Ak->clear(true);
tmp->clear(true);
}
std::cout << "HERE 5" << std::endl;
// setup matrix and boundaries // setup matrix and boundaries
MakeLaplaceMatrix(flags, A0, Ai, Aj, Ak, fractions); MakeLaplaceMatrix(flags, *A0, *Ai, *Aj, *Ak, fractions);
// MakeLaplaceMatrix(flags, A0, Ai, Aj, Ak, fractions);
// TODO (sebbas): Disabled for now
if (phi) { if (phi) {
ApplyGhostFluidDiagonal(A0, flags, *phi, gfClamp); ApplyGhostFluidDiagonal(*A0, flags, *phi, gfClamp);
} }
// check whether we need to fix some pressure value... // check whether we need to fix some pressure value...
@@ -1125,7 +1143,8 @@ void solvePressureSystem(Grid<Real> &rhs,
// debMsg("No empty cells! Fixing pressure of cell "<<fixPidx<<" to zero",1); // debMsg("No empty cells! Fixing pressure of cell "<<fixPidx<<" to zero",1);
} }
if (fixPidx >= 0) { if (fixPidx >= 0) {
fixPressure(fixPidx, Real(0), rhs, A0, Ai, Aj, Ak); fixPressure(fixPidx, Real(0), rhs, *A0, *Ai, *Aj, *Ak);
// fixPressure(fixPidx, Real(0), rhs, A0, Ai, Aj, Ak);
static bool msgOnce = false; static bool msgOnce = false;
if (!msgOnce) { if (!msgOnce) {
debMsg("Pinning pressure of cell " << fixPidx << " to zero", 2); debMsg("Pinning pressure of cell " << fixPidx << " to zero", 2);
@@ -1133,20 +1152,21 @@ void solvePressureSystem(Grid<Real> &rhs,
} }
} }
} }
std::cout << "HERE 6" << std::endl;
// CG setup // CG setup
// note: the last factor increases the max iterations for 2d, which right now can't use a // note: the last factor increases the max iterations for 2d, which right now can't use a
// preconditioner // preconditioner
GridCgInterface *gcg; GridCgInterface *gcg;
vector<Grid<Real> *> matA{&A0, &Ai, &Aj}; if (vel.is3D())
gcg = new GridCg<ApplyMatrix>(pressure, rhs, *residual, *search, flags, *tmp, A0, Ai, Aj, Ak);
if (vel.is3D()) { // gcg = new GridCg<ApplyMatrix> (pressure, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj,
matA.push_back(&Ak); // &Ak);
gcg = new GridCg<ApplyMatrix>(pressure, rhs, residual, search, flags, tmp, matA); else
} gcg = new GridCg<ApplyMatrix2D>(
else { pressure, rhs, *residual, *search, flags, *tmp, A0, Ai, Aj, Ak);
gcg = new GridCg<ApplyMatrix2D>(pressure, rhs, residual, search, flags, tmp, matA); // gcg = new GridCg<ApplyMatrix2D>(pressure, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj,
} // &Ak);
gcg->setAccuracy(cgAccuracy); gcg->setAccuracy(cgAccuracy);
gcg->setUseL2Norm(useL2Norm); gcg->setUseL2Norm(useL2Norm);
@@ -1155,6 +1175,7 @@ void solvePressureSystem(Grid<Real> &rhs,
Grid<Real> *pca0 = nullptr, *pca1 = nullptr, *pca2 = nullptr, *pca3 = nullptr; Grid<Real> *pca0 = nullptr, *pca1 = nullptr, *pca2 = nullptr, *pca3 = nullptr;
GridMg *pmg = nullptr; GridMg *pmg = nullptr;
std::cout << "HERE 7" << std::endl;
// optional preconditioning // optional preconditioning
if (preconditioner == PcMIC) { if (preconditioner == PcMIC) {
@@ -1180,10 +1201,12 @@ void solvePressureSystem(Grid<Real> &rhs,
gcg->setMGPreconditioner(GridCgInterface::PC_MGP, pmg); gcg->setMGPreconditioner(GridCgInterface::PC_MGP, pmg);
} }
std::cout << "HERE 8" << std::endl;
// CG solve // CG solve
Real time = 0;
for (int iter = 0; iter < maxIter; iter++) { for (int iter = 0; iter < maxIter; iter++) {
if (!gcg->iterate()) if (!gcg->iterate(time))
iter = maxIter; iter = maxIter;
if (iter < maxIter) if (iter < maxIter)
debMsg("FluidSolver::solvePressure iteration " << iter debMsg("FluidSolver::solvePressure iteration " << iter
@@ -1193,8 +1216,26 @@ void solvePressureSystem(Grid<Real> &rhs,
debMsg("FluidSolver::solvePressure done. Iterations:" << gcg->getIterations() debMsg("FluidSolver::solvePressure done. Iterations:" << gcg->getIterations()
<< ", residual:" << gcg->getResNorm(), << ", residual:" << gcg->getResNorm(),
2); 2);
// std::cout << "TIME: " << time << std::endl;
// Cleanup // Cleanup
if (cleanUp) {
if (residual)
delete residual;
if (search)
delete search;
if (A0)
delete A0;
if (Ai)
delete Ai;
if (Aj)
delete Aj;
if (Ak)
delete Ak;
if (tmp)
delete tmp;
}
if (gcg) if (gcg)
delete gcg; delete gcg;
if (pca0) if (pca0)
@@ -1225,26 +1266,40 @@ static PyObject *_W_2(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
MACGrid &vel = *_args.getPtr<MACGrid>("vel", 1, &_lock); MACGrid &vel = *_args.getPtr<MACGrid>("vel", 1, &_lock);
Grid<Real> &pressure = *_args.getPtr<Grid<Real>>("pressure", 2, &_lock); Grid<Real> &pressure = *_args.getPtr<Grid<Real>>("pressure", 2, &_lock);
const FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 3, &_lock); const FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 3, &_lock);
Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 4, 1e-3, &_lock); Grid<Real> *residual = _args.getPtrOpt<Grid<Real>>("residual", 4, nullptr, &_lock);
const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 5, nullptr, &_lock); Grid<Real> *search = _args.getPtrOpt<Grid<Real>>("search", 5, nullptr, &_lock);
Grid<Real> *A0 = _args.getPtrOpt<Grid<Real>>("A0", 6, nullptr, &_lock);
Grid<Real> *Ai = _args.getPtrOpt<Grid<Real>>("Ai", 7, nullptr, &_lock);
Grid<Real> *Aj = _args.getPtrOpt<Grid<Real>>("Aj", 8, nullptr, &_lock);
Grid<Real> *Ak = _args.getPtrOpt<Grid<Real>>("Ak", 9, nullptr, &_lock);
Grid<Real> *tmp = _args.getPtrOpt<Grid<Real>>("tmp", 10, nullptr, &_lock);
Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 11, 1e-3, &_lock);
const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 12, nullptr, &_lock);
const Grid<Real> *perCellCorr = _args.getPtrOpt<Grid<Real>>( const Grid<Real> *perCellCorr = _args.getPtrOpt<Grid<Real>>(
"perCellCorr", 6, nullptr, &_lock); "perCellCorr", 13, nullptr, &_lock);
const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 7, nullptr, &_lock); const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 14, nullptr, &_lock);
Real gfClamp = _args.getOpt<Real>("gfClamp", 8, 1e-04, &_lock); Real gfClamp = _args.getOpt<Real>("gfClamp", 15, 1e-04, &_lock);
Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 9, 1.5, &_lock); Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 16, 1.5, &_lock);
bool precondition = _args.getOpt<bool>("precondition", 10, true, &_lock); bool precondition = _args.getOpt<bool>("precondition", 17, true, &_lock);
int preconditioner = _args.getOpt<int>("preconditioner", 11, PcMIC, &_lock); int preconditioner = _args.getOpt<int>("preconditioner", 18, PcMIC, &_lock);
const bool enforceCompatibility = _args.getOpt<bool>( const bool enforceCompatibility = _args.getOpt<bool>(
"enforceCompatibility", 12, false, &_lock); "enforceCompatibility", 19, false, &_lock);
const bool useL2Norm = _args.getOpt<bool>("useL2Norm", 13, false, &_lock); const bool useL2Norm = _args.getOpt<bool>("useL2Norm", 20, false, &_lock);
const bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 14, false, &_lock); const bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 21, false, &_lock);
const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 15, nullptr, &_lock); const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 22, nullptr, &_lock);
const Real surfTens = _args.getOpt<Real>("surfTens", 16, 0., &_lock); const Real surfTens = _args.getOpt<Real>("surfTens", 23, 0., &_lock);
_retval = getPyNone(); _retval = getPyNone();
solvePressureSystem(rhs, solvePressureSystem(rhs,
vel, vel,
pressure, pressure,
flags, flags,
residual,
search,
A0,
Ai,
Aj,
Ak,
tmp,
cgAccuracy, cgAccuracy,
phi, phi,
perCellCorr, perCellCorr,
@@ -1370,6 +1425,13 @@ void PbRegister_correctVelocity()
void solvePressure(MACGrid &vel, void solvePressure(MACGrid &vel,
Grid<Real> &pressure, Grid<Real> &pressure,
const FlagGrid &flags, const FlagGrid &flags,
Grid<Real> *residual = nullptr,
Grid<Real> *search = nullptr,
Grid<Real> *A0 = nullptr,
Grid<Real> *Ai = nullptr,
Grid<Real> *Aj = nullptr,
Grid<Real> *Ak = nullptr,
Grid<Real> *tmp = nullptr,
Real cgAccuracy = 1e-3, Real cgAccuracy = 1e-3,
const Grid<Real> *phi = nullptr, const Grid<Real> *phi = nullptr,
const Grid<Real> *perCellCorr = nullptr, const Grid<Real> *perCellCorr = nullptr,
@@ -1411,6 +1473,13 @@ void solvePressure(MACGrid &vel,
vel, vel,
pressure, pressure,
flags, flags,
residual,
search,
A0,
Ai,
Aj,
Aj,
tmp,
cgAccuracy, cgAccuracy,
phi, phi,
perCellCorr, perCellCorr,
@@ -1442,10 +1511,11 @@ void solvePressure(MACGrid &vel,
curv, curv,
surfTens); surfTens);
// TODO (sebbas): Disabled for now
// optionally , return RHS // optionally , return RHS
if (retRhs) { // if(retRhs) {
retRhs->copyFrom(rhs); // retRhs->copyFrom(rhs);
} // }
} }
static PyObject *_W_4(PyObject *_self, PyObject *_linargs, PyObject *_kwds) static PyObject *_W_4(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
{ {
@@ -1460,26 +1530,40 @@ static PyObject *_W_4(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
MACGrid &vel = *_args.getPtr<MACGrid>("vel", 0, &_lock); MACGrid &vel = *_args.getPtr<MACGrid>("vel", 0, &_lock);
Grid<Real> &pressure = *_args.getPtr<Grid<Real>>("pressure", 1, &_lock); Grid<Real> &pressure = *_args.getPtr<Grid<Real>>("pressure", 1, &_lock);
const FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 2, &_lock); const FlagGrid &flags = *_args.getPtr<FlagGrid>("flags", 2, &_lock);
Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 3, 1e-3, &_lock); Grid<Real> *residual = _args.getPtrOpt<Grid<Real>>("residual", 3, nullptr, &_lock);
const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 4, nullptr, &_lock); Grid<Real> *search = _args.getPtrOpt<Grid<Real>>("search", 4, nullptr, &_lock);
Grid<Real> *A0 = _args.getPtrOpt<Grid<Real>>("A0", 5, nullptr, &_lock);
Grid<Real> *Ai = _args.getPtrOpt<Grid<Real>>("Ai", 6, nullptr, &_lock);
Grid<Real> *Aj = _args.getPtrOpt<Grid<Real>>("Aj", 7, nullptr, &_lock);
Grid<Real> *Ak = _args.getPtrOpt<Grid<Real>>("Ak", 8, nullptr, &_lock);
Grid<Real> *tmp = _args.getPtrOpt<Grid<Real>>("tmp", 9, nullptr, &_lock);
Real cgAccuracy = _args.getOpt<Real>("cgAccuracy", 10, 1e-3, &_lock);
const Grid<Real> *phi = _args.getPtrOpt<Grid<Real>>("phi", 11, nullptr, &_lock);
const Grid<Real> *perCellCorr = _args.getPtrOpt<Grid<Real>>( const Grid<Real> *perCellCorr = _args.getPtrOpt<Grid<Real>>(
"perCellCorr", 5, nullptr, &_lock); "perCellCorr", 12, nullptr, &_lock);
const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 6, nullptr, &_lock); const MACGrid *fractions = _args.getPtrOpt<MACGrid>("fractions", 13, nullptr, &_lock);
const MACGrid *obvel = _args.getPtrOpt<MACGrid>("obvel", 7, nullptr, &_lock); const MACGrid *obvel = _args.getPtrOpt<MACGrid>("obvel", 14, nullptr, &_lock);
Real gfClamp = _args.getOpt<Real>("gfClamp", 8, 1e-04, &_lock); Real gfClamp = _args.getOpt<Real>("gfClamp", 15, 1e-04, &_lock);
Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 9, 1.5, &_lock); Real cgMaxIterFac = _args.getOpt<Real>("cgMaxIterFac", 16, 1.5, &_lock);
bool precondition = _args.getOpt<bool>("precondition", 10, true, &_lock); bool precondition = _args.getOpt<bool>("precondition", 17, true, &_lock);
int preconditioner = _args.getOpt<int>("preconditioner", 11, PcMIC, &_lock); int preconditioner = _args.getOpt<int>("preconditioner", 18, PcMIC, &_lock);
bool enforceCompatibility = _args.getOpt<bool>("enforceCompatibility", 12, false, &_lock); bool enforceCompatibility = _args.getOpt<bool>("enforceCompatibility", 19, false, &_lock);
bool useL2Norm = _args.getOpt<bool>("useL2Norm", 13, false, &_lock); bool useL2Norm = _args.getOpt<bool>("useL2Norm", 20, false, &_lock);
bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 14, false, &_lock); bool zeroPressureFixing = _args.getOpt<bool>("zeroPressureFixing", 21, false, &_lock);
const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 15, nullptr, &_lock); const Grid<Real> *curv = _args.getPtrOpt<Grid<Real>>("curv", 22, nullptr, &_lock);
const Real surfTens = _args.getOpt<Real>("surfTens", 16, 0., &_lock); const Real surfTens = _args.getOpt<Real>("surfTens", 23, 0., &_lock);
Grid<Real> *retRhs = _args.getPtrOpt<Grid<Real>>("retRhs", 17, nullptr, &_lock); Grid<Real> *retRhs = _args.getPtrOpt<Grid<Real>>("retRhs", 24, nullptr, &_lock);
_retval = getPyNone(); _retval = getPyNone();
solvePressure(vel, solvePressure(vel,
pressure, pressure,
flags, flags,
residual,
search,
A0,
Ai,
Aj,
Ak,
tmp,
cgAccuracy, cgAccuracy,
phi, phi,
perCellCorr, perCellCorr,

View File

@@ -34,7 +34,7 @@ struct KnAddForcePvel : public KernelBase {
ParticleDataImpl<Vec3> &v, ParticleDataImpl<Vec3> &v,
const Vec3 &da, const Vec3 &da,
const ParticleDataImpl<int> *ptype, const ParticleDataImpl<int> *ptype,
const int exclude) const const int exclude)
{ {
if (ptype && ((*ptype)[idx] & exclude)) if (ptype && ((*ptype)[idx] & exclude))
return; return;
@@ -60,21 +60,17 @@ struct KnAddForcePvel : public KernelBase {
return exclude; return exclude;
} }
typedef int type3; typedef int type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnAddForcePvel ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, v, da, ptype, exclude);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, v, da, ptype, exclude);
}
} }
ParticleDataImpl<Vec3> &v; ParticleDataImpl<Vec3> &v;
const Vec3 &da; const Vec3 &da;
@@ -150,7 +146,7 @@ struct KnUpdateVelocityFromDeltaPos : public KernelBase {
const ParticleDataImpl<Vec3> &x_prev, const ParticleDataImpl<Vec3> &x_prev,
const Real over_dt, const Real over_dt,
const ParticleDataImpl<int> *ptype, const ParticleDataImpl<int> *ptype,
const int exclude) const const int exclude)
{ {
if (ptype && ((*ptype)[idx] & exclude)) if (ptype && ((*ptype)[idx] & exclude))
return; return;
@@ -186,21 +182,17 @@ struct KnUpdateVelocityFromDeltaPos : public KernelBase {
return exclude; return exclude;
} }
typedef int type5; typedef int type5;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnUpdateVelocityFromDeltaPos ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, p, v, x_prev, over_dt, ptype, exclude);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, p, v, x_prev, over_dt, ptype, exclude);
}
} }
const BasicParticleSystem &p; const BasicParticleSystem &p;
ParticleDataImpl<Vec3> &v; ParticleDataImpl<Vec3> &v;
@@ -273,7 +265,7 @@ struct KnStepEuler : public KernelBase {
const ParticleDataImpl<Vec3> &v, const ParticleDataImpl<Vec3> &v,
const Real dt, const Real dt,
const ParticleDataImpl<int> *ptype, const ParticleDataImpl<int> *ptype,
const int exclude) const const int exclude)
{ {
if (ptype && ((*ptype)[idx] & exclude)) if (ptype && ((*ptype)[idx] & exclude))
return; return;
@@ -304,21 +296,17 @@ struct KnStepEuler : public KernelBase {
return exclude; return exclude;
} }
typedef int type4; typedef int type4;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnStepEuler ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, p, v, dt, ptype, exclude);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, p, v, dt, ptype, exclude);
}
} }
BasicParticleSystem &p; BasicParticleSystem &p;
const ParticleDataImpl<Vec3> &v; const ParticleDataImpl<Vec3> &v;
@@ -393,7 +381,7 @@ struct KnSetPartType : public KernelBase {
const int mark, const int mark,
const int stype, const int stype,
const FlagGrid &flags, const FlagGrid &flags,
const int cflag) const const int cflag)
{ {
if (flags.isInBounds(part.getPos(idx), 0) && (flags.getAt(part.getPos(idx)) & cflag) && if (flags.isInBounds(part.getPos(idx), 0) && (flags.getAt(part.getPos(idx)) & cflag) &&
(ptype[idx] & stype)) (ptype[idx] & stype))
@@ -429,21 +417,17 @@ struct KnSetPartType : public KernelBase {
return cflag; return cflag;
} }
typedef int type5; typedef int type5;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnSetPartType ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, ptype, part, mark, stype, flags, cflag);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, ptype, part, mark, stype, flags, cflag);
}
} }
ParticleDataImpl<int> &ptype; ParticleDataImpl<int> &ptype;
const BasicParticleSystem &part; const BasicParticleSystem &part;

View File

@@ -99,7 +99,7 @@ struct knFlipComputeSecondaryParticlePotentials : public KernelBase {
const Real scaleFromManta, const Real scaleFromManta,
const int itype = FlagGrid::TypeFluid, const int itype = FlagGrid::TypeFluid,
const int jtype = FlagGrid::TypeObstacle | FlagGrid::TypeOutflow | const int jtype = FlagGrid::TypeObstacle | FlagGrid::TypeOutflow |
FlagGrid::TypeInflow) const FlagGrid::TypeInflow)
{ {
if (!(flags(i, j, k) & itype)) if (!(flags(i, j, k) & itype))
@@ -253,19 +253,48 @@ struct knFlipComputeSecondaryParticlePotentials : public KernelBase {
return jtype; return jtype;
} }
typedef int type16; typedef int type16;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knFlipComputeSecondaryParticlePotentials ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
#pragma omp parallel
{
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = radius; j < _maxY; j++)
for (int i = radius; i < _maxX; i++)
op(i,
j,
k,
potTA,
potWC,
potKE,
neighborRatio,
flags,
v,
normal,
radius,
tauMinTA,
tauMaxTA,
tauMinWC,
tauMaxWC,
tauMinKE,
tauMaxKE,
scaleFromManta,
itype,
jtype);
}
}
else {
const int k = 0;
#pragma omp parallel
{
#pragma omp for
for (int j = radius; j < _maxY; j++) for (int j = radius; j < _maxY; j++)
for (int i = radius; i < _maxX; i++) for (int i = radius; i < _maxX; i++)
op(i, op(i,
@@ -288,39 +317,8 @@ struct knFlipComputeSecondaryParticlePotentials : public KernelBase {
scaleFromManta, scaleFromManta,
itype, itype,
jtype); jtype);
}
} }
else {
const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++)
for (int i = radius; i < _maxX; i++)
op(i,
j,
k,
potTA,
potWC,
potKE,
neighborRatio,
flags,
v,
normal,
radius,
tauMinTA,
tauMaxTA,
tauMinWC,
tauMaxWC,
tauMinKE,
tauMaxKE,
scaleFromManta,
itype,
jtype);
}
}
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(radius, maxY), *this);
} }
Grid<Real> &potTA; Grid<Real> &potTA;
Grid<Real> &potWC; Grid<Real> &potWC;
@@ -670,13 +668,7 @@ struct knFlipSampleSecondaryParticlesMoreCylinders : public KernelBase {
return rand; return rand;
} }
typedef RandomStream type17; typedef RandomStream type17;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knFlipSampleSecondaryParticlesMoreCylinders ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void run() void run()
{ {
const int _maxX = maxX; const int _maxX = maxX;
@@ -930,13 +922,7 @@ struct knFlipSampleSecondaryParticles : public KernelBase {
return rand; return rand;
} }
typedef RandomStream type17; typedef RandomStream type17;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knFlipSampleSecondaryParticles ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void run() void run()
{ {
const int _maxX = maxX; const int _maxX = maxX;
@@ -1192,7 +1178,7 @@ struct knFlipUpdateSecondaryParticlesLinear : public KernelBase {
const Real c_b, const Real c_b,
const Real dt, const Real dt,
const int exclude, const int exclude,
const int antitunneling) const const int antitunneling)
{ {
if (!pts_sec.isActive(idx) || pts_sec[idx].flag & exclude) if (!pts_sec.isActive(idx) || pts_sec[idx].flag & exclude)
@@ -1342,36 +1328,32 @@ struct knFlipUpdateSecondaryParticlesLinear : public KernelBase {
return antitunneling; return antitunneling;
} }
typedef int type14; typedef int type14;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knFlipUpdateSecondaryParticlesLinear ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx,
pts_sec,
v_sec,
l_sec,
f_sec,
flags,
v,
neighborRatio,
gravity,
k_b,
k_d,
c_s,
c_b,
dt,
exclude,
antitunneling);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i,
pts_sec,
v_sec,
l_sec,
f_sec,
flags,
v,
neighborRatio,
gravity,
k_b,
k_d,
c_s,
c_b,
dt,
exclude,
antitunneling);
}
} }
BasicParticleSystem &pts_sec; BasicParticleSystem &pts_sec;
ParticleDataImpl<Vec3> &v_sec; ParticleDataImpl<Vec3> &v_sec;
@@ -1449,7 +1431,7 @@ struct knFlipUpdateSecondaryParticlesCubic : public KernelBase {
const Real dt, const Real dt,
const int exclude, const int exclude,
const int antitunneling, const int antitunneling,
const int itype) const const int itype)
{ {
if (!pts_sec.isActive(idx) || pts_sec[idx].flag & exclude) if (!pts_sec.isActive(idx) || pts_sec[idx].flag & exclude)
@@ -1655,38 +1637,34 @@ struct knFlipUpdateSecondaryParticlesCubic : public KernelBase {
return itype; return itype;
} }
typedef int type16; typedef int type16;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knFlipUpdateSecondaryParticlesCubic ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx,
pts_sec,
v_sec,
l_sec,
f_sec,
flags,
v,
neighborRatio,
radius,
gravity,
k_b,
k_d,
c_s,
c_b,
dt,
exclude,
antitunneling,
itype);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i,
pts_sec,
v_sec,
l_sec,
f_sec,
flags,
v,
neighborRatio,
radius,
gravity,
k_b,
k_d,
c_s,
c_b,
dt,
exclude,
antitunneling,
itype);
}
} }
BasicParticleSystem &pts_sec; BasicParticleSystem &pts_sec;
ParticleDataImpl<Vec3> &v_sec; ParticleDataImpl<Vec3> &v_sec;
@@ -1856,7 +1834,7 @@ struct knFlipDeleteParticlesInObstacle : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, BasicParticleSystem &pts, const FlagGrid &flags) const inline void op(IndexInt idx, BasicParticleSystem &pts, const FlagGrid &flags)
{ {
if (!pts.isActive(idx)) if (!pts.isActive(idx))
@@ -1885,21 +1863,17 @@ struct knFlipDeleteParticlesInObstacle : public KernelBase {
return flags; return flags;
} }
typedef FlagGrid type1; typedef FlagGrid type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knFlipDeleteParticlesInObstacle ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, pts, flags);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, pts, flags);
}
} }
BasicParticleSystem &pts; BasicParticleSystem &pts;
const FlagGrid &flags; const FlagGrid &flags;
@@ -2031,7 +2005,7 @@ struct knSetFlagsFromLevelset : public KernelBase {
FlagGrid &flags, FlagGrid &flags,
const Grid<Real> &phi, const Grid<Real> &phi,
const int exclude = FlagGrid::TypeObstacle, const int exclude = FlagGrid::TypeObstacle,
const int itype = FlagGrid::TypeFluid) const const int itype = FlagGrid::TypeFluid)
{ {
if (phi(idx) < 0 && !(flags(idx) & exclude)) if (phi(idx) < 0 && !(flags(idx) & exclude))
flags(idx) = itype; flags(idx) = itype;
@@ -2056,21 +2030,17 @@ struct knSetFlagsFromLevelset : public KernelBase {
return itype; return itype;
} }
typedef int type3; typedef int type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knSetFlagsFromLevelset ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, flags, phi, exclude, itype);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, flags, phi, exclude, itype);
}
} }
FlagGrid &flags; FlagGrid &flags;
const Grid<Real> &phi; const Grid<Real> &phi;
@@ -2126,7 +2096,7 @@ struct knSetMACFromLevelset : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, MACGrid &v, const Grid<Real> &phi, const Vec3 c) const inline void op(int i, int j, int k, MACGrid &v, const Grid<Real> &phi, const Vec3 c)
{ {
if (phi.getInterpolated(Vec3(i, j, k)) > 0) if (phi.getInterpolated(Vec3(i, j, k)) > 0)
v(i, j, k) = c; v(i, j, k) = c;
@@ -2146,37 +2116,35 @@ struct knSetMACFromLevelset : public KernelBase {
return c; return c;
} }
typedef Vec3 type2; typedef Vec3 type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knSetMACFromLevelset ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, v, phi, c);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, v, phi, c);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, v, phi, c);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, v, phi, c);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
MACGrid &v; MACGrid &v;
const Grid<Real> &phi; const Grid<Real> &phi;
const Vec3 c; const Vec3 c;
@@ -2268,7 +2236,7 @@ struct knFlipComputePotentialTrappedAir : public KernelBase {
const Real tauMax, const Real tauMax,
const Real scaleFromManta, const Real scaleFromManta,
const int itype = FlagGrid::TypeFluid, const int itype = FlagGrid::TypeFluid,
const int jtype = FlagGrid::TypeFluid) const const int jtype = FlagGrid::TypeFluid)
{ {
if (!(flags(i, j, k) & itype)) if (!(flags(i, j, k) & itype))
@@ -2342,37 +2310,35 @@ struct knFlipComputePotentialTrappedAir : public KernelBase {
return jtype; return jtype;
} }
typedef int type8; typedef int type8;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knFlipComputePotentialTrappedAir ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, pot, flags, v, radius, tauMin, tauMax, scaleFromManta, itype, jtype);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
Grid<Real> &pot; Grid<Real> &pot;
const FlagGrid &flags; const FlagGrid &flags;
const MACGrid &v; const MACGrid &v;
@@ -2472,7 +2438,7 @@ struct knFlipComputePotentialKineticEnergy : public KernelBase {
const Real tauMin, const Real tauMin,
const Real tauMax, const Real tauMax,
const Real scaleFromManta, const Real scaleFromManta,
const int itype = FlagGrid::TypeFluid) const const int itype = FlagGrid::TypeFluid)
{ {
if (!(flags(i, j, k) & itype)) if (!(flags(i, j, k) & itype))
@@ -2520,37 +2486,35 @@ struct knFlipComputePotentialKineticEnergy : public KernelBase {
return itype; return itype;
} }
typedef int type6; typedef int type6;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knFlipComputePotentialKineticEnergy ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, pot, flags, v, tauMin, tauMax, scaleFromManta, itype);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
Grid<Real> &pot; Grid<Real> &pot;
const FlagGrid &flags; const FlagGrid &flags;
const MACGrid &v; const MACGrid &v;
@@ -2650,7 +2614,7 @@ struct knFlipComputePotentialWaveCrest : public KernelBase {
const Real tauMax, const Real tauMax,
const Real scaleFromManta, const Real scaleFromManta,
const int itype = FlagGrid::TypeFluid, const int itype = FlagGrid::TypeFluid,
const int jtype = FlagGrid::TypeFluid) const const int jtype = FlagGrid::TypeFluid)
{ {
if (!(flags(i, j, k) & itype)) if (!(flags(i, j, k) & itype))
@@ -2736,19 +2700,41 @@ struct knFlipComputePotentialWaveCrest : public KernelBase {
return jtype; return jtype;
} }
typedef int type9; typedef int type9;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knFlipComputePotentialWaveCrest ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
#pragma omp parallel
{
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i,
j,
k,
pot,
flags,
v,
radius,
normal,
tauMin,
tauMax,
scaleFromManta,
itype,
jtype);
}
}
else {
const int k = 0;
#pragma omp parallel
{
#pragma omp for
for (int j = 1; j < _maxY; j++) for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++) for (int i = 1; i < _maxX; i++)
op(i, op(i,
@@ -2764,20 +2750,8 @@ struct knFlipComputePotentialWaveCrest : public KernelBase {
scaleFromManta, scaleFromManta,
itype, itype,
jtype); jtype);
}
} }
else {
const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, pot, flags, v, radius, normal, tauMin, tauMax, scaleFromManta, itype, jtype);
}
}
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
} }
Grid<Real> &pot; Grid<Real> &pot;
const FlagGrid &flags; const FlagGrid &flags;
@@ -2860,7 +2834,7 @@ struct knFlipComputeSurfaceNormals : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, Grid<Vec3> &normal, const Grid<Real> &phi) const inline void op(IndexInt idx, Grid<Vec3> &normal, const Grid<Real> &phi)
{ {
normal[idx] = getNormalized(normal[idx]); normal[idx] = getNormalized(normal[idx]);
} }
@@ -2874,21 +2848,17 @@ struct knFlipComputeSurfaceNormals : public KernelBase {
return phi; return phi;
} }
typedef Grid<Real> type1; typedef Grid<Real> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel knFlipComputeSurfaceNormals ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, normal, phi);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, normal, phi);
}
} }
Grid<Vec3> &normal; Grid<Vec3> &normal;
const Grid<Real> &phi; const Grid<Real> &phi;
@@ -2958,7 +2928,7 @@ struct knFlipUpdateNeighborRatio : public KernelBase {
Grid<Real> &neighborRatio, Grid<Real> &neighborRatio,
const int radius, const int radius,
const int itype = FlagGrid::TypeFluid, const int itype = FlagGrid::TypeFluid,
const int jtype = FlagGrid::TypeObstacle) const const int jtype = FlagGrid::TypeObstacle)
{ {
if (!(flags(i, j, k) & itype)) if (!(flags(i, j, k) & itype))
@@ -3008,37 +2978,35 @@ struct knFlipUpdateNeighborRatio : public KernelBase {
return jtype; return jtype;
} }
typedef int type4; typedef int type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knFlipUpdateNeighborRatio ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, neighborRatio, radius, itype, jtype);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, neighborRatio, radius, itype, jtype);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, neighborRatio, radius, itype, jtype);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, neighborRatio, radius, itype, jtype);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &neighborRatio; Grid<Real> &neighborRatio;
const int radius; const int radius;

View File

@@ -569,7 +569,7 @@ struct advectSurfacePoints : public KernelBase {
inline void op(IndexInt idx, inline void op(IndexInt idx,
BasicParticleSystemWrapper &surfacePoints, BasicParticleSystemWrapper &surfacePoints,
const BasicParticleSystemWrapper &coarseParticles, const BasicParticleSystemWrapper &coarseParticles,
const ParticleDataImplVec3Wrapper &coarseParticlesPrevPos) const const ParticleDataImplVec3Wrapper &coarseParticlesPrevPos)
{ {
if (surfacePoints.isActive(idx)) { if (surfacePoints.isActive(idx)) {
Vec3 avgDisplacement(0, 0, 0); Vec3 avgDisplacement(0, 0, 0);
@@ -606,21 +606,17 @@ struct advectSurfacePoints : public KernelBase {
return coarseParticlesPrevPos; return coarseParticlesPrevPos;
} }
typedef ParticleDataImplVec3Wrapper type2; typedef ParticleDataImplVec3Wrapper type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel advectSurfacePoints ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, coarseParticles, coarseParticlesPrevPos);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, coarseParticles, coarseParticlesPrevPos);
}
} }
BasicParticleSystemWrapper &surfacePoints; BasicParticleSystemWrapper &surfacePoints;
const BasicParticleSystemWrapper &coarseParticles; const BasicParticleSystemWrapper &coarseParticles;
@@ -673,7 +669,7 @@ struct computeSurfaceNormals : public KernelBase {
inline void op(IndexInt idx, inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &surfacePoints,
const BasicParticleSystemWrapper &coarseParticles, const BasicParticleSystemWrapper &coarseParticles,
ParticleDataImpl<Vec3> &surfaceNormals) const ParticleDataImpl<Vec3> &surfaceNormals)
{ {
Vec3 pos = surfacePoints.getPos(idx); Vec3 pos = surfacePoints.getPos(idx);
@@ -743,21 +739,17 @@ struct computeSurfaceNormals : public KernelBase {
return surfaceNormals; return surfaceNormals;
} }
typedef ParticleDataImpl<Vec3> type2; typedef ParticleDataImpl<Vec3> type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel computeSurfaceNormals ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, coarseParticles, surfaceNormals);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, coarseParticles, surfaceNormals);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
const BasicParticleSystemWrapper &coarseParticles; const BasicParticleSystemWrapper &coarseParticles;
@@ -780,7 +772,7 @@ struct computeAveragedNormals : public KernelBase {
} }
inline void op(IndexInt idx, inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &surfacePoints,
const ParticleDataImpl<Vec3> &surfaceNormals) const const ParticleDataImpl<Vec3> &surfaceNormals)
{ {
Vec3 pos = surfacePoints.getPos(idx); Vec3 pos = surfacePoints.getPos(idx);
Vec3 newNormal = Vec3(0, 0, 0); Vec3 newNormal = Vec3(0, 0, 0);
@@ -800,21 +792,17 @@ struct computeAveragedNormals : public KernelBase {
return surfaceNormals; return surfaceNormals;
} }
typedef ParticleDataImpl<Vec3> type1; typedef ParticleDataImpl<Vec3> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel computeAveragedNormals ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, surfaceNormals);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, surfaceNormals);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
const ParticleDataImpl<Vec3> &surfaceNormals; const ParticleDataImpl<Vec3> &surfaceNormals;
@@ -832,7 +820,7 @@ struct assignNormals : public KernelBase {
} }
inline void op(IndexInt idx, inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &surfacePoints,
ParticleDataImpl<Vec3> &surfaceNormals) const ParticleDataImpl<Vec3> &surfaceNormals)
{ {
surfaceNormals[idx] = tempSurfaceVec3[idx]; surfaceNormals[idx] = tempSurfaceVec3[idx];
} }
@@ -846,21 +834,17 @@ struct assignNormals : public KernelBase {
return surfaceNormals; return surfaceNormals;
} }
typedef ParticleDataImpl<Vec3> type1; typedef ParticleDataImpl<Vec3> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel assignNormals ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, surfaceNormals);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, surfaceNormals);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Vec3> &surfaceNormals; ParticleDataImpl<Vec3> &surfaceNormals;
@@ -963,7 +947,7 @@ struct computeSurfaceDensities : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, void *dummy) const inline void op(IndexInt idx, const BasicParticleSystemWrapper &surfacePoints, void *dummy)
{ {
Vec3 pos = surfacePoints.getPos(idx); Vec3 pos = surfacePoints.getPos(idx);
Real density = 0; Real density = 0;
@@ -984,21 +968,17 @@ struct computeSurfaceDensities : public KernelBase {
return dummy; return dummy;
} }
typedef void type1; typedef void type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel computeSurfaceDensities ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, dummy);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, dummy);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
void *dummy; void *dummy;
@@ -1016,7 +996,7 @@ struct computeSurfaceDisplacements : public KernelBase {
} }
inline void op(IndexInt idx, inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &surfacePoints,
const ParticleDataImpl<Vec3> &surfaceNormals) const const ParticleDataImpl<Vec3> &surfaceNormals)
{ {
Vec3 pos = surfacePoints.getPos(idx); Vec3 pos = surfacePoints.getPos(idx);
Vec3 normal = surfaceNormals[idx]; Vec3 normal = surfaceNormals[idx];
@@ -1068,21 +1048,17 @@ struct computeSurfaceDisplacements : public KernelBase {
return surfaceNormals; return surfaceNormals;
} }
typedef ParticleDataImpl<Vec3> type1; typedef ParticleDataImpl<Vec3> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel computeSurfaceDisplacements ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, surfaceNormals);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, surfaceNormals);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
const ParticleDataImpl<Vec3> &surfaceNormals; const ParticleDataImpl<Vec3> &surfaceNormals;
@@ -1095,7 +1071,7 @@ struct applySurfaceDisplacements : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(IndexInt idx, BasicParticleSystemWrapper &surfacePoints, void *dummy) const inline void op(IndexInt idx, BasicParticleSystemWrapper &surfacePoints, void *dummy)
{ {
surfacePoints.setPos(idx, surfacePoints.getPos(idx) + tempSurfaceVec3[idx]); surfacePoints.setPos(idx, surfacePoints.getPos(idx) + tempSurfaceVec3[idx]);
} }
@@ -1109,21 +1085,17 @@ struct applySurfaceDisplacements : public KernelBase {
return dummy; return dummy;
} }
typedef void type1; typedef void type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel applySurfaceDisplacements ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, dummy);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, dummy);
}
} }
BasicParticleSystemWrapper &surfacePoints; BasicParticleSystemWrapper &surfacePoints;
void *dummy; void *dummy;
@@ -1152,7 +1124,7 @@ struct constrainSurface : public KernelBase {
} }
inline void op(IndexInt idx, inline void op(IndexInt idx,
BasicParticleSystemWrapper &surfacePoints, BasicParticleSystemWrapper &surfacePoints,
const BasicParticleSystemWrapper &coarseParticles) const const BasicParticleSystemWrapper &coarseParticles)
{ {
Vec3 pos = surfacePoints.getPos(idx); Vec3 pos = surfacePoints.getPos(idx);
Real level = computeConstraintLevel(coarseParticles, surfacePoints.getPos(idx)); Real level = computeConstraintLevel(coarseParticles, surfacePoints.getPos(idx));
@@ -1179,21 +1151,17 @@ struct constrainSurface : public KernelBase {
return coarseParticles; return coarseParticles;
} }
typedef BasicParticleSystemWrapper type1; typedef BasicParticleSystemWrapper type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel constrainSurface ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, coarseParticles);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, coarseParticles);
}
} }
BasicParticleSystemWrapper &surfacePoints; BasicParticleSystemWrapper &surfacePoints;
const BasicParticleSystemWrapper &coarseParticles; const BasicParticleSystemWrapper &coarseParticles;
@@ -1220,7 +1188,7 @@ struct interpolateNewWaveData : public KernelBase {
ParticleDataImpl<Real> &surfaceWaveH, ParticleDataImpl<Real> &surfaceWaveH,
ParticleDataImpl<Real> &surfaceWaveDtH, ParticleDataImpl<Real> &surfaceWaveDtH,
ParticleDataImpl<Real> &surfaceWaveSeed, ParticleDataImpl<Real> &surfaceWaveSeed,
ParticleDataImpl<Real> &surfaceWaveSeedAmplitude) const ParticleDataImpl<Real> &surfaceWaveSeedAmplitude)
{ {
if (surfacePoints.getStatus(idx) & ParticleBase::PNEW) { if (surfacePoints.getStatus(idx) & ParticleBase::PNEW) {
Vec3 pos = surfacePoints.getPos(idx); Vec3 pos = surfacePoints.getPos(idx);
@@ -1270,26 +1238,22 @@ struct interpolateNewWaveData : public KernelBase {
return surfaceWaveSeedAmplitude; return surfaceWaveSeedAmplitude;
} }
typedef ParticleDataImpl<Real> type4; typedef ParticleDataImpl<Real> type4;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel interpolateNewWaveData ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx,
surfacePoints,
surfaceWaveH,
surfaceWaveDtH,
surfaceWaveSeed,
surfaceWaveSeedAmplitude);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i,
surfacePoints,
surfaceWaveH,
surfaceWaveDtH,
surfaceWaveSeed,
surfaceWaveSeedAmplitude);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Real> &surfaceWaveH; ParticleDataImpl<Real> &surfaceWaveH;
@@ -1345,7 +1309,7 @@ struct addSeed : public KernelBase {
inline void op(IndexInt idx, inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &surfacePoints,
ParticleDataImpl<Real> &surfaceWaveH, ParticleDataImpl<Real> &surfaceWaveH,
const ParticleDataImpl<Real> &surfaceWaveSeed) const const ParticleDataImpl<Real> &surfaceWaveSeed)
{ {
surfaceWaveH[idx] += surfaceWaveSeed[idx]; surfaceWaveH[idx] += surfaceWaveSeed[idx];
} }
@@ -1364,21 +1328,17 @@ struct addSeed : public KernelBase {
return surfaceWaveSeed; return surfaceWaveSeed;
} }
typedef ParticleDataImpl<Real> type2; typedef ParticleDataImpl<Real> type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel addSeed ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, surfaceWaveH, surfaceWaveSeed);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, surfaceWaveH, surfaceWaveSeed);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Real> &surfaceWaveH; ParticleDataImpl<Real> &surfaceWaveH;
@@ -1400,7 +1360,7 @@ struct computeSurfaceWaveNormal : public KernelBase {
inline void op(IndexInt idx, inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &surfacePoints,
const ParticleDataImpl<Vec3> &surfaceNormals, const ParticleDataImpl<Vec3> &surfaceNormals,
const ParticleDataImpl<Real> &surfaceWaveH) const const ParticleDataImpl<Real> &surfaceWaveH)
{ {
Vec3 pos = surfacePoints.getPos(idx); Vec3 pos = surfacePoints.getPos(idx);
@@ -1464,21 +1424,17 @@ struct computeSurfaceWaveNormal : public KernelBase {
return surfaceWaveH; return surfaceWaveH;
} }
typedef ParticleDataImpl<Real> type2; typedef ParticleDataImpl<Real> type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel computeSurfaceWaveNormal ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, surfaceNormals, surfaceWaveH);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, surfaceNormals, surfaceWaveH);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
const ParticleDataImpl<Vec3> &surfaceNormals; const ParticleDataImpl<Vec3> &surfaceNormals;
@@ -1500,7 +1456,7 @@ struct computeSurfaceWaveLaplacians : public KernelBase {
inline void op(IndexInt idx, inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &surfacePoints,
const ParticleDataImpl<Vec3> &surfaceNormals, const ParticleDataImpl<Vec3> &surfaceNormals,
const ParticleDataImpl<Real> &surfaceWaveH) const const ParticleDataImpl<Real> &surfaceWaveH)
{ {
Real laplacian = 0; Real laplacian = 0;
Real wTotal = 0; Real wTotal = 0;
@@ -1561,21 +1517,17 @@ struct computeSurfaceWaveLaplacians : public KernelBase {
return surfaceWaveH; return surfaceWaveH;
} }
typedef ParticleDataImpl<Real> type2; typedef ParticleDataImpl<Real> type2;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel computeSurfaceWaveLaplacians ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, surfaceNormals, surfaceWaveH);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, surfaceNormals, surfaceWaveH);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
const ParticleDataImpl<Vec3> &surfaceNormals; const ParticleDataImpl<Vec3> &surfaceNormals;
@@ -1600,7 +1552,7 @@ struct evolveWave : public KernelBase {
const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &surfacePoints,
ParticleDataImpl<Real> &surfaceWaveH, ParticleDataImpl<Real> &surfaceWaveH,
ParticleDataImpl<Real> &surfaceWaveDtH, ParticleDataImpl<Real> &surfaceWaveDtH,
const ParticleDataImpl<Real> &surfaceWaveSeed) const const ParticleDataImpl<Real> &surfaceWaveSeed)
{ {
surfaceWaveDtH[idx] += params.waveSpeed * params.waveSpeed * params.dt * tempSurfaceFloat[idx]; surfaceWaveDtH[idx] += params.waveSpeed * params.waveSpeed * params.dt * tempSurfaceFloat[idx];
surfaceWaveDtH[idx] /= (1 + params.dt * params.waveDamping); surfaceWaveDtH[idx] /= (1 + params.dt * params.waveDamping);
@@ -1635,21 +1587,17 @@ struct evolveWave : public KernelBase {
return surfaceWaveSeed; return surfaceWaveSeed;
} }
typedef ParticleDataImpl<Real> type3; typedef ParticleDataImpl<Real> type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel evolveWave ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, surfaceWaveH, surfaceWaveDtH, surfaceWaveSeed);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, surfaceWaveH, surfaceWaveDtH, surfaceWaveSeed);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Real> &surfaceWaveH; ParticleDataImpl<Real> &surfaceWaveH;
@@ -1669,7 +1617,7 @@ struct computeSurfaceCurvature : public KernelBase {
} }
inline void op(IndexInt idx, inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &surfacePoints,
const ParticleDataImpl<Vec3> &surfaceNormals) const const ParticleDataImpl<Vec3> &surfaceNormals)
{ {
Vec3 pPos = surfacePoints.getPos(idx); Vec3 pPos = surfacePoints.getPos(idx);
Real wTotal = 0; Real wTotal = 0;
@@ -1710,21 +1658,17 @@ struct computeSurfaceCurvature : public KernelBase {
return surfaceNormals; return surfaceNormals;
} }
typedef ParticleDataImpl<Vec3> type1; typedef ParticleDataImpl<Vec3> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel computeSurfaceCurvature ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, surfaceNormals);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, surfaceNormals);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
const ParticleDataImpl<Vec3> &surfaceNormals; const ParticleDataImpl<Vec3> &surfaceNormals;
@@ -1742,7 +1686,7 @@ struct smoothCurvature : public KernelBase {
} }
inline void op(IndexInt idx, inline void op(IndexInt idx,
const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &surfacePoints,
ParticleDataImpl<Real> &surfaceWaveSource) const ParticleDataImpl<Real> &surfaceWaveSource)
{ {
Vec3 pPos = surfacePoints.getPos(idx); Vec3 pPos = surfacePoints.getPos(idx);
Real curv = 0; Real curv = 0;
@@ -1768,21 +1712,17 @@ struct smoothCurvature : public KernelBase {
return surfaceWaveSource; return surfaceWaveSource;
} }
typedef ParticleDataImpl<Real> type1; typedef ParticleDataImpl<Real> type1;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel smoothCurvature ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, surfaceWaveSource);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, surfaceWaveSource);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Real> &surfaceWaveSource; ParticleDataImpl<Real> &surfaceWaveSource;
@@ -1806,7 +1746,7 @@ struct seedWaves : public KernelBase {
const BasicParticleSystemWrapper &surfacePoints, const BasicParticleSystemWrapper &surfacePoints,
ParticleDataImpl<Real> &surfaceWaveSeed, ParticleDataImpl<Real> &surfaceWaveSeed,
ParticleDataImpl<Real> &surfaceWaveSeedAmplitude, ParticleDataImpl<Real> &surfaceWaveSeedAmplitude,
ParticleDataImpl<Real> &surfaceWaveSource) const ParticleDataImpl<Real> &surfaceWaveSource)
{ {
Real source = smoothstep(params.waveSeedingCurvatureThresholdRegionCenter - Real source = smoothstep(params.waveSeedingCurvatureThresholdRegionCenter -
params.waveSeedingCurvatureThresholdRegionRadius, params.waveSeedingCurvatureThresholdRegionRadius,
@@ -1850,21 +1790,17 @@ struct seedWaves : public KernelBase {
return surfaceWaveSource; return surfaceWaveSource;
} }
typedef ParticleDataImpl<Real> type3; typedef ParticleDataImpl<Real> type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel seedWaves ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, surfacePoints, surfaceWaveSeed, surfaceWaveSeedAmplitude, surfaceWaveSource);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, surfacePoints, surfaceWaveSeed, surfaceWaveSeedAmplitude, surfaceWaveSource);
}
} }
const BasicParticleSystemWrapper &surfacePoints; const BasicParticleSystemWrapper &surfacePoints;
ParticleDataImpl<Real> &surfaceWaveSeed; ParticleDataImpl<Real> &surfaceWaveSeed;

File diff suppressed because it is too large Load Diff

View File

@@ -188,8 +188,7 @@ struct KnAcceleration : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op( inline void op(IndexInt idx, MACGrid &a, const MACGrid &v1, const MACGrid &v0, const Real idt)
IndexInt idx, MACGrid &a, const MACGrid &v1, const MACGrid &v0, const Real idt) const
{ {
a[idx] = (v1[idx] - v0[idx]) * idt; a[idx] = (v1[idx] - v0[idx]) * idt;
} }
@@ -213,21 +212,17 @@ struct KnAcceleration : public KernelBase {
return idt; return idt;
} }
typedef Real type3; typedef Real type3;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnAcceleration ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, a, v1, v0, idt);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, a, v1, v0, idt);
}
} }
MACGrid &a; MACGrid &a;
const MACGrid &v1; const MACGrid &v1;
@@ -576,18 +571,17 @@ void VICintegration(VortexSheetMesh &mesh,
// prepare CG solver // prepare CG solver
const int maxIter = (int)(cgMaxIterFac * vel.getSize().max()); const int maxIter = (int)(cgMaxIterFac * vel.getSize().max());
vector<Grid<Real> *> matA{&A0, &Ai, &Aj, &Ak};
GridCgInterface *gcg = new GridCg<ApplyMatrix>( GridCgInterface *gcg = new GridCg<ApplyMatrix>(
solution, rhs, residual, search, flags, temp1, matA); solution, rhs, residual, search, flags, temp1, &A0, &Ai, &Aj, &Ak);
gcg->setAccuracy(cgAccuracy); gcg->setAccuracy(cgAccuracy);
gcg->setUseL2Norm(true); gcg->setUseL2Norm(true);
gcg->setICPreconditioner( gcg->setICPreconditioner(
(GridCgInterface::PreconditionType)precondition, &pca0, &pca1, &pca2, &pca3); (GridCgInterface::PreconditionType)precondition, &pca0, &pca1, &pca2, &pca3);
// iterations // iterations
Real time = 0;
for (int iter = 0; iter < maxIter; iter++) { for (int iter = 0; iter < maxIter; iter++) {
if (!gcg->iterate()) if (!gcg->iterate(time))
iter = maxIter; iter = maxIter;
} }
debMsg("VICintegration CG iterations:" << gcg->getIterations() << ", res:" << gcg->getSigma(), debMsg("VICintegration CG iterations:" << gcg->getIterations() << ", res:" << gcg->getSigma(),

View File

@@ -170,7 +170,7 @@ struct KnInterpolateMACGrid : public KernelBase {
const MACGrid &source, const MACGrid &source,
const Vec3 &sourceFactor, const Vec3 &sourceFactor,
const Vec3 &off, const Vec3 &off,
int orderSpace) const int orderSpace)
{ {
Vec3 pos = Vec3(i, j, k) * sourceFactor + off; Vec3 pos = Vec3(i, j, k) * sourceFactor + off;
@@ -207,37 +207,35 @@ struct KnInterpolateMACGrid : public KernelBase {
return orderSpace; return orderSpace;
} }
typedef int type4; typedef int type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnInterpolateMACGrid ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, target, source, sourceFactor, off, orderSpace);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, target, source, sourceFactor, off, orderSpace);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, target, source, sourceFactor, off, orderSpace);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, target, source, sourceFactor, off, orderSpace);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
MACGrid &target; MACGrid &target;
const MACGrid &source; const MACGrid &source;
const Vec3 &sourceFactor; const Vec3 &sourceFactor;
@@ -319,7 +317,7 @@ struct knApplySimpleNoiseVec3 : public KernelBase {
Grid<Vec3> &target, Grid<Vec3> &target,
const WaveletNoiseField &noise, const WaveletNoiseField &noise,
Real scale, Real scale,
const Grid<Real> *weight) const const Grid<Real> *weight)
{ {
if (!flags.isFluid(i, j, k)) if (!flags.isFluid(i, j, k))
return; return;
@@ -353,37 +351,35 @@ struct knApplySimpleNoiseVec3 : public KernelBase {
return weight; return weight;
} }
typedef Grid<Real> type4; typedef Grid<Real> type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knApplySimpleNoiseVec3 ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, target, noise, scale, weight);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, target, noise, scale, weight);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, target, noise, scale, weight);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, target, noise, scale, weight);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Vec3> &target; Grid<Vec3> &target;
const WaveletNoiseField &noise; const WaveletNoiseField &noise;
@@ -461,7 +457,7 @@ struct knApplySimpleNoiseReal : public KernelBase {
Grid<Real> &target, Grid<Real> &target,
const WaveletNoiseField &noise, const WaveletNoiseField &noise,
Real scale, Real scale,
const Grid<Real> *weight) const const Grid<Real> *weight)
{ {
if (!flags.isFluid(i, j, k)) if (!flags.isFluid(i, j, k))
return; return;
@@ -495,37 +491,35 @@ struct knApplySimpleNoiseReal : public KernelBase {
return weight; return weight;
} }
typedef Grid<Real> type4; typedef Grid<Real> type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knApplySimpleNoiseReal ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, target, noise, scale, weight);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, target, noise, scale, weight);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, target, noise, scale, weight);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, target, noise, scale, weight);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &target; Grid<Real> &target;
const WaveletNoiseField &noise; const WaveletNoiseField &noise;
@@ -615,7 +609,7 @@ struct knApplyNoiseVec3 : public KernelBase {
const Grid<Real> *weight, const Grid<Real> *weight,
const Grid<Vec3> *uv, const Grid<Vec3> *uv,
bool uvInterpol, bool uvInterpol,
const Vec3 &sourceFactor) const const Vec3 &sourceFactor)
{ {
if (!flags.isFluid(i, j, k)) if (!flags.isFluid(i, j, k))
return; return;
@@ -694,19 +688,40 @@ struct knApplyNoiseVec3 : public KernelBase {
return sourceFactor; return sourceFactor;
} }
typedef Vec3 type8; typedef Vec3 type8;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knApplyNoiseVec3 ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
#pragma omp parallel
{
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i,
j,
k,
flags,
target,
noise,
scale,
scaleSpatial,
weight,
uv,
uvInterpol,
sourceFactor);
}
}
else {
const int k = 0;
#pragma omp parallel
{
#pragma omp for
for (int j = 0; j < _maxY; j++) for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++) for (int i = 0; i < _maxX; i++)
op(i, op(i,
@@ -721,31 +736,8 @@ struct knApplyNoiseVec3 : public KernelBase {
uv, uv,
uvInterpol, uvInterpol,
sourceFactor); sourceFactor);
}
} }
else {
const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++)
for (int i = 0; i < _maxX; i++)
op(i,
j,
k,
flags,
target,
noise,
scale,
scaleSpatial,
weight,
uv,
uvInterpol,
sourceFactor);
}
}
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
} }
const FlagGrid &flags; const FlagGrid &flags;
Grid<Vec3> &target; Grid<Vec3> &target;
@@ -834,7 +826,7 @@ struct KnApplyComputeEnergy : public KernelBase {
run(); run();
} }
inline void op( inline void op(
int i, int j, int k, const FlagGrid &flags, const MACGrid &vel, Grid<Real> &energy) const int i, int j, int k, const FlagGrid &flags, const MACGrid &vel, Grid<Real> &energy)
{ {
Real e = 0.f; Real e = 0.f;
if (flags.isFluid(i, j, k)) { if (flags.isFluid(i, j, k)) {
@@ -858,37 +850,35 @@ struct KnApplyComputeEnergy : public KernelBase {
return energy; return energy;
} }
typedef Grid<Real> type2; typedef Grid<Real> type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnApplyComputeEnergy ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, energy);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, energy);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, flags, vel, energy);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, flags, vel, energy);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
const MACGrid &vel; const MACGrid &vel;
Grid<Real> &energy; Grid<Real> &energy;
@@ -1019,7 +1009,7 @@ struct KnComputeStrainRateMag : public KernelBase {
run(); run();
} }
inline void op( inline void op(
int i, int j, int k, const MACGrid &vel, const Grid<Vec3> &velCenter, Grid<Real> &prod) const int i, int j, int k, const MACGrid &vel, const Grid<Vec3> &velCenter, Grid<Real> &prod)
{ {
// compute Sij = 1/2 * (dU_i/dx_j + dU_j/dx_i) // compute Sij = 1/2 * (dU_i/dx_j + dU_j/dx_i)
Vec3 diag = Vec3(vel(i + 1, j, k).x, vel(i, j + 1, k).y, 0.) - vel(i, j, k); Vec3 diag = Vec3(vel(i + 1, j, k).x, vel(i, j + 1, k).y, 0.) - vel(i, j, k);
@@ -1056,37 +1046,35 @@ struct KnComputeStrainRateMag : public KernelBase {
return prod; return prod;
} }
typedef Grid<Real> type2; typedef Grid<Real> type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel KnComputeStrainRateMag ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, velCenter, prod);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, velCenter, prod);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, vel, velCenter, prod);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, vel, velCenter, prod);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const MACGrid &vel; const MACGrid &vel;
const Grid<Vec3> &velCenter; const Grid<Vec3> &velCenter;
Grid<Real> &prod; Grid<Real> &prod;

View File

@@ -38,7 +38,7 @@ struct knCalcSecDeriv2d : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, const Grid<Real> &v, Grid<Real> &ret) const inline void op(int i, int j, int k, const Grid<Real> &v, Grid<Real> &ret)
{ {
ret(i, j, k) = (-4. * v(i, j, k) + v(i - 1, j, k) + v(i + 1, j, k) + v(i, j - 1, k) + ret(i, j, k) = (-4. * v(i, j, k) + v(i - 1, j, k) + v(i + 1, j, k) + v(i, j - 1, k) +
v(i, j + 1, k)); v(i, j + 1, k));
@@ -53,37 +53,35 @@ struct knCalcSecDeriv2d : public KernelBase {
return ret; return ret;
} }
typedef Grid<Real> type1; typedef Grid<Real> type1;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knCalcSecDeriv2d ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, v, ret);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, v, ret);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, v, ret);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, v, ret);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const Grid<Real> &v; const Grid<Real> &v;
Grid<Real> &ret; Grid<Real> &ret;
}; };
@@ -151,44 +149,43 @@ struct knTotalSum : public KernelBase {
return h; return h;
} }
typedef Grid<Real> type0; typedef Grid<Real> type0;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel knTotalSum ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, h, sum); double sum = 0;
#pragma omp for nowait
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, h, sum);
#pragma omp critical
{
this->sum += sum;
}
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, h, sum); double sum = 0;
#pragma omp for nowait
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, h, sum);
#pragma omp critical
{
this->sum += sum;
}
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
knTotalSum(knTotalSum &o, tbb::split) : KernelBase(o), h(o.h), sum(0)
{
}
void join(const knTotalSum &o)
{
sum += o.sum;
}
Grid<Real> &h; Grid<Real> &h;
double sum; double sum;
}; };
@@ -296,7 +293,7 @@ struct MakeRhsWE : public KernelBase {
const Grid<Real> &ut, const Grid<Real> &ut,
const Grid<Real> &utm1, const Grid<Real> &utm1,
Real s, Real s,
bool crankNic = false) const bool crankNic = false)
{ {
rhs(i, j, k) = (2. * ut(i, j, k) - utm1(i, j, k)); rhs(i, j, k) = (2. * ut(i, j, k) - utm1(i, j, k));
if (crankNic) { if (crankNic) {
@@ -334,37 +331,35 @@ struct MakeRhsWE : public KernelBase {
return crankNic; return crankNic;
} }
typedef bool type5; typedef bool type5;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel MakeRhsWE ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 1; j < _maxY; j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, rhs, ut, utm1, s, crankNic);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, rhs, ut, utm1, s, crankNic);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 1; i < _maxX; i++) {
op(i, j, k, flags, rhs, ut, utm1, s, crankNic);
#pragma omp for
for (int j = 1; j < _maxY; j++)
for (int i = 1; i < _maxX; i++)
op(i, j, k, flags, rhs, ut, utm1, s, crankNic);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
}
const FlagGrid &flags; const FlagGrid &flags;
Grid<Real> &rhs; Grid<Real> &rhs;
const Grid<Real> &ut; const Grid<Real> &ut;
@@ -423,21 +418,17 @@ void cgSolveWE(const FlagGrid &flags,
const int maxIter = (int)(cgMaxIterFac * flags.getSize().max()) * (flags.is3D() ? 1 : 4); const int maxIter = (int)(cgMaxIterFac * flags.getSize().max()) * (flags.is3D() ? 1 : 4);
GridCgInterface *gcg; GridCgInterface *gcg;
vector<Grid<Real> *> matA{&A0, &Ai, &Aj}; if (flags.is3D())
gcg = new GridCg<ApplyMatrix>(out, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
if (flags.is3D()) { else
matA.push_back(&Ak); gcg = new GridCg<ApplyMatrix2D>(out, rhs, residual, search, flags, tmp, &A0, &Ai, &Aj, &Ak);
gcg = new GridCg<ApplyMatrix>(out, rhs, residual, search, flags, tmp, matA);
}
else {
gcg = new GridCg<ApplyMatrix2D>(out, rhs, residual, search, flags, tmp, matA);
}
gcg->setAccuracy(cgAccuracy); gcg->setAccuracy(cgAccuracy);
// no preconditioning for now... // no preconditioning for now...
Real time = 0;
for (int iter = 0; iter < maxIter; iter++) { for (int iter = 0; iter < maxIter; iter++) {
if (!gcg->iterate()) if (!gcg->iterate(time))
iter = maxIter; iter = maxIter;
} }
debMsg("cgSolveWaveEq iterations:" << gcg->getIterations() << ", res:" << gcg->getSigma(), 1); debMsg("cgSolveWaveEq iterations:" << gcg->getIterations() << ", res:" << gcg->getSigma(), 1);

View File

@@ -89,7 +89,6 @@ extern void PbRegister_processBurn();
extern void PbRegister_updateFlame(); extern void PbRegister_updateFlame();
extern void PbRegister_getSpiralVelocity(); extern void PbRegister_getSpiralVelocity();
extern void PbRegister_setGradientYWeight(); extern void PbRegister_setGradientYWeight();
extern void PbRegister_PD_fluid_guiding();
extern void PbRegister_releaseBlurPrecomp(); extern void PbRegister_releaseBlurPrecomp();
extern void PbRegister_KEpsilonComputeProduction(); extern void PbRegister_KEpsilonComputeProduction();
extern void PbRegister_KEpsilonSources(); extern void PbRegister_KEpsilonSources();
@@ -145,7 +144,6 @@ extern void PbRegister_flipComputeSurfaceNormals();
extern void PbRegister_flipUpdateNeighborRatio(); extern void PbRegister_flipUpdateNeighborRatio();
extern void PbRegister_particleSurfaceTurbulence(); extern void PbRegister_particleSurfaceTurbulence();
extern void PbRegister_debugCheckParts(); extern void PbRegister_debugCheckParts();
extern void PbRegister_applyViscosity();
extern void PbRegister_markAsFixed(); extern void PbRegister_markAsFixed();
extern void PbRegister_texcoordInflow(); extern void PbRegister_texcoordInflow();
extern void PbRegister_meshSmokeInflow(); extern void PbRegister_meshSmokeInflow();
@@ -287,7 +285,6 @@ void MantaEnsureRegistration()
PbRegister_updateFlame(); PbRegister_updateFlame();
PbRegister_getSpiralVelocity(); PbRegister_getSpiralVelocity();
PbRegister_setGradientYWeight(); PbRegister_setGradientYWeight();
PbRegister_PD_fluid_guiding();
PbRegister_releaseBlurPrecomp(); PbRegister_releaseBlurPrecomp();
PbRegister_KEpsilonComputeProduction(); PbRegister_KEpsilonComputeProduction();
PbRegister_KEpsilonSources(); PbRegister_KEpsilonSources();
@@ -343,7 +340,6 @@ void MantaEnsureRegistration()
PbRegister_flipUpdateNeighborRatio(); PbRegister_flipUpdateNeighborRatio();
PbRegister_particleSurfaceTurbulence(); PbRegister_particleSurfaceTurbulence();
PbRegister_debugCheckParts(); PbRegister_debugCheckParts();
PbRegister_applyViscosity();
PbRegister_markAsFixed(); PbRegister_markAsFixed();
PbRegister_texcoordInflow(); PbRegister_texcoordInflow();
PbRegister_meshSmokeInflow(); PbRegister_meshSmokeInflow();

View File

@@ -52,8 +52,7 @@ template<class T> struct ApplyShapeToGrid : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op( inline void op(int i, int j, int k, Grid<T> *grid, Shape *shape, T value, FlagGrid *respectFlags)
int i, int j, int k, Grid<T> *grid, Shape *shape, T value, FlagGrid *respectFlags) const
{ {
if (respectFlags && respectFlags->isObstacle(i, j, k)) if (respectFlags && respectFlags->isObstacle(i, j, k))
return; return;
@@ -80,37 +79,35 @@ template<class T> struct ApplyShapeToGrid : public KernelBase {
return respectFlags; return respectFlags;
} }
typedef FlagGrid type3; typedef FlagGrid type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel ApplyShapeToGrid ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, grid, shape, value, respectFlags);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, grid, shape, value, respectFlags);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, grid, shape, value, respectFlags);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, grid, shape, value, respectFlags);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
Grid<T> *grid; Grid<T> *grid;
Shape *shape; Shape *shape;
T value; T value;
@@ -141,7 +138,7 @@ template<class T> struct ApplyShapeToGridSmooth : public KernelBase {
Real sigma, Real sigma,
Real shift, Real shift,
T value, T value,
FlagGrid *respectFlags) const FlagGrid *respectFlags)
{ {
if (respectFlags && respectFlags->isObstacle(i, j, k)) if (respectFlags && respectFlags->isObstacle(i, j, k))
return; return;
@@ -181,37 +178,35 @@ template<class T> struct ApplyShapeToGridSmooth : public KernelBase {
return respectFlags; return respectFlags;
} }
typedef FlagGrid type5; typedef FlagGrid type5;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel ApplyShapeToGridSmooth ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, grid, phi, sigma, shift, value, respectFlags);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, grid, phi, sigma, shift, value, respectFlags);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, grid, phi, sigma, shift, value, respectFlags);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, grid, phi, sigma, shift, value, respectFlags);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
Grid<T> *grid; Grid<T> *grid;
Grid<Real> &phi; Grid<Real> &phi;
Real sigma; Real sigma;
@@ -230,7 +225,7 @@ struct ApplyShapeToMACGrid : public KernelBase {
run(); run();
} }
inline void op( inline void op(
int i, int j, int k, MACGrid *grid, Shape *shape, Vec3 value, FlagGrid *respectFlags) const int i, int j, int k, MACGrid *grid, Shape *shape, Vec3 value, FlagGrid *respectFlags)
{ {
if (respectFlags && respectFlags->isObstacle(i, j, k)) if (respectFlags && respectFlags->isObstacle(i, j, k))
return; return;
@@ -261,37 +256,35 @@ struct ApplyShapeToMACGrid : public KernelBase {
return respectFlags; return respectFlags;
} }
typedef FlagGrid type3; typedef FlagGrid type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel ApplyShapeToMACGrid ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, grid, shape, value, respectFlags);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, grid, shape, value, respectFlags);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, grid, shape, value, respectFlags);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, grid, shape, value, respectFlags);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
MACGrid *grid; MACGrid *grid;
Shape *shape; Shape *shape;
Vec3 value; Vec3 value;
@@ -429,7 +422,7 @@ struct BoxSDF : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, Grid<Real> &phi, const Vec3 &p1, const Vec3 &p2) const inline void op(int i, int j, int k, Grid<Real> &phi, const Vec3 &p1, const Vec3 &p2)
{ {
const Vec3 p(i + 0.5, j + 0.5, k + 0.5); const Vec3 p(i + 0.5, j + 0.5, k + 0.5);
if (p.x <= p2.x && p.x >= p1.x && p.y <= p2.y && p.y >= p1.y && p.z <= p2.z && p.z >= p1.z) { if (p.x <= p2.x && p.x >= p1.x && p.y <= p2.y && p.y >= p1.y && p.z <= p2.z && p.z >= p1.z) {
@@ -505,37 +498,35 @@ struct BoxSDF : public KernelBase {
return p2; return p2;
} }
typedef Vec3 type2; typedef Vec3 type2;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel BoxSDF ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, phi, p1, p2);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, phi, p1, p2);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, phi, p1, p2);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, phi, p1, p2);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
Grid<Real> &phi; Grid<Real> &phi;
const Vec3 &p1; const Vec3 &p1;
const Vec3 &p2; const Vec3 &p2;
@@ -647,7 +638,7 @@ struct SphereSDF : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 scale) const inline void op(int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 scale)
{ {
phi(i, j, k) = norm((Vec3(i + 0.5, j + 0.5, k + 0.5) - center) / scale) - radius; phi(i, j, k) = norm((Vec3(i + 0.5, j + 0.5, k + 0.5) - center) / scale) - radius;
} }
@@ -671,37 +662,35 @@ struct SphereSDF : public KernelBase {
return scale; return scale;
} }
typedef Vec3 type3; typedef Vec3 type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel SphereSDF ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, phi, center, radius, scale);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, phi, center, radius, scale);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, phi, center, radius, scale);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, phi, center, radius, scale);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
Grid<Real> &phi; Grid<Real> &phi;
Vec3 center; Vec3 center;
Real radius; Real radius;
@@ -774,7 +763,7 @@ struct CylinderSDF : public KernelBase {
run(); run();
} }
inline void op( inline void op(
int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 zaxis, Real maxz) const int i, int j, int k, Grid<Real> &phi, Vec3 center, Real radius, Vec3 zaxis, Real maxz)
{ {
Vec3 p = Vec3(i + 0.5, j + 0.5, k + 0.5) - center; Vec3 p = Vec3(i + 0.5, j + 0.5, k + 0.5) - center;
Real z = fabs(dot(p, zaxis)); Real z = fabs(dot(p, zaxis));
@@ -820,37 +809,35 @@ struct CylinderSDF : public KernelBase {
return maxz; return maxz;
} }
typedef Real type4; typedef Real type4;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel CylinderSDF ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, phi, center, radius, zaxis, maxz);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, phi, center, radius, zaxis, maxz);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, phi, center, radius, zaxis, maxz);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, phi, center, radius, zaxis, maxz);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
Grid<Real> &phi; Grid<Real> &phi;
Vec3 center; Vec3 center;
Real radius; Real radius;
@@ -920,13 +907,8 @@ struct SlopeSDF : public KernelBase {
runMessage(); runMessage();
run(); run();
} }
inline void op(int i, inline void op(
int j, int i, int j, int k, const Vec3 &n, Grid<Real> &phiObs, const Real &fac, const Real &origin)
int k,
const Vec3 &n,
Grid<Real> &phiObs,
const Real &fac,
const Real &origin) const
{ {
phiObs(i, j, k) = (n.x * (double)i + n.y * (double)j + n.z * (double)k - origin) * fac; phiObs(i, j, k) = (n.x * (double)i + n.y * (double)j + n.z * (double)k - origin) * fac;
@@ -951,37 +933,35 @@ struct SlopeSDF : public KernelBase {
return origin; return origin;
} }
typedef Real type3; typedef Real type3;
void runMessage() void runMessage(){};
{ void run()
debMsg("Executing kernel SlopeSDF ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{ {
const int _maxX = maxX; const int _maxX = maxX;
const int _maxY = maxY; const int _maxY = maxY;
if (maxZ > 1) { if (maxZ > 1) {
for (int k = __r.begin(); k != (int)__r.end(); k++)
for (int j = 0; j < _maxY; j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, n, phiObs, fac, origin);
#pragma omp for
for (int k = minZ; k < maxZ; k++)
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, n, phiObs, fac, origin);
}
} }
else { else {
const int k = 0; const int k = 0;
for (int j = __r.begin(); j != (int)__r.end(); j++) #pragma omp parallel
for (int i = 0; i < _maxX; i++) {
op(i, j, k, n, phiObs, fac, origin);
#pragma omp for
for (int j = 0; j < _maxY; j++)
for (int i = 0; i < _maxX; i++)
op(i, j, k, n, phiObs, fac, origin);
}
} }
} }
void run()
{
if (maxZ > 1)
tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
else
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
}
const Vec3 &n; const Vec3 &n;
Grid<Real> &phiObs; Grid<Real> &phiObs;
const Real &fac; const Real &fac;

View File

@@ -269,7 +269,6 @@ class Shape : public PbClass {
protected: protected:
GridType mType; GridType mType;
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -320,7 +319,6 @@ class NullShape : public Shape {
{ {
gridSetConst<Real>(phi, 1000.0f); gridSetConst<Real>(phi, 1000.0f);
} }
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -396,7 +394,6 @@ class Box : public Shape {
protected: protected:
Vec3 mP0, mP1; Vec3 mP0, mP1;
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -458,7 +455,6 @@ class Sphere : public Shape {
protected: protected:
Vec3 mCenter, mScale; Vec3 mCenter, mScale;
Real mRadius; Real mRadius;
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -583,7 +579,6 @@ class Cylinder : public Shape {
protected: protected:
Vec3 mCenter, mZDir; Vec3 mCenter, mZDir;
Real mRadius, mZ; Real mRadius, mZ;
public: public:
PbArgs _args; PbArgs _args;
} }
@@ -660,7 +655,6 @@ class Slope : public Shape {
Real mAnglexy, mAngleyz; Real mAnglexy, mAngleyz;
Real mOrigin; Real mOrigin;
Vec3 mGs; Vec3 mGs;
public: public:
PbArgs _args; PbArgs _args;
} }

View File

@@ -50,28 +50,21 @@ struct reductionTest : public KernelBase {
return v; return v;
} }
typedef Grid<Real> type0; typedef Grid<Real> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel reductionTest ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, v, sum);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
reductionTest(reductionTest &o, tbb::split) : KernelBase(o), v(o.v), sum(0) {
{ double sum = 0;
} #pragma omp for nowait
void join(const reductionTest &o) for (IndexInt i = 0; i < _sz; i++)
{ op(i, v, sum);
sum += o.sum; #pragma omp critical
{
this->sum += sum;
}
}
} }
const Grid<Real> &v; const Grid<Real> &v;
double sum; double sum;
@@ -101,28 +94,21 @@ struct minReduction : public KernelBase {
return v; return v;
} }
typedef Grid<Real> type0; typedef Grid<Real> type0;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel minReduction ", 3);
debMsg("Kernel range"
<< " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r)
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, v, sum);
}
void run() void run()
{ {
tbb::parallel_reduce(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
} #pragma omp parallel
minReduction(minReduction &o, tbb::split) : KernelBase(o), v(o.v), sum(0) {
{ double sum = 0;
} #pragma omp for nowait
void join(const minReduction &o) for (IndexInt i = 0; i < _sz; i++)
{ op(i, v, sum);
sum = min(sum, o.sum); #pragma omp critical
{
this->sum = min(sum, this->sum);
}
}
} }
const Grid<Real> &v; const Grid<Real> &v;
double sum; double sum;

View File

@@ -136,7 +136,7 @@ struct KnSynthesizeTurbulence : public KernelBase {
int octaves, int octaves,
Real scale, Real scale,
Real invL0, Real invL0,
Real kmin) const Real kmin)
{ {
const Real PERSISTENCE = 0.56123f; const Real PERSISTENCE = 0.56123f;
@@ -217,21 +217,17 @@ struct KnSynthesizeTurbulence : public KernelBase {
return kmin; return kmin;
} }
typedef Real type9; typedef Real type9;
void runMessage() void runMessage(){};
{
debMsg("Executing kernel KnSynthesizeTurbulence ", 3);
debMsg("Kernel range"
<< " size " << size << " ",
4);
};
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, p, flags, noise, kGrid, alpha, dt, octaves, scale, invL0, kmin);
}
void run() void run()
{ {
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this); const IndexInt _sz = size;
#pragma omp parallel
{
#pragma omp for
for (IndexInt i = 0; i < _sz; i++)
op(i, p, flags, noise, kGrid, alpha, dt, octaves, scale, invL0, kmin);
}
} }
TurbulenceParticleSystem &p; TurbulenceParticleSystem &p;
FlagGrid &flags; FlagGrid &flags;

View File

@@ -199,7 +199,6 @@ class TurbulenceParticleSystem : public ParticleSystem<TurbulenceParticleData> {
private: private:
WaveletNoiseField &noise; WaveletNoiseField &noise;
public: public:
PbArgs _args; PbArgs _args;
} }

View File

@@ -60,56 +60,24 @@ inline Vec3 VortexKernel(const Vec3 &p, const vector<VortexParticleData> &vp, Re
return u; return u;
} }
struct _KnVpAdvectMesh : public KernelBase { struct KnVpAdvectMesh : public KernelBase {
_KnVpAdvectMesh(const KernelBase &base, KnVpAdvectMesh(vector<Node> &nodes, const vector<VortexParticleData> &vp, Real scale)
vector<Node> &nodes, : KernelBase(nodes.size()), nodes(nodes), vp(vp), scale(scale), u((size))
const vector<VortexParticleData> &vp,
Real scale,
vector<Vec3> &u)
: KernelBase(base), nodes(nodes), vp(vp), scale(scale), u(u)
{ {
runMessage();
run();
} }
inline void op(IndexInt idx, inline void op(IndexInt idx,
vector<Node> &nodes, vector<Node> &nodes,
const vector<VortexParticleData> &vp, const vector<VortexParticleData> &vp,
Real scale, Real scale,
vector<Vec3> &u) const vector<Vec3> &u)
{ {
if (nodes[idx].flags & Mesh::NfFixed) if (nodes[idx].flags & Mesh::NfFixed)
u[idx] = 0.0; u[idx] = 0.0;
else else
u[idx] = VortexKernel(nodes[idx].pos, vp, scale); u[idx] = VortexKernel(nodes[idx].pos, vp, scale);
} }
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, nodes, vp, scale, u);
}
void run()
{
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
}
vector<Node> &nodes;
const vector<VortexParticleData> &vp;
Real scale;
vector<Vec3> &u;
};
struct KnVpAdvectMesh : public KernelBase {
KnVpAdvectMesh(vector<Node> &nodes, const vector<VortexParticleData> &vp, Real scale)
: KernelBase(nodes.size()),
_inner(KernelBase(nodes.size()), nodes, vp, scale, u),
nodes(nodes),
vp(vp),
scale(scale),
u((size))
{
runMessage();
run();
}
void run()
{
_inner.run();
}
inline operator vector<Vec3>() inline operator vector<Vec3>()
{ {
return u; return u;
@@ -133,63 +101,38 @@ struct KnVpAdvectMesh : public KernelBase {
return scale; return scale;
} }
typedef Real type2; typedef Real type2;
void runMessage() void runMessage(){};
void run()
{ {
debMsg("Executing kernel KnVpAdvectMesh ", 3); const IndexInt _sz = size;
debMsg("Kernel range" #pragma omp parallel
<< " size " << size << " ", {
4);
}; #pragma omp for
_KnVpAdvectMesh _inner; for (IndexInt i = 0; i < _sz; i++)
op(i, nodes, vp, scale, u);
}
}
vector<Node> &nodes; vector<Node> &nodes;
const vector<VortexParticleData> &vp; const vector<VortexParticleData> &vp;
Real scale; Real scale;
vector<Vec3> u; vector<Vec3> u;
}; };
struct _KnVpAdvectSelf : public KernelBase { struct KnVpAdvectSelf : public KernelBase {
_KnVpAdvectSelf(const KernelBase &base, KnVpAdvectSelf(vector<VortexParticleData> &vp, Real scale)
vector<VortexParticleData> &vp, : KernelBase(vp.size()), vp(vp), scale(scale), u((size))
Real scale,
vector<Vec3> &u)
: KernelBase(base), vp(vp), scale(scale), u(u)
{ {
runMessage();
run();
} }
inline void op(IndexInt idx, vector<VortexParticleData> &vp, Real scale, vector<Vec3> &u) const inline void op(IndexInt idx, vector<VortexParticleData> &vp, Real scale, vector<Vec3> &u)
{ {
if (vp[idx].flag & ParticleBase::PDELETE) if (vp[idx].flag & ParticleBase::PDELETE)
u[idx] = 0.0; u[idx] = 0.0;
else else
u[idx] = VortexKernel(vp[idx].pos, vp, scale); u[idx] = VortexKernel(vp[idx].pos, vp, scale);
} }
void operator()(const tbb::blocked_range<IndexInt> &__r) const
{
for (IndexInt idx = __r.begin(); idx != (IndexInt)__r.end(); idx++)
op(idx, vp, scale, u);
}
void run()
{
tbb::parallel_for(tbb::blocked_range<IndexInt>(0, size), *this);
}
vector<VortexParticleData> &vp;
Real scale;
vector<Vec3> &u;
};
struct KnVpAdvectSelf : public KernelBase {
KnVpAdvectSelf(vector<VortexParticleData> &vp, Real scale)
: KernelBase(vp.size()),
_inner(KernelBase(vp.size()), vp, scale, u),
vp(vp),
scale(scale),
u((size))
{
runMessage();
run();
}
void run()
{
_inner.run();
}
inline operator vector<Vec3>() inline operator vector<Vec3>()
{ {
return u; return u;
@@ -208,14 +151,18 @@ struct KnVpAdvectSelf : public KernelBase {
return scale; return scale;
} }
typedef Real type1; typedef Real type1;
void runMessage() void runMessage(){};
void run()
{ {
debMsg("Executing kernel KnVpAdvectSelf ", 3); const IndexInt _sz = size;
debMsg("Kernel range" #pragma omp parallel
<< " size " << size << " ", {
4);
}; #pragma omp for
_KnVpAdvectSelf _inner; for (IndexInt i = 0; i < _sz; i++)
op(i, vp, scale, u);
}
}
vector<VortexParticleData> &vp; vector<VortexParticleData> &vp;
Real scale; Real scale;
vector<Vec3> u; vector<Vec3> u;

View File

@@ -127,7 +127,6 @@ class VortexParticleSystem : public ParticleSystem<VortexParticleData> {
} }
virtual ParticleBase *clone(); virtual ParticleBase *clone();
public: public:
PbArgs _args; PbArgs _args;
} }

View File

@@ -240,7 +240,6 @@ class VortexSheetMesh : public Mesh {
VorticityChannel mVorticity; VorticityChannel mVorticity;
TexCoord3Channel mTex1, mTex2; TexCoord3Channel mTex1, mTex2;
TurbulenceChannel mTurb; TurbulenceChannel mTurb;
public: public:
PbArgs _args; PbArgs _args;
} }

View File

@@ -59,8 +59,8 @@ int MANTA::with_debug(0);
MANTA::MANTA(int *res, FluidModifierData *fmd) MANTA::MANTA(int *res, FluidModifierData *fmd)
: mCurrentID(++solverID), mMaxRes(fmd->domain->maxres) : mCurrentID(++solverID), mMaxRes(fmd->domain->maxres)
{ {
if (with_debug) //if (with_debug)
cout << "FLUID: " << mCurrentID << " with res(" << res[0] << ", " << res[1] << ", " << res[2] cout << "============= FLUID: " << mCurrentID << " with res(" << res[0] << ", " << res[1] << ", " << res[2]
<< ")" << endl; << ")" << endl;
FluidDomainSettings *fds = fmd->domain; FluidDomainSettings *fds = fmd->domain;
@@ -279,6 +279,7 @@ MANTA::MANTA(int *res, FluidModifierData *fmd)
} }
/* All requested initializations must not fail in constructor. */ /* All requested initializations must not fail in constructor. */
BLI_assert(initSuccess); BLI_assert(initSuccess);
UNUSED_VARS(initSuccess);
updatePointers(fmd); updatePointers(fmd);
} }

View File

@@ -273,13 +273,32 @@ def fluid_adapt_time_step_$ID$():\n\
const std::string fluid_alloc = const std::string fluid_alloc =
"\n\ "\n\
mantaMsg('Fluid alloc data')\n\ mantaMsg('Fluid alloc data')\n\
flags_s$ID$ = s$ID$.create(FlagGrid, name='$NAME_FLAGS$')\n\ flags_s$ID$ = s$ID$.create(FlagGrid, name='$NAME_FLAGS$', offload=True)\n\
vel_s$ID$ = s$ID$.create(MACGrid, name='$NAME_VELOCITY$', sparse=True)\n\ mantaMsg('Fluid alloc data vel')\n\
vel_s$ID$ = s$ID$.create(MACGrid, name='$NAME_VELOCITY$', sparse=True, offload=True)\n\
velTmp_s$ID$ = s$ID$.create(MACGrid, name='$NAME_VELOCITYTMP$', sparse=True)\n\ velTmp_s$ID$ = s$ID$.create(MACGrid, name='$NAME_VELOCITYTMP$', sparse=True)\n\
x_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_X$')\n\ x_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_X$')\n\
y_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_Y$')\n\ y_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_Y$')\n\
z_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_Z$')\n\ z_vel_s$ID$ = s$ID$.create(RealGrid, name='$NAME_VELOCITY_Z$')\n\
pressure_s$ID$ = s$ID$.create(RealGrid, name='$NAME_PRESSURE$')\n\ mantaMsg('Fluid alloc data pressure')\n\
pressure_s$ID$ = s$ID$.create(RealGrid, name='$NAME_PRESSURE$', offload=True)\n\
mantaMsg('Fluid alloc data rhs')\n\
rhs_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
mantaMsg('Fluid alloc data A0')\n\
A0_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
mantaMsg('Fluid alloc data Ai')\n\
Ai_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
mantaMsg('Fluid alloc data Aj')\n\
Aj_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
mantaMsg('Fluid alloc data Ak')\n\
Ak_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
mantaMsg('Fluid alloc data search')\n\
search_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
mantaMsg('Fluid alloc data residual')\n\
residual_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
mantaMsg('Fluid alloc data tmp')\n\
tmp_s$ID$ = s$ID$.create(RealGrid, offload=True)\n\
mantaMsg('Fluid alloc data 6')\n\
phiObs_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHIOBS$')\n\ phiObs_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHIOBS$')\n\
phiSIn_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHISIN$') # helper for static flow objects\n\ phiSIn_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHISIN$') # helper for static flow objects\n\
phiIn_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHIIN$')\n\ phiIn_s$ID$ = s$ID$.create(LevelsetGrid, name='$NAME_PHIIN$')\n\
@@ -298,7 +317,8 @@ phiOut_s$ID$.setConst(9999)\n\
\n\ \n\
# Keep track of important objects in dict to load them later on\n\ # Keep track of important objects in dict to load them later on\n\
fluid_data_dict_final_s$ID$ = { 'vel' : vel_s$ID$ }\n\ fluid_data_dict_final_s$ID$ = { 'vel' : vel_s$ID$ }\n\
fluid_data_dict_resume_s$ID$ = { 'phiObs' : phiObs_s$ID$, 'phiIn' : phiIn_s$ID$, 'phiOut' : phiOut_s$ID$, 'flags' : flags_s$ID$, 'velTmp' : velTmp_s$ID$ }\n"; fluid_data_dict_resume_s$ID$ = { 'phiObs' : phiObs_s$ID$, 'phiIn' : phiIn_s$ID$, 'phiOut' : phiOut_s$ID$, 'flags' : flags_s$ID$, 'velTmp' : velTmp_s$ID$ }\n\
mantaMsg('Fluid alloc DONE')\n";
const std::string fluid_alloc_obstacle = const std::string fluid_alloc_obstacle =
"\n\ "\n\
@@ -477,7 +497,8 @@ mantaMsg('Delete guiding solver')\n\
if 'sg$ID$' in globals(): del sg$ID$\n\ if 'sg$ID$' in globals(): del sg$ID$\n\
\n\ \n\
# Release unreferenced memory (if there is some left)\n\ # Release unreferenced memory (if there is some left)\n\
gc.collect()\n"; gc.collect()\n\
mantaMsg('Done deleting')\n";
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// BAKE // BAKE

View File

@@ -274,9 +274,14 @@ def liquid_step_$ID$():\n\
velTmp_s$ID$.copyFrom(vel_s$ID$)\n\ velTmp_s$ID$.copyFrom(vel_s$ID$)\n\
\n\ \n\
mantaMsg('Advecting phi')\n\ mantaMsg('Advecting phi')\n\
#phi_s$ID$.updateToOmp()\n\
#vel_s$ID$.updateToOmp()\n\
advectSemiLagrange(flags=flags_s$ID$, vel=vel_s$ID$, grid=phi_s$ID$, order=1) # first order is usually enough\n\ advectSemiLagrange(flags=flags_s$ID$, vel=vel_s$ID$, grid=phi_s$ID$, order=1) # first order is usually enough\n\
\n\
mantaMsg('Advecting velocity')\n\ mantaMsg('Advecting velocity')\n\
advectSemiLagrange(flags=flags_s$ID$, vel=vel_s$ID$, grid=vel_s$ID$, order=2)\n\ advectSemiLagrange(flags=flags_s$ID$, vel=vel_s$ID$, grid=vel_s$ID$, order=2)\n\
#phi_s$ID$.updateFromOmp()\n\
#vel_s$ID$.updateFromOmp()\n\
\n\ \n\
# create level set of particles\n\ # create level set of particles\n\
gridParticleIndex(parts=pp_s$ID$, flags=flags_s$ID$, indexSys=pindex_s$ID$, index=gpi_s$ID$)\n\ gridParticleIndex(parts=pp_s$ID$, flags=flags_s$ID$, indexSys=pindex_s$ID$, index=gpi_s$ID$)\n\
@@ -323,21 +328,45 @@ def liquid_step_$ID$():\n\
getLaplacian(laplacian=curvature_s$ID$, grid=phi_s$ID$)\n\ getLaplacian(laplacian=curvature_s$ID$, grid=phi_s$ID$)\n\
curvature_s$ID$.clamp(-1.0, 1.0)\n\ curvature_s$ID$.clamp(-1.0, 1.0)\n\
\n\ \n\
#vel_s$ID$.updateToOmp()\n\
setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\ setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\
#vel_s$ID$.updateFromOmp()\n\
\n\
if using_viscosity_s$ID$:\n\ if using_viscosity_s$ID$:\n\
viscosity_s$ID$.setConst(viscosityValue_s$ID$)\n\ viscosity_s$ID$.setConst(viscosityValue_s$ID$)\n\
applyViscosity(flags=flags_s$ID$, phi=phi_s$ID$, vel=vel_s$ID$, volumes=volumes_s$ID$, viscosity=viscosity_s$ID$)\n\ applyViscosity(flags=flags_s$ID$, phi=phi_s$ID$, vel=vel_s$ID$, volumes=volumes_s$ID$, viscosity=viscosity_s$ID$)\n\
\n\ \n\
#vel_s$ID$.updateToOmp()\n\
setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\ setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\
#vel_s$ID$.updateFromOmp()\n\
\n\
if using_guiding_s$ID$:\n\ if using_guiding_s$ID$:\n\
mantaMsg('Guiding and pressure')\n\ mantaMsg('Guiding and pressure')\n\
PD_fluid_guiding(vel=vel_s$ID$, velT=velT_s$ID$, flags=flags_s$ID$, phi=phi_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, weight=weightGuide_s$ID$, blurRadius=beta_sg$ID$, pressure=pressure_s$ID$, tau=tau_sg$ID$, sigma=sigma_sg$ID$, theta=theta_sg$ID$, zeroPressureFixing=domainClosed_s$ID$)\n\ PD_fluid_guiding(vel=vel_s$ID$, velT=velT_s$ID$, flags=flags_s$ID$, phi=phi_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, weight=weightGuide_s$ID$, blurRadius=beta_sg$ID$, pressure=pressure_s$ID$, tau=tau_sg$ID$, sigma=sigma_sg$ID$, theta=theta_sg$ID$, zeroPressureFixing=domainClosed_s$ID$)\n\
else:\n\ else:\n\
mantaMsg('Pressure')\n\ print('Pressure')\n\
solvePressure(flags=flags_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, obvel=obvel_s$ID$ if using_fractions_s$ID$ else None, zeroPressureFixing=domainClosed_s$ID$)\n\ # openmp sync to device\n\
flags_s$ID$.updateToOmp()\n\
vel_s$ID$.updateToOmp()\n\
print('Pressure 2')\n\
\n\
#solvePressure(flags=flags_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, curv=curvature_s$ID$, surfTens=surfaceTension_s$ID$, fractions=fractions_s$ID$, obvel=obvel_s$ID$ if using_fractions_s$ID$ else None, zeroPressureFixing=domainClosed_s$ID$)\n\
computePressureRhs(rhs=rhs_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, flags=flags_s$ID$, preconditioner=PcNone)\n\
print('Pressure 21')\n\
solvePressureSystem(rhs=rhs_s$ID$, vel=vel_s$ID$, pressure=pressure_s$ID$, flags=flags_s$ID$, useL2Norm=True, preconditioner=PcNone, residual=residual_s$ID$, search=search_s$ID$, A0=A0_s$ID$, Ai=Ai_s$ID$, Aj=Aj_s$ID$, Ak=Ak_s$ID$, tmp=tmp_s$ID$)\n\
print('Pressure 22')\n\
correctVelocity(vel=vel_s$ID$, pressure=pressure_s$ID$, flags=flags_s$ID$, preconditioner=PcNone)\n\
\n\
print('Pressure 3')\n\
# openmp sync from device\n\
pressure_s$ID$.updateFromOmp()\n\
vel_s$ID$.updateFromOmp()\n\
\n\ \n\
extrapolateMACSimple(flags=flags_s$ID$, vel=vel_s$ID$, distance=4, intoObs=True if using_fractions_s$ID$ else False)\n\ extrapolateMACSimple(flags=flags_s$ID$, vel=vel_s$ID$, distance=4, intoObs=True if using_fractions_s$ID$ else False)\n\
\n\
#vel_s$ID$.updateToOmp()\n\
setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\ setWallBcs(flags=flags_s$ID$, vel=vel_s$ID$, obvel=None if using_fractions_s$ID$ else obvel_s$ID$, phiObs=phiObs_s$ID$, fractions=fractions_s$ID$)\n\
#vel_s$ID$.updateFromOmp()\n\
\n\ \n\
if not using_fractions_s$ID$:\n\ if not using_fractions_s$ID$:\n\
extrapolateMACSimple(flags=flags_s$ID$, vel=vel_s$ID$)\n\ extrapolateMACSimple(flags=flags_s$ID$, vel=vel_s$ID$)\n\

View File

@@ -76,7 +76,7 @@
.adapt_margin = 4, \ .adapt_margin = 4, \
.adapt_res = 0, \ .adapt_res = 0, \
.adapt_threshold = 0.02f, \ .adapt_threshold = 0.02f, \
.maxres = 32, \ .maxres = 64, \
.solver_res = 3, \ .solver_res = 3, \
.border_collisions = 0, \ .border_collisions = 0, \
.flags = FLUID_DOMAIN_USE_DISSOLVE_LOG | FLUID_DOMAIN_USE_ADAPTIVE_TIME, \ .flags = FLUID_DOMAIN_USE_DISSOLVE_LOG | FLUID_DOMAIN_USE_ADAPTIVE_TIME, \

View File

@@ -688,6 +688,13 @@ if(UNIX AND NOT APPLE)
DESTINATION ${TARGETDIR_VER}/python/lib/python${PYTHON_VERSION}/site-packages DESTINATION ${TARGETDIR_VER}/python/lib/python${PYTHON_VERSION}/site-packages
) )
endif() endif()
if(WITH_MOD_FLUID)
message(STATUS "============== Here")
install(
PROGRAMS $<TARGET_FILE:extern_mantaflow>
DESTINATION ${TARGETDIR_VER}/python/lib/python${PYTHON_VERSION}/site-packages
)
endif()
elseif(WIN32) elseif(WIN32)
set(BLENDER_TEXT_FILES_DESTINATION ".") set(BLENDER_TEXT_FILES_DESTINATION ".")