forked from blender/blender
Merge latest distributed renderering changes from GHE master-cluster into master-cluster on blender.org #1
6
extern/CMakeLists.txt
vendored
6
extern/CMakeLists.txt
vendored
@ -17,7 +17,11 @@ endif()
|
||||
|
||||
add_subdirectory(rangetree)
|
||||
add_subdirectory(wcwidth)
|
||||
add_subdirectory(perceptualdiff)
|
||||
#FRL_CLR_BEGIN
|
||||
if(UNIX AND NOT APPLE)
|
||||
add_subdirectory(perceptualdiff)
|
||||
endif()
|
||||
#FRL_CLR_END
|
||||
|
||||
if(WITH_BULLET)
|
||||
if(NOT WITH_SYSTEM_BULLET)
|
||||
|
BIN
extern/nvidia/Video_Codec_SDK_11.1.5/Deprecation_Notices.pdf
vendored
Normal file
BIN
extern/nvidia/Video_Codec_SDK_11.1.5/Deprecation_Notices.pdf
vendored
Normal file
Binary file not shown.
1201
extern/nvidia/Video_Codec_SDK_11.1.5/Interface/cuviddec.h
vendored
Normal file
1201
extern/nvidia/Video_Codec_SDK_11.1.5/Interface/cuviddec.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3907
extern/nvidia/Video_Codec_SDK_11.1.5/Interface/nvEncodeAPI.h
vendored
Normal file
3907
extern/nvidia/Video_Codec_SDK_11.1.5/Interface/nvEncodeAPI.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
436
extern/nvidia/Video_Codec_SDK_11.1.5/Interface/nvcuvid.h
vendored
Normal file
436
extern/nvidia/Video_Codec_SDK_11.1.5/Interface/nvcuvid.h
vendored
Normal file
@ -0,0 +1,436 @@
|
||||
/*
|
||||
* This copyright notice applies to this header file only:
|
||||
*
|
||||
* Copyright (c) 2010-2021 NVIDIA Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the software, and to permit persons to whom the
|
||||
* software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/********************************************************************************************************************/
|
||||
//! \file nvcuvid.h
|
||||
//! NVDECODE API provides video decoding interface to NVIDIA GPU devices.
|
||||
//! \date 2015-2020
|
||||
//! This file contains the interface constants, structure definitions and function prototypes.
|
||||
/********************************************************************************************************************/
|
||||
|
||||
#if !defined(__NVCUVID_H__)
|
||||
#define __NVCUVID_H__
|
||||
|
||||
#include "cuviddec.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
|
||||
/***********************************************/
|
||||
//!
|
||||
//! High-level helper APIs for video sources
|
||||
//!
|
||||
/***********************************************/
|
||||
|
||||
typedef void *CUvideosource;
|
||||
typedef void *CUvideoparser;
|
||||
typedef long long CUvideotimestamp;
|
||||
|
||||
|
||||
/************************************************************************/
|
||||
//! \enum cudaVideoState
|
||||
//! Video source state enums
|
||||
//! Used in cuvidSetVideoSourceState and cuvidGetVideoSourceState APIs
|
||||
/************************************************************************/
|
||||
typedef enum {
|
||||
cudaVideoState_Error = -1, /**< Error state (invalid source) */
|
||||
cudaVideoState_Stopped = 0, /**< Source is stopped (or reached end-of-stream) */
|
||||
cudaVideoState_Started = 1 /**< Source is running and delivering data */
|
||||
} cudaVideoState;
|
||||
|
||||
/************************************************************************/
|
||||
//! \enum cudaAudioCodec
|
||||
//! Audio compression enums
|
||||
//! Used in CUAUDIOFORMAT structure
|
||||
/************************************************************************/
|
||||
typedef enum {
|
||||
cudaAudioCodec_MPEG1=0, /**< MPEG-1 Audio */
|
||||
cudaAudioCodec_MPEG2, /**< MPEG-2 Audio */
|
||||
cudaAudioCodec_MP3, /**< MPEG-1 Layer III Audio */
|
||||
cudaAudioCodec_AC3, /**< Dolby Digital (AC3) Audio */
|
||||
cudaAudioCodec_LPCM, /**< PCM Audio */
|
||||
cudaAudioCodec_AAC, /**< AAC Audio */
|
||||
} cudaAudioCodec;
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDEOFORMAT
|
||||
//! Video format
|
||||
//! Used in cuvidGetSourceVideoFormat API
|
||||
/************************************************************************************************/
|
||||
typedef struct
|
||||
{
|
||||
cudaVideoCodec codec; /**< OUT: Compression format */
|
||||
/**
|
||||
* OUT: frame rate = numerator / denominator (for example: 30000/1001)
|
||||
*/
|
||||
struct {
|
||||
/**< OUT: frame rate numerator (0 = unspecified or variable frame rate) */
|
||||
unsigned int numerator;
|
||||
/**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */
|
||||
unsigned int denominator;
|
||||
} frame_rate;
|
||||
unsigned char progressive_sequence; /**< OUT: 0=interlaced, 1=progressive */
|
||||
unsigned char bit_depth_luma_minus8; /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
|
||||
unsigned char bit_depth_chroma_minus8; /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
|
||||
unsigned char min_num_decode_surfaces; /**< OUT: Minimum number of decode surfaces to be allocated for correct
|
||||
decoding. The client can send this value in ulNumDecodeSurfaces
|
||||
(in CUVIDDECODECREATEINFO structure).
|
||||
This guarantees correct functionality and optimal video memory
|
||||
usage but not necessarily the best performance, which depends on
|
||||
the design of the overall application. The optimal number of
|
||||
decode surfaces (in terms of performance and memory utilization)
|
||||
should be decided by experimentation for each application, but it
|
||||
cannot go below min_num_decode_surfaces.
|
||||
If this value is used for ulNumDecodeSurfaces then it must be
|
||||
returned to parser during sequence callback. */
|
||||
unsigned int coded_width; /**< OUT: coded frame width in pixels */
|
||||
unsigned int coded_height; /**< OUT: coded frame height in pixels */
|
||||
/**
|
||||
* area of the frame that should be displayed
|
||||
* typical example:
|
||||
* coded_width = 1920, coded_height = 1088
|
||||
* display_area = { 0,0,1920,1080 }
|
||||
*/
|
||||
struct {
|
||||
int left; /**< OUT: left position of display rect */
|
||||
int top; /**< OUT: top position of display rect */
|
||||
int right; /**< OUT: right position of display rect */
|
||||
int bottom; /**< OUT: bottom position of display rect */
|
||||
} display_area;
|
||||
cudaVideoChromaFormat chroma_format; /**< OUT: Chroma format */
|
||||
unsigned int bitrate; /**< OUT: video bitrate (bps, 0=unknown) */
|
||||
/**
|
||||
* OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc)
|
||||
*/
|
||||
struct {
|
||||
int x;
|
||||
int y;
|
||||
} display_aspect_ratio;
|
||||
/**
|
||||
* Video Signal Description
|
||||
* Refer section E.2.1 (VUI parameters semantics) of H264 spec file
|
||||
*/
|
||||
struct {
|
||||
unsigned char video_format : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, 5-Unspecified */
|
||||
unsigned char video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma range */
|
||||
unsigned char reserved_zero_bits : 4; /**< Reserved bits */
|
||||
unsigned char color_primaries; /**< OUT: chromaticity coordinates of source primaries */
|
||||
unsigned char transfer_characteristics; /**< OUT: opto-electronic transfer characteristic of the source picture */
|
||||
unsigned char matrix_coefficients; /**< OUT: used in deriving luma and chroma signals from RGB primaries */
|
||||
} video_signal_description;
|
||||
unsigned int seqhdr_data_length; /**< OUT: Additional bytes following (CUVIDEOFORMATEX) */
|
||||
} CUVIDEOFORMAT;
|
||||
|
||||
/****************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDOPERATINGPOINTINFO
|
||||
//! Operating point information of scalable bitstream
|
||||
/****************************************************************/
|
||||
typedef struct
|
||||
{
|
||||
cudaVideoCodec codec;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
unsigned char operating_points_cnt;
|
||||
unsigned char reserved24_bits[3];
|
||||
unsigned short operating_points_idc[32];
|
||||
} av1;
|
||||
unsigned char CodecReserved[1024];
|
||||
};
|
||||
} CUVIDOPERATINGPOINTINFO;
|
||||
|
||||
/****************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDAV1SEQHDR
|
||||
//! AV1 specific sequence header information
|
||||
/****************************************************************/
|
||||
typedef struct {
|
||||
unsigned int max_width;
|
||||
unsigned int max_height;
|
||||
unsigned char reserved[1016];
|
||||
} CUVIDAV1SEQHDR;
|
||||
|
||||
/****************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDEOFORMATEX
|
||||
//! Video format including raw sequence header information
|
||||
//! Used in cuvidGetSourceVideoFormat API
|
||||
/****************************************************************/
|
||||
typedef struct
|
||||
{
|
||||
CUVIDEOFORMAT format; /**< OUT: CUVIDEOFORMAT structure */
|
||||
union {
|
||||
CUVIDAV1SEQHDR av1;
|
||||
unsigned char raw_seqhdr_data[1024]; /**< OUT: Sequence header data */
|
||||
};
|
||||
} CUVIDEOFORMATEX;
|
||||
|
||||
/****************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUAUDIOFORMAT
|
||||
//! Audio formats
|
||||
//! Used in cuvidGetSourceAudioFormat API
|
||||
/****************************************************************/
|
||||
typedef struct
|
||||
{
|
||||
cudaAudioCodec codec; /**< OUT: Compression format */
|
||||
unsigned int channels; /**< OUT: number of audio channels */
|
||||
unsigned int samplespersec; /**< OUT: sampling frequency */
|
||||
unsigned int bitrate; /**< OUT: For uncompressed, can also be used to determine bits per sample */
|
||||
unsigned int reserved1; /**< Reserved for future use */
|
||||
unsigned int reserved2; /**< Reserved for future use */
|
||||
} CUAUDIOFORMAT;
|
||||
|
||||
|
||||
/***************************************************************/
|
||||
//! \enum CUvideopacketflags
|
||||
//! Data packet flags
|
||||
//! Used in CUVIDSOURCEDATAPACKET structure
|
||||
/***************************************************************/
|
||||
typedef enum {
|
||||
CUVID_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */
|
||||
CUVID_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */
|
||||
CUVID_PKT_DISCONTINUITY = 0x04, /**< Set when a discontinuity has to be signalled */
|
||||
CUVID_PKT_ENDOFPICTURE = 0x08, /**< Set when the packet contains exactly one frame or one field */
|
||||
CUVID_PKT_NOTIFY_EOS = 0x10, /**< If this flag is set along with CUVID_PKT_ENDOFSTREAM, an additional (dummy)
|
||||
display callback will be invoked with null value of CUVIDPARSERDISPINFO which
|
||||
should be interpreted as end of the stream. */
|
||||
} CUvideopacketflags;
|
||||
|
||||
/*****************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDSOURCEDATAPACKET
|
||||
//! Data Packet
|
||||
//! Used in cuvidParseVideoData API
|
||||
//! IN for cuvidParseVideoData
|
||||
/*****************************************************************************/
|
||||
typedef struct _CUVIDSOURCEDATAPACKET
|
||||
{
|
||||
unsigned long flags; /**< IN: Combination of CUVID_PKT_XXX flags */
|
||||
unsigned long payload_size; /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */
|
||||
const unsigned char *payload; /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */
|
||||
CUvideotimestamp timestamp; /**< IN: Presentation time stamp (10MHz clock), only valid if
|
||||
CUVID_PKT_TIMESTAMP flag is set */
|
||||
} CUVIDSOURCEDATAPACKET;
|
||||
|
||||
// Callback for packet delivery
|
||||
typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
|
||||
|
||||
/**************************************************************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDSOURCEPARAMS
|
||||
//! Describes parameters needed in cuvidCreateVideoSource API
|
||||
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported
|
||||
//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
|
||||
/**************************************************************************************************************************/
|
||||
typedef struct _CUVIDSOURCEPARAMS
|
||||
{
|
||||
unsigned int ulClockRate; /**< IN: Time stamp units in Hz (0=default=10000000Hz) */
|
||||
unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */
|
||||
unsigned int uReserved : 31; /**< Reserved for future use - set to zero */
|
||||
unsigned int uReserved1[6]; /**< Reserved for future use - set to zero */
|
||||
void *pUserData; /**< IN: User private data passed in to the data handlers */
|
||||
PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< IN: Called to deliver video packets */
|
||||
PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< IN: Called to deliver audio packets. */
|
||||
void *pvReserved2[8]; /**< Reserved for future use - set to NULL */
|
||||
} CUVIDSOURCEPARAMS;
|
||||
|
||||
|
||||
/**********************************************/
|
||||
//! \ingroup ENUMS
|
||||
//! \enum CUvideosourceformat_flags
|
||||
//! CUvideosourceformat_flags
|
||||
//! Used in cuvidGetSourceVideoFormat API
|
||||
/**********************************************/
|
||||
typedef enum {
|
||||
CUVID_FMT_EXTFORMATINFO = 0x100 /**< Return extended format structure (CUVIDEOFORMATEX) */
|
||||
} CUvideosourceformat_flags;
|
||||
|
||||
#if !defined(__APPLE__)
|
||||
/***************************************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams)
|
||||
//! Create CUvideosource object. CUvideosource spawns demultiplexer thread that provides two callbacks:
|
||||
//! pfnVideoDataHandler() and pfnAudioDataHandler()
|
||||
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported
|
||||
//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
|
||||
/***************************************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
|
||||
|
||||
/***************************************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams)
|
||||
//! Create video source
|
||||
/***************************************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
|
||||
|
||||
/********************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
|
||||
//! Destroy video source
|
||||
/********************************************************************/
|
||||
CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj);
|
||||
|
||||
/******************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
|
||||
//! Set video source state to:
|
||||
//! cudaVideoState_Started - to signal the source to run and deliver data
|
||||
//! cudaVideoState_Stopped - to stop the source from delivering the data
|
||||
//! cudaVideoState_Error - invalid source
|
||||
/******************************************************************************************/
|
||||
CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
|
||||
|
||||
/******************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
|
||||
//! Get video source state
|
||||
//! Returns:
|
||||
//! cudaVideoState_Started - if Source is running and delivering data
|
||||
//! cudaVideoState_Stopped - if Source is stopped or reached end-of-stream
|
||||
//! cudaVideoState_Error - if Source is in error state
|
||||
/******************************************************************************************/
|
||||
cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj);
|
||||
|
||||
/******************************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
|
||||
//! Gets video source format in pvidfmt, flags is set to combination of CUvideosourceformat_flags as per requirement
|
||||
/******************************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
|
||||
|
||||
/**************************************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
|
||||
//! Get audio source format
|
||||
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported
|
||||
//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
|
||||
/**************************************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
|
||||
|
||||
#endif
|
||||
/**********************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDPARSERDISPINFO
|
||||
//! Used in cuvidParseVideoData API with PFNVIDDISPLAYCALLBACK pfnDisplayPicture
|
||||
/**********************************************************************************/
|
||||
typedef struct _CUVIDPARSERDISPINFO
|
||||
{
|
||||
int picture_index; /**< OUT: Index of the current picture */
|
||||
int progressive_frame; /**< OUT: 1 if progressive frame; 0 otherwise */
|
||||
int top_field_first; /**< OUT: 1 if top field is displayed first; 0 otherwise */
|
||||
int repeat_first_field; /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling,
|
||||
-1=unpaired field) */
|
||||
CUvideotimestamp timestamp; /**< OUT: Presentation time stamp */
|
||||
} CUVIDPARSERDISPINFO;
|
||||
|
||||
/***********************************************************************************************************************/
|
||||
//! Parser callbacks
|
||||
//! The parser will call these synchronously from within cuvidParseVideoData(), whenever there is sequence change or a picture
|
||||
//! is ready to be decoded and/or displayed. First argument in functions is "void *pUserData" member of structure CUVIDSOURCEPARAMS
|
||||
//! Return values from these callbacks are interpreted as below. If the callbacks return failure, it will be propagated by
|
||||
//! cuvidParseVideoData() to the application.
|
||||
//! Parser picks default operating point as 0 and outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is
|
||||
//! -1 or invalid operating point.
|
||||
//! PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces
|
||||
//! while creating parser)
|
||||
//! PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded
|
||||
//! PFNVIDDISPLAYCALLBACK : 0: fail, >=1: succeeded
|
||||
//! PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30: reserved)
|
||||
/***********************************************************************************************************************/
|
||||
typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
|
||||
typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
|
||||
typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
|
||||
typedef int (CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO*);
|
||||
|
||||
/**************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDPARSERPARAMS
|
||||
//! Used in cuvidCreateVideoParser API
|
||||
/**************************************/
|
||||
typedef struct _CUVIDPARSERPARAMS
|
||||
{
|
||||
cudaVideoCodec CodecType; /**< IN: cudaVideoCodec_XXX */
|
||||
unsigned int ulMaxNumDecodeSurfaces; /**< IN: Max # of decode surfaces (parser will cycle through these) */
|
||||
unsigned int ulClockRate; /**< IN: Timestamp units in Hz (0=default=10000000Hz) */
|
||||
unsigned int ulErrorThreshold; /**< IN: % Error threshold (0-100) for calling pfnDecodePicture (100=always
|
||||
IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */
|
||||
unsigned int ulMaxDisplayDelay; /**< IN: Max display queue delay (improves pipelining of decode with display)
|
||||
0=no delay (recommended values: 2..4) */
|
||||
unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */
|
||||
unsigned int uReserved : 31; /**< Reserved for future use - set to zero */
|
||||
unsigned int uReserved1[4]; /**< IN: Reserved for future use - set to 0 */
|
||||
void *pUserData; /**< IN: User data for callbacks */
|
||||
PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */
|
||||
PFNVIDDECODECALLBACK pfnDecodePicture; /**< IN: Called when a picture is ready to be decoded (decode order) */
|
||||
PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< IN: Called whenever a picture is ready to be displayed (display order) */
|
||||
PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1
|
||||
scalable bitstream */
|
||||
void *pvReserved2[6]; /**< Reserved for future use - set to NULL */
|
||||
CUVIDEOFORMATEX *pExtVideoInfo; /**< IN: [Optional] sequence header data from system layer */
|
||||
} CUVIDPARSERPARAMS;
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
|
||||
//! Create video parser object and initialize
|
||||
/************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
|
||||
//! Parse the video data from source data packet in pPacket
|
||||
//! Extracts parameter sets like SPS, PPS, bitstream etc. from pPacket and
|
||||
//! calls back pfnDecodePicture with CUVIDPICPARAMS data for kicking of HW decoding
|
||||
//! calls back pfnSequenceCallback with CUVIDEOFORMAT data for initial sequence header or when
|
||||
//! the decoder encounters a video format change
|
||||
//! calls back pfnDisplayPicture with CUVIDPARSERDISPINFO data to display a video frame
|
||||
/************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
|
||||
//! Destroy the video parser
|
||||
/************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj);
|
||||
|
||||
/**********************************************************************************************/
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif // __NVCUVID_H__
|
||||
|
||||
|
BIN
extern/nvidia/Video_Codec_SDK_11.1.5/LicenseAgreement.pdf
vendored
Normal file
BIN
extern/nvidia/Video_Codec_SDK_11.1.5/LicenseAgreement.pdf
vendored
Normal file
Binary file not shown.
167
extern/nvidia/Video_Codec_SDK_11.1.5/NOTICES.txt
vendored
Normal file
167
extern/nvidia/Video_Codec_SDK_11.1.5/NOTICES.txt
vendored
Normal file
@ -0,0 +1,167 @@
|
||||
This SDK includes portions of FFMPEG, under the following license:
|
||||
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
|
||||
This version of the GNU Lesser General Public License incorporates
|
||||
the terms and conditions of version 3 of the GNU General Public
|
||||
License, supplemented by the additional permissions listed below.
|
||||
|
||||
0. Additional Definitions.
|
||||
|
||||
As used herein, "this License" refers to version 3 of the GNU Lesser
|
||||
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
||||
General Public License.
|
||||
|
||||
"The Library" refers to a covered work governed by this License,
|
||||
other than an Application or a Combined Work as defined below.
|
||||
|
||||
An "Application" is any work that makes use of an interface provided
|
||||
by the Library, but which is not otherwise based on the Library.
|
||||
Defining a subclass of a class defined by the Library is deemed a mode
|
||||
of using an interface provided by the Library.
|
||||
|
||||
A "Combined Work" is a work produced by combining or linking an
|
||||
Application with the Library. The particular version of the Library
|
||||
with which the Combined Work was made is also called the "Linked
|
||||
Version".
|
||||
|
||||
The "Minimal Corresponding Source" for a Combined Work means the
|
||||
Corresponding Source for the Combined Work, excluding any source code
|
||||
for portions of the Combined Work that, considered in isolation, are
|
||||
based on the Application, and not on the Linked Version.
|
||||
|
||||
The "Corresponding Application Code" for a Combined Work means the
|
||||
object code and/or source code for the Application, including any data
|
||||
and utility programs needed for reproducing the Combined Work from the
|
||||
Application, but excluding the System Libraries of the Combined Work.
|
||||
|
||||
1. Exception to Section 3 of the GNU GPL.
|
||||
|
||||
You may convey a covered work under sections 3 and 4 of this License
|
||||
without being bound by section 3 of the GNU GPL.
|
||||
|
||||
2. Conveying Modified Versions.
|
||||
|
||||
If you modify a copy of the Library, and, in your modifications, a
|
||||
facility refers to a function or data to be supplied by an Application
|
||||
that uses the facility (other than as an argument passed when the
|
||||
facility is invoked), then you may convey a copy of the modified
|
||||
version:
|
||||
|
||||
a) under this License, provided that you make a good faith effort to
|
||||
ensure that, in the event an Application does not supply the
|
||||
function or data, the facility still operates, and performs
|
||||
whatever part of its purpose remains meaningful, or
|
||||
|
||||
b) under the GNU GPL, with none of the additional permissions of
|
||||
this License applicable to that copy.
|
||||
|
||||
3. Object Code Incorporating Material from Library Header Files.
|
||||
|
||||
The object code form of an Application may incorporate material from
|
||||
a header file that is part of the Library. You may convey such object
|
||||
code under terms of your choice, provided that, if the incorporated
|
||||
material is not limited to numerical parameters, data structure
|
||||
layouts and accessors, or small macros, inline functions and templates
|
||||
(ten or fewer lines in length), you do both of the following:
|
||||
|
||||
a) Give prominent notice with each copy of the object code that the
|
||||
Library is used in it and that the Library and its use are
|
||||
covered by this License.
|
||||
|
||||
b) Accompany the object code with a copy of the GNU GPL and this license
|
||||
document.
|
||||
|
||||
4. Combined Works.
|
||||
|
||||
You may convey a Combined Work under terms of your choice that,
|
||||
taken together, effectively do not restrict modification of the
|
||||
portions of the Library contained in the Combined Work and reverse
|
||||
engineering for debugging such modifications, if you also do each of
|
||||
the following:
|
||||
|
||||
a) Give prominent notice with each copy of the Combined Work that
|
||||
the Library is used in it and that the Library and its use are
|
||||
covered by this License.
|
||||
|
||||
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
||||
document.
|
||||
|
||||
c) For a Combined Work that displays copyright notices during
|
||||
execution, include the copyright notice for the Library among
|
||||
these notices, as well as a reference directing the user to the
|
||||
copies of the GNU GPL and this license document.
|
||||
|
||||
d) Do one of the following:
|
||||
|
||||
0) Convey the Minimal Corresponding Source under the terms of this
|
||||
License, and the Corresponding Application Code in a form
|
||||
suitable for, and under terms that permit, the user to
|
||||
recombine or relink the Application with a modified version of
|
||||
the Linked Version to produce a modified Combined Work, in the
|
||||
manner specified by section 6 of the GNU GPL for conveying
|
||||
Corresponding Source.
|
||||
|
||||
1) Use a suitable shared library mechanism for linking with the
|
||||
Library. A suitable mechanism is one that (a) uses at run time
|
||||
a copy of the Library already present on the user's computer
|
||||
system, and (b) will operate properly with a modified version
|
||||
of the Library that is interface-compatible with the Linked
|
||||
Version.
|
||||
|
||||
e) Provide Installation Information, but only if you would otherwise
|
||||
be required to provide such information under section 6 of the
|
||||
GNU GPL, and only to the extent that such information is
|
||||
necessary to install and execute a modified version of the
|
||||
Combined Work produced by recombining or relinking the
|
||||
Application with a modified version of the Linked Version. (If
|
||||
you use option 4d0, the Installation Information must accompany
|
||||
the Minimal Corresponding Source and Corresponding Application
|
||||
Code. If you use option 4d1, you must provide the Installation
|
||||
Information in the manner specified by section 6 of the GNU GPL
|
||||
for conveying Corresponding Source.)
|
||||
|
||||
5. Combined Libraries.
|
||||
|
||||
You may place library facilities that are a work based on the
|
||||
Library side by side in a single library together with other library
|
||||
facilities that are not Applications and are not covered by this
|
||||
License, and convey such a combined library under terms of your
|
||||
choice, if you do both of the following:
|
||||
|
||||
a) Accompany the combined library with a copy of the same work based
|
||||
on the Library, uncombined with any other library facilities,
|
||||
conveyed under the terms of this License.
|
||||
|
||||
b) Give prominent notice with the combined library that part of it
|
||||
is a work based on the Library, and explaining where to find the
|
||||
accompanying uncombined form of the same work.
|
||||
|
||||
6. Revised Versions of the GNU Lesser General Public License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions
|
||||
of the GNU Lesser General Public License from time to time. Such new
|
||||
versions will be similar in spirit to the present version, but may
|
||||
differ in detail to address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Library as you received it specifies that a certain numbered version
|
||||
of the GNU Lesser General Public License "or any later version"
|
||||
applies to it, you have the option of following the terms and
|
||||
conditions either of that published version or of any later version
|
||||
published by the Free Software Foundation. If the Library as you
|
||||
received it does not specify a version number of the GNU Lesser
|
||||
General Public License, you may choose any version of the GNU Lesser
|
||||
General Public License ever published by the Free Software Foundation.
|
||||
|
||||
If the Library as you received it specifies that a proxy can decide
|
||||
whether future versions of the GNU Lesser General Public License shall
|
||||
apply, that proxy's public statement of acceptance of any version is
|
||||
permanent authorization for you to choose that version for the
|
||||
Library.
|
BIN
extern/nvidia/Video_Codec_SDK_11.1.5/Read_Me.pdf
vendored
Normal file
BIN
extern/nvidia/Video_Codec_SDK_11.1.5/Read_Me.pdf
vendored
Normal file
Binary file not shown.
BIN
extern/nvidia/Video_Codec_SDK_11.1.5/Release_Notes.pdf
vendored
Normal file
BIN
extern/nvidia/Video_Codec_SDK_11.1.5/Release_Notes.pdf
vendored
Normal file
Binary file not shown.
9
extern/nvidia/Video_Codec_SDK_11.1.5/rl_readme.txt
vendored
Normal file
9
extern/nvidia/Video_Codec_SDK_11.1.5/rl_readme.txt
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
Reality Labs notes.
|
||||
|
||||
This is shortened set of NVIDIA video codec SDK.
|
||||
|
||||
Folder Doc, Lib and Samples are removed since we do not use them anyway.
|
||||
Only Interface folder and readme and license files are left.
|
||||
|
||||
|
||||
|
BIN
extern/perceptualdiff/test/Aqsis_vase.png
vendored
Normal file
BIN
extern/perceptualdiff/test/Aqsis_vase.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 34 KiB |
BIN
extern/perceptualdiff/test/Aqsis_vase_ref.png
vendored
Normal file
BIN
extern/perceptualdiff/test/Aqsis_vase_ref.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 34 KiB |
BIN
extern/perceptualdiff/test/Bug1102605.tif
vendored
Normal file
BIN
extern/perceptualdiff/test/Bug1102605.tif
vendored
Normal file
Binary file not shown.
BIN
extern/perceptualdiff/test/Bug1102605_ref.tif
vendored
Normal file
BIN
extern/perceptualdiff/test/Bug1102605_ref.tif
vendored
Normal file
Binary file not shown.
BIN
extern/perceptualdiff/test/Bug1471457.tif
vendored
Normal file
BIN
extern/perceptualdiff/test/Bug1471457.tif
vendored
Normal file
Binary file not shown.
BIN
extern/perceptualdiff/test/Bug1471457_ref.tif
vendored
Normal file
BIN
extern/perceptualdiff/test/Bug1471457_ref.tif
vendored
Normal file
Binary file not shown.
BIN
extern/perceptualdiff/test/cam_mb.tif
vendored
Normal file
BIN
extern/perceptualdiff/test/cam_mb.tif
vendored
Normal file
Binary file not shown.
BIN
extern/perceptualdiff/test/cam_mb_ref.tif
vendored
Normal file
BIN
extern/perceptualdiff/test/cam_mb_ref.tif
vendored
Normal file
Binary file not shown.
BIN
extern/perceptualdiff/test/fish1.png
vendored
Normal file
BIN
extern/perceptualdiff/test/fish1.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 222 KiB |
BIN
extern/perceptualdiff/test/fish2.png
vendored
Normal file
BIN
extern/perceptualdiff/test/fish2.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 250 KiB |
49
extern/perceptualdiff/test/run_tests.sh
vendored
Executable file
49
extern/perceptualdiff/test/run_tests.sh
vendored
Executable file
@ -0,0 +1,49 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to run pdiff against a set of image file pairs, and check that the
|
||||
# PASS or FAIL status is as expected.
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Image files and expected perceptualdiff PASS/FAIL status. Line format is
|
||||
# (PASS|FAIL) image1.(tif|png) image2.(tif|png)
|
||||
#
|
||||
# Edit the following lines to add additional tests.
|
||||
function all_tests {
|
||||
cat <<EOF
|
||||
FAIL Bug1102605_ref.tif Bug1102605.tif
|
||||
PASS Bug1471457_ref.tif Bug1471457.tif
|
||||
PASS cam_mb_ref.tif cam_mb.tif
|
||||
FAIL fish2.png fish1.png
|
||||
EOF
|
||||
}
|
||||
|
||||
# Modify pdiffBinary to point to your compiled pdiff executable if desired.
|
||||
pdiffBinary=../perceptualdiff
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
totalTests=0
|
||||
numTestsFailed=0
|
||||
|
||||
# Run all tests.
|
||||
while read expectedResult image1 image2 ; do
|
||||
if $pdiffBinary -verbose $image1 $image2 | grep -q "^$expectedResult" ; then
|
||||
totalTests=$(($totalTests+1))
|
||||
else
|
||||
numTestsFailed=$(($numTestsFailed+1))
|
||||
echo "Regression failure: expected $expectedResult for \"$pdiffBinary $image1 $image2\"" >&2
|
||||
fi
|
||||
done <<EOF
|
||||
$(all_tests)
|
||||
EOF
|
||||
# (the above with the EOF's is a stupid bash trick to stop while from running
|
||||
# in a subshell)
|
||||
|
||||
# Give some diagnostics:
|
||||
if [[ $numTestsFailed == 0 ]] ; then
|
||||
echo "*** all $totalTests tests passed"
|
||||
else
|
||||
echo "*** $numTestsFailed failed tests of $totalTests"
|
||||
fi
|
||||
|
||||
exit $numTestsFailed
|
@ -276,7 +276,12 @@ if(WITH_CYCLES_DEVICE_METAL)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
add_definitions(-DWITH_ONEAPI)
|
||||
# FRL_CLR_BEGIN
|
||||
#TODO: T146077207
|
||||
message(STATUS "Temporarily turn WITH_ONEAPI off on monaco-cluster branch due to it does not compile there. TODO: fix and put it back. Task: T146077207 ")
|
||||
# add_definitions(-DWITH_ONEAPI)
|
||||
# FRL_CLR_END
|
||||
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_EMBREE)
|
||||
|
@ -8,58 +8,63 @@ set(INC
|
||||
../../../../extern # for flatbuffers
|
||||
${TurboJPEG_INCLUDE_DIRS}
|
||||
${OPENIMAGEIO_INCLUDE_DIR}
|
||||
# ${CUDA_TOOLKIT_INCLUDE}
|
||||
../../cluster_rendering/libcluster
|
||||
../../cluster_rendering/libcluster/compression # for nv_encoder
|
||||
../../cluster_rendering/libnetwork
|
||||
${NVCODEC_PUBLIC_INTERFACE_DIR}
|
||||
)
|
||||
|
||||
if(WITH_WEBRTC)
|
||||
list(APPEND INC ../../cluster_rendering/libstream/include)
|
||||
endif(WITH_WEBRTC)
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
)
|
||||
|
||||
set(INC_SYS
|
||||
)
|
||||
if(NOT DEFINED NVENCODEAPI_LIB)
|
||||
if(WIN32)
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
find_library(NVENCODEAPI_LIB NAMES nvencodeapi HINTS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../../lib/Video_Codec_SDK_11.1.5/Lib/" PATH_SUFFIXES x64/)
|
||||
else()
|
||||
find_library(NVENCODEAPI_LIB NAMES nvencodeapi HINTS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../../lib/Video_Codec_SDK_11.1.5/Lib/" PATH_SUFFIXES Win32/)
|
||||
endif()
|
||||
else()
|
||||
find_library(NVENCODEAPI_LIB NAMES nvidia-encode HINTS "${CMAKE_CURRENT_SOURCE_DIR}/../../../../../lib/Video_Codec_SDK_11.1.5/Lib/" PATH_SUFFIXES linux/stubs/x86_64/)
|
||||
if(NOT DEFINED NVENCODEAPI_LIB)
|
||||
find_library(NVENCODEAPI_LIB NAMES nvidia-encode HINTS "/Lib64/")
|
||||
endif()
|
||||
endif()
|
||||
if(NVENCODEAPI_LIB)
|
||||
message(STATUS "Found NVidia encoding library:${NVENCODEAPI_LIB}")
|
||||
else()
|
||||
message(WARNING "NVidia encoding library not found.")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "Using existing NVENCODEAPI_LIB, skipping library search")
|
||||
endif()
|
||||
|
||||
find_package(Boost 1.48 COMPONENTS program_options serialization thread filesystem)
|
||||
|
||||
add_definitions(-DHEADLESS_CLIENT)
|
||||
|
||||
set(LIBRARIES
|
||||
cycles_libnetwork
|
||||
cycles_libcluster
|
||||
cycles_libnetwork
|
||||
${TurboJPEG_LIBRARIES}
|
||||
${OPENCOLORIO_LIBRARIES}
|
||||
${OPENIMAGEIO_LIBRARY}
|
||||
${OPENIMAGEIO_UTIL_LIBRARY}
|
||||
${PNG_LIBRARIES}
|
||||
${JPEG_LIBRARIES}
|
||||
${TIFF_LIBRARY}
|
||||
${OPENEXR_LIBRARIES}
|
||||
${OPENJPEG_LIBRARIES}
|
||||
${ZLIB_LIBRARIES}
|
||||
${PUGIXML_LIBRARY}
|
||||
${WEBP_LIBRARIES}
|
||||
${Boost_LIBRARIES}
|
||||
${GLOG_LIBRARIES}
|
||||
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
|
||||
list(APPEND LIBRARIES ${NVENCODEAPI_LIB}) # For NVCODEC
|
||||
if(UNIX AND NOT APPLE)
|
||||
list(APPEND LIB
|
||||
bf_intern_libc_compat
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA AND UNIX)
|
||||
if(NOT DEFINED NVENCODEAPI_LIB)
|
||||
find_library(NVENCODEAPI_LIB nvidia-encode)
|
||||
find_library(NVCUVID_LIB nvcuvid HINTS "/lib64/") # the same folder on dev laptop and docker image
|
||||
if(NOT NVENCODEAPI_LIB)
|
||||
message(ERROR "NVidia encoding library not found.")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "Using pre-defined NVENCODEAPI_LIB: ${NVENCODEAPI_LIB}")
|
||||
endif()
|
||||
list(APPEND LIBRARIES ${NVENCODEAPI_LIB})
|
||||
if(WITH_CUDA_DYNLOAD AND NOT CUDA_CUDA_LIBRARY)
|
||||
list(APPEND INC "../../../../extern/cuew/include/")
|
||||
list(APPEND LIBRARIES extern_cuew)
|
||||
add_definitions(-DWITH_CUDA_DYNLOAD)
|
||||
else()
|
||||
list(APPEND INC ${CUDA_TOOLKIT_INCLUDE})
|
||||
list(APPEND LIBRARIES ${CUDA_CUDA_LIBRARY})
|
||||
remove_definitions(-DWITH_CUDA_DYNLOAD)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Only use cuew if CUDA_CUDA_LIBRARY not sepcified
|
||||
@ -81,21 +86,12 @@ else()
|
||||
remove_definitions(-DWITH_CUDA_DYNLOAD)
|
||||
endif()
|
||||
list(APPEND LIBRARIES ${CMAKE_DL_LIBS})
|
||||
|
||||
link_directories(${BOOST_LIBPATH})
|
||||
|
||||
include_directories(${INC})
|
||||
include_directories(SYSTEM ${INC_SYS})
|
||||
|
||||
set(CMAKE_EXE_LINKER_FLAGS "-std=c++11 -pthread")
|
||||
file(GLOB SRC *.cpp)
|
||||
# Normally we should just link against libcluster and do not compile against source files in libcluster
|
||||
# however libcluster pulls tons of dependencies including server/master side dependencies
|
||||
# most of which we do not need what makes this light client not light at all.
|
||||
# We may refactor libcluster to split it on client and master/server parts.
|
||||
# Since on this light client we only need a functionality of classes listed below
|
||||
# we work around dependencies issue by only compiling against following source files we need:
|
||||
list(APPEND SRC ../../cluster_rendering/libcluster/rpc_blender_protocol.cpp)
|
||||
list(APPEND SRC ../../cluster_rendering/libcluster/net_camera.cpp)
|
||||
file(GLOB SRC *.cpp *.cu)
|
||||
|
||||
add_executable(${TARGET} ${SRC})
|
||||
target_link_libraries(${TARGET} ${LIBRARIES})
|
||||
|
@ -28,8 +28,11 @@ const string Config::SAVE_IMAGES_PATH = "save_images_path";
|
||||
const string Config::SAVE_VIDEO_STREAM_PATH = "save_video_stream_path";
|
||||
const string Config::USER_EVENTS_PATH = "user_events_path";
|
||||
const string Config::MASTER_IMAGE_COLOR_FORMAT = "master_image_color_format";
|
||||
const string Config::MASTER_IMAGE_COMPRESSOR = "master_image_compressor";
|
||||
const string Config::COLOR_FORMAT_LINEAR = "linear";
|
||||
const string Config::COLOR_FORMAT_SRGB = "srgb";
|
||||
const string Config::IMAGE_COMPRESSOR_JPEG = "JPEG";
|
||||
const string Config::IMAGE_COMPRESSOR_NVENCODER = "NVENCODER";
|
||||
|
||||
|
||||
Config::Config() :
|
||||
@ -55,6 +58,12 @@ bool Config::init(int argc, char* argv[]) {
|
||||
COLOR_FORMAT_SRGB + ". " +
|
||||
"Default is: " + COLOR_FORMAT_SRGB;
|
||||
|
||||
const std::string supported_image_compressors =
|
||||
IMAGE_COMPRESSOR_JPEG + ", " +
|
||||
IMAGE_COMPRESSOR_NVENCODER + ". " +
|
||||
"Default is: " + IMAGE_COMPRESSOR_JPEG;
|
||||
|
||||
|
||||
namespace po = boost::program_options;
|
||||
po::options_description options("Allowed options");
|
||||
options.add_options()
|
||||
@ -78,6 +87,10 @@ if not provided, client uses the resolution net camera object was saved with")
|
||||
((USER_EVENTS_PATH + ",u").c_str(), po::value<string>(), "path to a file with user events")
|
||||
((MASTER_IMAGE_COLOR_FORMAT + ",i").c_str(), po::value<string>(),
|
||||
("master image color format. Options: " + supported_color_formats).c_str())
|
||||
|
||||
((MASTER_IMAGE_COMPRESSOR + ",i").c_str(), po::value<string>(),
|
||||
("master image compressor. Options: " + supported_image_compressors).c_str())
|
||||
|
||||
((DENOISER + ",d").c_str(), po::value<string>(), denoiser_help.c_str());
|
||||
po::variables_map vm;
|
||||
po::store(po::parse_command_line(argc, argv, options), vm);
|
||||
@ -191,6 +204,19 @@ if not provided, client uses the resolution net camera object was saved with")
|
||||
}
|
||||
}
|
||||
|
||||
if (vm.count(MASTER_IMAGE_COMPRESSOR)) {
|
||||
string master_image_compressor_str = vm[MASTER_IMAGE_COMPRESSOR].as<string>();
|
||||
if(master_image_compressor_str == IMAGE_COMPRESSOR_JPEG) {
|
||||
master_image_compressor = ClusterSessionParams::MasterImageCompressor::MASTER_IMAGE_COMPRESSOR_JPEG;
|
||||
} else if(master_image_compressor_str == IMAGE_COMPRESSOR_NVENCODER) {
|
||||
master_image_compressor = ClusterSessionParams::MasterImageCompressor::MASTER_IMAGE_COMPRESSOR_NVENCODER;
|
||||
} else {
|
||||
VLOG(1) << "FATAL. Requested image color format is not supported. Requested: " << master_image_compressor_str <<
|
||||
" Supported: " << supported_color_formats;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -35,8 +35,11 @@ class Config {
|
||||
static const string SAVE_VIDEO_STREAM_PATH;
|
||||
static const string USER_EVENTS_PATH;
|
||||
static const string MASTER_IMAGE_COLOR_FORMAT;
|
||||
static const string MASTER_IMAGE_COMPRESSOR;
|
||||
static const string COLOR_FORMAT_LINEAR;
|
||||
static const string COLOR_FORMAT_SRGB;
|
||||
static const string IMAGE_COMPRESSOR_JPEG;
|
||||
static const string IMAGE_COMPRESSOR_NVENCODER;
|
||||
|
||||
public:
|
||||
size_t start_frame_id = 0;
|
||||
@ -60,6 +63,8 @@ public:
|
||||
ClusterSessionParams::MasterDenoiser master_denoiser;
|
||||
ClusterSessionParams::MasterImageColorFormat master_image_color_format =
|
||||
ClusterSessionParams::MasterImageColorFormat::MASTER_IMAGE_COLOR_FORMAT_SRGB;
|
||||
ClusterSessionParams::MasterImageCompressor master_image_compressor =
|
||||
ClusterSessionParams::ClusterSessionParams::MasterImageCompressor::MASTER_IMAGE_COMPRESSOR_JPEG;
|
||||
|
||||
Config();
|
||||
~Config();
|
||||
|
@ -8,15 +8,23 @@
|
||||
|
||||
#include "camera_provider.h"
|
||||
#include "cluster_session_params.h"
|
||||
#ifdef WITH_OPTIX
|
||||
#include "cuda_context.h"
|
||||
#ifdef WITH_CUDA
|
||||
#include "cuda_context_provider.h"
|
||||
#endif
|
||||
#include "image_io_util.h"
|
||||
#include "light_cluster_rendering_client.h"
|
||||
#include "modify_object_message.h"
|
||||
#include "net_camera.h"
|
||||
#include "user_events_provider.h"
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
using cgr_libcluster::CudaContextProvider;
|
||||
using cgr_libcluster::CUDAContextScope;
|
||||
#endif
|
||||
using cgr_libcluster::ImageIOUtil;
|
||||
using cluster_rendering_net_lib::Retry;
|
||||
using OpenImageIO_v2_4::ImageOutput;
|
||||
using OpenImageIO_v2_4::TypeDesc;
|
||||
|
||||
using namespace std::chrono;
|
||||
|
||||
@ -25,6 +33,7 @@ namespace headless_light_client {
|
||||
const std::string LightClusterRenderingClient::IMAGE = "Image";
|
||||
const std::string LightClusterRenderingClient::CAMERA = "Camera";
|
||||
const std::string LightClusterRenderingClient::JPG = ".jpg";
|
||||
const std::string LightClusterRenderingClient::PNG = ".png";
|
||||
const double LightClusterRenderingClient::WAIT_LAST_FRAME_TIMEOUT_MS = 1500;
|
||||
|
||||
LightClusterRenderingClient::LightClusterRenderingClient(const Config & config, CameraProvider & camera_provider,
|
||||
@ -42,16 +51,12 @@ LightClusterRenderingClient::LightClusterRenderingClient(const Config & config,
|
||||
image_client_events_handler(IMAGE),
|
||||
netlib_camera_client(camera_client_events_handler),
|
||||
netlib_image_client(image_client_events_handler)
|
||||
#ifdef WITH_OPTIX
|
||||
,encoded_image(INITIAL_SIZE_OF_BUFFER_FOR_ENCODED_IMAGE)
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
LightClusterRenderingClient::~LightClusterRenderingClient() {
|
||||
#ifdef WITH_OPTIX
|
||||
video_stream_file.close();
|
||||
#endif
|
||||
}
|
||||
|
||||
void LightClusterRenderingClient::run() {
|
||||
@ -69,10 +74,12 @@ void LightClusterRenderingClient::run() {
|
||||
camera.cam_width = config.frame_width;
|
||||
camera.cam_height = config.frame_height;
|
||||
}
|
||||
VLOG(3) << "client send camera for frame: " << camera.frame << " cam_width: " << camera.cam_width << " cam_height: " << camera.cam_height;
|
||||
VLOG(3) << "client send camera for frame: " << camera.frame << " cam_width: " << camera.cam_width <<
|
||||
" cam_height: " << camera.cam_height;
|
||||
camera.sampleCount = samples_count;
|
||||
camera.master_denoiser = master_denoiser;
|
||||
camera.master_image_color_format = config.master_image_color_format;
|
||||
camera.master_image_compressor = config.master_image_compressor;
|
||||
if(is_first_camera) {
|
||||
camera.frame = 0;
|
||||
is_first_camera = false;
|
||||
@ -127,9 +134,9 @@ void LightClusterRenderingClient::nextCameraDelay(const int frame_id) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
#ifdef WITH_CUDA
|
||||
void LightClusterRenderingClient::encodeImage(NvEncoder * nv_encoder_ptr,
|
||||
const std::vector<uint8_t> & raw_image, AutoEnlargingBuffer<uint8_t> & encoded_image_out) {
|
||||
const std::vector<uint8_t> & raw_image, std::vector<uint8_t> & encoded_image_out) {
|
||||
auto start = high_resolution_clock::now();
|
||||
if(nv_encoder_ptr->encode(raw_image.data(), encoded_image_out)) {
|
||||
std::chrono::duration<float> duration_sec = high_resolution_clock::now() - start;
|
||||
@ -140,9 +147,13 @@ void LightClusterRenderingClient::encodeImage(NvEncoder * nv_encoder_ptr,
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void LightClusterRenderingClient::writeImageToVideoStreamFile(std::ofstream &video_stream_file,
|
||||
const AutoEnlargingBuffer<uint8_t> &encoded_image) {
|
||||
const std::vector<uint8_t> &encoded_image) {
|
||||
if(!video_stream_file.is_open()) {
|
||||
video_stream_file.open(config.save_video_stream_path, std::ios::app | std::ios::binary);
|
||||
}
|
||||
if(video_stream_file.is_open()) {
|
||||
video_stream_file.write(reinterpret_cast<const char*>(encoded_image.data()), encoded_image.size());
|
||||
VLOG(3) << "Wrote encoded image of size: " << encoded_image.size();
|
||||
@ -151,7 +162,7 @@ void LightClusterRenderingClient::writeImageToVideoStreamFile(std::ofstream &vid
|
||||
throw std::invalid_argument(message);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Simple RGB to RGBA convertion.
|
||||
// Headless client gets image in jpeg format from the master and decompress it into RGB format
|
||||
// This method is used for convertion from RGB to RGBA since NVEncoder expects RGBA flavor
|
||||
@ -181,6 +192,8 @@ void LightClusterRenderingClient::imageChannelLoop() {
|
||||
VLOG(3) << "Image channel received from master buffer size: " << buf_size;
|
||||
NetCamera net_camera;
|
||||
netlib_image_client.receive(&net_camera, sizeof(NetCamera));
|
||||
// log line below is used by get_metrics.py to calculate stats. If you change it
|
||||
// please make sure get_metrics.py still works correctly. Update if needed.
|
||||
VLOG(3) << "net camera received for frame: " << net_camera.frame << " "
|
||||
<< net_camera.cam_width << " " << net_camera.cam_height;
|
||||
std::unique_ptr<std::vector<uint8_t> > image_buffer_uptr(new std::vector<uint8_t>(buf_size));
|
||||
@ -190,6 +203,7 @@ void LightClusterRenderingClient::imageChannelLoop() {
|
||||
received_image_from_master = true;
|
||||
++num_received_frames;
|
||||
outputImageIfRequested(image_buffer_uptr.get(), net_camera);
|
||||
outputVideoStreamIfRequested(image_buffer_uptr.get(), net_camera);
|
||||
image_cv.notify_one();
|
||||
}
|
||||
} else {
|
||||
@ -201,49 +215,88 @@ void LightClusterRenderingClient::imageChannelLoop() {
|
||||
VLOG(3) << "Finished image channel thread.";
|
||||
}
|
||||
|
||||
void LightClusterRenderingClient::outputImageIfRequested(std::vector<uint8_t> * jpeg_image, const NetCamera & net_camera) {
|
||||
if(config.save_images_path.length() < 1 && config.save_video_stream_path.length() < 1) {
|
||||
return; // no output is requested, return;
|
||||
void LightClusterRenderingClient::outputImageIfRequested(std::vector<uint8_t> * image, const NetCamera & net_camera) {
|
||||
if(config.save_images_path.length() < 1) {
|
||||
return; // no image output is requested, return;
|
||||
}
|
||||
if(jpeg_image == nullptr) {
|
||||
std::string message("FATAL. Cannot output an image, a pointer to the image is null");
|
||||
VLOG(3) << message;
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
uint8_t * flipped_jpeg = nullptr;
|
||||
unsigned long flipped_jpeg_size = 0;
|
||||
if(jpeg_tools_ptr == nullptr) {
|
||||
jpeg_tools_ptr.reset(new JpegTools());
|
||||
}
|
||||
// Images that we receive from the master are upside down, we need to flip images before saving
|
||||
jpeg_tools_ptr->flipJpegImage(jpeg_image->data(), jpeg_image->size(), &flipped_jpeg, flipped_jpeg_size);
|
||||
if(config.save_images_path.length() > 0) {
|
||||
// Saving of noisy images (which are significantly larger due to solt & papper noise)
|
||||
// to /mnt/graphics_ssd may take about 30 ms what limits read image thread to about 30 fps.
|
||||
// Since we do not want IO operations to affect averall system performance on hight fps
|
||||
// we save images in a different thread
|
||||
boost::asio::post(save_image_thread_pool, std::bind(&LightClusterRenderingClient::saveImage, this,
|
||||
getImagePath(net_camera.frame, JPG), flipped_jpeg, flipped_jpeg_size));
|
||||
}
|
||||
if(config.save_video_stream_path.length() > 0) {
|
||||
#ifdef WITH_OPTIX
|
||||
if(!nv_encoder_ptr) {
|
||||
// create NVEncoder once we got the first frame with width and height of the image
|
||||
VLOG(3) << "Creating NvEncoder";
|
||||
nv_encoder_ptr.reset(new NvEncoder(NvEncoder::BUFFER_FORMAT_FOR_IMAGES_FROM_MASTER,
|
||||
CudaContext::createCudaContext(),
|
||||
net_camera.cam_width, net_camera.cam_height));
|
||||
video_stream_file.open(config.save_video_stream_path, std::ios::app | std::ios::binary);
|
||||
if(net_camera.master_image_compressor ==
|
||||
ClusterSessionParams::MasterImageCompressor::MASTER_IMAGE_COMPRESSOR_JPEG) {
|
||||
if(image == nullptr) {
|
||||
std::string message("FATAL. Cannot output an image, a pointer to the image is null");
|
||||
VLOG(3) << message;
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
uint8_t * flipped_jpeg = nullptr;
|
||||
unsigned long flipped_jpeg_size = 0;
|
||||
if(jpeg_tools_ptr == nullptr) {
|
||||
jpeg_tools_ptr.reset(new JpegTools());
|
||||
}
|
||||
// Images that we receive from the master are upside down, we need to flip images before saving
|
||||
jpeg_tools_ptr->flipJpegImage(image->data(), image->size(), &flipped_jpeg, flipped_jpeg_size);
|
||||
// Saving of noisy images (which are significantly larger due to solt & papper noise)
|
||||
// to /mnt/graphics_ssd may take about 30 ms what limits read image thread to about 30 fps.
|
||||
// Since we do not want IO operations to affect averall system performance on hight fps
|
||||
// we save images in a different thread
|
||||
boost::asio::post(save_image_thread_pool, std::bind(&LightClusterRenderingClient::saveImageAsIs, this,
|
||||
getImagePath(net_camera.frame, JPG), flipped_jpeg, flipped_jpeg_size));
|
||||
}
|
||||
#ifdef WITH_CUDA
|
||||
else if(net_camera.master_image_compressor ==
|
||||
ClusterSessionParams::MasterImageCompressor::MASTER_IMAGE_COMPRESSOR_NVENCODER) {
|
||||
if(!nv_decoder_ptr) {
|
||||
VLOG(3) << "Creating nvdecoder";
|
||||
CUcontext cuda_context = CudaContextProvider::getPrimaryContext(CUDA_DEVICE_NUM);
|
||||
const CUDAContextScope scope(cuda_context);
|
||||
nv_decoder_ptr.reset(new NvDecoder(cuda_context));
|
||||
}
|
||||
nv_decoder_ptr->decode(*image, net_camera.frame, &decoded_image);
|
||||
boost::asio::post(save_image_thread_pool, std::bind(&LightClusterRenderingClient::saveImageInFormat, this,
|
||||
PNG, net_camera.frame, decoded_image.data(), net_camera.cam_width, net_camera.cam_height));
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
VLOG(1) << "Cannot save image, unknown compression: " << net_camera.master_image_compressor;
|
||||
}
|
||||
}
|
||||
|
||||
void LightClusterRenderingClient::outputVideoStreamIfRequested(std::vector<uint8_t> * image, const NetCamera & net_camera) {
|
||||
if(config.save_video_stream_path.length() < 1) {
|
||||
return; // no video stream output is requested, return;
|
||||
}
|
||||
if(net_camera.master_image_compressor ==
|
||||
ClusterSessionParams::MasterImageCompressor::MASTER_IMAGE_COMPRESSOR_JPEG) {
|
||||
#ifdef WITH_CUDA
|
||||
if(!nv_encoder_ptr) {
|
||||
VLOG(3) << "Creating NvEncoder";
|
||||
CUcontext cuda_context = CudaContextProvider::getPrimaryContext(CUDA_DEVICE_NUM);
|
||||
const CUDAContextScope scope(cuda_context);
|
||||
nv_encoder_ptr.reset(new NvEncoder(NV_ENC_BUFFER_FORMAT_ABGR,
|
||||
cuda_context,
|
||||
net_camera.cam_width, net_camera.cam_height));
|
||||
}
|
||||
std::vector<uint8_t> decompressed_image_rgb(net_camera.cam_width * net_camera.cam_height * 3);
|
||||
std::vector<uint8_t> decompressed_image_rgba(net_camera.cam_width * net_camera.cam_height * 4);
|
||||
uint8_t * flipped_jpeg = nullptr;
|
||||
unsigned long flipped_jpeg_size = 0;
|
||||
if(jpeg_tools_ptr == nullptr) {
|
||||
jpeg_tools_ptr.reset(new JpegTools());
|
||||
}
|
||||
// Images that we receive from the master are upside down, we need to flip images before saving
|
||||
jpeg_tools_ptr->flipJpegImage(image->data(), image->size(), &flipped_jpeg, flipped_jpeg_size);
|
||||
jpeg_tools_ptr->decompressJpeg(flipped_jpeg, flipped_jpeg_size, decompressed_image_rgb);
|
||||
rgbToRgba(decompressed_image_rgb, decompressed_image_rgba);
|
||||
#ifdef WITH_OPTIX
|
||||
encodeImage(nv_encoder_ptr.get(), decompressed_image_rgba, encoded_image);
|
||||
writeImageToVideoStreamFile(video_stream_file, encoded_image);
|
||||
#else
|
||||
throw std::runtime_error("ERROR. Client is compiled without CUDA support so has no nvencoder and\
|
||||
cannot encode received image and save video stream");
|
||||
#endif
|
||||
} else if(net_camera.master_image_compressor ==
|
||||
ClusterSessionParams::MasterImageCompressor::MASTER_IMAGE_COMPRESSOR_NVENCODER) {
|
||||
// image is already video-encoded by nvencoder, save it as is
|
||||
writeImageToVideoStreamFile(video_stream_file, *image);
|
||||
} else {
|
||||
VLOG(1) << "Cannot save video stream, unknown compressor: " << net_camera.master_image_compressor;
|
||||
}
|
||||
}
|
||||
|
||||
@ -254,17 +307,24 @@ std::string LightClusterRenderingClient::getImagePath(const int frame_id, const
|
||||
return path_ostream.str();
|
||||
}
|
||||
|
||||
void LightClusterRenderingClient::saveImage(const std::string file_path, unsigned char * jpeg_image, int jpeg_image_size) {
|
||||
void LightClusterRenderingClient::saveImageAsIs(const std::string file_path, unsigned char * image, int image_size) {
|
||||
VLOG(3) << "Saving image into: " << file_path;
|
||||
auto start = high_resolution_clock::now();
|
||||
std::ofstream wf(file_path, std::ios::out | std::ios::binary);
|
||||
wf.write(reinterpret_cast<const char*>(jpeg_image), jpeg_image_size);
|
||||
wf.write(reinterpret_cast<const char*>(image), image_size);
|
||||
wf.close();
|
||||
auto end = high_resolution_clock::now();
|
||||
double time_taken = duration_cast<milliseconds>(end - start).count();
|
||||
VLOG(3) << "Image saved successfully. Save image time: " << time_taken << " ms";
|
||||
}
|
||||
|
||||
void LightClusterRenderingClient::saveImageInFormat(std::string format_extension, int frame_id, void * image,
|
||||
int width, int height) {
|
||||
std::string file_path = getImagePath(frame_id, format_extension);
|
||||
std::unique_ptr<ImageOutput> image_output = std::unique_ptr<ImageOutput>(ImageOutput::create(file_path));
|
||||
ImageIOUtil::saveFrame(file_path, TypeDesc::UCHAR, image_output.get(), image, width, height);
|
||||
}
|
||||
|
||||
bool LightClusterRenderingClient::connectToMaster() {
|
||||
bool connected = false;
|
||||
|
||||
|
@ -11,8 +11,9 @@
|
||||
|
||||
#include "jpeg_tools.h"
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
#ifdef WITH_CUDA
|
||||
#include "nv_encoder.h"
|
||||
#include "nv_decoder.h"
|
||||
#endif
|
||||
|
||||
#include "camera_provider.h"
|
||||
@ -20,14 +21,15 @@
|
||||
#include "netclient.h"
|
||||
#include "netlib_event_handler.h"
|
||||
#include "rpc_blender_protocol.h"
|
||||
#include "./utils/vector_types.h" // for uchar3
|
||||
|
||||
namespace headless_light_client {
|
||||
|
||||
using cgr_libcluster::ClusterSessionParams;
|
||||
using cgr_libcluster::RpcBlenderProtocol;
|
||||
#ifdef WITH_OPTIX
|
||||
#ifdef WITH_CUDA
|
||||
using cgr_libcluster::NvDecoder;
|
||||
using cgr_libcluster::NvEncoder;
|
||||
using cgr_libcluster::AutoEnlargingBuffer;
|
||||
#endif
|
||||
|
||||
class CameraProvider;
|
||||
@ -51,6 +53,8 @@ private:
|
||||
static const std::string IMAGE;
|
||||
static const std::string CAMERA;
|
||||
static const std::string JPG;
|
||||
static const std::string PNG;
|
||||
static const size_t CUDA_DEVICE_NUM = 0;
|
||||
static const size_t MAX_NUM_RETRIES = 5;
|
||||
static const size_t RETRY_INTERVAL_MS = 2000;
|
||||
static const size_t INITIAL_SIZE_OF_BUFFER_FOR_ENCODED_IMAGE = 50000;
|
||||
@ -81,11 +85,13 @@ private:
|
||||
std::condition_variable image_cv;
|
||||
std::atomic<bool> received_image_from_master = false;
|
||||
std::unique_ptr<JpegTools> jpeg_tools_ptr;
|
||||
#ifdef WITH_OPTIX
|
||||
std::unique_ptr<NvEncoder> nv_encoder_ptr;
|
||||
AutoEnlargingBuffer<uint8_t> encoded_image;
|
||||
#ifdef WITH_CUDA
|
||||
std::unique_ptr<NvEncoder> nv_encoder_ptr;
|
||||
std::unique_ptr<NvDecoder> nv_decoder_ptr;
|
||||
#endif
|
||||
std::vector<uint8_t> encoded_image;
|
||||
std::vector<cgr_libcluster::uchar3> decoded_image;
|
||||
std::ofstream video_stream_file;
|
||||
#endif
|
||||
|
||||
boost::asio::thread_pool save_image_thread_pool = boost::asio::thread_pool(4);
|
||||
|
||||
@ -99,13 +105,16 @@ private:
|
||||
void imageChannelLoop();
|
||||
void nextCameraDelay(const int frame_id);
|
||||
std::string getImagePath(const int frame_id, const std::string & file_extension);
|
||||
void outputImageIfRequested(uint8_t * jpeg_image, int jpeg_image_size, const NetCamera & net_camera);
|
||||
void outputImageIfRequested(std::vector<uint8_t> * jpeg_image, const NetCamera & net_camera);
|
||||
void saveImage(const std::string file_path, unsigned char * jpeg_image, int jpeg_image_size);
|
||||
#ifdef WITH_OPTIX
|
||||
void writeImageToVideoStreamFile(std::ofstream &video_stream_file, const AutoEnlargingBuffer<uint8_t> &encoded_image);
|
||||
void outputVideoStreamIfRequested(std::vector<uint8_t> * image, const NetCamera & net_camera);
|
||||
void outputImageIfRequested(std::vector<uint8_t> * image, const NetCamera & net_camera);
|
||||
void saveImageAsIs(const std::string file_path, unsigned char * image, int image_size);
|
||||
// Saves image in a format specified as a file extention
|
||||
void saveImageInFormat(std::string format_extension, int frame_id, void * image, int width, int height);
|
||||
void writeImageToVideoStreamFile(std::ofstream &video_stream_file,
|
||||
const std::vector<uint8_t> &encoded_image);
|
||||
#ifdef WITH_CUDA
|
||||
void encodeImage(NvEncoder * nv_encoder_ptr, const std::vector<uint8_t> & raw_image,
|
||||
AutoEnlargingBuffer<uint8_t> & encoded_image_out);
|
||||
std::vector<uint8_t> & encoded_image_out);
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -29,8 +29,8 @@ using headless_light_client::LightClusterRenderingClient;
|
||||
using headless_light_client::UserEventsProvider;
|
||||
|
||||
static void initLogging(const char* argv0) {
|
||||
setenv("GLOG_v", "3", 1);
|
||||
setenv("GLOG_vmodule", "session_network*=3", 1);
|
||||
//setenv("GLOG_v", "3", 1);
|
||||
//setenv("GLOG_vmodule", "session_network*=3", 1);
|
||||
google::InitGoogleLogging(argv0);
|
||||
GFLAGS_NAMESPACE::SetCommandLineOption("logtostderr", "1");
|
||||
}
|
||||
|
@ -163,6 +163,7 @@ class CYCLES_RENDER_PT_render_session_mode(CyclesButtonsPanel, Panel):
|
||||
netsub.enabled = net.enabled and render.render_session_mode == 'MASTER'
|
||||
netsub.prop(render, "num_servers")
|
||||
netport.prop(render, "master_image_color_format")
|
||||
netport.prop(render, "master_image_compressor")
|
||||
#Temp turned off. TODO: [pmishchuk] enable when ready
|
||||
# import _cycles
|
||||
# if _cycles.with_webrtc:
|
||||
@ -833,7 +834,7 @@ class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Pa
|
||||
|
||||
@classmethod
|
||||
def poll(cls, context):
|
||||
return not use_optix(context)
|
||||
return not use_optix(context)
|
||||
|
||||
def draw(self, context):
|
||||
import _cycles
|
||||
|
@ -871,6 +871,13 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
|
||||
// use default color format.
|
||||
master_image_color_format_num = cgr_libcluster::ClusterSessionParams::DEFAULT_MASTER_IMAGE_COLOR_FORMAT;
|
||||
}
|
||||
int master_image_compressor_num = r.master_image_compressor();
|
||||
if (master_image_compressor_num <= cgr_libcluster::ClusterSessionParams::MasterImageCompressor::MASTER_IMAGE_COMPRESSOR_NONE ||
|
||||
master_image_compressor_num >= cgr_libcluster::ClusterSessionParams::MasterImageCompressor::MASTER_IMAGE_COMPRESSOR_END) {
|
||||
// Blend file has no saved value for image compressor (it was saved before we added this property)
|
||||
// use default color format.
|
||||
master_image_compressor_num = cgr_libcluster::ClusterSessionParams::DEFAULT_MASTER_IMAGE_COMPRESSOR;
|
||||
}
|
||||
|
||||
params.cluster_session_params = cgr_libcluster::ClusterSessionParams(
|
||||
static_cast<cgr_libcluster::ClusterSessionParams::SessionMode>(r.render_session_mode()),
|
||||
@ -878,7 +885,8 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
|
||||
static_cast<cgr_libcluster::ClusterSessionParams::MasterDenoiser>(r.master_denoiser()),
|
||||
r.save_denoise_io(), r.save_streamed_image(), save_every_n_images,
|
||||
r.save_cameras(), r.filepath(),
|
||||
static_cast<cgr_libcluster::ClusterSessionParams::MasterImageColorFormat>(master_image_color_format_num));
|
||||
static_cast<cgr_libcluster::ClusterSessionParams::MasterImageColorFormat>(master_image_color_format_num),
|
||||
static_cast<cgr_libcluster::ClusterSessionParams::MasterImageCompressor>(master_image_compressor_num));
|
||||
params.cluster_session_params.fps = r.fps();
|
||||
|
||||
cgr_libcluster::ModifyObjectParams & modify_object_params = params.cluster_session_params.modify_object_params;
|
||||
|
@ -1,5 +1,5 @@
|
||||
if(WITH_CYCLES_DEVICE_CUDA)
|
||||
add_subdirectory(libcluster_cuda_kernels)
|
||||
endif()
|
||||
add_subdirectory(libcluster)
|
||||
add_subdirectory(libnetwork)
|
||||
if(WITH_WEBRTC)
|
||||
add_subdirectory(libstream)
|
||||
endif(WITH_WEBRTC)
|
||||
|
@ -1,7 +1,7 @@
|
||||
add_subdirectory(test)
|
||||
#add_subdirectory(../../../../extern/flatbuffers)
|
||||
# ${CMAKE_CURRENT_BINARY_DIR}/flatbuffers-build
|
||||
# EXCLUDE_FROM_ALL)
|
||||
# EXCLUDE_FROM_ALL)
|
||||
#SET(OIDN_PATH ${PROJECT_SOURCE_DIR}/../lib)
|
||||
#if(APPLE)
|
||||
#SET(OIDN_FOLDER ${OIDN_PATH}/oidn.macos)
|
||||
@ -9,7 +9,9 @@ add_subdirectory(test)
|
||||
#SET(OIDN_FOLDER ${OIDN_PATH}/oidn.linux)
|
||||
#endif()
|
||||
|
||||
find_package(Boost 1.48 COMPONENTS serialization REQUIRED)
|
||||
# From William: find_package(Boost 1.48 COMPONENTS serialization REQUIRED)
|
||||
# Does not work on Windows at the moment, so turning this REQUIRED off
|
||||
find_package(Boost 1.48 COMPONENTS serialization)
|
||||
find_package(TurboJPEG REQUIRED)
|
||||
# Uncomment instead of the above if using VCPKG
|
||||
# find_package(libjpeg-turbo REQUIRED PATHS)
|
||||
@ -54,34 +56,52 @@ set(SRC
|
||||
net_client.cpp
|
||||
rpc_blender_protocol.cpp
|
||||
image_io_util.cpp
|
||||
compression/turbojpeg_compressor.cpp
|
||||
utils/timer.cpp
|
||||
utils/vector_types.cpp
|
||||
utils/image.cpp
|
||||
denoising/denoise_buffer.cpp
|
||||
denoising/denoising_context.cpp
|
||||
denoising/master_denoiser.cpp
|
||||
denoising/master_oidn_denoiser.cpp
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
list(APPEND SRC denoising/master_optix_denoiser.cpp)
|
||||
endif()
|
||||
|
||||
set(FLAT_BUFFER_FILES
|
||||
net_camera.fbs
|
||||
)
|
||||
function(FLATBUFFERS_GENERATE_C_HEADERS Name)
|
||||
if(NOT FLATBUFFERS_FLATC_EXECUTABLE)
|
||||
set(FLATBUFFERS_FLATC_EXECUTABLE $<TARGET_FILE:flatc>)
|
||||
if(NOT WIN32)
|
||||
set(FLATBUFFERS_FLATC_EXECUTABLE ${CMAKE_BINARY_DIR}/bin/flatc)
|
||||
message(WARNING "Using flatc binary FLATBUFFERS_FLATC_EXECUTABLE")
|
||||
else()
|
||||
# There seems to be a bug on Windows that when the CMAKE_BUILD_TYPE is undefined the
|
||||
# CMAKE_BUILD_TYPE_INIT is set to "Debug" when it should be "Release"
|
||||
message("Foo is located at:${CONFIG}")
|
||||
# if($<CONFIG:Debug>)
|
||||
set(FLATBUFFERS_FLATC_EXECUTABLE $<TARGET_FILE:flatc>) # ${CMAKE_BINARY_DIR}/bin/Debug/flatc.exe)
|
||||
#else()
|
||||
# set(FLATBUFFERS_FLATC_EXECUTABLE ${CMAKE_BINARY_DIR}/bin/Release/flatc.exe)
|
||||
#endif()
|
||||
message(WARNING "Using Windows flatc binary:${FLATBUFFERS_FLATC_EXECUTABLE} for build type:${CMAKE_BUILD_TYPE} ${CMAKE_BUILD_TYPE_INIT}")
|
||||
endif()
|
||||
# if(NOT WIN32)
|
||||
# #if(APPLE)
|
||||
# # There seems to be a bug on Windows that when the CMAKE_BUILD_TYPE is undefined the
|
||||
# # CMAKE_BUILD_TYPE_INIT is set to "Debug" when it should be "Release"
|
||||
# message("Foo is located at:${CONFIG}")
|
||||
# # if($<CONFIG:Debug>)
|
||||
# set(FLATBUFFERS_FLATC_EXECUTABLE $<TARGET_FILE:flatc>) # ${CMAKE_BINARY_DIR}/bin/Debug/flatc)
|
||||
# #else()
|
||||
# # set(FLATBUFFERS_FLATC_EXECUTABLE ${CMAKE_BINARY_DIR}/bin/Release/flatc)
|
||||
# #endif()
|
||||
# message(WARNING "Using MacOS flatc binary:${FLATBUFFERS_FLATC_EXECUTABLE}")
|
||||
# #else()
|
||||
# set(FLATBUFFERS_FLATC_EXECUTABLE ${CMAKE_BINARY_DIR}/bin/flatc)
|
||||
# message(WARNING "Using flatc binary FLATBUFFERS_FLATC_EXECUTABLE")
|
||||
# #endif()
|
||||
# else()
|
||||
# # There seems to be a bug on Windows that when the CMAKE_BUILD_TYPE is undefined the
|
||||
# # CMAKE_BUILD_TYPE_INIT is set to "Debug" when it should be "Release"
|
||||
# message("Foo is located at:${CONFIG}")
|
||||
# # if($<CONFIG:Debug>)
|
||||
# set(FLATBUFFERS_FLATC_EXECUTABLE $<TARGET_FILE:flatc>) # ${CMAKE_BINARY_DIR}/bin/Debug/flatc.exe)
|
||||
# #else()
|
||||
# # set(FLATBUFFERS_FLATC_EXECUTABLE ${CMAKE_BINARY_DIR}/bin/Release/flatc.exe)
|
||||
# #endif()
|
||||
# message(WARNING "Using Windows flatc binary:${FLATBUFFERS_FLATC_EXECUTABLE} for build type:${CMAKE_BUILD_TYPE} ${CMAKE_BUILD_TYPE_INIT}")
|
||||
# endif()
|
||||
endif()
|
||||
set(FLATC_OUTPUTS)
|
||||
foreach(FILE ${ARGN})
|
||||
@ -93,18 +113,18 @@ function(FLATBUFFERS_GENERATE_C_HEADERS Name)
|
||||
set(INPUT_FBS_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${FILE}")
|
||||
cmake_path(NATIVE_PATH INPUT_FBS_PATH NATIVE_INPUT_FBS_PATH)
|
||||
set(OUTPUT_DIR_PATH "${CMAKE_CURRENT_BINARY_DIR}/")
|
||||
cmake_path(NATIVE_PATH OUTPUT_DIR_PATH NATIVE_OUTPUT_DIR_PATH)
|
||||
cmake_path(NATIVE_PATH OUTPUT_DIR_PATH NATIVE_OUTPUT_DIR_PATH)
|
||||
add_custom_command(OUTPUT ${FLATC_OUTPUT}
|
||||
COMMAND ${FLATBUFFERS_FLATC_EXECUTABLE}
|
||||
ARGS -c -o "${NATIVE_OUTPUT_DIR_PATH}" ${INPUT_FBS_PATH}
|
||||
DEPENDS ${DEPS}
|
||||
COMMENT "Building C++ header for ${FILE}"
|
||||
COMMENT "Building C++ header for ${FILE}"
|
||||
COMMENT "Running ${FLATBUFFERS_FLATC_EXECUTABLE} -c -o \"${NATIVE_OUTPUT_DIR_PATH}\" ${NATIVE_INPUT_FBS_PATH}"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
endforeach()
|
||||
set(${Name}_OUTPUTS ${FLATC_OUTPUTS} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
|
||||
#FLATBUFFERS_GENERATE_C_HEADERS(FLAT_BUFFER_FILES)
|
||||
# compile_flatbuffers_schema_to_cpp(net_camera.fbs)
|
||||
flatbuffers_generate_c_headers(cycles_libcluster_flat_headers net_camera.fbs)
|
||||
@ -140,6 +160,7 @@ set(SRC_HEADERS
|
||||
utils/logging.h
|
||||
utils/timer.h
|
||||
utils/vector_types.h
|
||||
utils/image.h
|
||||
denoising/denoise_buffer.h
|
||||
denoising/denoising_context.h
|
||||
denoising/master_denoiser.h
|
||||
@ -152,22 +173,31 @@ set(LIB
|
||||
${Boost_LIBRARIES}
|
||||
${TurboJPEG_LIBRARIES}
|
||||
flatbuffers
|
||||
# rt
|
||||
)
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
|
||||
message(STATUS "Using CUDA adding nv_encoder")
|
||||
list(APPEND INC ${OPTIX_INCLUDE_DIRS})
|
||||
list(APPEND SRC denoising/master_optix_denoiser.cpp compression/nv_encoder.cpp)
|
||||
list(APPEND SRC_HEADERS denoising/master_optix_denoiser.h compression/nv_encoder.h)
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
list(APPEND INC ${OPTIX_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA AND UNIX)
|
||||
message(STATUS "Building with CUDA, adding nv_encoder to the libcluster")
|
||||
find_library(NVENCODEAPI_LIB nvidia-encode)
|
||||
find_library(NVCUVID_LIB nvcuvid HINTS "/lib64/") # the same folder on dev laptop and docker image
|
||||
list(APPEND SRC compression/nv_encoder.cpp)
|
||||
list(APPEND SRC_HEADERS compression/nv_encoder.h)
|
||||
list(APPEND SRC compression/nv_decoder.cpp)
|
||||
list(APPEND SRC_HEADERS compression/nv_decoder.h)
|
||||
list(APPEND LIB cycles_libcluster_cuda_kernels)
|
||||
list(APPEND LIB ${NVENCODEAPI_LIB})
|
||||
list(APPEND LIB ${NVCUVID_LIB})
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
list(APPEND LIB
|
||||
extern_cuew
|
||||
)
|
||||
list(APPEND LIB extern_cuew)
|
||||
else()
|
||||
list(APPEND LIB
|
||||
${CUDA_CUDA_LIBRARY}
|
||||
)
|
||||
list(APPEND LIB ${CUDA_CUDA_LIBRARY})
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "No CUDA or we are not on Linux so building libcluster without nv_encoder")
|
||||
endif()
|
||||
|
||||
#if(APPLE)
|
||||
|
@ -61,12 +61,24 @@ public:
|
||||
// END must go last
|
||||
MASTER_IMAGE_COLOR_FORMAT_END,
|
||||
};
|
||||
|
||||
enum MasterImageCompressor{
|
||||
MASTER_IMAGE_COMPRESSOR_NONE = 0,
|
||||
// Add new options here between _NONE and _END
|
||||
// List of formats must be in sync with list in DNA_scene_types.h
|
||||
MASTER_IMAGE_COMPRESSOR_JPEG = 1,
|
||||
MASTER_IMAGE_COMPRESSOR_NVENCODER = 2,
|
||||
// END must go last
|
||||
MASTER_IMAGE_COMPRESSOR_END,
|
||||
};
|
||||
|
||||
static const MasterImageColorFormat DEFAULT_MASTER_IMAGE_COLOR_FORMAT = MASTER_IMAGE_COLOR_FORMAT_LINEAR;
|
||||
static const MasterImageCompressor DEFAULT_MASTER_IMAGE_COMPRESSOR = MASTER_IMAGE_COMPRESSOR_JPEG;
|
||||
|
||||
ClusterSessionParams(SessionMode session_mode, std::string master_address, int master_port, int num_servers,
|
||||
int master_compression_quality, MasterDenoiser master_denoiser, bool save_denoise_io, bool save_streamed_image,
|
||||
int save_every_n_images, bool save_cameras, const std::string & output_folder_path,
|
||||
MasterImageColorFormat master_image_color_format) :
|
||||
MasterImageColorFormat master_image_color_format, MasterImageCompressor master_image_compressor) :
|
||||
session_mode(session_mode),
|
||||
master_address(master_address),
|
||||
master_port(master_port),
|
||||
@ -74,6 +86,7 @@ public:
|
||||
master_compression_quality(master_compression_quality),
|
||||
master_denoiser(master_denoiser),
|
||||
master_image_color_format(master_image_color_format),
|
||||
master_image_compressor(master_image_compressor),
|
||||
save_denoise_io(save_denoise_io),
|
||||
save_streamed_image(save_streamed_image),
|
||||
save_every_n_images(save_every_n_images),
|
||||
@ -89,6 +102,7 @@ public:
|
||||
master_compression_quality = 85;
|
||||
master_denoiser = MASTER_DENOISER_NONE;
|
||||
master_image_color_format = MASTER_IMAGE_COLOR_FORMAT_LINEAR;
|
||||
master_image_compressor = MASTER_IMAGE_COMPRESSOR_JPEG;
|
||||
}
|
||||
|
||||
bool modified(const ClusterSessionParams ¶ms) const
|
||||
@ -103,6 +117,7 @@ public:
|
||||
save_every_n_images == params.save_every_n_images &&
|
||||
save_cameras == params.save_cameras &&
|
||||
master_image_color_format == params.master_image_color_format &&
|
||||
master_image_compressor == params.master_image_compressor &&
|
||||
output_folder_path == params.output_folder_path);
|
||||
// modified method should not compare modify_object_params.
|
||||
// When modified method returns true a session is restarted.
|
||||
@ -116,6 +131,7 @@ public:
|
||||
int master_compression_quality;
|
||||
MasterDenoiser master_denoiser;
|
||||
MasterImageColorFormat master_image_color_format;
|
||||
MasterImageCompressor master_image_compressor;
|
||||
bool save_denoise_io = false;
|
||||
bool save_streamed_image = false;
|
||||
// When save_every_n_images is 10, we only save 0th, 10th, 20th, 30th images
|
||||
|
@ -0,0 +1,262 @@
|
||||
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "nv_decoder.h"
|
||||
#include "../cuda_context_provider.h"
|
||||
#include "../utils/cuda_utils.h"
|
||||
#include "../utils/image.h"
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
static const char * getVideoCodecString(cudaVideoCodec video_codec) {
|
||||
static struct {
|
||||
cudaVideoCodec codec;
|
||||
const char *name;
|
||||
} CodecToName [] = {
|
||||
{ cudaVideoCodec_MPEG1, "MPEG-1" },
|
||||
{ cudaVideoCodec_MPEG2, "MPEG-2" },
|
||||
{ cudaVideoCodec_MPEG4, "MPEG-4 (ASP)" },
|
||||
{ cudaVideoCodec_VC1, "VC-1/WMV" },
|
||||
{ cudaVideoCodec_H264, "AVC/H.264" },
|
||||
{ cudaVideoCodec_JPEG, "M-JPEG" },
|
||||
{ cudaVideoCodec_H264_SVC, "H.264/SVC" },
|
||||
{ cudaVideoCodec_H264_MVC, "H.264/MVC" },
|
||||
{ cudaVideoCodec_HEVC, "H.265/HEVC" },
|
||||
{ cudaVideoCodec_VP8, "VP8" },
|
||||
{ cudaVideoCodec_VP9, "VP9" },
|
||||
{ cudaVideoCodec_AV1, "AV1" },
|
||||
{ cudaVideoCodec_NumCodecs, "Invalid" },
|
||||
{ cudaVideoCodec_YUV420, "YUV 4:2:0" },
|
||||
{ cudaVideoCodec_YV12, "YV12 4:2:0" },
|
||||
{ cudaVideoCodec_NV12, "NV12 4:2:0" },
|
||||
{ cudaVideoCodec_YUYV, "YUYV 4:2:2" },
|
||||
{ cudaVideoCodec_UYVY, "UYVY 4:2:2" },
|
||||
};
|
||||
if (video_codec >= 0 && video_codec <= cudaVideoCodec_NumCodecs) {
|
||||
return CodecToName[video_codec].name;
|
||||
}
|
||||
for (int i = cudaVideoCodec_NumCodecs + 1; i < sizeof(CodecToName) / sizeof(CodecToName[0]); i++) {
|
||||
if (video_codec == CodecToName[i].codec) {
|
||||
return CodecToName[video_codec].name;
|
||||
}
|
||||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
static const char * getVideoChromaFormatString(cudaVideoChromaFormat chroma_format) {
|
||||
static struct {
|
||||
cudaVideoChromaFormat chroma_format;
|
||||
const char *name;
|
||||
} ChromaFormatToName[] = {
|
||||
{ cudaVideoChromaFormat_Monochrome, "YUV 400 (Monochrome)" },
|
||||
{ cudaVideoChromaFormat_420, "YUV 420" },
|
||||
{ cudaVideoChromaFormat_422, "YUV 422" },
|
||||
{ cudaVideoChromaFormat_444, "YUV 444" },
|
||||
};
|
||||
if (chroma_format >= 0 && chroma_format < sizeof(ChromaFormatToName) / sizeof(ChromaFormatToName[0])) {
|
||||
return ChromaFormatToName[chroma_format].name;
|
||||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
static float getChromaHeightFactor(cudaVideoSurfaceFormat surface_format) {
|
||||
float factor = 0.5;
|
||||
switch (surface_format) {
|
||||
case cudaVideoSurfaceFormat_NV12:
|
||||
case cudaVideoSurfaceFormat_P016:
|
||||
factor = 0.5;
|
||||
break;
|
||||
case cudaVideoSurfaceFormat_YUV444:
|
||||
case cudaVideoSurfaceFormat_YUV444_16Bit:
|
||||
factor = 1.0;
|
||||
break;
|
||||
}
|
||||
return factor;
|
||||
}
|
||||
|
||||
static int getChromaPlaneCount(cudaVideoSurfaceFormat surface_format) {
|
||||
int numPlane = 1;
|
||||
switch (surface_format) {
|
||||
case cudaVideoSurfaceFormat_NV12:
|
||||
case cudaVideoSurfaceFormat_P016:
|
||||
numPlane = 1;
|
||||
break;
|
||||
case cudaVideoSurfaceFormat_YUV444:
|
||||
case cudaVideoSurfaceFormat_YUV444_16Bit:
|
||||
numPlane = 2;
|
||||
break;
|
||||
}
|
||||
return numPlane;
|
||||
}
|
||||
|
||||
NvDecoder::NvDecoder(CUcontext cuda_context):
|
||||
m_cuda_context(cuda_context) {
|
||||
LOG(INFO) << "NvDecoder constructor. Creating video parser";
|
||||
CUVIDPARSERPARAMS videoParserParameters = {};
|
||||
videoParserParameters.CodecType = cudaVideoCodec_H264;
|
||||
videoParserParameters.ulMaxNumDecodeSurfaces = 1;
|
||||
videoParserParameters.ulClockRate = 1000;
|
||||
constexpr int low_latency_display_delay = 0;
|
||||
videoParserParameters.ulMaxDisplayDelay = low_latency_display_delay;
|
||||
videoParserParameters.pUserData = this;
|
||||
videoParserParameters.pfnSequenceCallback = HandleVideoSequenceCallback;
|
||||
videoParserParameters.pfnDecodePicture = HandlePictureDecodeCallback;
|
||||
videoParserParameters.pfnDisplayPicture = HandlePictureDisplayCallback;
|
||||
CUDA_API_CALL(cuvidCreateVideoParser(&m_parser, &videoParserParameters), THROW_IF_ERROR);
|
||||
LOG(INFO) << "Created video parser";
|
||||
}
|
||||
|
||||
NvDecoder::~NvDecoder() {
|
||||
LOG(INFO) << "NvDecoder destructor";
|
||||
if (m_parser) {
|
||||
LOG(INFO) << "Destroying video parser";
|
||||
CUDA_API_CALL(cuvidDestroyVideoParser(m_parser), DO_NOT_THROW);
|
||||
}
|
||||
if (m_decoder) {
|
||||
LOG(INFO) << "Destroying video decoder";
|
||||
CUDA_API_CALL(cuvidDestroyDecoder(m_decoder), DO_NOT_THROW);
|
||||
}
|
||||
LOG(INFO) << "NvDecoder released resources and is destroyed";
|
||||
}
|
||||
|
||||
// Return values:
|
||||
// 0 : fail
|
||||
// 1 : succeeded, but driver should not override CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces
|
||||
// >1: succeeded, and driver should override CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces with this return value
|
||||
int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *vdeo_format){
|
||||
LOG(INFO) << "HandleVideoSequence callback. Creating nvdecoder";
|
||||
LOG(INFO) << "Video Input Information:";
|
||||
LOG(INFO) << "\tCodec : " << getVideoCodecString(vdeo_format->codec);
|
||||
LOG(INFO) << "\tSequence : " << (vdeo_format->progressive_sequence ? "Progressive" : "Interlaced");
|
||||
LOG(INFO) << "\tCoded size : [" << vdeo_format->coded_width << ", " << vdeo_format->coded_height << "]";
|
||||
LOG(INFO) << "\tDisplay area : [" << vdeo_format->display_area.left << ", " << vdeo_format->display_area.top << ", "
|
||||
<< vdeo_format->display_area.right << ", " << vdeo_format->display_area.bottom << "]";
|
||||
LOG(INFO) << "\tChroma : " << getVideoChromaFormatString(vdeo_format->chroma_format);
|
||||
LOG(INFO) << "\tBit depth : " << vdeo_format->bit_depth_luma_minus8 + 8;
|
||||
|
||||
CUVIDDECODECREATEINFO video_decode_create_info = { 0 };
|
||||
video_decode_create_info.CodecType = vdeo_format->codec;
|
||||
video_decode_create_info.ChromaFormat = vdeo_format->chroma_format;
|
||||
video_decode_create_info.OutputFormat = cudaVideoSurfaceFormat_NV12;
|
||||
video_decode_create_info.bitDepthMinus8 = vdeo_format->bit_depth_luma_minus8;
|
||||
video_decode_create_info.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
|
||||
video_decode_create_info.ulNumOutputSurfaces = 2;
|
||||
video_decode_create_info.ulCreationFlags = cudaVideoCreate_PreferCUVID;
|
||||
// This is how nvidia recommends calculating ulNumDecodeSurfaces here:
|
||||
// https://developer.nvidia.com/blog/optimizing-video-memory-usage-with-the-nvdecode-api-and-nvidia-video-codec-sdk/
|
||||
video_decode_create_info.ulNumDecodeSurfaces = vdeo_format->min_num_decode_surfaces + 3;
|
||||
video_decode_create_info.ulWidth = vdeo_format->coded_width;
|
||||
video_decode_create_info.ulHeight = vdeo_format->coded_height;
|
||||
video_decode_create_info.ulMaxWidth = video_decode_create_info.ulWidth;
|
||||
video_decode_create_info.ulMaxHeight = video_decode_create_info.ulHeight;
|
||||
video_decode_create_info.ulTargetWidth = video_decode_create_info.ulWidth;
|
||||
video_decode_create_info.ulTargetHeight = video_decode_create_info.ulHeight;
|
||||
|
||||
m_image_width_in_pixels = vdeo_format->display_area.right - vdeo_format->display_area.left;
|
||||
// NV12/P016 output format width is 2 byte aligned because of U and V interleave
|
||||
if (m_output_format == cudaVideoSurfaceFormat_NV12 ||
|
||||
m_output_format == cudaVideoSurfaceFormat_P016) {
|
||||
m_image_width_in_pixels = (m_image_width_in_pixels + 1) & ~1;
|
||||
}
|
||||
m_luma_height = vdeo_format->display_area.bottom - vdeo_format->display_area.top;
|
||||
m_bytes_per_pixel = video_decode_create_info.bitDepthMinus8 > 0 ? 2 : 1;
|
||||
m_chroma_height = (int)(std::ceil(m_luma_height * getChromaHeightFactor(video_decode_create_info.OutputFormat)));
|
||||
m_num_chroma_planes = getChromaPlaneCount(video_decode_create_info.OutputFormat);
|
||||
m_surface_height = video_decode_create_info.ulTargetHeight;
|
||||
const int size_ofdecoded_image_yuv_format_in_bytes = m_image_width_in_pixels *
|
||||
(m_luma_height + (m_chroma_height * m_num_chroma_planes)) * m_bytes_per_pixel;
|
||||
m_decoded_image_yuv.resize(size_ofdecoded_image_yuv_format_in_bytes);
|
||||
|
||||
CUDAContextScope cuda_context_scope(m_cuda_context);
|
||||
CUDA_API_CALL(cuvidCreateDecoder(&m_decoder, &video_decode_create_info), THROW_IF_ERROR);
|
||||
return video_decode_create_info.ulNumDecodeSurfaces;
|
||||
}
|
||||
|
||||
void NvDecoder::decode(const std::vector<uint8_t> & encoded_image_in, const int frame_id,
|
||||
std::vector<cgr_libcluster::uchar3> * decoded_image_out_ptr) {
|
||||
if(!decoded_image_out_ptr) {
|
||||
std::string message = "Pointer to decoded image buffer is null. There is no place for output image, break.";
|
||||
LOG(ERROR) << message;
|
||||
throw std::invalid_argument(message);
|
||||
}
|
||||
m_decoded_image_rgb_out_ptr = decoded_image_out_ptr;
|
||||
CUVIDSOURCEDATAPACKET packet = { 0 };
|
||||
packet.payload = encoded_image_in.data();
|
||||
packet.payload_size = encoded_image_in.size();
|
||||
packet.flags = CUVID_PKT_ENDOFPICTURE | CUVID_PKT_TIMESTAMP;
|
||||
packet.timestamp = 0;
|
||||
if (encoded_image_in.size() == 0) {
|
||||
packet.flags |= CUVID_PKT_ENDOFSTREAM;
|
||||
}
|
||||
CUDA_API_CALL(cuvidParseVideoData(m_parser, &packet), THROW_IF_ERROR);
|
||||
}
|
||||
|
||||
// 0: fail, >=1: succeeded
|
||||
int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pic_params) {
|
||||
CUDAContextScope cuda_context_scope(m_cuda_context);
|
||||
CUDA_API_CALL(cuvidDecodePicture(m_decoder, pic_params), THROW_IF_ERROR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// 0: fail; >=1: succeeded
|
||||
int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *parser_disp_info) {
|
||||
CUVIDPROCPARAMS video_processing_parameters = {};
|
||||
video_processing_parameters.progressive_frame = parser_disp_info->progressive_frame;
|
||||
video_processing_parameters.second_field = parser_disp_info->repeat_first_field + 1;
|
||||
video_processing_parameters.top_field_first = parser_disp_info->top_field_first;
|
||||
video_processing_parameters.unpaired_field = parser_disp_info->repeat_first_field < 0;
|
||||
video_processing_parameters.output_stream = m_cuvid_stream;
|
||||
|
||||
CUdeviceptr src_frame_device_ptr = 0;
|
||||
unsigned int src_pitch = 0;
|
||||
CUDAContextScope cuda_context_scope(m_cuda_context);
|
||||
CUDA_API_CALL(cuvidMapVideoFrame(m_decoder, parser_disp_info->picture_index, &src_frame_device_ptr,
|
||||
&src_pitch, &video_processing_parameters), THROW_IF_ERROR);
|
||||
|
||||
CUVIDGETDECODESTATUS decode_status;
|
||||
memset(&decode_status, 0, sizeof(decode_status));
|
||||
CUresult result = cuvidGetDecodeStatus(m_decoder, parser_disp_info->picture_index, &decode_status);
|
||||
if (result == CUDA_SUCCESS &&
|
||||
(decode_status.decodeStatus == cuvidDecodeStatus_Error ||
|
||||
decode_status.decodeStatus == cuvidDecodeStatus_Error_Concealed)) {
|
||||
LOG(INFO) << "Image decoding failed with status: " << decode_status.decodeStatus;
|
||||
}
|
||||
uint8_t *decoded_image_yuv_ptr = m_decoded_image_yuv.data();
|
||||
// Copy luma plane
|
||||
CUDA_MEMCPY2D mem_cpy_2d = { 0 };
|
||||
mem_cpy_2d.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
mem_cpy_2d.srcDevice = src_frame_device_ptr;
|
||||
mem_cpy_2d.srcPitch = src_pitch;
|
||||
mem_cpy_2d.dstMemoryType = CU_MEMORYTYPE_HOST;
|
||||
mem_cpy_2d.dstDevice = (CUdeviceptr)(mem_cpy_2d.dstHost = decoded_image_yuv_ptr);
|
||||
mem_cpy_2d.dstPitch = m_device_frame_pitch ? m_device_frame_pitch : m_image_width_in_pixels * m_bytes_per_pixel;
|
||||
mem_cpy_2d.WidthInBytes = m_image_width_in_pixels * m_bytes_per_pixel;
|
||||
mem_cpy_2d.Height = m_luma_height;
|
||||
CUDA_API_CALL(cuMemcpy2DAsync(&mem_cpy_2d, m_cuvid_stream), THROW_IF_ERROR);
|
||||
// Copy chroma plane
|
||||
// NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning height
|
||||
mem_cpy_2d.srcDevice = (CUdeviceptr)((uint8_t *)src_frame_device_ptr + mem_cpy_2d.srcPitch * ((m_surface_height + 1) & ~1));
|
||||
mem_cpy_2d.dstDevice = (CUdeviceptr)(mem_cpy_2d.dstHost = decoded_image_yuv_ptr + mem_cpy_2d.dstPitch * m_luma_height);
|
||||
mem_cpy_2d.Height = m_chroma_height;
|
||||
CUDA_API_CALL(cuMemcpy2DAsync(&mem_cpy_2d, m_cuvid_stream), THROW_IF_ERROR);
|
||||
|
||||
if (m_num_chroma_planes == 2) {
|
||||
mem_cpy_2d.srcDevice = (CUdeviceptr)((uint8_t *)src_frame_device_ptr + mem_cpy_2d.srcPitch * ((m_surface_height + 1) & ~1) * 2);
|
||||
mem_cpy_2d.dstDevice = (CUdeviceptr)(mem_cpy_2d.dstHost = decoded_image_yuv_ptr + mem_cpy_2d.dstPitch * m_luma_height * 2);
|
||||
mem_cpy_2d.Height = m_chroma_height;
|
||||
CUDA_API_CALL(cuMemcpy2DAsync(&mem_cpy_2d, m_cuvid_stream), THROW_IF_ERROR);
|
||||
}
|
||||
CUDA_API_CALL(cuStreamSynchronize(m_cuvid_stream), THROW_IF_ERROR);
|
||||
CUDA_API_CALL(cuvidUnmapVideoFrame(m_decoder, src_frame_device_ptr), THROW_IF_ERROR);
|
||||
const int num_pixels = m_image_width_in_pixels * m_luma_height;
|
||||
if(m_decoded_image_rgb_out_ptr->size() != num_pixels) {
|
||||
m_decoded_image_rgb_out_ptr->resize(num_pixels);
|
||||
}
|
||||
yuv2Rgb(decoded_image_yuv_ptr, m_image_width_in_pixels, m_luma_height, m_decoded_image_rgb_out_ptr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
#ifndef __NV_DECODER_H__
|
||||
#define __NV_DECODER_H__
|
||||
|
||||
#ifdef WITH_CUDA_DYNLOAD
|
||||
#include <cuew.h>
|
||||
// Do not use CUDA SDK headers when using CUEW
|
||||
// The macro below is used by Optix SDK and is necessary to avoid DSO loading collision
|
||||
// See device_optix.cpp for example.
|
||||
#define OPTIX_DONT_INCLUDE_CUDA
|
||||
#else
|
||||
#include <cuda.h>
|
||||
#endif
|
||||
|
||||
#include "nvcuvid.h"
|
||||
|
||||
#include "../utils/logging.h"
|
||||
#include "../utils/vector_types.h" // for uchar3
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
class NvDecoder {
|
||||
public:
|
||||
|
||||
NvDecoder(CUcontext cuda_context);
|
||||
~NvDecoder();
|
||||
|
||||
// Decodes input image, converts into rgb format and puts into decoded_image_out_ptr
|
||||
// Decoder resizes output vector so it can accommodate a decoded image if needed
|
||||
void decode(const std::vector<uint8_t> & encoded_image_in, const int frame_id,
|
||||
std::vector<cgr_libcluster::uchar3> * decoded_image_out_ptr);
|
||||
|
||||
private:
|
||||
const CUcontext m_cuda_context;
|
||||
|
||||
CUvideoparser m_parser = nullptr;
|
||||
CUvideodecoder m_decoder = nullptr;
|
||||
CUstream m_cuvid_stream = 0;
|
||||
|
||||
unsigned int m_image_width_in_pixels = 0;
|
||||
unsigned int m_luma_height = 0;
|
||||
unsigned int m_chroma_height = 0;
|
||||
unsigned int m_num_chroma_planes = 0;
|
||||
int m_bytes_per_pixel = 1;
|
||||
size_t m_device_frame_pitch = 0;
|
||||
int m_surface_height = 0;
|
||||
cudaVideoSurfaceFormat m_output_format = cudaVideoSurfaceFormat_NV12;
|
||||
std::vector<uint8_t> m_decoded_image_yuv;
|
||||
std::vector<cgr_libcluster::uchar3> * m_decoded_image_rgb_out_ptr = nullptr;
|
||||
|
||||
// Callback function to be registered for getting a callback when decoding of sequence starts
|
||||
static int CUDAAPI HandleVideoSequenceCallback(void *pUserData, CUVIDEOFORMAT *pVideoFormat) {
|
||||
return ((NvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat);
|
||||
}
|
||||
|
||||
// Callback function to be registered for getting a callback when a decoded frame is ready to be decoded
|
||||
static int CUDAAPI HandlePictureDecodeCallback(void *pUserData, CUVIDPICPARAMS *pPicParams) {
|
||||
return ((NvDecoder *)pUserData)->HandlePictureDecode(pPicParams);
|
||||
}
|
||||
|
||||
// Callback function to be registered for getting a callback when a decoded frame is available for display
|
||||
static int CUDAAPI HandlePictureDisplayCallback(void *pUserData, CUVIDPARSERDISPINFO *pDispInfo) {
|
||||
return ((NvDecoder *)pUserData)->HandlePictureDisplay(pDispInfo);
|
||||
}
|
||||
|
||||
// This function gets called when a sequence is ready to be decoded. The function also gets called
|
||||
// when there is format change. It inits video decoder
|
||||
int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
|
||||
|
||||
// This function gets called when a picture is ready to be decoded. cuvidDecodePicture is called from
|
||||
// this function to decode the picture
|
||||
int HandlePictureDecode(CUVIDPICPARAMS *pPicParams);
|
||||
|
||||
// This function gets called after a picture is decoded and available for display. Frames are fetched
|
||||
// and stored in internal buffer
|
||||
int HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -6,6 +6,7 @@
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include "nv_encoder.h"
|
||||
#include "../cuda_context_provider.h"
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
@ -24,22 +25,56 @@ NvEncoder::NvEncoder(NV_ENC_BUFFER_FORMAT buffer_format,
|
||||
}
|
||||
|
||||
NvEncoder::~NvEncoder() {
|
||||
VLOG(3) << "NvEncoder destructor";
|
||||
if(m_encoder_session_handle.get()) {
|
||||
nvencode_api_function_list.nvEncDestroyBitstreamBuffer(m_encoder_session_handle.get(), m_bitstream_output_buffer.get());
|
||||
nvencode_api_function_list.nvEncUnregisterResource(m_encoder_session_handle.get(), m_input_buffer_registration.get());
|
||||
nvencode_api_function_list.nvEncDestroyEncoder(m_encoder_session_handle.get());
|
||||
NV_ENC_PIC_PARAMS end_of_input_stream_pic_arams = {NV_ENC_PIC_PARAMS_VER};
|
||||
end_of_input_stream_pic_arams.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
|
||||
NVENCSTATUS nv_error_code = m_nvencode_api_function_list.nvEncEncodePicture(
|
||||
m_encoder_session_handle.get(), &end_of_input_stream_pic_arams);
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
std::string message = "FATAL. nvEncEncodePicture call with end of stream failed with error: "
|
||||
+ std::to_string(nv_error_code);
|
||||
VLOG(1) << message;
|
||||
} else {
|
||||
VLOG(3) << "Communicated end of stream to the NvEncoder";
|
||||
}
|
||||
|
||||
nv_error_code = m_nvencode_api_function_list.nvEncUnregisterResource(m_encoder_session_handle.get(),
|
||||
m_input_buffer_registration.get());
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
std::string message = "FATAL. NvEncoder input biffer un-registration failed with error: "
|
||||
+ std::to_string(nv_error_code);
|
||||
VLOG(1) << message;
|
||||
} else {
|
||||
VLOG(3) << "Unregistered NvEncoder input biffer successfully";
|
||||
}
|
||||
|
||||
nv_error_code = m_nvencode_api_function_list.nvEncDestroyBitstreamBuffer(m_encoder_session_handle.get(),
|
||||
m_bitstream_output_buffer.get());
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
std::string message = "FATAL. nvEncDestroyBitstreamBuffer failed with error: "
|
||||
+ std::to_string(nv_error_code);
|
||||
VLOG(1) << message;
|
||||
} else {
|
||||
VLOG(3) << "Destroyed NvEncoder bitstream buffer successfully";
|
||||
}
|
||||
m_nvencode_api_function_list.nvEncDestroyEncoder(m_encoder_session_handle.get());
|
||||
VLOG(3) << "Destroyed NvEncoder session successfully";
|
||||
}
|
||||
|
||||
if(m_cuda_context) {
|
||||
cuCtxPushCurrent(m_cuda_context);
|
||||
CUDAContextScope cuda_context_scope(m_cuda_context);
|
||||
cuMemFree(m_cuda_device_ptr.get());
|
||||
cuCtxPopCurrent(NULL);
|
||||
VLOG(3) << "Released cuda input buffer successfully";
|
||||
}
|
||||
|
||||
VLOG(3) << "NvEncoder released resources and is destroyed";
|
||||
}
|
||||
|
||||
void NvEncoder::initNvEncoder() {
|
||||
// most of values that are currently set here are taken from the NVIDIA samples NvEncoder.cpp
|
||||
// we can adjust them later on if needed
|
||||
VLOG(3) << "Initializing NVENCODER for width: " << m_width << " height: " << m_height;
|
||||
uint32_t driver_version = 0;
|
||||
uint32_t header_version = (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
|
||||
NVENCSTATUS nv_error_code_v = NvEncodeAPIGetMaxSupportedVersion(&driver_version);
|
||||
@ -56,15 +91,15 @@ void NvEncoder::initNvEncoder() {
|
||||
VLOG(1) << message;
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
nvencode_api_function_list = { NV_ENCODE_API_FUNCTION_LIST_VER };
|
||||
NVENCSTATUS nv_error_code = NvEncodeAPICreateInstance(&nvencode_api_function_list);
|
||||
m_nvencode_api_function_list = { NV_ENCODE_API_FUNCTION_LIST_VER };
|
||||
NVENCSTATUS nv_error_code = NvEncodeAPICreateInstance(&m_nvencode_api_function_list);
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
std::string message = "FATAL. NvEncodeAPICreateInstance failed with error: "
|
||||
+ std::to_string(nv_error_code);
|
||||
VLOG(1) << message;
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
if(!nvencode_api_function_list.nvEncOpenEncodeSessionEx) {
|
||||
if(!m_nvencode_api_function_list.nvEncOpenEncodeSessionEx) {
|
||||
std::string message = "FATAL. nvEncOpenEncodeSessionEx API not found";
|
||||
VLOG(1) << message;
|
||||
throw std::runtime_error(message);
|
||||
@ -74,13 +109,13 @@ void NvEncoder::initNvEncoder() {
|
||||
encode_session_ex_params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
|
||||
encode_session_ex_params.apiVersion = NVENCAPI_VERSION;
|
||||
void * encoder_session_handle;
|
||||
nv_error_code = nvencode_api_function_list.nvEncOpenEncodeSessionEx(&encode_session_ex_params, &encoder_session_handle);
|
||||
nv_error_code = m_nvencode_api_function_list.nvEncOpenEncodeSessionEx(&encode_session_ex_params, &encoder_session_handle);
|
||||
m_encoder_session_handle.init(encoder_session_handle, __FILE__, __FUNCTION__, __LINE__);
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
std::string message = "FATAL. nvEncOpenEncodeSessionEx failed with error: "
|
||||
+ std::to_string(nv_error_code);
|
||||
VLOG(1) << message;
|
||||
nvencode_api_function_list.nvEncDestroyEncoder(m_encoder_session_handle.get());
|
||||
m_nvencode_api_function_list.nvEncDestroyEncoder(m_encoder_session_handle.get());
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
if(!m_encoder_session_handle.get()) {
|
||||
@ -92,66 +127,66 @@ void NvEncoder::initNvEncoder() {
|
||||
// extern/nvidia/Video_Codec_SDK_11.1.5/Interface/nvEncodeAPI.h
|
||||
// github link:
|
||||
// https://ghe.oculus-rep.com/FRL-Graphics-Research/distributed_blender_cycles/blob/cluster_blender_32_main_nvenc/extern/nvidia/Video_Codec_SDK_11.1.5/Interface/nvEncodeAPI.h#L1667
|
||||
NV_ENC_INITIALIZE_PARAMS encoder_init_params = { 0 };
|
||||
encoder_init_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
|
||||
GUID preset = NV_ENC_PRESET_P1_GUID;
|
||||
encoder_init_params.encodeGUID = NV_ENC_CODEC_H264_GUID;
|
||||
encoder_init_params.presetGUID = preset;
|
||||
encoder_init_params.encodeWidth = m_width;
|
||||
encoder_init_params.encodeHeight = m_height;
|
||||
encoder_init_params.darWidth = m_width;
|
||||
encoder_init_params.darHeight = m_height;
|
||||
encoder_init_params.frameRateNum = 30;
|
||||
encoder_init_params.frameRateDen = 1;
|
||||
encoder_init_params.enablePTD = 1;
|
||||
encoder_init_params.reportSliceOffsets = 0;
|
||||
encoder_init_params.enableSubFrameWrite = 0;
|
||||
encoder_init_params.maxEncodeWidth = m_width;
|
||||
encoder_init_params.maxEncodeHeight = m_height;
|
||||
encoder_init_params.enableMEOnlyMode = false;
|
||||
encoder_init_params.enableOutputInVidmem = false;
|
||||
encoder_init_params.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY;
|
||||
//encoder_init_params.tuningInfo = NV_ENC_TUNING_INFO_HIGH_QUALITY;
|
||||
m_encoder_init_params = { 0 };
|
||||
m_encoder_init_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
|
||||
GUID preset_guid = NV_ENC_PRESET_P1_GUID;
|
||||
m_encoder_init_params.encodeGUID = NV_ENC_CODEC_H264_GUID;
|
||||
m_encoder_init_params.presetGUID = preset_guid;
|
||||
m_encoder_init_params.encodeWidth = m_width;
|
||||
m_encoder_init_params.encodeHeight = m_height;
|
||||
m_encoder_init_params.darWidth = m_width;
|
||||
m_encoder_init_params.darHeight = m_height;
|
||||
m_encoder_init_params.frameRateNum = 30;
|
||||
m_encoder_init_params.frameRateDen = 1;
|
||||
m_encoder_init_params.enablePTD = 1;
|
||||
m_encoder_init_params.reportSliceOffsets = 0;
|
||||
m_encoder_init_params.enableSubFrameWrite = 0;
|
||||
m_encoder_init_params.maxEncodeWidth = m_width;
|
||||
m_encoder_init_params.maxEncodeHeight = m_height;
|
||||
m_encoder_init_params.enableMEOnlyMode = false;
|
||||
m_encoder_init_params.enableOutputInVidmem = false;
|
||||
m_encoder_init_params.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY;
|
||||
//m_encoder_init_params.tuningInfo = NV_ENC_TUNING_INFO_HIGH_QUALITY;
|
||||
|
||||
NV_ENC_CONFIG encode_config = { 0 };
|
||||
encoder_init_params.encodeConfig = &encode_config;
|
||||
NV_ENC_PRESET_CONFIG presetConfig_2 = { NV_ENC_PRESET_CONFIG_VER, { NV_ENC_CONFIG_VER } };
|
||||
nvencode_api_function_list.nvEncGetEncodePresetConfigEx(m_encoder_session_handle.get(),
|
||||
NV_ENC_CODEC_H264_GUID, preset, encoder_init_params.tuningInfo, &presetConfig_2);
|
||||
memcpy(encoder_init_params.encodeConfig, &presetConfig_2.presetCfg, sizeof(NV_ENC_CONFIG));
|
||||
m_encode_config = { 0 };
|
||||
m_encoder_init_params.encodeConfig = &m_encode_config;
|
||||
NV_ENC_PRESET_CONFIG preset_config_ex = { NV_ENC_PRESET_CONFIG_VER, { NV_ENC_CONFIG_VER } };
|
||||
m_nvencode_api_function_list.nvEncGetEncodePresetConfigEx(m_encoder_session_handle.get(),
|
||||
m_encoder_init_params.encodeGUID, preset_guid, m_encoder_init_params.tuningInfo, &preset_config_ex);
|
||||
memcpy(m_encoder_init_params.encodeConfig, &preset_config_ex.presetCfg, sizeof(NV_ENC_CONFIG));
|
||||
|
||||
encode_config.version = NV_ENC_CONFIG_VER;
|
||||
encode_config.frameIntervalP = 1;
|
||||
encode_config.gopLength = NVENC_INFINITE_GOPLENGTH;
|
||||
encode_config.encodeCodecConfig.h264Config.idrPeriod = encode_config.gopLength;
|
||||
encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
|
||||
encode_config.rcParams.multiPass = NV_ENC_TWO_PASS_FULL_RESOLUTION;
|
||||
m_encode_config.frameIntervalP = 1;
|
||||
m_encode_config.gopLength = NVENC_INFINITE_GOPLENGTH;
|
||||
m_encode_config.encodeCodecConfig.h264Config.idrPeriod = m_encode_config.gopLength;
|
||||
m_encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
|
||||
m_encode_config.rcParams.multiPass = NV_ENC_TWO_PASS_FULL_RESOLUTION;
|
||||
// This produces images with acceptable quality. Tested on Tesla and Trudy scenes on flat screen.
|
||||
// We may adjust the value of the bitrate based of results of further testing.
|
||||
encode_config.rcParams.averageBitRate = 2800000;
|
||||
m_encode_config.rcParams.averageBitRate = 2800000;
|
||||
// Below is bitrate calculation from the NVIDIA samples app. It's too low for us.
|
||||
// Keep it here for now for reference. Remove once bitrate calculation is finalised.
|
||||
// encode_config.rcParams.averageBitRate = (static_cast<unsigned int>(
|
||||
// 5.0f * encoder_init_params.encodeWidth * encoder_init_params.encodeHeight) /
|
||||
// m_encode_config.rcParams.averageBitRate = (static_cast<unsigned int>(
|
||||
// 5.0f * m_encoder_init_params.encodeWidth * m_encoder_init_params.encodeHeight) /
|
||||
// (width * height)) * 100000;
|
||||
VLOG(3) << "Average bitrate: " << encode_config.rcParams.averageBitRate;
|
||||
encode_config.rcParams.vbvBufferSize = (encode_config.rcParams.averageBitRate * encoder_init_params.frameRateDen /
|
||||
encoder_init_params.frameRateNum) * 5;
|
||||
encode_config.rcParams.maxBitRate = encode_config.rcParams.averageBitRate * 2;
|
||||
encode_config.rcParams.vbvInitialDelay = encode_config.rcParams.vbvBufferSize;
|
||||
VLOG(3) << "Average bitrate: " << m_encode_config.rcParams.averageBitRate;
|
||||
m_encode_config.rcParams.vbvBufferSize = (m_encode_config.rcParams.averageBitRate * m_encoder_init_params.frameRateDen /
|
||||
m_encoder_init_params.frameRateNum) * 5;
|
||||
m_encode_config.rcParams.maxBitRate = m_encode_config.rcParams.averageBitRate * 2;
|
||||
m_encode_config.rcParams.vbvInitialDelay = m_encode_config.rcParams.vbvBufferSize;
|
||||
|
||||
nv_error_code = nvencode_api_function_list.nvEncInitializeEncoder(m_encoder_session_handle.get(), &encoder_init_params);
|
||||
nv_error_code = m_nvencode_api_function_list.nvEncInitializeEncoder(m_encoder_session_handle.get(), &m_encoder_init_params);
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
std::string message = "FATAL. nvEncInitializeEncoder failed with error: "
|
||||
+ std::to_string(nv_error_code);
|
||||
VLOG(1) << message;
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
VLOG(3) << "NvEncoder initialization completed";
|
||||
}
|
||||
|
||||
void NvEncoder::allocateOutputBuffer() {
|
||||
NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_buffer_struct = { NV_ENC_CREATE_BITSTREAM_BUFFER_VER };
|
||||
NVENCSTATUS nv_error_code = nvencode_api_function_list.nvEncCreateBitstreamBuffer(m_encoder_session_handle.get(),
|
||||
NVENCSTATUS nv_error_code = m_nvencode_api_function_list.nvEncCreateBitstreamBuffer(m_encoder_session_handle.get(),
|
||||
&create_bitstream_buffer_struct);
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
std::string message = "FATAL. nvEncCreateBitstreamBuffer failed with error: "
|
||||
@ -165,14 +200,7 @@ void NvEncoder::allocateOutputBuffer() {
|
||||
}
|
||||
|
||||
void NvEncoder::allocateInputBuffer() {
|
||||
CUresult cu_result = cuCtxPushCurrent(m_cuda_context);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
const char *error_name = NULL;
|
||||
cuGetErrorName(cu_result, &error_name);
|
||||
std::string message = "FATAL. cuCtxPushCurrent failed with error: " + std::string(error_name);
|
||||
VLOG(1) << message;
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
CUDAContextScope cuda_context_scope(m_cuda_context);
|
||||
const int chroma_height = getChromaHeight(m_buffer_format, m_height);
|
||||
const int height_in_rows = m_height + chroma_height;
|
||||
const int width_in_bytes = getWidthInBytes(m_buffer_format, m_width);
|
||||
@ -180,7 +208,7 @@ void NvEncoder::allocateInputBuffer() {
|
||||
CUdeviceptr cuda_device_ptr;
|
||||
VLOG(3) << "Allocating input buffer with cuMemAllocPitch cuda_pitch: " << m_cuda_pitch.get() << " width_in_bytes: " <<
|
||||
width_in_bytes << " height in rows: " << height_in_rows;
|
||||
cu_result = cuMemAllocPitch((CUdeviceptr *)&cuda_device_ptr,
|
||||
CUresult cu_result = cuMemAllocPitch((CUdeviceptr *)&cuda_device_ptr,
|
||||
&cuda_pitch,
|
||||
width_in_bytes,
|
||||
height_in_rows,
|
||||
@ -195,14 +223,6 @@ void NvEncoder::allocateInputBuffer() {
|
||||
m_cuda_pitch.init(cuda_pitch, __FILE__, __FUNCTION__, __LINE__);
|
||||
m_cuda_device_ptr.init(cuda_device_ptr, __FILE__, __FUNCTION__, __LINE__);
|
||||
VLOG(3) << "Successfully allocated input buffer with cuMemAllocPitch";
|
||||
cu_result = cuCtxPopCurrent(NULL);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
const char *error_name = NULL;
|
||||
cuGetErrorName(cu_result, &error_name);
|
||||
std::string message = "FATAL. cuCtxPopCurrent failed with error: " + std::string(error_name);
|
||||
VLOG(1) << message;
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
VLOG(3) << "Input CUDA buffer allocation completed";
|
||||
registerInputResources(cuda_device_ptr);
|
||||
}
|
||||
@ -221,7 +241,7 @@ void NvEncoder::registerInputResources(CUdeviceptr cuda_device_ptr) {
|
||||
register_resource_struct.pOutputFencePoint = nullptr;
|
||||
VLOG(3) << "nvEncRegisterResource with width: " << register_resource_struct.width <<
|
||||
" height: " << register_resource_struct.height << " pitch: " << register_resource_struct.pitch;
|
||||
NVENCSTATUS nv_error_code = nvencode_api_function_list.nvEncRegisterResource(m_encoder_session_handle.get(),
|
||||
NVENCSTATUS nv_error_code = m_nvencode_api_function_list.nvEncRegisterResource(m_encoder_session_handle.get(),
|
||||
®ister_resource_struct);
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
std::string message = "FATAL. nvEncRegisterResource failed with error: "
|
||||
@ -245,41 +265,56 @@ void NvEncoder::registerInputResources(CUdeviceptr cuda_device_ptr) {
|
||||
VLOG(3) << "Successfully registered input buffer resource";
|
||||
}
|
||||
|
||||
bool NvEncoder::getEncodedBuffer(NV_ENC_MAP_INPUT_RESOURCE &map_input_resource, AutoEnlargingBuffer<uint8_t> &encoded_buffer_out,
|
||||
bool NvEncoder::getEncodedBuffer(NV_ENC_MAP_INPUT_RESOURCE &map_input_resource, std::vector<uint8_t> &encoded_buffer_out,
|
||||
high_resolution_clock::time_point & encoding_done) const {
|
||||
NV_ENC_LOCK_BITSTREAM lock_bitstream_data = { NV_ENC_LOCK_BITSTREAM_VER };
|
||||
lock_bitstream_data.outputBitstream = m_bitstream_output_buffer.get();
|
||||
lock_bitstream_data.doNotWait = false;
|
||||
NVENCSTATUS nv_error_code = nvencode_api_function_list.nvEncLockBitstream(m_encoder_session_handle.get(), &lock_bitstream_data);
|
||||
NVENCSTATUS nv_error_code = m_nvencode_api_function_list.nvEncLockBitstream(m_encoder_session_handle.get(), &lock_bitstream_data);
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
VLOG(1) << "ERROR. nvEncLockBitstream failed with error: " << nv_error_code;
|
||||
return false;
|
||||
}
|
||||
encoding_done = high_resolution_clock::now();
|
||||
uint8_t * encoded_data_ptr = (uint8_t *)lock_bitstream_data.bitstreamBufferPtr;
|
||||
encoded_buffer_out.insert(&encoded_data_ptr[0], lock_bitstream_data.bitstreamSizeInBytes);
|
||||
nv_error_code = nvencode_api_function_list.nvEncUnlockBitstream(m_encoder_session_handle.get(), lock_bitstream_data.outputBitstream);
|
||||
encoded_buffer_out.assign(encoded_data_ptr, encoded_data_ptr + lock_bitstream_data.bitstreamSizeInBytes);
|
||||
nv_error_code = m_nvencode_api_function_list.nvEncUnlockBitstream(m_encoder_session_handle.get(), lock_bitstream_data.outputBitstream);
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
VLOG(1) << "ERROR. nvEncUnlockBitstream failed with error: " << nv_error_code;
|
||||
return false;
|
||||
}
|
||||
nv_error_code = nvencode_api_function_list.nvEncUnmapInputResource(m_encoder_session_handle.get(), map_input_resource.mappedResource);
|
||||
nv_error_code = m_nvencode_api_function_list.nvEncUnmapInputResource(m_encoder_session_handle.get(), map_input_resource.mappedResource);
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
VLOG(1) << "ERROR. nvEncUnmapInputResource failed with error: " << nv_error_code;
|
||||
return false;
|
||||
}
|
||||
std::chrono::duration<float> copy_to_cpu_time_sec = high_resolution_clock::now() - encoding_done;
|
||||
VLOG(3) << "Time to copy encoded image buffer to CPU memory: " << copy_to_cpu_time_sec.count() << " sec";
|
||||
VLOG(3) << "Time to copy encoded image buffer to CPU memory: " << copy_to_cpu_time_sec.count() <<
|
||||
" sec. Size: " << encoded_buffer_out.size();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NvEncoder::encode(const uint8_t* input_buffer_on_host, AutoEnlargingBuffer<uint8_t> &encoded_buffer_out) const {
|
||||
// Do explicit template instantiation for encode() and
|
||||
// copyInputBufferIntoGpuMappedMemory() methods (see below) for the follow reasons:
|
||||
// we only support buffers in the CPU/host memory as uint8_t* and buffers in the GPU/device memory as CUdeviceptr
|
||||
// So instantiate explicitly only for these types. This also allows us to have template
|
||||
// methods implementation in the .cpp file instead of .h to keep header clean.
|
||||
|
||||
// Explicit template instantiation of the encode for the input buffers in cpu memory as uint8_t*
|
||||
template bool NvEncoder::encode(const uint8_t*, std::vector<uint8_t> &) const;
|
||||
|
||||
// Explicit template instantiation of the encode for the input buffers in gpu memory as CUdeviceptr
|
||||
template bool NvEncoder::encode(const CUdeviceptr, std::vector<uint8_t> &) const;
|
||||
|
||||
template<typename T>
|
||||
bool NvEncoder::encode(const T input_buffer, std::vector<uint8_t> &encoded_buffer_out) const {
|
||||
CUDAContextScope cuda_context_scope(m_cuda_context);
|
||||
if(m_encoder_session_handle.get() == nullptr) {
|
||||
VLOG(1) << "ERROR. encoder_session_handle is null. Encoder is not initialised or initialization failed.";
|
||||
return false;
|
||||
}
|
||||
NV_ENC_MAP_INPUT_RESOURCE map_input_resource = { NV_ENC_MAP_INPUT_RESOURCE_VER };
|
||||
if(!copyInputBufferToGpu(input_buffer_on_host, map_input_resource)) {
|
||||
if(!copyInputBufferIntoGpuMappedMemory(input_buffer, map_input_resource)) {
|
||||
return false;
|
||||
}
|
||||
auto start_encoding = high_resolution_clock::now();
|
||||
@ -305,24 +340,31 @@ bool NvEncoder::encode(const uint8_t* input_buffer_on_host, AutoEnlargingBuffer<
|
||||
}
|
||||
|
||||
const NVENCSTATUS NvEncoder::startEncoding(NV_ENC_MAP_INPUT_RESOURCE & map_input_resource) const {
|
||||
NV_ENC_PIC_PARAMS picParams = {};
|
||||
picParams.version = NV_ENC_PIC_PARAMS_VER;
|
||||
picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
|
||||
picParams.inputBuffer = map_input_resource.mappedResource;
|
||||
picParams.bufferFmt = m_buffer_format;
|
||||
picParams.inputWidth = m_width;
|
||||
picParams.inputHeight = m_height;
|
||||
picParams.outputBitstream = m_bitstream_output_buffer.get();
|
||||
picParams.completionEvent = nullptr;
|
||||
return nvencode_api_function_list.nvEncEncodePicture(m_encoder_session_handle.get(), &picParams);
|
||||
NV_ENC_PIC_PARAMS pic_params = {};
|
||||
pic_params.version = NV_ENC_PIC_PARAMS_VER;
|
||||
pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
|
||||
pic_params.inputBuffer = map_input_resource.mappedResource;
|
||||
pic_params.bufferFmt = m_buffer_format;
|
||||
pic_params.inputWidth = m_width;
|
||||
pic_params.inputHeight = m_height;
|
||||
pic_params.outputBitstream = m_bitstream_output_buffer.get();
|
||||
pic_params.completionEvent = nullptr;
|
||||
return m_nvencode_api_function_list.nvEncEncodePicture(m_encoder_session_handle.get(), &pic_params);
|
||||
}
|
||||
|
||||
bool NvEncoder::copyInputBufferToGpu(const uint8_t* input_buffer_on_host, NV_ENC_MAP_INPUT_RESOURCE &map_input_resource) const {
|
||||
// Explicit template instantiation of the copyInputBufferIntoGpuMappedMemory for the input buffers in cpu memory as uint8_t*
|
||||
template bool NvEncoder::copyInputBufferIntoGpuMappedMemory(const uint8_t*, NV_ENC_MAP_INPUT_RESOURCE &) const;
|
||||
|
||||
// Explicit template instantiation of the copyInputBufferIntoGpuMappedMemory for the input buffers in gpu memory as CUdeviceptr
|
||||
template bool NvEncoder::copyInputBufferIntoGpuMappedMemory(const CUdeviceptr, NV_ENC_MAP_INPUT_RESOURCE &) const;
|
||||
|
||||
template<typename T>
|
||||
bool NvEncoder::copyInputBufferIntoGpuMappedMemory(T input_buffer,
|
||||
NV_ENC_MAP_INPUT_RESOURCE &map_input_resource) const {
|
||||
auto start_copy_to_gpu = high_resolution_clock::now();
|
||||
const int width_in_bytes = getWidthInBytes(m_buffer_format, m_width);
|
||||
CUDA_MEMCPY2D memcpy2d = { 0 };
|
||||
memcpy2d.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||
memcpy2d.srcHost = input_buffer_on_host;
|
||||
setSourceBuffer(input_buffer, memcpy2d);
|
||||
memcpy2d.srcPitch = getWidthInBytes(m_buffer_format, m_width);
|
||||
memcpy2d.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
memcpy2d.dstDevice = m_cuda_device_ptr.get();
|
||||
@ -331,7 +373,7 @@ bool NvEncoder::copyInputBufferToGpu(const uint8_t* input_buffer_on_host, NV_ENC
|
||||
memcpy2d.Height = m_height;
|
||||
cuMemcpy2D(&memcpy2d);
|
||||
for (int i = 0; i < m_src_chroma_offsets.get().size(); ++i) {
|
||||
memcpy2d.srcHost = (input_buffer_on_host + m_src_chroma_offsets.get()[i]);
|
||||
setSourceBuffer(input_buffer + m_src_chroma_offsets.get()[i], memcpy2d);
|
||||
memcpy2d.dstDevice = (CUdeviceptr)((uint8_t *)m_cuda_device_ptr.get() + m_dst_chroma_offsets.get()[i]);
|
||||
memcpy2d.srcPitch = getChromaPitch(m_buffer_format, memcpy2d.srcPitch);
|
||||
memcpy2d.dstPitch = getChromaPitch(m_buffer_format, memcpy2d.dstPitch);
|
||||
@ -340,7 +382,7 @@ bool NvEncoder::copyInputBufferToGpu(const uint8_t* input_buffer_on_host, NV_ENC
|
||||
cuMemcpy2D(&memcpy2d);
|
||||
}
|
||||
map_input_resource.registeredResource = m_input_buffer_registration.get();
|
||||
NVENCSTATUS nv_error_code = nvencode_api_function_list.nvEncMapInputResource(m_encoder_session_handle.get(), &map_input_resource);
|
||||
NVENCSTATUS nv_error_code = m_nvencode_api_function_list.nvEncMapInputResource(m_encoder_session_handle.get(), &map_input_resource);
|
||||
if(nv_error_code != NV_ENC_SUCCESS) {
|
||||
VLOG(3) << "ERROR. nvEncMapInputResource failed with error: " << std::to_string(nv_error_code);
|
||||
return false;
|
||||
@ -350,6 +392,16 @@ bool NvEncoder::copyInputBufferToGpu(const uint8_t* input_buffer_on_host, NV_ENC
|
||||
return true;
|
||||
}
|
||||
|
||||
void NvEncoder::setSourceBuffer(const uint8_t * input_buffer, CUDA_MEMCPY2D &memcpy2d) const {
|
||||
memcpy2d.srcHost = input_buffer;
|
||||
memcpy2d.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||
}
|
||||
|
||||
void NvEncoder::setSourceBuffer(const CUdeviceptr input_buffer, CUDA_MEMCPY2D &memcpy2d) const {
|
||||
memcpy2d.srcDevice = input_buffer;
|
||||
memcpy2d.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
}
|
||||
|
||||
uint32_t NvEncoder::getWidthInBytes(const NV_ENC_BUFFER_FORMAT buffer_format, const uint32_t width) const {
|
||||
switch (buffer_format) {
|
||||
case NV_ENC_BUFFER_FORMAT_NV12:
|
||||
|
@ -1,76 +1,36 @@
|
||||
#ifndef __NV_ENCODER_H__
|
||||
#define __NV_ENCODER_H__
|
||||
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
//#include <cuda.h>
|
||||
# ifdef WITH_CUDA_DYNLOAD
|
||||
# include "cuew.h"
|
||||
# else
|
||||
# include <cuda.h>
|
||||
# include <cudaGL.h>
|
||||
# endif
|
||||
|
||||
#ifdef WITH_CUDA_DYNLOAD
|
||||
#include <cuew.h>
|
||||
// Do not use CUDA SDK headers when using CUEW
|
||||
// The macro below is used by Optix SDK and is necessary to avoid DSO loading collision
|
||||
// See device_optix.cpp for example.
|
||||
#define OPTIX_DONT_INCLUDE_CUDA
|
||||
#else
|
||||
#include <cuda.h>
|
||||
#endif
|
||||
|
||||
#include "final.h"
|
||||
#include "nvEncodeAPI.h"
|
||||
#include "../utils/logging.h"
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
using namespace std::chrono;
|
||||
|
||||
// This class holds preallocated buffer with requested initial size.
|
||||
// It enlarges the buffer automatically if it cannot accomodate incoming data
|
||||
// Buffer does not shrink, it only enlarges when needed to reduce amount of memory re-allocations.
|
||||
template <typename T>
|
||||
class AutoEnlargingBuffer {
|
||||
public:
|
||||
AutoEnlargingBuffer(size_t init_size){
|
||||
if(init_size > 0) {
|
||||
resize(init_size);
|
||||
}
|
||||
}
|
||||
|
||||
~AutoEnlargingBuffer(){
|
||||
delete [] data_buffer;
|
||||
}
|
||||
|
||||
void insert(const T* input_data, const size_t input_data_size) {
|
||||
if(input_data_size > m_capacity) {
|
||||
VLOG(3) << "Current capacity of the AutoEnlargingBuffer: " << m_capacity <<
|
||||
" Reallocating to support larger data size: " << input_data_size;
|
||||
resize(input_data_size);
|
||||
}
|
||||
memcpy(data_buffer, input_data, input_data_size);
|
||||
m_data_size = input_data_size;
|
||||
}
|
||||
|
||||
T* data() const {
|
||||
return data_buffer;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return m_data_size;
|
||||
}
|
||||
|
||||
private:
|
||||
T* data_buffer = nullptr;
|
||||
size_t m_data_size = 0;
|
||||
size_t m_capacity = 0;
|
||||
|
||||
void resize(const size_t new_buffer_size) {
|
||||
delete [] data_buffer;
|
||||
data_buffer = new T[new_buffer_size];
|
||||
m_capacity = new_buffer_size;
|
||||
m_data_size = 0;
|
||||
}
|
||||
};
|
||||
|
||||
class NvEncoder {
|
||||
public:
|
||||
static const NV_ENC_BUFFER_FORMAT BUFFER_FORMAT_FOR_IMAGES_FROM_MASTER = NV_ENC_BUFFER_FORMAT_ABGR;
|
||||
|
||||
NvEncoder(NV_ENC_BUFFER_FORMAT buffer_format, CUcontext cuda_context, const int width, const int height);
|
||||
~NvEncoder();
|
||||
bool encode(const uint8_t* input_buffer_on_host, AutoEnlargingBuffer<uint8_t> &encoded_buffer_out) const;
|
||||
|
||||
// Encodes an image within the input_buffer
|
||||
template<typename T>
|
||||
bool encode(const T input_buffer, std::vector<uint8_t> &encoded_buffer_out) const;
|
||||
|
||||
private:
|
||||
static constexpr int DEVICE_NUM = 0;
|
||||
@ -80,7 +40,9 @@ private:
|
||||
const int m_width;
|
||||
const int m_height;
|
||||
const NV_ENC_BUFFER_FORMAT m_buffer_format;
|
||||
NV_ENCODE_API_FUNCTION_LIST nvencode_api_function_list;
|
||||
NV_ENC_INITIALIZE_PARAMS m_encoder_init_params;
|
||||
NV_ENC_CONFIG m_encode_config;
|
||||
NV_ENCODE_API_FUNCTION_LIST m_nvencode_api_function_list;
|
||||
Final<void *> m_encoder_session_handle;
|
||||
Final<size_t> m_cuda_pitch;
|
||||
Final<CUdeviceptr> m_cuda_device_ptr;
|
||||
@ -111,11 +73,17 @@ private:
|
||||
// Registers preallocated input CUDA buffer for the NvEncoder with nvEncRegisterResource
|
||||
void registerInputResources(CUdeviceptr cuda_device_ptr);
|
||||
|
||||
// Copies input image buffer (on CPU) to the preallocated and registered encoder input buffer on GPU (CUDA device)
|
||||
bool copyInputBufferToGpu(const uint8_t* input_buffer_on_host, NV_ENC_MAP_INPUT_RESOURCE &map_input_resource) const;
|
||||
// Copies input image buffer (on CPU or GPU memory) to the preallocated and registered
|
||||
// encoder input buffer on GPU (CUDA device)
|
||||
template<typename T>
|
||||
bool copyInputBufferIntoGpuMappedMemory(T input_buffer, NV_ENC_MAP_INPUT_RESOURCE &map_input_resource) const;
|
||||
void setSourceBuffer(const uint8_t * input_buffer, CUDA_MEMCPY2D &memcpy2d) const;
|
||||
void setSourceBuffer(const CUdeviceptr input_buffer, CUDA_MEMCPY2D &memcpy2d) const;
|
||||
|
||||
|
||||
// Copies encoded image from the encoder to the preallocated output buffer on CPU
|
||||
bool getEncodedBuffer(NV_ENC_MAP_INPUT_RESOURCE &map_input_resource, AutoEnlargingBuffer<uint8_t> &encoded_buffer_out,
|
||||
bool getEncodedBuffer(NV_ENC_MAP_INPUT_RESOURCE &map_input_resource,
|
||||
std::vector<uint8_t> &encoded_buffer_out,
|
||||
high_resolution_clock::time_point & encoding_done) const;
|
||||
|
||||
// Starts image encoding
|
||||
|
@ -0,0 +1,124 @@
|
||||
|
||||
//#include "./utils/timer.h" // for scoped_timer
|
||||
#include "../utils/logging.h"
|
||||
#include "turbojpeg_compressor.h"
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
TurbojpegCompressor::TurbojpegCompressor() {
|
||||
m_jpeg_compressor = tjInitCompress();
|
||||
m_jpeg_decompressor = tjInitDecompress();
|
||||
}
|
||||
|
||||
TurbojpegCompressor::~TurbojpegCompressor() {
|
||||
if (m_jpeg_compressor != nullptr) {
|
||||
tjDestroy(m_jpeg_compressor);
|
||||
}
|
||||
if (m_jpeg_decompressor != nullptr) {
|
||||
tjDestroy(m_jpeg_decompressor);
|
||||
}
|
||||
}
|
||||
|
||||
//#define TIME_JPEG
|
||||
size_t TurbojpegCompressor::compress(void* src_buffer, int width, int height, int compression_quality,
|
||||
unsigned char*& jpeg_image) {
|
||||
// Convert buffer to unsigned char * 3 channels
|
||||
const int subsampling = TJSAMP_444;
|
||||
size_t jpeg_length = 0; // tjCompress2 will allocate the jpeg_image buffer
|
||||
jpeg_image = nullptr;
|
||||
|
||||
#ifdef TIME_JPEG
|
||||
struct timespec start_time, end_time;
|
||||
clock_gettime(CLOCK_MONOTONIC, &start_time);
|
||||
#endif
|
||||
if (m_jpeg_compressor == nullptr) {
|
||||
LOG(ERROR) << "Cannot initialize JPEG compressor";
|
||||
return 0;
|
||||
}
|
||||
int jpeg_error = tjCompress2(m_jpeg_compressor,
|
||||
(unsigned char*) src_buffer,
|
||||
width,
|
||||
0,
|
||||
height,
|
||||
TJPF_RGB,
|
||||
&jpeg_image,
|
||||
(unsigned long *)&jpeg_length,
|
||||
subsampling,
|
||||
compression_quality,
|
||||
TJFLAG_FASTDCT);
|
||||
if (jpeg_error < 0) {
|
||||
const char *jpeg_error_str = tjGetErrorStr();
|
||||
LOG(ERROR) << "JPEG compression error: " << jpeg_error_str;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef TIME_JPEG
|
||||
clock_gettime(CLOCK_MONOTONIC, &end_time);
|
||||
// ms time
|
||||
double elapsed_time = (end_time.tv_nsec - start_time.tv_nsec) / 1e6;
|
||||
LOG(INFO) << "TIMING: JPEG compression: " << elapsed_time << "ms"
|
||||
<< ", resolution " << w << "x" << h
|
||||
<< ", sizes " << src_buffer.size() << " (" << jpeg_length << ")";
|
||||
#endif
|
||||
return jpeg_length;
|
||||
}
|
||||
|
||||
bool TurbojpegCompressor::decompress(std::vector<uint8_t> & jpeg_image_buffer, int & width_out, int & height_out,
|
||||
std::vector<cgr_libcluster::uchar3> & decompressed_image_out) {
|
||||
#ifdef TIME_JPEG
|
||||
struct timespec start_time, end_time;
|
||||
clock_gettime(CLOCK_MONOTONIC, &start_time);
|
||||
#endif
|
||||
// Use TurboJPEG to decompress the buffer
|
||||
int subsampling = 0;
|
||||
if (m_jpeg_decompressor == nullptr) {
|
||||
LOG(ERROR) << "Cannot initialize JPEG decompressor";
|
||||
return false;
|
||||
}
|
||||
int width = 0, height = 0;
|
||||
int jpeg_error = tjDecompressHeader2(m_jpeg_decompressor, jpeg_image_buffer.data(),
|
||||
jpeg_image_buffer.size(), &width, &height, &subsampling);
|
||||
if (jpeg_error < 0) {
|
||||
LOG(ERROR) << "Cannot decode JPEG header from incoming image buffer";
|
||||
return false;
|
||||
}
|
||||
width_out = width;
|
||||
height_out = height;
|
||||
const size_t num_pixels = width_out * height_out;
|
||||
if(decompressed_image_out.size() != num_pixels) {
|
||||
decompressed_image_out.resize(num_pixels);
|
||||
}
|
||||
jpeg_error = tjDecompress2(m_jpeg_decompressor,
|
||||
jpeg_image_buffer.data(),
|
||||
jpeg_image_buffer.size(),
|
||||
(unsigned char*) decompressed_image_out.data(),
|
||||
width_out,
|
||||
0,
|
||||
height_out,
|
||||
TJPF_RGB,
|
||||
TJFLAG_ACCURATEDCT);
|
||||
//tjDestroy(jpeg_decompressor); // move to d-tor?
|
||||
if (jpeg_error < 0) {
|
||||
const char *jpeg_error_str = tjGetErrorStr();
|
||||
LOG(ERROR) << "JPEG decompression error: " << jpeg_error_str;
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef TIME_JPEG
|
||||
clock_gettime(CLOCK_MONOTONIC, &end_time);
|
||||
// ms time
|
||||
double elapsed_time = (end_time.tv_nsec - start_time.tv_nsec) / 1e6;
|
||||
LOG(INFO) << "TIMING: JPEG decompression: " << elapsed_time << "ms"
|
||||
<< ", resolution " << w << "x" << h
|
||||
<< ", sizes " << jpeg_image_buffer.size() << " (" << dst_buffer.size() << ")";
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void TurbojpegCompressor::free(uint8_t *memory) {
|
||||
tjFree(memory);
|
||||
}
|
||||
|
||||
|
||||
} // cgr_libcluster
|
@ -0,0 +1,34 @@
|
||||
#ifndef __TURBOJPEG_COMPRESSOR_H__
|
||||
#define __TURBOJPEG_COMPRESSOR_H__
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <turbojpeg.h>
|
||||
|
||||
#include "../utils/vector_types.h" // for uchar3
|
||||
|
||||
//#include "net_camera.h"
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
class TurbojpegCompressor {
|
||||
public:
|
||||
TurbojpegCompressor();
|
||||
~TurbojpegCompressor();
|
||||
|
||||
// Compress the image buffer into a jpeg image
|
||||
size_t compress(void* src_buffer, int width, int height, int compression_quality, unsigned char*& jpeg_image) ;
|
||||
// Decompress a jpeg stream cbuffer into the StreamedImage
|
||||
bool decompress(std::vector<uint8_t> & jpeg_image_buffer, int & width_out, int & height_out,
|
||||
std::vector<cgr_libcluster::uchar3> & decompressed_image_out);
|
||||
|
||||
void free(uint8_t *memory);
|
||||
|
||||
private:
|
||||
tjhandle m_jpeg_compressor = nullptr;
|
||||
tjhandle m_jpeg_decompressor = nullptr;
|
||||
};
|
||||
|
||||
} // cgr_libcluster
|
||||
|
||||
#endif
|
@ -0,0 +1,92 @@
|
||||
#ifndef __CUDA_CONTEXT_PROVIDER_H__
|
||||
#define __CUDA_CONTEXT_PROVIDER_H__
|
||||
|
||||
#include "./utils/logging.h"
|
||||
|
||||
#ifdef WITH_CUDA_DYNLOAD
|
||||
#include <cuew.h>
|
||||
// Do not use CUDA SDK headers when using CUEW
|
||||
// The macro below is used by Optix SDK and is necessary to avoid DSO loading collision
|
||||
// See device_optix.cpp for example.
|
||||
#define OPTIX_DONT_INCLUDE_CUDA
|
||||
# else
|
||||
# include <cuda.h>
|
||||
#endif
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
struct CUDAContextScope {
|
||||
CUDAContextScope(CUcontext ctx) {
|
||||
CUresult cu_result = cuCtxPushCurrent(ctx);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
const char *error_name = NULL;
|
||||
cuGetErrorName(cu_result, &error_name);
|
||||
std::string message = "FATAL. cuCtxPushCurrent failed with error: " + std::string(error_name);
|
||||
LOG(ERROR) << message;
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
}
|
||||
|
||||
~CUDAContextScope() {
|
||||
CUresult cu_result = cuCtxPopCurrent(NULL);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
const char *error_name = NULL;
|
||||
cuGetErrorName(cu_result, &error_name);
|
||||
std::string message = "FATAL. cuCtxPopCurrent failed with error: " + std::string(error_name);
|
||||
LOG(ERROR) << message;
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class CudaContextProvider {
|
||||
|
||||
public:
|
||||
|
||||
static CUcontext getPrimaryContext(const int device_num) {
|
||||
static CUcontext cuda_context = 0;
|
||||
if(cuda_context) {
|
||||
return cuda_context;
|
||||
}
|
||||
#ifdef WITH_CUDA_DYNLOAD
|
||||
LOG(INFO) << "WITH_CUDA_DYNLOAD is on, call cuewInit to load CUDA library";
|
||||
// When we run as a part of the blender the CUDA lib is normally already loaded by CUDADevice
|
||||
// However it's not loaded when is called from unit tests so call cuewInit here to be sure
|
||||
// that CUDA lib is loaded, it's safe to call cuewInit multiple times
|
||||
if (cuewInit(CUEW_INIT_CUDA) != CUEW_SUCCESS) {
|
||||
throw std::runtime_error("Error. CUEW failed to load CUDA lib");
|
||||
}
|
||||
#endif
|
||||
|
||||
CUdevice cuda_device = 0;
|
||||
CUresult cu_result = cuDeviceGet(&cuda_device, device_num);
|
||||
// CUDA is normally initialised by CUDADevice however it's not initialised
|
||||
// when is called from unit tests, check if CUDA is initialised, run cuInit if not.
|
||||
if(cu_result == CUDA_ERROR_NOT_INITIALIZED) {
|
||||
LOG(WARNING) << "Cuda is not initialised, run cuInit";
|
||||
cu_result = cuInit(0);
|
||||
if (cu_result != CUDA_SUCCESS) {
|
||||
std::string message = "Error. Cannot initialise CUDA, cuInit failed "
|
||||
"with error code: " + std::to_string(cu_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
cu_result = cuDeviceGet(&cuda_device, device_num);
|
||||
}
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
std::string message = "Error. Cannot get primary cuda context due to cuDeviceGet failed "
|
||||
"with error code: " + std::to_string(cu_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
cu_result = cuDevicePrimaryCtxRetain(&cuda_context, cuda_device);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
std::string message = "Error. Failed to get primary cuda context "
|
||||
"with error code: " + std::to_string(cu_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
return cuda_context;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cgr_libcluster
|
||||
|
||||
#endif
|
@ -1,3 +1,6 @@
|
||||
#ifdef WITH_CUDA
|
||||
#include "../cuda_context_provider.h"
|
||||
#endif
|
||||
#include "denoising_context.h"
|
||||
#include "master_oidn_denoiser.h"
|
||||
#ifdef WITH_OPTIX
|
||||
@ -26,7 +29,8 @@ MasterDenoiser * DenoisersProvider::makeOptixDenoiser(bool save_denoise_io, int
|
||||
const std::string & output_folder_path, bool is_denoising_passes_on,
|
||||
const ImageOutputProvider & image_output_provider, int max_img_width, int max_img_height) const {
|
||||
try {
|
||||
return new MasterOptixDenoiser(save_denoise_io, save_every_n_images, output_folder_path,
|
||||
CUcontext cuda_context = CudaContextProvider::getPrimaryContext(MasterOptixDenoiser::DEVICE_NUM);
|
||||
return new MasterOptixDenoiser(cuda_context, save_denoise_io, save_every_n_images, output_folder_path,
|
||||
is_denoising_passes_on, image_output_provider, max_img_width, max_img_height);
|
||||
} catch(std::runtime_error & ex) {
|
||||
LOG(ERROR) << "DenoisersProvider::makeDenoiser. ERROR. Failed to create an instance of MasterOptixDenoiser due to: "
|
||||
|
@ -1,3 +1,6 @@
|
||||
|
||||
#include "../cuda_context_provider.h"
|
||||
#include "../utils/cuda_utils.h"
|
||||
#include "../utils/timer.h" // for scoped_timer
|
||||
#include "../utils/logging.h"
|
||||
#include "../server_image.h" // for ServerImage;
|
||||
@ -7,28 +10,24 @@
|
||||
#include <optix_denoiser_tiling.h>
|
||||
#include <optix.h>
|
||||
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
struct CUDAContextScope {
|
||||
CUDAContextScope(CUcontext ctx) {
|
||||
cuCtxPushCurrent(ctx);
|
||||
}
|
||||
|
||||
~CUDAContextScope() {
|
||||
cuCtxPopCurrent(NULL);
|
||||
}
|
||||
};
|
||||
|
||||
// Constructor throws exception if it cannot create an instance.
|
||||
// This prevents creation of the object that is not functional.
|
||||
// Doesn't affect rendering performance as happens once at init stage
|
||||
MasterOptixDenoiser::MasterOptixDenoiser(bool save_denoise_io, int save_every_n_images, const std::string & output_folder_path,
|
||||
bool is_denoising_passes_on, const ImageOutputProvider & image_output_provider, int max_img_width, int max_img_height) :
|
||||
MasterOptixDenoiser::MasterOptixDenoiser(CUcontext cuda_context, bool save_denoise_io, int save_every_n_images,
|
||||
const std::string & output_folder_path, bool is_denoising_passes_on,
|
||||
const ImageOutputProvider & image_output_provider, int max_img_width, int max_img_height) :
|
||||
MasterDenoiser(
|
||||
ClusterSessionParams::MasterDenoiser::MASTER_DENOISER_OPTIX, save_denoise_io, save_every_n_images,
|
||||
output_folder_path, is_denoising_passes_on, image_output_provider),
|
||||
max_img_width(max_img_width), max_img_height(max_img_height) {
|
||||
LOG(INFO) << "Creating MasterOptixDenoiser";
|
||||
max_img_width(max_img_width), max_img_height(max_img_height),
|
||||
cuda_context(cuda_context) {
|
||||
LOG(INFO) << "Creating MasterOptixDenoiser with width: " << max_img_width << " height: " << max_img_height <<
|
||||
" save_io: " << save_denoise_io << " save_every_n_images: " << save_every_n_images << " output_folder_path: " <<
|
||||
output_folder_path << " is_denoising_passes_on: " << is_denoising_passes_on;
|
||||
|
||||
OptixDeviceContext optix_context = nullptr;
|
||||
OptixDenoiserOptions denoiser_options;
|
||||
if(is_denoising_passes_on) {
|
||||
@ -39,166 +38,69 @@ MasterOptixDenoiser::MasterOptixDenoiser(bool save_denoise_io, int save_every_n_
|
||||
denoiser_options.guideNormal = 0;
|
||||
}
|
||||
//denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3;
|
||||
CUdevice cuda_device = 0;
|
||||
|
||||
OptixDeviceContextOptions options = {};
|
||||
|
||||
#ifdef WITH_CUDA_DYNLOAD
|
||||
LOG(INFO) << "WITH_CUDA_DYNLOAD is on, call cuewInit to load CUDA library";
|
||||
// When we run as a part of the blender the CUDA lib is normally already loaded by CUDADevice
|
||||
// However it's not loaded when is called from unit tests so call cuewInit here to be sure
|
||||
// that CUDA lib is loaded, it's safe to call cuewInit multiple times
|
||||
if (cuewInit(CUEW_INIT_CUDA) != CUEW_SUCCESS) {
|
||||
throw std::runtime_error("Error. CUEW failed to load CUDA lib");
|
||||
}
|
||||
#endif
|
||||
|
||||
CUresult cu_result = cuDeviceGet(&cuda_device, DEVICE_NUM);
|
||||
// CUDA is normally initialised by CUDADevice however it's not initialised
|
||||
// when is called from unit tests, check if CUDA is initialised, run cuInit if not.
|
||||
if(cu_result == CUDA_ERROR_NOT_INITIALIZED) {
|
||||
LOG(WARNING) << "Cuda is not initialised, run cuInit";
|
||||
cu_result = cuInit(0);
|
||||
if (cu_result != CUDA_SUCCESS) {
|
||||
std::string message = "Error. Cannot initialise CUDA, cuInit failed "
|
||||
"with error code: " + std::to_string(cu_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
cu_result = cuDeviceGet(&cuda_device, DEVICE_NUM);
|
||||
}
|
||||
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
std::string message = "Error. Cannot create MasterOptixDenoiser due to cuDeviceGet failed "
|
||||
"with error code: " + std::to_string(cu_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
|
||||
unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX;
|
||||
cu_result = cuCtxCreate(&cuda_context, ctx_flags, cuda_device);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
std::string message = "Error. Cannot create MasterOptixDenoiser due to cuCtxCreate failed "
|
||||
"with error code: " + std::to_string(cu_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
const CUDAContextScope scope(cuda_context);
|
||||
|
||||
if (g_optixFunctionTable.optixDeviceContextCreate != NULL) {
|
||||
LOG(INFO) << "Optix function table is already initialized, continue.";
|
||||
} else {
|
||||
LOG(INFO) << "Initializing Optix...";
|
||||
OptixResult optix_result = optixInit();
|
||||
if(optix_result == OPTIX_ERROR_UNSUPPORTED_ABI_VERSION) {
|
||||
std::string message = "Error. OptiX initialization failed because the installed driver does not support "
|
||||
"ABI version: " + std::to_string(OPTIX_ABI_VERSION);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
else if(optix_result != OPTIX_SUCCESS) {
|
||||
std::string message = "Error. OptiX initialization failed with error code: " + std::to_string(optix_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
OPTIX_API_CALL(optixInit(), THROW_IF_ERROR);
|
||||
LOG(INFO) << "Initialization of Optix function table complete";
|
||||
}
|
||||
|
||||
OptixResult optix_result = optixDeviceContextCreate(cuda_context, &options, &optix_context);
|
||||
if(optix_result != OPTIX_SUCCESS) {
|
||||
std::string message = "Error. Cannot create MasterOptixDenoiser due to optixDeviceContextCreate failed "
|
||||
"with error code: " + std::to_string(optix_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
OPTIX_API_CALL(optixDeviceContextCreate(cuda_context, &options, &optix_context), THROW_IF_ERROR);
|
||||
|
||||
//TODO: figure out how to support KIND_TEMPORAL, needs vecocity vector
|
||||
optix_result = optixDenoiserCreate(optix_context, OPTIX_DENOISER_MODEL_KIND_HDR,
|
||||
&denoiser_options, &denoiser);
|
||||
if(optix_result != OPTIX_SUCCESS) {
|
||||
std::string message = "Error. Cannot create MasterOptixDenoiser due to optixDeviceContextCreate failed "
|
||||
"with error code: " + std::to_string(optix_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
OPTIX_API_CALL(optixDenoiserCreate(optix_context, OPTIX_DENOISER_MODEL_KIND_HDR,
|
||||
&denoiser_options, &denoiser), THROW_IF_ERROR);
|
||||
if(denoiser == nullptr) {
|
||||
throw std::runtime_error("Error. Cannot create MasterOptixDenoiser due to optixDenoiserCreate returned no denoiser");
|
||||
}
|
||||
|
||||
/*
|
||||
optix_result = optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, nullptr, 0);
|
||||
if(optix_result != OPTIX_SUCCESS) {
|
||||
std::string message = "Error. Cannot create MasterOptixDenoiser due to optixDenoiserSetModel failed "
|
||||
"with error code: " + std::to_string(optix_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
*/
|
||||
// OPTIX_API_CALL(optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, nullptr, 0), THROW_IF_ERROR);
|
||||
|
||||
memset(&denoiser_sizes, 0, sizeof(OptixDenoiserSizes));
|
||||
optix_result = optixDenoiserComputeMemoryResources(denoiser, max_img_width, max_img_height, &denoiser_sizes);
|
||||
if(optix_result != OPTIX_SUCCESS) {
|
||||
std::string message = "Error. Cannot create MasterOptixDenoiser due to optixDenoiserComputeMemoryResources failed "
|
||||
"with error code: " + std::to_string(optix_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
OPTIX_API_CALL(optixDenoiserComputeMemoryResources(denoiser, max_img_width, max_img_height, &denoiser_sizes),
|
||||
THROW_IF_ERROR);
|
||||
|
||||
cu_result = cuMemAlloc(&state_denoiser, denoiser_sizes.stateSizeInBytes);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
std::string message = "Error. Cannot create MasterOptixDenoiser due to cuMemAlloc for state_denoiser failed "
|
||||
"with error code: " + std::to_string(cu_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
|
||||
cu_result = cuMemAlloc(&scratch_denoiser, denoiser_sizes.withoutOverlapScratchSizeInBytes);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
std::string message = "Error. Cannot create MasterOptixDenoiser due to cuMemAlloc for scratch_denoiser failed "
|
||||
"with error code: " + std::to_string(cu_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
optix_result = optixDenoiserSetup(
|
||||
CUDA_API_CALL(cuMemAlloc(&state_denoiser, denoiser_sizes.stateSizeInBytes), THROW_IF_ERROR);
|
||||
CUDA_API_CALL(cuMemAlloc(&scratch_denoiser, denoiser_sizes.withoutOverlapScratchSizeInBytes), THROW_IF_ERROR);
|
||||
CUDA_API_CALL(cuStreamCreate(&cuda_stream, CU_STREAM_DEFAULT), THROW_IF_ERROR);
|
||||
OPTIX_API_CALL(optixDenoiserSetup(
|
||||
denoiser,
|
||||
0, // cuda stream,
|
||||
cuda_stream,
|
||||
max_img_width,
|
||||
max_img_height,
|
||||
state_denoiser,
|
||||
denoiser_sizes.stateSizeInBytes,
|
||||
scratch_denoiser,
|
||||
denoiser_sizes.withoutOverlapScratchSizeInBytes);
|
||||
if(optix_result != OPTIX_SUCCESS) {
|
||||
std::string message = "Error. Cannot create MasterOptixDenoiser due to optixDenoiserSetup failed "
|
||||
"with error code: " + std::to_string(optix_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
denoiser_sizes.withoutOverlapScratchSizeInBytes), THROW_IF_ERROR);
|
||||
allocateCudaBuffer(&cuda_pixels_buffer, getMaxBufferSize(), "input/noisy pixels buffer");
|
||||
|
||||
if(is_denoising_passes_on) {
|
||||
allocateCudaBuffer(&cuda_albedo_buffer, getMaxBufferSize(), "input albedo buffer");
|
||||
allocateCudaBuffer(&cuda_normal_buffer, getMaxBufferSize(), "input normal buffer");
|
||||
}
|
||||
allocateCudaBuffer(&cuda_denoised_pixels_buffer, getMaxBufferSize(), "denoised pixels buffer");
|
||||
|
||||
LOG(INFO) << "MasterOptixDenoiser creation is complete";
|
||||
}
|
||||
|
||||
MasterOptixDenoiser::~MasterOptixDenoiser() {
|
||||
LOG(INFO) << "Destroing MasterOptixDenoiser";
|
||||
CUresult cu_result = cuMemFree(scratch_denoiser);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
LOG(ERROR) << "Error. cuMemFree failed for scratch_denoiser with error code: " << std::to_string(cu_result);
|
||||
}
|
||||
cu_result = cuMemFree(state_denoiser);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
LOG(ERROR) << "Error. cuMemFree failed for state_denoiser with error code: " << std::to_string(cu_result);
|
||||
}
|
||||
OptixResult optix_result = optixDenoiserDestroy(denoiser);
|
||||
if(optix_result != OPTIX_SUCCESS) {
|
||||
LOG(ERROR) << "Error. optixDenoiserDestroy failed with error code: " << optix_result;
|
||||
}
|
||||
const CUDAContextScope scope(cuda_context);
|
||||
CUDA_API_CALL(cuMemFree(scratch_denoiser), DO_NOT_THROW);
|
||||
CUDA_API_CALL(cuMemFree(state_denoiser), DO_NOT_THROW);
|
||||
CUDA_API_CALL(cuStreamDestroy(cuda_stream), DO_NOT_THROW);
|
||||
OPTIX_API_CALL(optixDenoiserDestroy(denoiser), DO_NOT_THROW);
|
||||
releaseCudaBuffers();
|
||||
cu_result = cuCtxDestroy(cuda_context);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
LOG(ERROR) << "Error. cuCtxDestroy failed with error code: " << std::to_string(cu_result);
|
||||
}
|
||||
LOG(INFO) << "Destroyed MasterOptixDenoiser";
|
||||
}
|
||||
|
||||
void MasterOptixDenoiser::allocateCudaBuffer(CUdeviceptr * buffer_ptr, size_t buffer_size, const std::string & buffer_name) {
|
||||
LOG(INFO) << "Allocating cuda memory for " << buffer_name;
|
||||
CUresult cu_result = cuMemAlloc(buffer_ptr, buffer_size);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
std::string message = "Error. Could not allocate memory for " + buffer_name + " on device. Cuda error code: "
|
||||
+ std::to_string(cu_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
CUDA_API_CALL(cuMemAlloc(buffer_ptr, buffer_size), THROW_IF_ERROR);
|
||||
}
|
||||
|
||||
size_t MasterOptixDenoiser::getMaxBufferSize() {
|
||||
@ -206,32 +108,17 @@ size_t MasterOptixDenoiser::getMaxBufferSize() {
|
||||
}
|
||||
|
||||
void MasterOptixDenoiser::releaseCudaBuffers() {
|
||||
CUresult cu_result = cuMemFree(cuda_pixels_buffer);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
"Error. cuMemFree failed for cuda_pixels_buffer with error code:" + std::to_string(cu_result);
|
||||
}
|
||||
CUDA_API_CALL(cuMemFree(cuda_pixels_buffer), DO_NOT_THROW);
|
||||
cuda_pixels_buffer = 0;
|
||||
|
||||
if(cuda_albedo_buffer) {
|
||||
cu_result = cuMemFree(cuda_albedo_buffer);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
"Error. cuMemFree failed for cuda_albedo_buffer with error code:" + std::to_string(cu_result);
|
||||
}
|
||||
CUDA_API_CALL(cuMemFree(cuda_albedo_buffer), DO_NOT_THROW);
|
||||
cuda_albedo_buffer = 0;
|
||||
}
|
||||
|
||||
if(cuda_normal_buffer) {
|
||||
cu_result = cuMemFree(cuda_normal_buffer);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
"Error. cuMemFree failed for cuda_normal_buffer with error code:" + std::to_string(cu_result);
|
||||
}
|
||||
CUDA_API_CALL(cuMemFree(cuda_normal_buffer), DO_NOT_THROW);
|
||||
cuda_normal_buffer = 0;
|
||||
}
|
||||
|
||||
cu_result = cuMemFree(cuda_denoised_pixels_buffer);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
"Error. cuMemFree failed for cuda_denoised_pixels_buffer with error code:" + std::to_string(cu_result);
|
||||
}
|
||||
CUDA_API_CALL(cuMemFree(cuda_denoised_pixels_buffer), DO_NOT_THROW);
|
||||
cuda_denoised_pixels_buffer = 0;
|
||||
}
|
||||
|
||||
@ -252,11 +139,7 @@ bool MasterOptixDenoiser::initInputOptixImage(OptixImage2D &input_image, CUdevic
|
||||
const int row_stride = server_image.getWidth() * pixel_stride;
|
||||
const size_t buffer_size_bytes = sizeof(cgr_libcluster::float3) * server_image.getWidth() * server_image.getHeight();
|
||||
|
||||
CUresult cu_result = cuMemcpyHtoD(input_buffer_device, input_buffer_host, buffer_size_bytes);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
LOG(ERROR) << "Error. Could not copy input buffer from host to device memory. CUDA error code: " << cu_result;
|
||||
return false;
|
||||
}
|
||||
CUDA_API_CALL(cuMemcpyHtoD(input_buffer_device, input_buffer_host, buffer_size_bytes), THROW_IF_ERROR);
|
||||
input_image.data = input_buffer_device;
|
||||
input_image.width = server_image.getWidth();
|
||||
input_image.height = server_image.getHeight();
|
||||
@ -267,6 +150,7 @@ bool MasterOptixDenoiser::initInputOptixImage(OptixImage2D &input_image, CUdevic
|
||||
}
|
||||
|
||||
DenoisingResult MasterOptixDenoiser::denoise(ServerImage &server_image) {
|
||||
const CUDAContextScope scope(cuda_context);
|
||||
if(save_denoise_io) {
|
||||
if (save_every_n_images <= 0) {
|
||||
LOG(ERROR) << "Error: invalid save_every_n_images: " << save_every_n_images;
|
||||
@ -314,7 +198,7 @@ DenoisingResult MasterOptixDenoiser::denoise(ServerImage &server_image) {
|
||||
|
||||
OptixResult optix_result = optixDenoiserInvoke(
|
||||
denoiser,
|
||||
0, // cuda stream
|
||||
cuda_stream,
|
||||
¶ms_denoiser,
|
||||
state_denoiser,
|
||||
denoiser_sizes.stateSizeInBytes,
|
||||
@ -329,24 +213,28 @@ DenoisingResult MasterOptixDenoiser::denoise(ServerImage &server_image) {
|
||||
LOG(ERROR) << "Error. Optix denoise failed. OPTIX error code: " << optix_result;
|
||||
return DenoisingResult::FAILED;
|
||||
}
|
||||
|
||||
const size_t buffer_size_bytes = sizeof(cgr_libcluster::float3) * server_image.getWidth() * server_image.getHeight();
|
||||
CUresult cu_result = cuMemcpyDtoH(server_image.denoised_pixels, cuda_denoised_pixels_buffer, buffer_size_bytes);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
LOG(ERROR) << "Error. Could not copy image buffer from device to host memory. CUDA error code: " << cu_result;
|
||||
return DenoisingResult::FAILED;
|
||||
}
|
||||
|
||||
server_image.denoised = true;
|
||||
// explicitly wait till denoising is complete
|
||||
cuStreamSynchronize(cuda_stream);
|
||||
denoised_buffer_size_bytes = sizeof(cgr_libcluster::float3) * server_image.getWidth() * server_image.getHeight();
|
||||
LOG(INFO) << "denoising time for frame: " << server_image.camera.frame << " " << denoizer_timer.elapsed();
|
||||
if(save_denoise_io) {
|
||||
if (save_every_n_images <= 0) {
|
||||
LOG(ERROR) << "Error: invalid save_every_n_images: " << save_every_n_images;
|
||||
} else if (server_image.getFrame() % save_every_n_images == 0) {
|
||||
saveDenoisedImage(server_image);
|
||||
}
|
||||
}
|
||||
return server_image.denoised ? DenoisingResult::OK : DenoisingResult::FAILED;
|
||||
return DenoisingResult::OK;
|
||||
}
|
||||
|
||||
CUdeviceptr MasterOptixDenoiser::getCudaMemPointerToDenoisedImage() const {
|
||||
return cuda_denoised_pixels_buffer;
|
||||
}
|
||||
|
||||
CUstream MasterOptixDenoiser::getCudaStream() const {
|
||||
return cuda_stream;
|
||||
}
|
||||
|
||||
void MasterOptixDenoiser::copyDenoisedImageFromCudaToHostMemory(uint8_t * dest_host_memory) {
|
||||
scoped_timer copy_device_2_host_timer;
|
||||
const CUDAContextScope scope(cuda_context);
|
||||
CUDA_API_CALL(cuMemcpyDtoHAsync(dest_host_memory, cuda_denoised_pixels_buffer,
|
||||
denoised_buffer_size_bytes, cuda_stream), THROW_IF_ERROR);
|
||||
CUDA_API_CALL(cuStreamSynchronize(cuda_stream), THROW_IF_ERROR);
|
||||
LOG(INFO) << "time to copy denoised image from device to host: " << copy_device_2_host_timer.elapsed();
|
||||
}
|
||||
|
||||
} // cgr_libcluster
|
||||
|
@ -8,6 +8,8 @@
|
||||
// The macro below is used by Optix SDK and is necessary to avoid DSO loading collision
|
||||
// See device_optix.cpp for example.
|
||||
#define OPTIX_DONT_INCLUDE_CUDA
|
||||
#else
|
||||
#include <cuda.h>
|
||||
#endif
|
||||
|
||||
#include <optix_stubs.h>
|
||||
@ -32,21 +34,27 @@ public:
|
||||
static const size_t DEFAULT_IMAGE_WIDTH_PIXELS = 1024;
|
||||
static const size_t DEFAULT_IMAGE_HEIGHT_PIXELS = 1024;
|
||||
|
||||
MasterOptixDenoiser(bool save_denoise_io, int save_every_n_images, const std::string & output_folder_path,
|
||||
bool is_denoising_passes_on, const ImageOutputProvider & image_output_provider, int max_img_width, int max_img_height);
|
||||
MasterOptixDenoiser(CUcontext cuda_context, bool save_denoise_io, int save_every_n_images,
|
||||
const std::string & output_folder_path, bool is_denoising_passes_on,
|
||||
const ImageOutputProvider & image_output_provider, int max_img_width, int max_img_height);
|
||||
virtual ~MasterOptixDenoiser();
|
||||
|
||||
virtual DenoisingResult denoise(ServerImage &server_image) override;
|
||||
|
||||
MasterOptixDenoiser(MasterOptixDenoiser const&) = delete;
|
||||
void operator=(MasterOptixDenoiser const&) = delete;
|
||||
void operator=(MasterOptixDenoiser const&) = delete;
|
||||
CUstream getCudaStream() const;
|
||||
CUdeviceptr getCudaMemPointerToDenoisedImage() const;
|
||||
|
||||
void copyDenoisedImageFromCudaToHostMemory(uint8_t * dest_host_memory);
|
||||
|
||||
private:
|
||||
// For denoising we always use device # 0
|
||||
// rendering can be assigned to devices starting from #1
|
||||
// so denoising and rendering do not run on the same device and do not fight for GPU resources
|
||||
static const int DEVICE_NUM = 0;
|
||||
|
||||
private:
|
||||
|
||||
void allocateCudaBuffer(CUdeviceptr * buffer_ptr, size_t buffer_size, const std::string & buffer_name);
|
||||
void releaseCudaBuffers();
|
||||
size_t getMaxBufferSize();
|
||||
@ -54,6 +62,18 @@ private:
|
||||
ServerImage &server_image, ImagePixel * input_buffer_host);
|
||||
void initOutputOptixImage(OptixImage2D &output_image, ServerImage &server_image);
|
||||
|
||||
// We set cuda stream explicitly so components which use denoised image down the stream
|
||||
// like pixel conversion and image compression can run within the same cuda stream
|
||||
// and therefore be implicitly synchronized for the best possible performance.
|
||||
// For now we synchronize explicitly by calling cuStreamSynchronize() in the denoise()
|
||||
// so denoise time logging outputs correct denoising duration. Denosing time calculation
|
||||
// and logging needs to be updated upon switching to implicit synchronization by CUDA runtime.
|
||||
// Here is a task for that improvement: T143205954.
|
||||
// Currently cuda stream is owned by this class for simplicity. Alternatively cuda context
|
||||
// can be passed from outside but this requires more changes in ServerImageBuffer and DenoisingContext.
|
||||
// Not making this change at this point to limit the scope to the nvdecoder integration,
|
||||
// this change can be done as part of the task for synchronization improvement mentioned above.
|
||||
CUstream cuda_stream = nullptr;
|
||||
CUcontext cuda_context = nullptr;
|
||||
OptixDenoiser denoiser = nullptr;
|
||||
OptixDenoiserSizes denoiser_sizes;
|
||||
@ -64,7 +84,11 @@ private:
|
||||
CUdeviceptr cuda_pixels_buffer = 0;
|
||||
CUdeviceptr cuda_albedo_buffer = 0;
|
||||
CUdeviceptr cuda_normal_buffer = 0;
|
||||
|
||||
CUdevice m_cuda_device;
|
||||
|
||||
CUdeviceptr cuda_denoised_pixels_buffer = 0;
|
||||
size_t denoised_buffer_size_bytes = 0;
|
||||
int max_img_width = DEFAULT_IMAGE_WIDTH_PIXELS;
|
||||
int max_img_height = DEFAULT_IMAGE_HEIGHT_PIXELS;
|
||||
};
|
||||
|
@ -90,6 +90,7 @@ bool NetCamera::operator==(const NetCamera &other_camera) const
|
||||
this->compression_quality == other_camera.compression_quality &&
|
||||
this->master_denoiser == other_camera.master_denoiser &&
|
||||
this->master_image_color_format == other_camera.master_image_color_format &&
|
||||
this->master_image_compressor == other_camera.master_image_compressor &&
|
||||
this->worker_map == other_camera.worker_map;
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@ namespace cgr_libcluster;
|
||||
enum CameraTypeFlatBuffer:byte { CAMERA_PERSPECTIVE = 0, CAMERA_ORTHOGRAPHIC, CAMERA_PANORAMA }
|
||||
enum MasterDenoiserFlatBuffer:byte { MASTER_DENOISER_NONE = 0, MASTER_DENOISER_OIDN, MASTER_DENOISER_OPTIX, MASTER_DENOISER_BARCELONA }
|
||||
enum MasterImageColorFormatFlatBuffer:byte { MASTER_IMAGE_COLOR_FORMAT_LINEAR = 1, MASTER_IMAGE_COLOR_FORMAT_SRGB = 2}
|
||||
enum MasterImageCompressorFlatBuffer:byte { MASTER_IMAGE_COMPRESSOR_JPEG = 1, MASTER_IMAGE_COMPRESSOR_NVENCODER = 2}
|
||||
|
||||
table NetCameraFlatBuffer {
|
||||
cam_matrix:TransformFlatBuffer;
|
||||
@ -28,6 +29,7 @@ table NetCameraFlatBuffer {
|
||||
worker_map:WorkerMapFlatBuffer;
|
||||
scene_frame:int;
|
||||
master_image_color_format:MasterImageColorFormatFlatBuffer = MASTER_IMAGE_COLOR_FORMAT_LINEAR;
|
||||
master_image_compressor:MasterImageCompressorFlatBuffer = MASTER_IMAGE_COMPRESSOR_JPEG;
|
||||
}
|
||||
|
||||
table TransformFlatBuffer {
|
||||
|
@ -90,6 +90,8 @@ public:
|
||||
bool expect_modify_object_message = false;
|
||||
ClusterSessionParams::MasterImageColorFormat master_image_color_format =
|
||||
ClusterSessionParams::DEFAULT_MASTER_IMAGE_COLOR_FORMAT;
|
||||
ClusterSessionParams::MasterImageCompressor master_image_compressor =
|
||||
ClusterSessionParams::DEFAULT_MASTER_IMAGE_COMPRESSOR;
|
||||
};
|
||||
|
||||
|
||||
|
@ -8,9 +8,19 @@
|
||||
#include "denoising/denoising_context.h"
|
||||
#include "streamed_image.h"
|
||||
|
||||
// Not sure that this is the best place to set OS_LINUX
|
||||
// IMO it should be set somewhere on higher level
|
||||
// setting it here for now to get system built.
|
||||
// TODO: revisit this flag later
|
||||
#if defined(linux) || defined(__linux) || defined(__linux__)
|
||||
# ifndef OS_LINUX
|
||||
# define OS_LINUX
|
||||
# endif
|
||||
#endif
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
NetServer::~NetServer()
|
||||
NetServer::~NetServer()
|
||||
{
|
||||
}
|
||||
|
||||
@ -41,7 +51,7 @@ void NetServer::stop()
|
||||
theClient->reset();
|
||||
|
||||
//wait for main client to connect
|
||||
bool newClient = false;
|
||||
bool newClient = false;
|
||||
while (!newClient) {
|
||||
acceptConnection(masterPort);
|
||||
//tell main client to start
|
||||
@ -54,8 +64,8 @@ void NetServer::stop()
|
||||
netThread = new std::thread(std::bind(&NetServer::run, this));
|
||||
}
|
||||
}
|
||||
tbb::mutex::scoped_lock imageStoplock(serverImage.serverImageStopMutex);
|
||||
serverImage.reset();
|
||||
tbb::mutex::scoped_lock imageStoplock(server_image_buffer.serverImageStopMutex);
|
||||
server_image_buffer.reset();
|
||||
}
|
||||
|
||||
void NetServer::createStreamedImage(unsigned int readIndex, ServerImage ¤tImage, DenoisingContext & denoising_context)
|
||||
@ -65,8 +75,8 @@ void NetServer::createStreamedImage(unsigned int readIndex, ServerImage ¤t
|
||||
#ifndef WORKER_IMAGE_STREAMING_TEST
|
||||
mergeWithWorkers = theClient->mergeWith(currentImage);
|
||||
#endif
|
||||
serverImage.createStreamedImage(readIndex, mergeWithWorkers, denoising_context);
|
||||
}
|
||||
server_image_buffer.createStreamedImage(readIndex, mergeWithWorkers, denoising_context);
|
||||
}
|
||||
}
|
||||
|
||||
bool NetServer::isMaster(){
|
||||
@ -87,6 +97,7 @@ void NetServer::set_save_every_n_images(int save_every_n_images) {
|
||||
|
||||
void NetServer::set_output_folder_path(const std::string & output_folder_path) {
|
||||
this->output_folder_path = output_folder_path;
|
||||
server_image_buffer.set_output_folder_path(output_folder_path);
|
||||
}
|
||||
|
||||
void NetServer::run()
|
||||
@ -100,13 +111,15 @@ void NetServer::run()
|
||||
for (;;) {
|
||||
if (stopped) break;
|
||||
|
||||
tbb::mutex::scoped_lock imageStopLock(serverImage.serverImageStopMutex);
|
||||
if (!serverImage.isEmpty()) {
|
||||
int readIndex = serverImage.readIndex;
|
||||
LOG(INFO) << "server image indices: " << serverImage.writeIndex << " " << serverImage.readIndex;
|
||||
ServerImage& currentImage = serverImage.readImage();
|
||||
tbb::mutex::scoped_lock imageStopLock(server_image_buffer.serverImageStopMutex);
|
||||
if (!server_image_buffer.isEmpty()) {
|
||||
int readIndex = server_image_buffer.readIndex;
|
||||
LOG(INFO) << "server image indices: " << server_image_buffer.writeIndex << " " << server_image_buffer.readIndex;
|
||||
ServerImage& currentImage = server_image_buffer.readImage();
|
||||
//if has client, combine with correct client image before sending
|
||||
if (isMaster()) createStreamedImage(readIndex, currentImage, denoising_context);
|
||||
if (isMaster()) {
|
||||
createStreamedImage(readIndex, currentImage, denoising_context);
|
||||
}
|
||||
|
||||
RPCSend streamSnd(image_socket, &error_func, SERVER_STREAM_IMAGE_CMD);
|
||||
if (error_func.have_error()) {
|
||||
@ -114,7 +127,7 @@ void NetServer::run()
|
||||
}
|
||||
if (isMaster()) {
|
||||
// Create a StreamedImage
|
||||
StreamedImage &streamedImage = serverImage.streamedImageBuffer[readIndex];
|
||||
StreamedImage &streamedImage = server_image_buffer.streamedImageBuffer[readIndex];
|
||||
send_streamed_image(streamSnd, streamedImage);
|
||||
if (save_streamed_image) {
|
||||
if (save_every_n_images <= 0) {
|
||||
@ -135,12 +148,16 @@ void NetServer::run()
|
||||
time_sleep(NET_IMAGE_PAUSE);
|
||||
}
|
||||
} //endfor
|
||||
#if defined(OS_LINUX) && defined(WITH_CUDA)
|
||||
server_image_buffer.deleteNvEncoder();
|
||||
#endif
|
||||
VLOG(3) << "Exit NetServer run loop";
|
||||
}
|
||||
|
||||
NetServer::NetServer(
|
||||
const char *masterAddr, unsigned short masterPort)
|
||||
: NetBase(SERVER_PORT, masterAddr),
|
||||
theClient(NULL), serverImage(IMAGE_FRAME_COUNT, true, true),
|
||||
: NetBase(SERVER_PORT, masterAddr),
|
||||
theClient(NULL), server_image_buffer(IMAGE_FRAME_COUNT, true, true),
|
||||
masterPort(masterPort), net_camera_command(net_camera, modify_object_message)
|
||||
{
|
||||
// acceptConnection();
|
||||
@ -159,7 +176,7 @@ ClusterRenderCommand& NetServer::wait_for_client_command() {
|
||||
rcv.read_buffer(&cam, sizeof(cam));
|
||||
LOG(INFO) << "server receive camera for frame: " << cam.frame << " cam_width: " << cam.cam_width << " cam_height: " << cam.cam_height ;
|
||||
numCameraReceived++;
|
||||
serverImage.set_master_image_color_format(cam.master_image_color_format);
|
||||
server_image_buffer.set_master_image_color_format(cam.master_image_color_format);
|
||||
if(cam.expect_modify_object_message) {
|
||||
rcv.read_buffer(&modify_object_message, sizeof(modify_object_message));
|
||||
if (theClient) {
|
||||
@ -171,16 +188,16 @@ ClusterRenderCommand& NetServer::wait_for_client_command() {
|
||||
//master prepare to receive image from workers
|
||||
//FENGTOFIX: this is going to cause a prob
|
||||
//if client camera for frame n is received
|
||||
//before worker images for frame n-1 have been
|
||||
//before worker images for frame n-1 have been
|
||||
//processed!!!!
|
||||
|
||||
//make workers do 2xworker_count work
|
||||
//make workers do 2xworker_count work
|
||||
NetCamera childCam(cam);
|
||||
childCam.integrator_seed = cam.integrator_seed+5;
|
||||
childCam.sampleCount = cam.sampleCount * theClient->getChildrenCount();
|
||||
theClient->send_camera(childCam);
|
||||
}
|
||||
serverImage.begin_frame(cam);
|
||||
server_image_buffer.begin_frame(cam);
|
||||
return net_camera_command;
|
||||
} else if (rcv.name == KILL_CMD) {
|
||||
LOG(INFO) << "received terminate message...exiting";
|
||||
@ -199,8 +216,8 @@ bool NetServer::send_tile(const NetRenderTile& rtile)
|
||||
//because rtile has no imformation about NetCamera
|
||||
//we have to insert NetCamera associated with image
|
||||
//before first tile insertion
|
||||
|
||||
ServerImage* activeImage = serverImage.getActiveImage();
|
||||
|
||||
ServerImage* activeImage = server_image_buffer.getActiveImage();
|
||||
assert(activeImage);
|
||||
if (activeImage) {
|
||||
//no locking because tile insertion are for different elements in the buffer
|
||||
@ -208,11 +225,11 @@ bool NetServer::send_tile(const NetRenderTile& rtile)
|
||||
if (activeImage->tile_count == getMaxTileCount()) {
|
||||
activeImage->imageCount = 1;
|
||||
activeImage->sampleCount = activeImage->camera.sampleCount;
|
||||
serverImage.endInsertImage();
|
||||
server_image_buffer.endInsertImage();
|
||||
}
|
||||
std::cout << "Insert tile progress: "<< activeImage->tile_count << " "<< getMaxTileCount() << "\n";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -230,7 +247,7 @@ void NetServer::serverConnectionsThread(NetClient *theClient, unsigned short mas
|
||||
delete slave;
|
||||
maxSlaves--;
|
||||
}
|
||||
}
|
||||
}
|
||||
theClient->slaveCount = slaveCount;
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,7 @@ class NetServer: public NetBase {
|
||||
NetClient *theClient; //client of worker servers
|
||||
|
||||
void createStreamedImage(unsigned int index, ServerImage& curImage, DenoisingContext & denoising_context);
|
||||
ServerImageBuffer serverImage;
|
||||
ServerImageBuffer server_image_buffer;
|
||||
|
||||
unsigned short masterPort;
|
||||
|
||||
|
@ -10,9 +10,9 @@ namespace cgr_libcluster {
|
||||
|
||||
//using namespace ClusterRenderer;
|
||||
class NetCamera;
|
||||
class Float4FlatBuffer;
|
||||
class TransformFlatBuffer;
|
||||
|
||||
struct Float4FlatBuffer;
|
||||
struct TransformFlatBuffer;
|
||||
|
||||
class Serializer {
|
||||
public:
|
||||
class Buffer {
|
||||
@ -23,16 +23,14 @@ public:
|
||||
size_t size;
|
||||
};
|
||||
|
||||
// Pass in FlatBuffer otherwise the memory won't exist when the method is exited
|
||||
|
||||
// Pass in FlatBufferBuilder otherwise the memory won't exist when the method is exited
|
||||
static Buffer serialize(flatbuffers::FlatBufferBuilder &builder, const NetCamera & net_camera);
|
||||
static void deserialize(uint8_t *buffer_pointer, NetCamera &net_camera_out);
|
||||
private:
|
||||
|
||||
static void bufferToFloat4(const Float4FlatBuffer * float4_flatbuffer, cgr_libcluster::float4 & float4_out);
|
||||
|
||||
static void bufferToTransform(const TransformFlatBuffer * cam_matrix_flatbuffer, cgr_libcluster::Transform & cam_matrix_out);
|
||||
};
|
||||
};
|
||||
|
||||
} // cgr_libcluster
|
||||
|
||||
|
@ -125,6 +125,8 @@ size_t ServerImage::read(RPCReceive &rcv)
|
||||
NetCamera net_camera;
|
||||
rcv.read_buffer(&net_camera, sizeof(NetCamera));
|
||||
size_t buf_size = net_camera.cam_width * net_camera.cam_height * sizeof (ImagePixel);
|
||||
// log line below is used by get_metrics.py to calculate stats. If you change it
|
||||
// please make sure get_metrics.py still works correctly. Update if needed.
|
||||
LOG(INFO) << "net camera received for frame: " << net_camera.frame << " " << net_camera.cam_width << " " << net_camera.cam_height;
|
||||
rcv.read_buffer(pixels, buf_size);
|
||||
LOG(INFO) << "read image for frame: " << net_camera.frame << " with sample: " << net_camera.sampleCount << " of size: " << buf_size;
|
||||
|
@ -1,22 +1,56 @@
|
||||
|
||||
#include "net_base.h" // for NET_IMAGE_TIMEOUT
|
||||
|
||||
#include <fstream> // to save video stream
|
||||
|
||||
#include <tbb/parallel_for.h>
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
#ifdef WITH_CUDA_DYNLOAD
|
||||
#include <cuew.h>
|
||||
// Do not use CUDA SDK headers when using CUEW
|
||||
// The macro below is used by Optix SDK and is necessary to avoid DSO loading collision
|
||||
// See device_optix.cpp for example.
|
||||
#define OPTIX_DONT_INCLUDE_CUDA
|
||||
#else
|
||||
#include <cuda.h>
|
||||
#endif
|
||||
#include "compression/nv_decoder.h"
|
||||
#include "compression/nv_encoder.h"
|
||||
#endif
|
||||
#include "compression/turbojpeg_compressor.h"
|
||||
#ifdef WITH_CUDA
|
||||
#include "cuda_context_provider.h"
|
||||
#endif
|
||||
#include "./utils/timer.h" // for time_dt
|
||||
#include "denoising/denoising_context.h"
|
||||
#include "net_base.h" // for NET_IMAGE_TIMEOUT
|
||||
#ifdef WITH_OPTIX
|
||||
#include "denoising/master_optix_denoiser.h"
|
||||
#endif
|
||||
#include "server_image_buffer.h"
|
||||
#include "server_image.h"
|
||||
#ifdef WITH_CUDA
|
||||
#include "../libcluster_cuda_kernels/gpu_image_utils.h"
|
||||
#endif
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
const string ServerImageBuffer::VIDEO_FULL_FILE_NAME = "master_nvencoded_video_stream.h264";
|
||||
|
||||
ServerImageBuffer::ServerImageBuffer(int s, bool hasServerImage, bool hasStreamImage): imageBuffer(NULL), buffer_size(s)
|
||||
{
|
||||
if (hasServerImage) imageBuffer = new ServerImage[buffer_size];
|
||||
if (hasStreamImage) streamedImageBuffer.resize(buffer_size);
|
||||
turbojpeg_compressor_uptr.reset(new TurbojpegCompressor());
|
||||
reset();
|
||||
}
|
||||
|
||||
ServerImageBuffer::~ServerImageBuffer()
|
||||
{
|
||||
if (imageBuffer) delete []imageBuffer;
|
||||
#if defined(OS_LINUX) && defined(WITH_CUDA)
|
||||
releaseImageCompressionCudaBuffers();
|
||||
#endif
|
||||
}
|
||||
|
||||
ServerImage* ServerImageBuffer::getActiveImage() {
|
||||
@ -29,6 +63,12 @@ ServerImage& ServerImageBuffer::getReadImage() {
|
||||
return imageBuffer[readIndex];
|
||||
}
|
||||
|
||||
void ServerImageBuffer::set_output_folder_path(const std::string & output_folder_path) {
|
||||
this->output_folder_path = output_folder_path;
|
||||
encoded_videostream_filename = output_folder_path + "/" + VIDEO_FULL_FILE_NAME;
|
||||
VLOG(3) << "Path to the nvencoded video stream file: " << encoded_videostream_filename;
|
||||
}
|
||||
|
||||
void ServerImageBuffer::reset()
|
||||
{
|
||||
readIndex = 0;
|
||||
@ -95,6 +135,7 @@ void ServerImageBuffer::endInsertImage()
|
||||
//but I am leaving it fixed for now
|
||||
void ServerImageBuffer::parallel_convert(const ServerImage& src, std::vector<cgr_libcluster::uchar3>& dst)
|
||||
{
|
||||
scoped_timer pixel_convert_timer;
|
||||
int ts = src.getWidth() * src.getHeight();
|
||||
int thread_count = PIXEL_THREAD_COUNT;
|
||||
int interval = ts/thread_count;
|
||||
@ -118,6 +159,7 @@ void ServerImageBuffer::parallel_convert(const ServerImage& src, std::vector<cgr
|
||||
VLOG(1) << "ERROR. Unknown master image color format. We shuld not get here. Requested color format value: "
|
||||
<< master_image_color_format;
|
||||
}
|
||||
LOG(INFO) << "pixel conversion time for frame: " << src.camera.frame << " " << pixel_convert_timer.elapsed();
|
||||
}
|
||||
|
||||
void ServerImageBuffer::normalizeImage(ServerImage &server_image) {
|
||||
@ -134,22 +176,220 @@ void ServerImageBuffer::normalizeImage(ServerImage &server_image) {
|
||||
}
|
||||
}
|
||||
|
||||
void ServerImageBuffer::writeImageToVideoStreamFile(
|
||||
const std::vector<uint8_t> &encoded_image) const {
|
||||
if(encoded_videostream_filename.length() < 1) {
|
||||
return;
|
||||
}
|
||||
std::ofstream video_stream_file;
|
||||
video_stream_file.open(encoded_videostream_filename, std::ios::app | std::ios::binary);
|
||||
if(video_stream_file.is_open()) {
|
||||
video_stream_file.write(reinterpret_cast<const char*>(encoded_image.data()), encoded_image.size());
|
||||
VLOG(3) << "Wrote encoded image of size: " << encoded_image.size();
|
||||
video_stream_file.close();
|
||||
} else {
|
||||
std::string message = "FATAL. Unable to open video stream output file: " + encoded_videostream_filename;
|
||||
throw std::invalid_argument(message);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(OS_LINUX) && defined(WITH_CUDA)
|
||||
|
||||
void ServerImageBuffer::allocateCudaBuffer(CUdeviceptr * buffer_ptr, size_t buffer_size, const std::string & buffer_name) {
|
||||
LOG(INFO) << "Allocating cuda memory for: " << buffer_name;
|
||||
CUresult cu_result = cuMemAlloc(buffer_ptr, buffer_size);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
std::string message = "Error. Could not allocate memory for " + buffer_name + " on device. Cuda error code: "
|
||||
+ std::to_string(cu_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
}
|
||||
|
||||
void ServerImageBuffer::resetNvEncoder(const ServerImage &server_image) {
|
||||
VLOG(3) << "Resetting NvEncoder";
|
||||
CUcontext cuda_context = CudaContextProvider::getPrimaryContext(CUDA_DEVICE_NUM);
|
||||
const CUDAContextScope scope(cuda_context);
|
||||
// Renderer, optix denoiser and nvencoder share the same cuda context, call cuCtxSynchronize
|
||||
// to make sure all operations on cuda context are completed before we start creating NvEncoder
|
||||
// not doing this may lead to sporadical problems like nvEncDestroyEncoder call hangs
|
||||
cuCtxSynchronize();
|
||||
nv_encoder_uptr.reset(new NvEncoder(
|
||||
NV_ENC_BUFFER_FORMAT_NV12,
|
||||
cuda_context,
|
||||
server_image.getWidth(),
|
||||
server_image.getHeight()));
|
||||
VLOG(3) << "Created NvEncoder successfully";
|
||||
releaseImageCompressionCudaBuffers();
|
||||
allocateImageCompressionCudaBuffers(server_image);
|
||||
VLOG(3) << "Resetting NvEncoder done";
|
||||
}
|
||||
|
||||
void ServerImageBuffer::copyServerImageToGpuMemory(const ServerImage & server_image, CUdeviceptr & linear_image_buffer_gpu) {
|
||||
CUcontext cuda_context = CudaContextProvider::getPrimaryContext(CUDA_DEVICE_NUM);
|
||||
const CUDAContextScope scope(cuda_context);
|
||||
uint8_t* linear_image_buffer_cpu = server_image.denoised ? (uint8_t*)server_image.denoised_pixels :
|
||||
(uint8_t*)server_image.pixels;
|
||||
CUresult cu_result = cuMemcpyHtoD(linear_image_buffer_gpu, linear_image_buffer_cpu, server_image.getBufferSize());
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
std::string message = "Error. Could not copy input buffer from host to device memory. CUDA error code: "
|
||||
+ std::to_string(cu_result);
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
}
|
||||
|
||||
void ServerImageBuffer::allocateImageCompressionCudaBuffers(const ServerImage &server_image) {
|
||||
const size_t num_pixels = server_image.getImageSize();
|
||||
// 1 byte per color channel
|
||||
const int nvencoder_yuv_nv12_buffer_size = num_pixels + (num_pixels)/2;
|
||||
allocateCudaBuffer(&nvencoder_input_buffer_gpu, nvencoder_yuv_nv12_buffer_size,
|
||||
"input cuda buffer for nvencoder");
|
||||
allocateCudaBuffer(&linear_image_buffer_gpu, server_image.getBufferSize(),
|
||||
"cuda buffer to accept linear images from CPU memory");
|
||||
}
|
||||
|
||||
void ServerImageBuffer::releaseImageCompressionCudaBuffers() {
|
||||
CUresult cu_result = cuMemFree(nvencoder_input_buffer_gpu);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
"Error. cuMemFree failed for nvencoder_input_buffer_gpu with error code:" + std::to_string(cu_result);
|
||||
}
|
||||
nvencoder_input_buffer_gpu = 0;
|
||||
|
||||
cu_result = cuMemFree(linear_image_buffer_gpu);
|
||||
if(cu_result != CUDA_SUCCESS) {
|
||||
"Error. cuMemFree failed for linear_image_buffer_gpu with error code:" + std::to_string(cu_result);
|
||||
}
|
||||
linear_image_buffer_gpu = 0;
|
||||
}
|
||||
|
||||
void ServerImageBuffer::deleteNvEncoder() {
|
||||
nv_encoder_uptr.reset(nullptr);
|
||||
}
|
||||
|
||||
#endif // WITH_CUDA
|
||||
|
||||
void ServerImageBuffer::createStreamedImage(int index, bool scaled, DenoisingContext & denoising_context)
|
||||
{
|
||||
ServerImage &bufferImage = imageBuffer[index];
|
||||
StreamedImage &streamedImage = streamedImageBuffer[index];
|
||||
streamedImage.initImage(bufferImage.camera);
|
||||
bufferImage.denoised = false;
|
||||
ServerImage &server_image = imageBuffer[index];
|
||||
StreamedImage &streamed_image = streamedImageBuffer[index];
|
||||
streamed_image.initImage(server_image.camera);
|
||||
server_image.denoised = false;
|
||||
if(!scaled) {
|
||||
normalizeImage(bufferImage);
|
||||
normalizeImage(server_image);
|
||||
}
|
||||
float sampleCount = server_image.sampleCount;
|
||||
|
||||
#if defined(OS_LINUX) && defined(WITH_CUDA)
|
||||
const bool use_nvencoder_for_compression = server_image.camera.master_image_compressor ==
|
||||
ClusterSessionParams::MASTER_IMAGE_COMPRESSOR_NVENCODER;
|
||||
if(use_nvencoder_for_compression) {
|
||||
if(!nv_encoder_uptr || server_image.camera.frame == 0) {
|
||||
resetNvEncoder(server_image);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if(server_image.camera.master_image_compressor == ClusterSessionParams::MASTER_IMAGE_COMPRESSOR_NVENCODER) {
|
||||
throw std::runtime_error("ERROR. NVENCODER compressor is requested. Server is compiled without CUDA support\
|
||||
so has no nvencoder and can not encode images with nvencoder.\
|
||||
Recompile with CUDA or use JPEG compressor instead. Terminating.");
|
||||
}
|
||||
#endif
|
||||
|
||||
if(server_image.camera.master_denoiser) {
|
||||
bool denoising_ok = denoiseImage(server_image, denoising_context);
|
||||
if(!denoising_ok) {
|
||||
throw std::runtime_error("Image denoising failed.");
|
||||
}
|
||||
}
|
||||
|
||||
if(bufferImage.camera.master_denoiser) {
|
||||
denoiseImage(bufferImage, denoising_context);
|
||||
MasterDenoiser * master_denoiser = denoising_context.getDenoiser(server_image.camera.master_denoiser);
|
||||
const bool image_is_denoised_by_optix_denoiser = master_denoiser &&
|
||||
master_denoiser->getType() == ClusterSessionParams::MasterDenoiser::MASTER_DENOISER_OPTIX;
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
MasterOptixDenoiser* optix_denoiser = nullptr;
|
||||
if(image_is_denoised_by_optix_denoiser) {
|
||||
optix_denoiser = dynamic_cast<MasterOptixDenoiser*>(master_denoiser);
|
||||
}
|
||||
#endif
|
||||
|
||||
scoped_timer compression_timer;
|
||||
#if defined(OS_LINUX) && defined(WITH_CUDA)
|
||||
if(use_nvencoder_for_compression) {
|
||||
#ifdef WITH_OPTIX
|
||||
compressWithNvencoder(server_image, streamed_image, optix_denoiser);
|
||||
#else
|
||||
compressWithNvencoder(server_image, streamed_image);
|
||||
#endif
|
||||
} else {
|
||||
#ifdef WITH_OPTIX
|
||||
compressWithTurbojpeg(server_image, streamed_image, optix_denoiser);
|
||||
#else
|
||||
compressWithTurbojpeg(server_image, streamed_image);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#ifdef WITH_OPTIX
|
||||
compressWithTurbojpeg(server_image, streamed_image, optix_denoiser);
|
||||
#else
|
||||
compressWithTurbojpeg(server_image, streamed_image);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
double compression_time = compression_timer.elapsed();
|
||||
LOG(INFO) << "compression time for frame: " << server_image.camera.frame << " " << compression_time <<
|
||||
" (includes pixel conversion time)";
|
||||
}
|
||||
|
||||
#if defined(OS_LINUX) && defined(WITH_CUDA)
|
||||
#ifdef WITH_OPTIX
|
||||
void ServerImageBuffer::compressWithNvencoder(ServerImage &server_image, StreamedImage &streamed_image,
|
||||
MasterOptixDenoiser* optix_denoiser) {
|
||||
#else
|
||||
void ServerImageBuffer::compressWithNvencoder(ServerImage &server_image, StreamedImage &streamed_image) {
|
||||
#endif
|
||||
CUstream cuda_stream = nullptr;
|
||||
CUdeviceptr image_to_compress_gpu_ptr = 0;
|
||||
#ifdef WITH_OPTIX
|
||||
if(optix_denoiser) {
|
||||
cuda_stream = optix_denoiser->getCudaStream();
|
||||
image_to_compress_gpu_ptr = optix_denoiser->getCudaMemPointerToDenoisedImage();
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
copyServerImageToGpuMemory(server_image, linear_image_buffer_gpu);
|
||||
image_to_compress_gpu_ptr = linear_image_buffer_gpu;
|
||||
}
|
||||
const bool useSrgbColorSpace =
|
||||
master_image_color_format == ClusterSessionParams::MasterImageColorFormat::MASTER_IMAGE_COLOR_FORMAT_SRGB;
|
||||
gpuRawRgbToGammaCorrectedYuvNv12(image_to_compress_gpu_ptr, server_image.getWidth(),
|
||||
server_image.getHeight(), useSrgbColorSpace, cuda_stream, nvencoder_input_buffer_gpu);
|
||||
nv_encoder_uptr->encode(nvencoder_input_buffer_gpu, streamed_image.getCompressedImage());
|
||||
}
|
||||
#endif // WITH_CUDA
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
void ServerImageBuffer::compressWithTurbojpeg(ServerImage &server_image, StreamedImage &streamed_image,
|
||||
MasterOptixDenoiser* optix_denoiser) {
|
||||
#else
|
||||
void ServerImageBuffer::compressWithTurbojpeg(ServerImage &server_image, StreamedImage &streamed_image) {
|
||||
#endif
|
||||
#ifdef WITH_OPTIX
|
||||
if(optix_denoiser) {
|
||||
optix_denoiser->copyDenoisedImageFromCudaToHostMemory(
|
||||
(uint8_t*)server_image.denoised_pixels);
|
||||
server_image.denoised = true;
|
||||
}
|
||||
#endif
|
||||
parallel_convert(server_image, streamed_image.getByteBuffer());
|
||||
uint8_t *jpeg_data = NULL;
|
||||
size_t buf_size = turbojpeg_compressor_uptr->compress(streamed_image.getByteBuffer().data(),
|
||||
server_image.getWidth(), server_image.getHeight(),
|
||||
server_image.camera.compression_quality,
|
||||
jpeg_data);
|
||||
if (jpeg_data) {
|
||||
streamed_image.copyInCompressedImage(jpeg_data, buf_size);
|
||||
turbojpeg_compressor_uptr->free(jpeg_data);
|
||||
}
|
||||
scoped_timer pixel_convert_timer;
|
||||
parallel_convert(bufferImage, streamedImage.getByteBuffer());
|
||||
LOG(INFO) << "pixel conversion time for frame: " << bufferImage.camera.frame << " " << pixel_convert_timer.elapsed();
|
||||
}
|
||||
|
||||
bool ServerImageBuffer::denoiseImage(ServerImage &bufferImage, DenoisingContext & denoising_context) {
|
||||
@ -162,7 +402,22 @@ bool ServerImageBuffer::denoiseImage(ServerImage &bufferImage, DenoisingContext
|
||||
if(result == DenoisingResult::OK) {
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
else if (result == DenoisingResult::IMAGE_TOO_BIG &&
|
||||
master_denoiser->getType() == ClusterSessionParams::MasterDenoiser::MASTER_DENOISER_OPTIX) {
|
||||
bool replacement_ok = denoising_context.replaceOptixDenoiser(bufferImage.getWidth(), bufferImage.getHeight());
|
||||
if(replacement_ok) {
|
||||
master_denoiser = denoising_context.getDenoiser(bufferImage.camera.master_denoiser);
|
||||
if(master_denoiser) {
|
||||
return master_denoiser->denoise(bufferImage) == DenoisingResult::OK;
|
||||
} else {
|
||||
LOG(WARNING) << "WARNING. No denoiser instance of requested type: " << bufferImage.camera.master_denoiser;
|
||||
}
|
||||
} else {
|
||||
LOG(ERROR) << "ERROR. replaceOptixDenoiser failed, image will not be denoised";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -190,18 +445,18 @@ int ServerImageBuffer::add_frame(RPCReceive &rcv, std::atomic<bool> &stopped) {
|
||||
|
||||
//Feng TODO: Add TIMEOUT if thin client gets stuck with not able
|
||||
//to process the streamed images
|
||||
//not having this TIMEOUT requires the main SessionThread
|
||||
//not having this TIMEOUT requires the main SessionThread
|
||||
//to stay alive until we have killed theClient
|
||||
//the same applies to add_frame(RPCReceive &)
|
||||
//where the network thread of the server for master
|
||||
//need to keep pulling from the queue for there to
|
||||
//need to keep pulling from the queue for there to
|
||||
//be space in the queue to add new worker frames
|
||||
|
||||
int ServerImageBuffer::add_streamed_image(RPCReceive &rcv,
|
||||
std::atomic<bool> &stopped, bool save_streamed_image,
|
||||
const std::string &output_folder_path, int frame_id) {
|
||||
LOG(INFO) << "thin client tries to find space in the display image buffer";
|
||||
|
||||
|
||||
// while (isFull() && numIterations++ < maxIterations) {
|
||||
while (isFull()) {
|
||||
if (stopped) return -1;
|
||||
@ -209,19 +464,43 @@ int ServerImageBuffer::add_streamed_image(RPCReceive &rcv,
|
||||
}
|
||||
if (!isFull() ) {
|
||||
int frameIndex = writeIndex;
|
||||
StreamedImage &writeImage = streamedImageBuffer[frameIndex];
|
||||
writeImage.read(rcv);
|
||||
StreamedImage &streamed_image = streamedImageBuffer[frameIndex];
|
||||
streamed_image.read(rcv);
|
||||
scoped_timer decompress_timer;
|
||||
#if defined(OS_LINUX) && defined(WITH_CUDA)
|
||||
const bool use_nvdecoder_for_decompression = streamed_image.getNetCamera().master_image_compressor ==
|
||||
ClusterSessionParams::MASTER_IMAGE_COMPRESSOR_NVENCODER;
|
||||
if(use_nvdecoder_for_decompression) {
|
||||
if(!nv_decoder_uptr || streamed_image.getFrame() == 0) {
|
||||
VLOG(3) << "Creating nvdecoder";
|
||||
CUcontext cuda_context = CudaContextProvider::getPrimaryContext(CUDA_DEVICE_NUM);
|
||||
const CUDAContextScope scope(cuda_context);
|
||||
nv_decoder_uptr.reset(new NvDecoder(cuda_context));
|
||||
}
|
||||
nv_decoder_uptr->decode(streamed_image.getCompressedImage(),
|
||||
streamed_image.getFrame(), &streamed_image.getRgbImageBuffer());
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
int width = 0, height = 0;
|
||||
streamed_image.getImage(width, height);
|
||||
if (!turbojpeg_compressor_uptr->decompress(streamed_image.getCompressedImage(), width, height,
|
||||
streamed_image.getRgbImageBuffer())) {
|
||||
LOG(ERROR) << "jpeg decompression failed";
|
||||
}
|
||||
}
|
||||
LOG(INFO) << "decompress image for frame: " << streamed_image.getFrame() << " " << decompress_timer.elapsed();
|
||||
if (save_streamed_image) {
|
||||
if (!writeImage.saveImage(output_folder_path, "client_streamed_image_rgb_", frame_id)) {
|
||||
if (!streamed_image.saveImage(output_folder_path, "client_streamed_image_rgb_", frame_id)) {
|
||||
LOG(ERROR) << "failed to save streamed image";
|
||||
}
|
||||
}
|
||||
incWriteIndex();
|
||||
LOG(INFO) << "thin client insert master image frame: "<< writeImage.getFrame();
|
||||
LOG(INFO) << "thin client insert master image frame: "<< streamed_image.getFrame();
|
||||
return frameIndex;
|
||||
} else {
|
||||
StreamedImage writeImage;
|
||||
writeImage.read(rcv);
|
||||
StreamedImage streamed_image;
|
||||
streamed_image.read(rcv);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -11,41 +11,54 @@
|
||||
#define IMAGE_FRAME_COUNT 10
|
||||
#define PIXEL_THREAD_COUNT 60
|
||||
|
||||
// Not sure that this is the best place to set OS_LINUX
|
||||
// IMO it should be set somewhere on higher level
|
||||
// setting it here for now to get system built.
|
||||
// TODO: revisit this flag later
|
||||
#if defined(linux) || defined(__linux) || defined(__linux__)
|
||||
# ifndef OS_LINUX
|
||||
# define OS_LINUX
|
||||
# endif
|
||||
#endif
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
using std::string;
|
||||
|
||||
class Camera;
|
||||
class MasterOptixDenoiser;
|
||||
class NetCamera;
|
||||
#if defined(OS_LINUX) && defined(WITH_CUDA)
|
||||
class NvEncoder;
|
||||
class NvDecoder;
|
||||
#endif
|
||||
class RPCSend;
|
||||
class RPCReceive;
|
||||
class PathTraceDisplay;
|
||||
class ServerImage;
|
||||
class DenoisingContext;
|
||||
|
||||
class TurbojpegCompressor;
|
||||
|
||||
class ServerImageBuffer {
|
||||
public:
|
||||
ServerImageBuffer(int s=IMAGE_FRAME_COUNT, bool hasServerImage = true, bool hasStreamImage = false);
|
||||
~ServerImageBuffer();
|
||||
|
||||
|
||||
void init(bool hasServerImage = true, bool hasStreamImage = true);
|
||||
|
||||
bool isEmpty();
|
||||
bool isFull();
|
||||
ServerImage& getReadImage();
|
||||
|
||||
|
||||
ServerImage* getActiveImage();
|
||||
|
||||
ServerImage* beginInsertImage(NetCamera&);
|
||||
void endInsertImage();
|
||||
bool begin_frame(NetCamera&);
|
||||
|
||||
int add_frame(RPCReceive& rcv, std::atomic<bool>& stopped);
|
||||
int add_streamed_image(RPCReceive& rcv, std::atomic<bool>& stopped,
|
||||
bool save_streamed_image, const std::string &output_folder_path, int frame_id);
|
||||
bool wait_for_streamed_image(std::atomic<bool>& stopped );
|
||||
const StreamedImage* get_streamed_image();
|
||||
|
||||
void set_output_folder_path(const std::string & output_folder_path);
|
||||
|
||||
void reset();
|
||||
|
||||
@ -57,7 +70,10 @@ public:
|
||||
void set_master_image_color_format(ClusterSessionParams::MasterImageColorFormat master_image_color_format_in) {
|
||||
master_image_color_format = master_image_color_format_in;
|
||||
}
|
||||
|
||||
#if defined(OS_LINUX) && defined(WITH_CUDA)
|
||||
void deleteNvEncoder();
|
||||
#endif
|
||||
|
||||
//only accessed by insertion thread
|
||||
|
||||
std::vector<StreamedImage> streamedImageBuffer;
|
||||
@ -72,6 +88,9 @@ public:
|
||||
tbb::mutex serverImageStopMutex;
|
||||
|
||||
private:
|
||||
static const int CUDA_DEVICE_NUM = 0;
|
||||
static const string VIDEO_FULL_FILE_NAME;
|
||||
|
||||
bool denoiseImage(ServerImage &bufferImage, DenoisingContext & denoising_context);
|
||||
// In the master-workers configuration (which is target production setup) images are normalised
|
||||
// upon arriving at the master. It's done in different threads what benefits from the
|
||||
@ -82,6 +101,55 @@ private:
|
||||
void normalizeImage(ServerImage &server_image);
|
||||
ClusterSessionParams::MasterImageColorFormat master_image_color_format =
|
||||
ClusterSessionParams::DEFAULT_MASTER_IMAGE_COLOR_FORMAT;
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
void allocateCudaBuffer(long long unsigned int * buffer_ptr, size_t buffer_size,
|
||||
const std::string & buffer_name);
|
||||
void allocateImageCompressionCudaBuffers(const ServerImage &server_image);
|
||||
void releaseImageCompressionCudaBuffers();
|
||||
void resetNvEncoder(const ServerImage &server_image);
|
||||
|
||||
void copyServerImageToGpuMemory(const ServerImage &server_image, long long unsigned int &linear_image_buffer_gpu);
|
||||
#endif // WITH_CUDA
|
||||
void writeImageToVideoStreamFile(const std::vector<uint8_t> &encoded_image) const;
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
#ifdef WITH_OPTIX
|
||||
void compressWithNvencoder(ServerImage &server_image, StreamedImage &streamed_image, MasterOptixDenoiser* optix_denoiser);
|
||||
void compressWithTurbojpeg(ServerImage &server_image, StreamedImage &streamed_image, MasterOptixDenoiser* optix_denoiser);
|
||||
#else
|
||||
void compressWithNvencoder(ServerImage &server_image, StreamedImage &streamed_image);
|
||||
void compressWithTurbojpeg(ServerImage &server_image, StreamedImage &streamed_image);
|
||||
#endif // WITH_OPTIX
|
||||
#else
|
||||
void compressWithTurbojpeg(ServerImage &server_image, StreamedImage &streamed_image);
|
||||
#endif // WITH_CUDA
|
||||
|
||||
std::string output_folder_path;
|
||||
std::string encoded_videostream_filename;
|
||||
|
||||
bool image_buffers_allocated = false;
|
||||
|
||||
// Use long long unsigned int instead of CUdeviceptr since cuda headers are not included
|
||||
// in this header file due to server_image_buffer.h is included from the cycles/session/buffers.cpp
|
||||
// via libcluster/net_server.h but cycles/session unaware about cuda so compilation fails.
|
||||
// To minimize changes to the cycles/session and keep it cuda independent cuda headers are
|
||||
// included in the server_image_buffer.cpp
|
||||
// We may come up with better incapsulation and libcluster interface towards the Blender code.
|
||||
long long unsigned int nvencoder_input_buffer_gpu = 0;
|
||||
|
||||
// When rendered image is denoised by OIDN denoiser or is not denoised at all
|
||||
// it's hosted in the CPU memory.
|
||||
// In these cases we copy such images into this CUDA memory when nvencoder is used for the image compression
|
||||
// so we can do gamma correction and rgb to yuv conversion before passing the image to the nvencoder.
|
||||
long long unsigned int linear_image_buffer_gpu = 0;
|
||||
|
||||
// image compressors
|
||||
#if defined(OS_LINUX) && defined(WITH_CUDA)
|
||||
std::unique_ptr<NvEncoder> nv_encoder_uptr;
|
||||
std::unique_ptr<NvDecoder> nv_decoder_uptr;
|
||||
#endif
|
||||
std::unique_ptr<TurbojpegCompressor> turbojpeg_compressor_uptr;
|
||||
};
|
||||
|
||||
} // cgr_libcluster
|
||||
|
@ -1,15 +1,41 @@
|
||||
#include "image_io_util.h"
|
||||
#include <turbojpeg.h> // for tjFree
|
||||
#include "./utils/timer.h" // for scoped_timer
|
||||
#include "./utils/logging.h"
|
||||
#include "streamed_image.h"
|
||||
#include "net_simple.h"
|
||||
|
||||
#include "compression/turbojpeg_compressor.h"
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
using OpenImageIO_v2_4::ImageOutput;
|
||||
using OpenImageIO_v2_4::TypeDesc;
|
||||
|
||||
StreamedImage::StreamedImage() : allocated(false), w(0), h(0),
|
||||
jpeg_compressor_uptr(new TurbojpegCompressor()) {
|
||||
}
|
||||
|
||||
StreamedImage::~StreamedImage(){
|
||||
}
|
||||
|
||||
StreamedImage::StreamedImage(const StreamedImage& ) :
|
||||
jpeg_compressor_uptr(new TurbojpegCompressor()) {
|
||||
}
|
||||
|
||||
StreamedImage::StreamedImage( StreamedImage&& ) :
|
||||
jpeg_compressor_uptr(new TurbojpegCompressor()) {
|
||||
}
|
||||
|
||||
StreamedImage& StreamedImage::operator=(const StreamedImage& ) {
|
||||
jpeg_compressor_uptr.reset(new TurbojpegCompressor());
|
||||
return *this;
|
||||
}
|
||||
|
||||
StreamedImage& StreamedImage::operator=(StreamedImage&& ) {
|
||||
jpeg_compressor_uptr.reset(new TurbojpegCompressor());
|
||||
return *this;
|
||||
}
|
||||
|
||||
size_t StreamedImage::read(RPCReceive &rcv)
|
||||
{
|
||||
size_t buf_size = 0;
|
||||
@ -18,163 +44,47 @@ size_t StreamedImage::read(RPCReceive &rcv)
|
||||
|
||||
NetCamera net_camera;
|
||||
rcv.read_buffer(&net_camera, sizeof(NetCamera));
|
||||
// log line below is used by get_metrics.py to calculate stats. If you change it
|
||||
// please make sure get_metrics.py still works correctly. Update if needed.
|
||||
LOG(INFO) << "net camera received for frame: " << net_camera.frame << " " << net_camera.cam_width << " " << net_camera.cam_height;
|
||||
|
||||
initImage(net_camera);
|
||||
std::vector<uint8_t> cbuffer(buf_size);
|
||||
rcv.read_buffer(reinterpret_cast<char *>(cbuffer.data()), buf_size);
|
||||
LOG(INFO) << "read image for frame: " << camera.frame << " with sample: " << camera.sampleCount << " size: " << buf_size;
|
||||
scoped_timer dec_timer;
|
||||
if (!decompress(cbuffer)) {
|
||||
buf_size = 0;
|
||||
if(compressed_image.size() != buf_size) {
|
||||
compressed_image.resize(buf_size);
|
||||
}
|
||||
LOG(INFO) << "decompress image for frame: " << camera.frame << " " << dec_timer.elapsed();
|
||||
rcv.read_buffer(reinterpret_cast<char *>(compressed_image.data()), buf_size);
|
||||
LOG(INFO) << "read image for frame: " << camera.frame << " with sample: " << camera.sampleCount << " size: " << buf_size;
|
||||
return buf_size;
|
||||
}
|
||||
|
||||
size_t StreamedImage::write(RPCSend &snd)
|
||||
{
|
||||
scoped_timer compression_timer;
|
||||
uint8_t *jpeg_data = NULL;
|
||||
size_t buf_size = compress(jpeg_data);
|
||||
LOG(INFO) << "compression time for frame: " << camera.frame << " " << compression_timer.elapsed();
|
||||
if (buf_size > 0) {
|
||||
// get_metrics.py script depends on formatting of this log line, it you change it,
|
||||
const size_t compressed_image_size = compressed_image.size();
|
||||
if (compressed_image_size > 0) {
|
||||
// get_metrics.py script depends on formatting of this log line, if you change it,
|
||||
// please make sure that script still works or update it accordingly
|
||||
LOG(INFO) << "stream image for frame: " << camera.frame << " with sample " << camera.sampleCount << " quality: " << camera.compression_quality << " buffer size " << buf_size + sizeof(buf_size) + sizeof(NetCamera);
|
||||
LOG(INFO) << "stream image for frame: " << camera.frame << " with sample " << camera.sampleCount << " quality: " <<
|
||||
camera.compression_quality << " buffer size " << compressed_image_size + sizeof(compressed_image_size) + sizeof(NetCamera);
|
||||
snd.write();
|
||||
snd.write_buffer(&buf_size, sizeof(buf_size));
|
||||
snd.write_buffer(&compressed_image_size, sizeof(compressed_image_size));
|
||||
snd.write_buffer(&camera, sizeof (NetCamera) );
|
||||
snd.write_buffer(jpeg_data, buf_size);
|
||||
snd.write_buffer(compressed_image.data(), compressed_image_size);
|
||||
}
|
||||
if (jpeg_data) tjFree(jpeg_data);
|
||||
return buf_size;
|
||||
return compressed_image_size;
|
||||
}
|
||||
|
||||
#ifdef WITH_WEBRTC
|
||||
size_t StreamedImage::write(cgr_streaming::WebrtcPeer *webrtcPeer)
|
||||
{
|
||||
if (webrtcPeer == nullptr) {
|
||||
LOG(ERROR) << "webrtcPeer is nullptr";
|
||||
return 0;
|
||||
}
|
||||
size_t buf_size = getBufferSize();
|
||||
if (buf_size > 0) {
|
||||
// TODO(fangy): Might need to work with pmishchuk@ to update the LOG line below so get_metrics.py works when WebRTC is used.
|
||||
// get_metrics.py script depends on formatting of this log line, it you change it,
|
||||
// please make sure that script still works or update it accordingly
|
||||
LOG(INFO) << "stream image for frame: " << camera.frame << " with sample " << camera.sampleCount << " quality: " << camera.compression_quality << " buffer size " << buf_size + sizeof(buf_size) + sizeof(NetCamera) << " using WebRTC";
|
||||
int width = 0, height = 0;
|
||||
void *image_buffer = getImage(width, height);
|
||||
LOG(INFO) << "WebRTC sendFrame id " << getFrame() << " width " << width << " height " << height;
|
||||
scoped_timer send_frame_timer;
|
||||
cgr_streaming::WebrtcFrame frame(getFrame(), static_cast<const uint8_t*>(image_buffer), width, height);
|
||||
if (!webrtcPeer->sendFrame(frame)) {
|
||||
LOG(ERROR) << "failed to send frame via WebRTC";
|
||||
}
|
||||
LOG(INFO) << "send frame time for frame number: " << camera.frame << " " << send_frame_timer.elapsed();
|
||||
}
|
||||
return buf_size;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//#define TIME_JPEG
|
||||
size_t StreamedImage::compress(unsigned char* &jpeg_image)
|
||||
{
|
||||
// Convert buffer to unsigned char * 3 channels
|
||||
const int subsampling = TJSAMP_444;
|
||||
size_t jpeg_length = 0; // tjCompress2 will allocate the jpeg_image buffer
|
||||
jpeg_image = nullptr;
|
||||
|
||||
#ifdef TIME_JPEG
|
||||
struct timespec start_time, end_time;
|
||||
clock_gettime(CLOCK_MONOTONIC, &start_time);
|
||||
#endif
|
||||
tjhandle jpeg_compressor = tjInitCompress();
|
||||
if (jpeg_compressor == nullptr) {
|
||||
LOG(ERROR) << "Cannot initialize JPEG compressor";
|
||||
return 0;
|
||||
}
|
||||
void* src_buffer = byte_buffer.data();
|
||||
int jpeg_error = tjCompress2(jpeg_compressor,
|
||||
(unsigned char*) src_buffer,
|
||||
w,
|
||||
0,
|
||||
h,
|
||||
TJPF_RGB,
|
||||
&jpeg_image,
|
||||
(unsigned long *)&jpeg_length,
|
||||
subsampling,
|
||||
camera.compression_quality,
|
||||
TJFLAG_FASTDCT);
|
||||
tjDestroy(jpeg_compressor);
|
||||
if (jpeg_error < 0) {
|
||||
const char *jpeg_error_str = tjGetErrorStr();
|
||||
LOG(ERROR) << "JPEG compression error: " << jpeg_error_str;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef TIME_JPEG
|
||||
clock_gettime(CLOCK_MONOTONIC, &end_time);
|
||||
// ms time
|
||||
double elapsed_time = (end_time.tv_nsec - start_time.tv_nsec) / 1e6;
|
||||
LOG(INFO) << "TIMING: JPEG compression: " << elapsed_time << "ms"
|
||||
<< ", resolution " << w << "x" << h
|
||||
<< ", sizes " << src_buffer.size() << " (" << jpeg_length << ")";
|
||||
#endif
|
||||
return jpeg_length;
|
||||
void StreamedImage::copyInCompressedImage(const uint8_t * compressed_image_ptr, const size_t size_in_bytes) {
|
||||
// for uint8_t the number_of_elements is the same as size_in_bytes
|
||||
const size_t number_of_elements = size_in_bytes;
|
||||
compressed_image.assign(compressed_image_ptr, compressed_image_ptr + number_of_elements);
|
||||
}
|
||||
|
||||
bool StreamedImage::decompress(std::vector<uint8_t> &cbuffer)
|
||||
{
|
||||
#ifdef TIME_JPEG
|
||||
struct timespec start_time, end_time;
|
||||
clock_gettime(CLOCK_MONOTONIC, &start_time);
|
||||
#endif
|
||||
/* Use TurboJPEG to decompress the buffer */
|
||||
int subsampling = 0;
|
||||
tjhandle jpeg_decompressor = tjInitDecompress();
|
||||
if (jpeg_decompressor == nullptr) {
|
||||
LOG(ERROR) << "Cannot initialize JPEG decompressor";
|
||||
return false;
|
||||
}
|
||||
int jpeg_error = tjDecompressHeader2(jpeg_decompressor, cbuffer.data(),
|
||||
cbuffer.size(), &w, &h, &subsampling);
|
||||
if (jpeg_error < 0) {
|
||||
LOG(ERROR) << "Cannot decode JPEG header from StreamedImage";
|
||||
tjDestroy(jpeg_decompressor);
|
||||
return false;
|
||||
}
|
||||
std::vector<uint8_t>& StreamedImage::getCompressedImage() {
|
||||
return compressed_image;
|
||||
}
|
||||
|
||||
std::vector<unsigned char> dst_buffer(w * h * 3);
|
||||
//void *dst_buffer = byte_buffer.data();
|
||||
jpeg_error = tjDecompress2(jpeg_decompressor,
|
||||
cbuffer.data(),
|
||||
cbuffer.size(),
|
||||
(unsigned char*) dst_buffer.data(),
|
||||
w,
|
||||
0,
|
||||
h,
|
||||
TJPF_RGB,
|
||||
TJFLAG_ACCURATEDCT);
|
||||
tjDestroy(jpeg_decompressor);
|
||||
if (jpeg_error < 0) {
|
||||
const char *jpeg_error_str = tjGetErrorStr();
|
||||
LOG(ERROR) << "JPEG decompression error" << jpeg_error_str;
|
||||
return false;
|
||||
}
|
||||
putImage(w, h, dst_buffer.data());
|
||||
#ifdef TIME_JPEG
|
||||
clock_gettime(CLOCK_MONOTONIC, &end_time);
|
||||
// ms time
|
||||
double elapsed_time = (end_time.tv_nsec - start_time.tv_nsec) / 1e6;
|
||||
LOG(INFO) << "TIMING: JPEG decompression: " << elapsed_time << "ms"
|
||||
<< ", resolution " << w << "x" << h
|
||||
<< ", sizes " << cbuffer.size() << " (" << dst_buffer.size() << ")";
|
||||
#endif
|
||||
|
||||
return true;
|
||||
std::vector<cgr_libcluster::uchar3>& StreamedImage::getByteBuffer() {
|
||||
return byte_buffer;
|
||||
}
|
||||
|
||||
bool StreamedImage::saveImage(const std::string &output_folder_path, const std::string &file_name_prefix, int frame_id) {
|
||||
@ -183,4 +93,12 @@ bool StreamedImage::saveImage(const std::string &output_folder_path, const std::
|
||||
return ImageIOUtil::saveFrame(file_path, TypeDesc::UCHAR, image_output.get(),getByteBuffer().data(), w, h);
|
||||
}
|
||||
|
||||
std::vector<cgr_libcluster::uchar3>& StreamedImage::getRgbImageBuffer() {
|
||||
return byte_buffer;
|
||||
}
|
||||
|
||||
NetCamera& StreamedImage::getNetCamera() {
|
||||
return camera;
|
||||
}
|
||||
|
||||
} // cgr_libcluster
|
||||
|
@ -10,29 +10,26 @@ namespace cgr_libcluster {
|
||||
class PathTraceDisplay;
|
||||
class RPCSend;
|
||||
class RPCReceive;
|
||||
class TurbojpegCompressor;
|
||||
|
||||
class StreamedImage {
|
||||
public:
|
||||
StreamedImage() : allocated(false), w(0), h(0) {}
|
||||
~StreamedImage() {}
|
||||
void putImage(int width, int height, const void *image)
|
||||
{
|
||||
if (w < width || h < height) {
|
||||
w = width;
|
||||
h = height;
|
||||
byte_buffer.resize(w*h);
|
||||
}
|
||||
w = width;
|
||||
h = height;
|
||||
memcpy(byte_buffer.data(), image, sizeof(cgr_libcluster::uchar3) * width * height);
|
||||
}
|
||||
StreamedImage();
|
||||
~StreamedImage();
|
||||
|
||||
StreamedImage(const StreamedImage& );
|
||||
StreamedImage(StreamedImage&& );
|
||||
StreamedImage& operator=(const StreamedImage& );
|
||||
StreamedImage& operator=(StreamedImage&& );
|
||||
|
||||
public:
|
||||
const void* getImage(int &width, int &height) const
|
||||
{
|
||||
width = w;
|
||||
height = h;
|
||||
return byte_buffer.data();
|
||||
}
|
||||
|
||||
void initImage(NetCamera &cam) {
|
||||
camera = cam;
|
||||
if (allocated) {
|
||||
@ -48,26 +45,28 @@ class StreamedImage {
|
||||
size_t getBufferSize() const { return w * h * sizeof(cgr_libcluster::uchar3); }
|
||||
size_t read(RPCReceive &rcv);
|
||||
size_t write(RPCSend &snd);
|
||||
#ifdef WITH_WEBRTC
|
||||
size_t write(cgr_streaming::WebrtcPeer *webrtcPeer);
|
||||
#endif
|
||||
int getFrame() const { return camera.frame; }
|
||||
|
||||
// Compress the StreamedImage into a jpeg stream stored in cbuffer
|
||||
size_t compress(unsigned char*&) ;
|
||||
// Decompress a jpeg stream cbuffer into the StreamedImage
|
||||
bool decompress(std::vector<uint8_t> &cbuffer);
|
||||
|
||||
bool saveImage(const std::string &output_folder_path, const std::string &file_name_prefix, int frame_id);
|
||||
|
||||
std::vector<cgr_libcluster::uchar3>& getByteBuffer() {return byte_buffer; }
|
||||
void copyInCompressedImage(const uint8_t * compressed_image_ptr, const size_t size_in_bytes);
|
||||
std::vector<cgr_libcluster::uchar3>& getByteBuffer();
|
||||
std::vector<uint8_t>& getCompressedImage();
|
||||
std::vector<cgr_libcluster::uchar3>& getRgbImageBuffer();
|
||||
NetCamera& getNetCamera();
|
||||
|
||||
private:
|
||||
bool allocated;
|
||||
int w;
|
||||
int h;
|
||||
std::vector<cgr_libcluster::uchar3> byte_buffer;
|
||||
std::vector<cgr_libcluster::uchar3> byte_buffer; // raw image as rgb
|
||||
std::vector<uint8_t> compressed_image;
|
||||
|
||||
NetCamera camera;
|
||||
// Have TurbojpegCompressor as pointer here so we can use forward declaration
|
||||
// and avoid including TurboJpeg header in this header and minimize changes in the Blender code
|
||||
// which includes this header but is not aware about TurboJpeg
|
||||
std::unique_ptr<TurbojpegCompressor> jpeg_compressor_uptr;
|
||||
};
|
||||
|
||||
} // cgr_libcluster
|
||||
|
@ -34,11 +34,13 @@ set(LIBRARIES
|
||||
|
||||
add_definitions(-DWITH_CYCLES_LOGGING)
|
||||
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
list(APPEND LIBRARIES extern_cuew)
|
||||
add_definitions(-DWITH_CUDA_DYNLOAD)
|
||||
else()
|
||||
list(APPEND LIBRARIES ${CUDA_CUDA_LIBRARY})
|
||||
if(WITH_CYCLES_DEVICE_CUDA)
|
||||
if(WITH_CUDA_DYNLOAD)
|
||||
list(APPEND LIBRARIES extern_cuew)
|
||||
add_definitions(-DWITH_CUDA_DYNLOAD)
|
||||
else()
|
||||
list(APPEND LIBRARIES ${CUDA_CUDA_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_LOGGING)
|
||||
|
@ -1,7 +1,9 @@
|
||||
#include "testing/testing.h"
|
||||
#include "gmock/gmock.h"
|
||||
#ifdef WITH_OPTIX
|
||||
#include <optix_stubs.h>
|
||||
#include <optix_function_table_definition.h>
|
||||
#endif
|
||||
|
||||
#include "mocks.h"
|
||||
#include "denoising/denoising_context.h"
|
||||
@ -21,10 +23,10 @@ public:
|
||||
bool is_denoising_passes_on, const ImageOutputProvider & image_output_provider) :
|
||||
MasterDenoiser(ClusterSessionParams::MasterDenoiser::MASTER_DENOISER_OIDN,
|
||||
save_denoise_io, save_every_n_images, output_folder_path, is_denoising_passes_on, image_output_provider) {};
|
||||
|
||||
|
||||
MOCK_METHOD0(destructor, void());
|
||||
virtual ~MockOidnMasterDenoiser() override { destructor(); };
|
||||
|
||||
|
||||
virtual DenoisingResult denoise(ServerImage &server_image) override { return DenoisingResult::OK; };
|
||||
};
|
||||
|
||||
@ -35,7 +37,7 @@ public:
|
||||
bool is_denoising_passes_on, const ImageOutputProvider & image_output_provider) :
|
||||
MasterDenoiser(ClusterSessionParams::MasterDenoiser::MASTER_DENOISER_OPTIX,
|
||||
save_denoise_io, save_every_n_images, output_folder_path, is_denoising_passes_on, image_output_provider) {};
|
||||
|
||||
|
||||
MOCK_METHOD0(destructor, void());
|
||||
virtual ~MockOptixMasterDenoiser() override { destructor(); };
|
||||
|
||||
@ -53,7 +55,7 @@ public:
|
||||
|
||||
// Tests
|
||||
|
||||
TEST(LibclusterDenoisingContextTest, uninitialized_context_getDenoiser_returns_null) {
|
||||
TEST(LibclusterDenoisingContextTest, uninitialized_context_getDenoiser_returns_null) {
|
||||
const bool save_denoise_io = false;
|
||||
const int save_every_n_images = 1;
|
||||
const bool is_denoising_passes_on = false;
|
||||
@ -109,7 +111,7 @@ TEST(LibclusterDenoisingContextTest, init_creates_all_denoisers_success) {
|
||||
ASSERT_EQ(oidn_master_denoiser->getType(), ClusterSessionParams::MasterDenoiser::MASTER_DENOISER_OIDN);
|
||||
ASSERT_EQ(oidn_master_denoiser, mock_oidn_master_denoiser_ptr);
|
||||
|
||||
#ifdef WITH_OPTIX
|
||||
#ifdef WITH_OPTIX
|
||||
MasterDenoiser * optix_master_denoiser = denoising_context.getDenoiser(
|
||||
ClusterSessionParams::MasterDenoiser::MASTER_DENOISER_OPTIX);
|
||||
ASSERT_TRUE(optix_master_denoiser != nullptr);
|
||||
|
@ -0,0 +1,46 @@
|
||||
#ifndef __CUDA_UTILS_H__
|
||||
#define __CUDA_UTILS_H__
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
#define THROW_IF_ERROR true
|
||||
#define DO_NOT_THROW false
|
||||
|
||||
#define CUDA_API_CALL(cuda_api, throw_if_error) \
|
||||
do { \
|
||||
CUresult cu_result = cuda_api; \
|
||||
if (cu_result != CUDA_SUCCESS) { \
|
||||
const char *error_name = NULL; \
|
||||
cuGetErrorName(cu_result, &error_name); \
|
||||
std::string message = std::string("ERROR. ") + #cuda_api + " failed with error: " + std::string(error_name); \
|
||||
LOG(ERROR) << message ; \
|
||||
if (throw_if_error) { \
|
||||
throw std::runtime_error(message); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define OPTIX_API_CALL(optix_api, throw_if_error) \
|
||||
do { \
|
||||
OptixResult optix_result = optix_api; \
|
||||
if(optix_result == OPTIX_ERROR_UNSUPPORTED_ABI_VERSION) { \
|
||||
std::string message = std::string("ERROR. ") + #optix_api + " failed due to installed driver " \
|
||||
"does not support ABI version: " + std::to_string(OPTIX_ABI_VERSION); \
|
||||
LOG(ERROR) << message ; \
|
||||
if(throw_if_error) { \
|
||||
throw std::runtime_error(message); \
|
||||
} \
|
||||
} else if(optix_result != OPTIX_SUCCESS) { \
|
||||
std::string message = std::string("ERROR. ") + #optix_api + " failed with error: " + \
|
||||
std::to_string(optix_result); \
|
||||
LOG(ERROR) << message ; \
|
||||
if(throw_if_error) { \
|
||||
throw std::runtime_error(message); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
} // end of namespace cgr_libcluster
|
||||
|
||||
#endif
|
41
intern/cycles/cluster_rendering/libcluster/utils/image.cpp
Normal file
41
intern/cycles/cluster_rendering/libcluster/utils/image.cpp
Normal file
@ -0,0 +1,41 @@
|
||||
#include<vector>
|
||||
|
||||
#include "vector_types.h" // for uchar3
|
||||
|
||||
#include "image.h"
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
#define PIXEL_THREAD_COUNT 60
|
||||
|
||||
int clip(int n, int lower, int upper) {
|
||||
return std::max(lower, std::min(n, upper));
|
||||
}
|
||||
|
||||
void yuv2Rgb(uint8_t *yuv_image, int width, int height,
|
||||
std::vector<cgr_libcluster::uchar3> * rgb_image) {
|
||||
const int num_pixels = width * height;
|
||||
for(int i = 0; i < num_pixels; ++i) {
|
||||
const int pixel_row = i/width;
|
||||
const int pixel_column = i - (width * pixel_row);
|
||||
const int uv_row = pixel_row / 2;
|
||||
const int uv_column = pixel_column / 2;
|
||||
const int u_i = num_pixels + uv_row * width + uv_column*2;
|
||||
const int v_i = u_i + 1;
|
||||
const int y = yuv_image[i];
|
||||
const int u = yuv_image[u_i];
|
||||
const int v = yuv_image[v_i];
|
||||
const int c = y - 16;
|
||||
const int d = u - 128;
|
||||
const int e = v - 128;
|
||||
const uint8_t r = clip((298*c + 409*e + 128) >> 8, 0, 255);
|
||||
const uint8_t g = clip((298*c - 100*d - 208*e + 128) >> 8, 0, 255);
|
||||
const uint8_t b = clip((298*c + 516*d + 128) >> 8, 0, 255);
|
||||
uchar3 & rgb_pixel = (*rgb_image)[i];
|
||||
rgb_pixel.x = r;
|
||||
rgb_pixel.y = g;
|
||||
rgb_pixel.z = b;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
12
intern/cycles/cluster_rendering/libcluster/utils/image.h
Normal file
12
intern/cycles/cluster_rendering/libcluster/utils/image.h
Normal file
@ -0,0 +1,12 @@
|
||||
#ifndef __IMAGE_H__
|
||||
#define __IMAGE_H__
|
||||
|
||||
namespace cgr_libcluster {
|
||||
|
||||
// This function expects that rgb_image is not null and is properly sized by caller to accomodate all pixels
|
||||
void yuv2Rgb(uint8_t *yuv_image, int width, int height,
|
||||
std::vector<cgr_libcluster::uchar3> * rgb_image);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,8 @@
|
||||
|
||||
file(GLOB SRC *.cpp *.cu)
|
||||
|
||||
if(NOT WITH_CYCLES_CUDA_BINARIES)
|
||||
find_package(CUDA)
|
||||
endif()
|
||||
|
||||
cuda_add_library(cycles_libcluster_cuda_kernels "${LIB}" ${SRC} ${SRC_HEADERS})
|
@ -0,0 +1,91 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
#include "gpu_image_utils.h"
|
||||
|
||||
constexpr int NUM_COLOR_CHANNELS = 3;
|
||||
constexpr int NUM_THREADS = 10240;
|
||||
constexpr int NUM_THREADS_PER_BLOCK = 512;
|
||||
|
||||
template<typename T>
|
||||
struct PixelRgb {
|
||||
T r;
|
||||
T g;
|
||||
T b;
|
||||
};
|
||||
|
||||
__device__
|
||||
float color_linear_to_srgb(float c) {
|
||||
if (c < 0.0031308f)
|
||||
return (c < 0.0f) ? 0.0f : c * 12.92f;
|
||||
else
|
||||
return 1.055f * powf(c, 1.0f / 2.4f) - 0.055f;
|
||||
}
|
||||
|
||||
// Simple RGB to YUV NV12 (4:2:0 12 bpp) conversion based on a description in this doc:
|
||||
// https://learn.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering
|
||||
// It only supports resolutions of even numbers to keep it simple. Supporting resolutions with odd numbers
|
||||
// adds extra complexity to support edge cases what is unnecessary commplication for not required
|
||||
// usecases since we only need to support standard resolutions like
|
||||
// 720p, 1080p, 1024x1024, 2048x2048 etc. which are all even numbers.
|
||||
// It looks like RGB to YUV conversion is available in the NVIDIA Performance Primitives (npp)
|
||||
// https://developer.nvidia.com/npp
|
||||
// but we currently do not use npp so I won't introduce this dependency just for single method.
|
||||
// If there is a standard or better GPU implementation of RGB-to-YUV conversion available we can switch to it.
|
||||
__global__
|
||||
void gpuRawRgbToGammaCorrectedYuvNv12Impl(CUdeviceptr cu_image_in_ptr, int width, int height,
|
||||
bool useSrgbColorSpace, CUdeviceptr cu_image_yuv_nv12_out_ptr) {
|
||||
const int num_pixels = width * height;
|
||||
const int num_threads_per_block = blockDim.x;
|
||||
const int num_blocks_in_grid = gridDim.x;
|
||||
const int total_num_of_threads = num_threads_per_block * num_blocks_in_grid;
|
||||
const int num_pixels_to_process_in_thread = (num_pixels + total_num_of_threads) / total_num_of_threads;
|
||||
const int start_i = blockIdx.x * num_threads_per_block * num_pixels_to_process_in_thread +
|
||||
threadIdx.x * num_pixels_to_process_in_thread;
|
||||
const int end_i = min(start_i + num_pixels_to_process_in_thread, num_pixels);
|
||||
const PixelRgb<float> * cu_image_in_rgb_float_ptr = (PixelRgb<float>*)cu_image_in_ptr;
|
||||
uint8_t* cu_image_yuv_nv12_out_uint_ptr = (uint8_t*)cu_image_yuv_nv12_out_ptr;
|
||||
float r_f, g_f, b_f;
|
||||
uint8_t r, g, b;
|
||||
for(int i = start_i; i < end_i; ++i) {
|
||||
// Gamma correction
|
||||
const PixelRgb<float> & raw_pixel = cu_image_in_rgb_float_ptr[i];
|
||||
if(useSrgbColorSpace) {
|
||||
r_f = color_linear_to_srgb(raw_pixel.r);
|
||||
g_f = color_linear_to_srgb(raw_pixel.g);
|
||||
b_f = color_linear_to_srgb(raw_pixel.b);
|
||||
} else {
|
||||
r_f = raw_pixel.r;
|
||||
g_f = raw_pixel.g;
|
||||
b_f = raw_pixel.b;
|
||||
}
|
||||
r = (uint8_t)(__saturatef(r_f) * 255.0f + 0.5f);
|
||||
g = (uint8_t)(__saturatef(g_f) * 255.0f + 0.5f);
|
||||
b = (uint8_t)(__saturatef(b_f) * 255.0f + 0.5f);
|
||||
// Convert sRGB to YUV
|
||||
const int pixel_row = i/width;;
|
||||
const int num_pixels_above_the_current_row = pixel_row * width;
|
||||
const int pixel_column = i - num_pixels_above_the_current_row;
|
||||
const uint8_t y = (( 66 * r + 129 * g + 25 * b + 128) >> 8) + 16;
|
||||
cu_image_yuv_nv12_out_uint_ptr[i] = y;
|
||||
if(pixel_column % 2 == 0 && pixel_row % 2 == 0) {
|
||||
const uint8_t u = ((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128;
|
||||
const uint8_t v = ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
|
||||
const int u_i = num_pixels + i - (pixel_row/2 * width);
|
||||
cu_image_yuv_nv12_out_uint_ptr[u_i] = u;
|
||||
cu_image_yuv_nv12_out_uint_ptr[u_i + 1] = v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gpuRawRgbToGammaCorrectedYuvNv12(CUdeviceptr cu_image_in_ptr, int width, int height,
|
||||
bool useSrgbColorSpace, CUstream cuda_stream, CUdeviceptr cu_image_yuv_nv12_out_ptr) {
|
||||
const int num_blocks = NUM_THREADS / NUM_THREADS_PER_BLOCK;
|
||||
const size_t size_of_dynamically_allocated_shared_memory = 0;
|
||||
gpuRawRgbToGammaCorrectedYuvNv12Impl<<<num_blocks, NUM_THREADS_PER_BLOCK,
|
||||
size_of_dynamically_allocated_shared_memory, cuda_stream>>>(
|
||||
cu_image_in_ptr, width, height, useSrgbColorSpace, cu_image_yuv_nv12_out_ptr);
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
#ifndef __IMAGE_UTILS_H__
|
||||
#define __IMAGE_UTILS_H__
|
||||
|
||||
void gpuRawRgbToGammaCorrectedYuvNv12(CUdeviceptr cu_image_in_ptr, int width, int height,
|
||||
bool useSrgbColorSpace, CUstream cuda_stream, CUdeviceptr cu_image_yuv_nv12_out_ptr);
|
||||
|
||||
#endif
|
@ -979,8 +979,8 @@ def get_resolution_stats(config):
|
||||
print("Can not get resolution from the log line, it could be corrupted. Log line:\n" + log_line, file = sys.stderr)
|
||||
log_line = log_file.readline()
|
||||
continue
|
||||
width = log_items[-3]
|
||||
height = log_items[-2]
|
||||
width = log_items[-2]
|
||||
height = log_items[-1]
|
||||
resolution = width + "x" + height
|
||||
if current_resolution_stats is None or current_resolution_stats.name != resolution:
|
||||
current_resolution_stats = MetricStats(resolution)
|
||||
|
@ -16,7 +16,7 @@ from pprint import pprint
|
||||
|
||||
DEFAULT_NUM_GPU = 10
|
||||
|
||||
SUPPORTED_DEVICE_TYPES = ["OPTIX", "CUDA"]
|
||||
SUPPORTED_DEVICE_TYPES = ["OPTIX", "CUDA", "METAL"]
|
||||
class ProcUnitType(enum.Enum):
|
||||
CPU = "CPU"
|
||||
GPU = "GPU"
|
||||
|
@ -103,12 +103,16 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
}
|
||||
|
||||
/* Create context. */
|
||||
result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
|
||||
|
||||
// FRL_CGR BEGIN
|
||||
// result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
|
||||
result = cuDevicePrimaryCtxRetain(&cuContext, cuDevice);
|
||||
|
||||
if (result != CUDA_SUCCESS) {
|
||||
set_error(string_printf("Failed to create CUDA context (%s)", cuewErrorString(result)));
|
||||
return;
|
||||
}
|
||||
// FRL_CGR END
|
||||
|
||||
int major, minor;
|
||||
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
|
||||
@ -116,14 +120,20 @@ CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
cuDevArchitecture = major * 100 + minor * 10;
|
||||
|
||||
/* Pop context set by cuCtxCreate. */
|
||||
cuCtxPopCurrent(NULL);
|
||||
// FRL_CGR BEGIN
|
||||
// with cuDevicePrimaryCtxRetain do not need to pop current
|
||||
// cuCtxPopCurrent(NULL);
|
||||
// FRL_CGR_END
|
||||
}
|
||||
|
||||
CUDADevice::~CUDADevice()
|
||||
{
|
||||
texture_info.free();
|
||||
|
||||
cuda_assert(cuCtxDestroy(cuContext));
|
||||
// FRL_CGR BEGIN
|
||||
// do not destroy primary context
|
||||
//cuda_assert(cuCtxDestroy(cuContext));
|
||||
// FRL_CGR END
|
||||
}
|
||||
|
||||
bool CUDADevice::support_device(const uint /*kernel_features*/)
|
||||
|
@ -73,7 +73,7 @@ class CUDADevice : public GPUDevice {
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size, size_t offset) override;
|
||||
void host_mem_alloc(size_t size, int aligment, void **p_mem) override;
|
||||
void host_mem_free(void *p_mem) override;
|
||||
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
|
@ -963,7 +963,7 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
||||
// static thread_mutex mutex;
|
||||
// thread_scoped_lock lock(mutex);
|
||||
|
||||
//const CUDAContextScope scope(this);
|
||||
const CUDAContextScope scope(this);
|
||||
|
||||
//const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
|
||||
// FRL_CGR
|
||||
|
@ -6,6 +6,7 @@
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
using namespace std::chrono;
|
||||
using cgr_libcluster::ClusterSessionParams;
|
||||
|
||||
ccl::float3 &toCyclesFloat3(cgr_libcluster::float3 &value);
|
||||
cgr_libcluster::float3 &fromCyclesFloat3(ccl::float3 &value);
|
||||
@ -82,6 +83,7 @@ void Session::logRenderSessionPanelSettings() {
|
||||
" num servers: " << params.cluster_session_params.num_servers << std::endl <<
|
||||
" compression quality: " << params.cluster_session_params.master_compression_quality << std::endl <<
|
||||
" master_image_color_format: " << params.cluster_session_params.master_image_color_format << std::endl <<
|
||||
" master_image_compressor: " << params.cluster_session_params.master_image_compressor << std::endl <<
|
||||
" master denoiser: " << params.cluster_session_params.master_denoiser;
|
||||
}
|
||||
|
||||
@ -139,7 +141,7 @@ void Session::modify_shader_object(const std::string & object_name, const std::s
|
||||
attribute_name << " value is not set";
|
||||
}
|
||||
|
||||
//helpers used by servers session to set buffer params and scene camera according to
|
||||
//helpers used by servers session to set buffer params and scene camera according to
|
||||
//NetCamera command
|
||||
|
||||
static void setBufferParams(BufferParams &buffer_params_, cgr_libcluster::NetCamera& netCamera)
|
||||
@ -242,9 +244,9 @@ void Session::resetNetCamera(cgr_libcluster::NetCamera& netCamera)
|
||||
VLOG(3) << "set net integrator seed: " << integrator->get_seed();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
tile_manager_.reset_scheduling(buffer_params_, get_effective_tile_size());
|
||||
render_scheduler_.reset(buffer_params_, netCamera.sampleCount, 0);
|
||||
render_scheduler_.reset(buffer_params_, netCamera.sampleCount, 0);
|
||||
tile_manager_.update(buffer_params_, scene);
|
||||
|
||||
params.samples = netCamera.sampleCount;
|
||||
@ -302,10 +304,11 @@ bool Session::server_wait_for_camera()
|
||||
}
|
||||
}
|
||||
|
||||
//-------Methods used by client and standalone session to create NetCamera objects, serialize and transport (client) them
|
||||
//-------Methods used by client and standalone session to create NetCamera objects, serialize and transport (client) them
|
||||
//-------------------------------------------------------------------------------------------------------------------------
|
||||
//used by client session to initliaze a NetCamera for transport
|
||||
static void initializeNetCamera(cgr_libcluster::NetCamera& netCamera, Camera &cam, int s, int f, int sceneFrame, int iseed, cgr_libcluster::ClusterSessionParams& csp)
|
||||
static void initializeNetCamera(cgr_libcluster::NetCamera& netCamera, Camera &cam, int s, int f, int sceneFrame, int iseed,
|
||||
cgr_libcluster::ClusterSessionParams& csp)
|
||||
{
|
||||
netCamera.cam_matrix = fromCyclesTransform(cam.get_matrix());
|
||||
netCamera.cam_type = fromCyclesCameraType(cam.get_camera_type());
|
||||
@ -329,11 +332,29 @@ static void initializeNetCamera(cgr_libcluster::NetCamera& netCamera, Camera &ca
|
||||
|
||||
netCamera.master_denoiser = csp.master_denoiser;
|
||||
netCamera.master_image_color_format = csp.master_image_color_format;
|
||||
netCamera.master_image_compressor = csp.master_image_compressor;
|
||||
VLOG(3) << "Constructed net camera: " << netCamera.cam_width << " " << netCamera.cam_height;
|
||||
}
|
||||
|
||||
void Session::client_send_camera(int samples)
|
||||
{
|
||||
#ifdef WITH_CUDA
|
||||
if (theClient &&
|
||||
params.cluster_session_params.master_image_compressor == ClusterSessionParams::MASTER_IMAGE_COMPRESSOR_NVENCODER &&
|
||||
(scene->camera->get_full_width() % 2 || scene->camera->get_full_height() % 2)) {
|
||||
std::string message = "NVENCODER compressor is requested for images with odd dimension: " +
|
||||
std::to_string(scene->camera->get_full_width()) + "x" + std::to_string(scene->camera->get_full_height()) +
|
||||
" Current implementation of NVENCODER only supports images with even dimensions.\
|
||||
To use NVENCODER compressor please resize image so it has even dimension like 1280x720";
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
#else
|
||||
if(params.cluster_session_params.master_image_compressor == ClusterSessionParams::MASTER_IMAGE_COMPRESSOR_NVENCODER) {
|
||||
throw std::runtime_error("ERROR. NVENCODER compressor is requested. Client is compiled without CUDA support\
|
||||
so has no nvencoder and will not be able to decode received images which are nvencoded.\
|
||||
Recompile with CUDA or use JPEG compressor instead. Terminating.");
|
||||
}
|
||||
#endif
|
||||
//TODO Modify object msg need to be handled properly
|
||||
if (theClient) {
|
||||
cgr_libcluster::ModifyObjectParams & modify_object_params = params.cluster_session_params.modify_object_params;
|
||||
@ -349,7 +370,6 @@ void Session::client_send_camera(int samples)
|
||||
int scene_frame = scene->getCurrentFrame();
|
||||
initializeNetCamera(netCamera, *(scene->camera), samples, frame_count, scene_frame, iseed, params.cluster_session_params);
|
||||
if (theClient) {
|
||||
|
||||
//client send NetCamera object to master
|
||||
theClient->send_camera(netCamera);
|
||||
|
||||
@ -380,9 +400,9 @@ void Session::client_set_modify_object_message(std::string & object_name, std::s
|
||||
bool Session::ready_to_reset()
|
||||
{
|
||||
bool ready_to_reset = path_trace_->ready_to_reset();
|
||||
|
||||
//
|
||||
//this logic a bit complicated but it basically tracks
|
||||
|
||||
//
|
||||
//this logic a bit complicated but it basically tracks
|
||||
//timeout as between the last "not" ready_to_reset
|
||||
//
|
||||
if (ready_to_reset) {
|
||||
|
@ -5,4 +5,16 @@ if(WITH_FREESTYLE)
|
||||
add_definitions(-DWITH_FREESTYLE)
|
||||
endif()
|
||||
|
||||
#FRL_CLR_BEGIN
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA)
|
||||
add_definitions(-DWITH_CUDA)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
add_definitions(-DWITH_OPTIX)
|
||||
endif()
|
||||
|
||||
#FRL_CLR_END
|
||||
|
||||
add_subdirectory(intern)
|
||||
|
@ -146,6 +146,7 @@ enum {
|
||||
CAM_SHOWSENSOR = (1 << 8),
|
||||
CAM_SHOW_SAFE_CENTER = (1 << 9),
|
||||
CAM_SHOW_BG_IMAGE = (1 << 10),
|
||||
CAM_SHOW_KINECT_AZURE = (1 << 11), // Custom FB
|
||||
};
|
||||
|
||||
/* Sensor fit */
|
||||
|
@ -1657,6 +1657,7 @@ typedef struct NodeShaderMix {
|
||||
#define SHD_GLOSSY_GGX 2
|
||||
#define SHD_GLOSSY_ASHIKHMIN_SHIRLEY 3
|
||||
#define SHD_GLOSSY_MULTI_GGX 4
|
||||
#define SHD_GLOSSY_GGX_FRESNEL_REFRACTION 5
|
||||
|
||||
/* vector transform */
|
||||
#define SHD_VECT_TRANSFORM_TYPE_VECTOR 0
|
||||
|
@ -850,7 +850,8 @@ typedef struct RenderData {
|
||||
int schedule_modify_object_message;
|
||||
int device_scale_factor;
|
||||
int master_image_color_format;
|
||||
char _pad_frl_fields[4]; // add padding if needed to satify aligning check (by 8 bytes)
|
||||
int master_image_compressor;
|
||||
//char _pad_frl_fields[4]; // add padding if needed to satify aligning check (by 8 bytes)
|
||||
|
||||
/* WebRTC related */
|
||||
int use_webrtc;
|
||||
@ -898,7 +899,9 @@ typedef enum {
|
||||
typedef enum {
|
||||
MASTER_DENOISER_NONE = 0,
|
||||
MASTER_DENOISER_OIDN = 1,
|
||||
#ifdef WITH_OPTIX
|
||||
MASTER_DENOISER_OPTIX = 2,
|
||||
#endif //WITH_OPTIX
|
||||
MASTER_DENOISER_BARCELONA = 3,
|
||||
} eMasterDenoiser;
|
||||
|
||||
@ -908,6 +911,14 @@ typedef enum {
|
||||
MASTER_IMAGE_COLOR_FORMAT_SRGB = 2,
|
||||
} eMasterImageColorFormat;
|
||||
|
||||
/* RenderData.master_image_compressor */
|
||||
typedef enum {
|
||||
MASTER_IMAGE_COMPRESSOR_JPEG = 1,
|
||||
#ifdef WITH_CUDA
|
||||
MASTER_IMAGE_COMPRESSOR_NVENCODER = 2,
|
||||
#endif //WITH_CUDA
|
||||
} eMasterImageCompressor;
|
||||
|
||||
/* RenderData.peer_connection_protocol */
|
||||
typedef enum {
|
||||
PEER_CONNECTION_PROTOCOL_ANY = 0,
|
||||
|
@ -20,6 +20,17 @@ set(LIB
|
||||
)
|
||||
|
||||
add_definitions(-DWITH_DNA_GHASH)
|
||||
#FRL_CLR_BEGIN
|
||||
|
||||
#if(WITH_CYCLES_DEVICE_CUDA)
|
||||
# add_definitions(-DWITH_CUDA)
|
||||
#endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
add_definitions(-DWITH_OPTIX)
|
||||
endif()
|
||||
|
||||
#FRL_CLR_END
|
||||
|
||||
# Needed for `mallocn.c`.
|
||||
if(HAVE_MALLOC_STATS_H)
|
||||
|
@ -241,6 +241,18 @@ if(WITH_PYTHON)
|
||||
)
|
||||
endif()
|
||||
|
||||
#FRL_CLR_BEGIN
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA)
|
||||
add_definitions(-DWITH_CUDA)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
add_definitions(-DWITH_OPTIX)
|
||||
endif()
|
||||
|
||||
#FRL_CLR_END
|
||||
|
||||
if(WITH_IMAGE_OPENEXR)
|
||||
add_definitions(-DWITH_OPENEXR)
|
||||
endif()
|
||||
|
@ -6282,12 +6282,14 @@ static void rna_def_scene_render_data(BlenderRNA *brna)
|
||||
"OIDN",
|
||||
0,
|
||||
"OpenImageDenoise",
|
||||
"Master denoises images with Intel Open Image Denoise"},
|
||||
{MASTER_DENOISER_OPTIX,
|
||||
"Master denoises images with Intel Open Image Denoise"},
|
||||
#ifdef WITH_OPTX
|
||||
{MASTER_DENOISER_OPTIX,
|
||||
"OPTIX",
|
||||
0,
|
||||
"OptiX",
|
||||
"Master denoises images with NVIDIA OptiX AI-Accelerated denoiser"},
|
||||
#endif
|
||||
{MASTER_DENOISER_BARCELONA,
|
||||
"BARCELONA",
|
||||
0,
|
||||
@ -6310,6 +6312,22 @@ static void rna_def_scene_render_data(BlenderRNA *brna)
|
||||
{0, NULL, 0, NULL, NULL},
|
||||
};
|
||||
|
||||
static const EnumPropertyItem master_image_compressor_items[] = {
|
||||
{MASTER_IMAGE_COMPRESSOR_JPEG,
|
||||
"JPEG",
|
||||
0,
|
||||
"JPEG",
|
||||
"Master compresses images with JPEG"},
|
||||
#ifdef WITH_CUDA
|
||||
{MASTER_IMAGE_COMPRESSOR_NVENCODER,
|
||||
"NVENCODER",
|
||||
0,
|
||||
"NVENCODER",
|
||||
"Master compresses images with NVENCODER."},
|
||||
#endif //WITH_CUDA
|
||||
{0, NULL, 0, NULL, NULL},
|
||||
};
|
||||
|
||||
static const EnumPropertyItem render_session_mode_items[] = {
|
||||
{RENDER_SESSION_MODE_STANDALONE,
|
||||
"STANDALONE",
|
||||
@ -6455,6 +6473,12 @@ static void rna_def_scene_render_data(BlenderRNA *brna)
|
||||
RNA_def_property_ui_text(prop, "Color format", "");
|
||||
RNA_def_property_update(prop, NC_SCENE | ND_RENDER_OPTIONS, NULL);
|
||||
|
||||
prop = RNA_def_property(srna, "master_image_compressor", PROP_ENUM, PROP_NONE);
|
||||
RNA_def_property_enum_items(prop, master_image_compressor_items);
|
||||
RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
|
||||
RNA_def_property_ui_text(prop, "Image compressor", "");
|
||||
RNA_def_property_update(prop, NC_SCENE | ND_RENDER_OPTIONS, NULL);
|
||||
|
||||
// Modify object name section
|
||||
prop = RNA_def_property(srna, "modify_object_name", PROP_STRING, PROP_NONE);
|
||||
RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
|
||||
|
@ -111,6 +111,18 @@ if(WITH_OPENCOLORIO)
|
||||
add_definitions(-DWITH_OCIO)
|
||||
endif()
|
||||
|
||||
#FRL_CLR_BEGIN
|
||||
|
||||
if(WITH_CYCLES_DEVICE_CUDA)
|
||||
add_definitions(-DWITH_CUDA)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
add_definitions(-DWITH_OPTIX)
|
||||
endif()
|
||||
|
||||
#FRL_CLR_END
|
||||
|
||||
# Setup the EXE sources and `buildinfo`.
|
||||
set(SRC
|
||||
creator.c
|
||||
|
@ -2020,9 +2020,13 @@ static int arg_handle_master_denoiser(int argc, const char **argv, void *data)
|
||||
const char *rmtype = argv[1];
|
||||
if (BLI_strcasecmp(rmtype, "OIDN") == 0) {
|
||||
scene->r.master_denoiser = MASTER_DENOISER_OIDN;
|
||||
} else if (BLI_strcasecmp(rmtype, "OPTIX") == 0) {
|
||||
}
|
||||
#ifdef WITH_OPTIX
|
||||
else if (BLI_strcasecmp(rmtype, "OPTIX") == 0) {
|
||||
scene->r.master_denoiser = MASTER_DENOISER_OPTIX;
|
||||
} else if (BLI_strcasecmp(rmtype, "BARCELONA") == 0) {
|
||||
}
|
||||
#endif
|
||||
else if (BLI_strcasecmp(rmtype, "BARCELONA") == 0) {
|
||||
scene->r.master_denoiser = MASTER_DENOISER_BARCELONA;
|
||||
} else {
|
||||
printf("\nError: Unknown master denoiser %s (--master-denoiser <OIDN> or <OPTIX> or <BARCELONA>).\n", rmtype);
|
||||
@ -2064,11 +2068,48 @@ static int arg_handle_master_image_color_format(int argc, const char **argv, voi
|
||||
else {
|
||||
printf(
|
||||
"\nError: no blend loaded. "
|
||||
"order the arguments so '--master_image_color_format' is after the blend is loaded.\n");
|
||||
"order the arguments so '--master-image-color-format' is after the blend is loaded.\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static const char arg_handle_master_image_compressor_doc[] =
|
||||
"<master_image_compression>\n"
|
||||
"\tCompressor that master uses to compress images before sending them to a client.\n"
|
||||
"\tSupported values: JPEG, NVENCODER\n";
|
||||
|
||||
static int arg_handle_master_image_compressor(int argc, const char **argv, void *data)
|
||||
{
|
||||
bContext *C = data;
|
||||
Scene *scene = CTX_data_scene(C);
|
||||
|
||||
scene->r.master_image_color_format = MASTER_IMAGE_COMPRESSOR_JPEG;
|
||||
if (scene) {
|
||||
if (argc > 1) {
|
||||
const char *rmtype = argv[1];
|
||||
if (BLI_strcasecmp(rmtype, "JPEG") == 0) {
|
||||
scene->r.master_image_color_format = MASTER_IMAGE_COMPRESSOR_JPEG;
|
||||
}
|
||||
#ifdef WITH_CUDA
|
||||
else if (BLI_strcasecmp(rmtype, "NVENCODER") == 0) {
|
||||
scene->r.master_image_color_format = MASTER_IMAGE_COMPRESSOR_NVENCODER;
|
||||
}
|
||||
#endif //WITH_CUDA
|
||||
else {
|
||||
printf("\nError: Unknown compressor %s (--master-image-compressor <JPEG> or <NVENCODER>).\n", rmtype);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
printf(
|
||||
"\nError: no blend loaded. "
|
||||
"order the arguments so '--master-image-compressor' is after the blend is loaded.\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef WITH_WEBRTC
|
||||
|
||||
static int arg_handle_webrtc_int_param(int argc, const char **argv, const char *arg_id, int *param, int default_param_value)
|
||||
@ -3049,6 +3090,7 @@ void main_args_setup(bContext *C, bArgs *ba)
|
||||
BLI_args_add(ba, NULL, "--save_denoise_io", CB(arg_handle_save_denoise_io), C);
|
||||
BLI_args_add(ba, NULL, "--save_cameras", CB(arg_handle_save_cameras), C);
|
||||
BLI_args_add(ba, NULL, "--master-image-color-format", CB(arg_handle_master_image_color_format), C);
|
||||
BLI_args_add(ba, NULL, "--master-image-compressor", CB(arg_handle_master_image_compressor), C);
|
||||
#ifdef WITH_WEBRTC
|
||||
BLI_args_add(ba, NULL, "--use-webrtc", CB(arg_handle_use_webrtc), C);
|
||||
BLI_args_add(ba, NULL, "--signaling-server-address", CB(arg_handle_signaling_server_address), C);
|
||||
|
Loading…
Reference in New Issue
Block a user