Skip to content
This repository was archived by the owner on Jun 10, 2024. It is now read-only.

Commit d230bfd

Browse files
RomanArzumanyantheHamsta
authored andcommitted
Update NvDecoder.cpp
Reduce vRAM usage by allocating pVideoFormat->min_num_decode_surfaces + 3 surfaces.
1 parent ebbf718 commit d230bfd

File tree

1 file changed

+4
-43
lines changed

1 file changed

+4
-43
lines changed

src/TC/src/NvDecoder.cpp

Lines changed: 4 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -98,43 +98,6 @@ static int GetChromaPlaneCount(cudaVideoChromaFormat eChromaFormat)
9898
return numPlane;
9999
}
100100

101-
unsigned long GetNumDecodeSurfaces(cudaVideoCodec eCodec, unsigned int nWidth,
102-
unsigned int nHeight)
103-
{
104-
if (eCodec == cudaVideoCodec_VP9) {
105-
return 12;
106-
}
107-
108-
if (eCodec == cudaVideoCodec_H264 || eCodec == cudaVideoCodec_H264_SVC ||
109-
eCodec == cudaVideoCodec_H264_MVC) {
110-
// assume worst-case of 20 decode surfaces for H264
111-
return 20;
112-
}
113-
114-
if (eCodec == cudaVideoCodec_HEVC) {
115-
// ref HEVC spec: A.4.1 General tier and level limits
116-
// currently assuming level 6.2, 8Kx4K
117-
auto MaxLumaPS = 35651584U;
118-
int MaxDpbPicBuf = 6;
119-
int PicSizeInSamplesY = (int)(nWidth * nHeight);
120-
int MaxDpbSize;
121-
122-
if (PicSizeInSamplesY <= (MaxLumaPS >> 2U)) {
123-
MaxDpbSize = MaxDpbPicBuf * 4;
124-
} else if (PicSizeInSamplesY <= (MaxLumaPS >> 1U)) {
125-
MaxDpbSize = MaxDpbPicBuf * 2;
126-
} else if (PicSizeInSamplesY <= ((3U * MaxLumaPS) >> 2U)) {
127-
MaxDpbSize = (MaxDpbPicBuf * 4) / 3;
128-
} else {
129-
MaxDpbSize = MaxDpbPicBuf;
130-
}
131-
132-
return (min)(MaxDpbSize, 16) + 4;
133-
}
134-
135-
return 8;
136-
}
137-
138101
struct Rect {
139102
int l, t, r, b;
140103
};
@@ -200,9 +163,9 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT* pVideoFormat) noexcept
200163
p_impl->decoder_recon++;
201164
CudaCtxPush ctxPush(p_impl->m_cuContext);
202165

203-
int nDecodeSurface =
204-
GetNumDecodeSurfaces(pVideoFormat->codec, pVideoFormat->coded_width,
205-
pVideoFormat->coded_height);
166+
// Shall be enough according to NVIDIA Nvdec mem optimization blog article
167+
// (https://developer.nvidia.com/blog/optimizing-video-memory-usage-with-the-nvdecode-api-and-nvidia-video-codec-sdk/)
168+
int nDecodeSurface = pVideoFormat->min_num_decode_surfaces + 3;
206169

207170
CUVIDDECODECAPS decodecaps;
208171
memset(&decodecaps, 0, sizeof(decodecaps));
@@ -379,9 +342,7 @@ int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT* pVideoFormat)
379342
pVideoFormat->display_area.right ==
380343
p_impl->m_videoFormat.display_area.right);
381344

382-
int nDecodeSurface =
383-
GetNumDecodeSurfaces(pVideoFormat->codec, pVideoFormat->coded_width,
384-
pVideoFormat->coded_height);
345+
int nDecodeSurface = pVideoFormat->min_num_decode_surfaces + 3;
385346

386347
if ((pVideoFormat->coded_width > p_impl->m_nMaxWidth) ||
387348
(pVideoFormat->coded_height > p_impl->m_nMaxHeight)) {

0 commit comments

Comments
 (0)