Discussion:
[Libav-user] Using H264 Hardware decoding on windows = several problems
Jérôme SALAYET
2018-09-06 16:28:34 UTC
Permalink
Hello, I use FFMPEG 4.0.2 to decode h264/hevc video streams from RTSP cameras on a Windows system.
But, due to the CPU limitation, I want to use the hwdevice to decompress.


Based on hw_decode.c sample file I try to use cuda or d3d11va.

I can use cuda working

My code is :

AVCodecHWConfig* pHWConfig ;
AVBufferRef* pHWDeviceCtx;
enum AVPixelFormat HwPixfmt;
AVHWDeviceType HwDeviceType;
AVPixelFormat hwPixelFormat;
AVHWDeviceType hwDeviceType;
int methods;
AVFrame* pSWFrame;

////////////////////////////////////////////////////////////////////////////////
static enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts)
////////////////////////////////////////////////////////////////////////////////
{ const enum AVPixelFormat *p;

for (p = pix_fmts; *p != -1; p++)
{ if (*p == AV_PIX_FMT_RGB0) //hw_pix_fmt for CUDA
return *p;
}

printf("Failed to get HW surface format\r\n");
return AV_PIX_FMT_NONE;
}

////////////////////////////////////////////////////////////////////////////////
int hw_decoder_init(AVCodecContext *ctx, const enum AVHWDeviceType type)
////////////////////////////////////////////////////////////////////////////////
{ int err = 0;

if ((err = av_hwdevice_ctx_create(&pHWDeviceCtx, type, NULL, NULL, 0)) < 0)
{ return err;
}
if (av_hwdevice_ctx_init(m_lpHWDeviceCtx) < 0)
{ return -1;
}
ctx->hw_device_ctx = lpfnav_buffer_ref(m_lpHWDeviceCtx);
return err;
}
///////////////////////////////////////////////////////////////
in the decode part (1 per decoding Thread)
////////////////////////////////////////////////////////////////
...
pCodec = avcodec_find_decoder_by_name("h264_cuvid");
if (pCodec != NULL)
{ hwPixelFormat = AV_PIX_FMT_NONE;
hwDeviceType = AV_HWDEVICE_TYPE_NONE;
methods = 0;

for (int i = 0;; i++)
{ const AVCodecHWConfig* hw_config = lpfnavcodec_get_hw_config(m_lpCodec, i);
if (!hw_config)
{ pCodec = NULL;
break;
}
if (hw_config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && hw_config->device_type == AV_HWDEVICE_TYPE_CUDA)
{ HwPixfmt = hw_config->pix_fmt;
pHWConfig = (AVCodecHWConfig*)hw_config;

if (!(hw_config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL))
{ // Second Options
methods = hw_config->methods;
HwPixfmt = hw_config->pix_fmt;
hwDeviceType = hw_config->device_type;
break;
}
break;
}
}
enum AVHWDeviceType iterateType = AV_HWDEVICE_TYPE_NONE;
while ((iterateType = av_hwdevice_iterate_types (iterateType)) != AV_HWDEVICE_TYPE_NONE)
{ if (iterateType == AV_HWDEVICE_TYPE_CUDA)
{ HwDeviceType = iterateType;
}
}
....

if ((pCodec != NULL) && (pCodecCtx == NULL))
{ pCodecCtx = avcodec_alloc_context3(pCodec);
if (pCodecCtx != NULL)
{ if (pHWConfig != NULL)
{ pCodecCtx->get_format = get_hw_format;
hw_decoder_init( pCodecCtx, HwDeviceType)
}

// Init CodecContext
if (pCodecCtx->width == 0)
{ pCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
pCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
pCodecCtx->codec_id = AV_CODEC_ID_H264;
pCodecCtx->coded_width = 1280
pCodecCtx->coded_height = 720
pCodecCtx->width = 1280
pCodecCtx->height = 720
pCodecCtx->thread_count = 1;
pCodecCtx->thread_type = FF_THREAD_FRAME|FF_THREAD_SLICE;
pCodecCtx->err_recognition = AV_EF_EXPLODE;
}
}
}
if ((pCodecCtx!=NULL) && (pCodec!=NULL)&&(!avcodec_is_open(pCodecCtx)))
{ if ( pCodec != NULL)
{ ......
iResult = avcodec_open2( pCodecCtx, pCodec, &optionsDict);

iRes = avcodec_send_packet( pCodecCtx, &pPacket);
if (iRes>=0)
{ iRes = avcodec_receive_frame( pCodecCtx, pFrame );
if (iRes == AVERROR(EAGAIN) || iRes == AVERROR_EOF)
{ av_frame_free(&pFrame);
}
else
if (iRes < 0)
{ printf("receive_frame Error while decoding\r\n");
}
else
if (pFrame->format == HwPixfmt)
{ sw_frame = av_frame_alloc();
if ((iRes = av_hwframe_transfer_data( sw_frame, pFrame, 0)) < 0)
{ tmp_frame = pFrame;
}
else
{ tmp_frame = sw_frame;
}
}
...

When I set the av_log, I can see when it works :

[h264_cuvid @ 0e389540] Format cuda chosen by get_format().
[h264_cuvid @ 0e389540] Loaded lib: nvcuvid.dll
[h264_cuvid @ 0e389540] Loaded sym: cuvidGetDecoderCaps
[h264_cuvid @ 0e389540] Loaded sym: cuvidCreateDecoder
[h264_cuvid @ 0e389540] Loaded sym: cuvidDestroyDecoder
[h264_cuvid @ 0e389540] Loaded sym: cuvidDecodePicture
[h264_cuvid @ 0e389540] Loaded sym: cuvidMapVideoFrame
[h264_cuvid @ 0e389540] Loaded sym: cuvidUnmapVideoFrame
[h264_cuvid @ 0e389540] Loaded sym: cuvidCtxLockCreate
[h264_cuvid @ 0e389540] Loaded sym: cuvidCtxLockDestroy
[h264_cuvid @ 0e389540] Loaded sym: cuvidCtxLock
[h264_cuvid @ 0e389540] Loaded sym: cuvidCtxUnlock
[h264_cuvid @ 0e389540] Loaded sym: cuvidCreateVideoSource
[h264_cuvid @ 0e389540] Loaded sym: cuvidCreateVideoSourceW
[h264_cuvid @ 0e389540] Loaded sym: cuvidDestroyVideoSource
[h264_cuvid @ 0e389540] Loaded sym: cuvidSetVideoSourceState
[h264_cuvid @ 0e389540] Loaded sym: cuvidGetVideoSourceState
[h264_cuvid @ 0e389540] Loaded sym: cuvidGetSourceVideoFormat
[h264_cuvid @ 0e389540] Loaded sym: cuvidGetSourceAudioFormat
[h264_cuvid @ 0e389540] Loaded sym: cuvidCreateVideoParser
[h264_cuvid @ 0e389540] Loaded sym: cuvidParseVideoData
[h264_cuvid @ 0e389540] Loaded sym: cuvidDestroyVideoParser
[AVHWDeviceContext @ 0e36d800] Loaded lib: nvcuda.dll
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuInit
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuDeviceGetCount
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuDeviceGet
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuDeviceGetName
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuDeviceComputeCapability
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuCtxCreate_v2
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuCtxSetLimit
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuCtxPushCurrent_v2
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuCtxPopCurrent_v2
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuCtxDestroy_v2
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuMemAlloc_v2
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuMemFree_v2
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuMemcpy2D_v2
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuMemcpy2DAsync_v2
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuGetErrorName
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuGetErrorString
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuStreamCreate
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuStreamQuery
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuStreamSynchronize
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuStreamDestroy_v2
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuStreamAddCallback
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuEventCreate
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuEventDestroy_v2
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuEventSynchronize
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuEventQuery
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuEventRecord
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuGLGetDevices_v2
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuGraphicsGLRegisterImage
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuGraphicsUnregisterResource
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuGraphicsMapResources
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuGraphicsUnmapResources
[AVHWDeviceContext @ 0e36d800] Loaded sym: cuGraphicsSubResourceGetMappedArray
[AVBSFContext @ 0e3befc0] The input looks like it is Annex B already
[h264_cuvid @ 0e389540] CUVID capabilities for h264_cuvid:
[h264_cuvid @ 0e389540] 8 bit: supported: 1, min_width: 48, max_width: 4096, min_height: 16, max_height: 4096
[h264_cuvid @ 0e389540] 10 bit: supported: 0, min_width: 0, max_width: 0, min_height: 0, max_height: 0
[h264_cuvid @ 0e389540] 12 bit: supported: 0, min_width: 0, max_width: 0, min_height: 0, max_height: 0
[h264_cuvid @ 0e389540] Invalid pkt_timebase, passing timestamps as-is.
[h264_cuvid @ 0e389540] Format cuda chosen by get_format().
[h264_cuvid @ 0e389540] Formats: Original: cuda | HW: cuda | SW: nv12

But when I want to use it for the seventh time, I have an error, is there a limitation ?
[AVHWDeviceContext @ 0e3ca9c0] Error creating a CUDA context


So I want to try d3d11va instead...
Just changing some part in my code

////////////////////////////////////////////////////////////////////////////////
static enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts)
////////////////////////////////////////////////////////////////////////////////
{ const enum AVPixelFormat *p;

for (p = pix_fmts; *p != -1; p++)
{ if (*p == AV_PIX_FMT_RGB0) // is it a hw_pix_fmt for d3d11va?
return *p;
}

printf("Failed to get HW surface format\r\n");
return AV_PIX_FMT_NONE;
}

And

///////////////////////////////////////////////////////////////
in the decode part (1 per decoding Thread)
////////////////////////////////////////////////////////////////
...
pCodec = avcodec_find_decoder_by_name("h264 ");
if (pCodec != NULL)
{ hwPixelFormat = AV_PIX_FMT_NONE;
hwDeviceType = AV_HWDEVICE_TYPE_NONE;
methods = 0;

for (int i = 0;; i++)
{ const AVCodecHWConfig* hw_config = lpfnavcodec_get_hw_config(m_lpCodec, i);
if (!hw_config)
{ pCodec = NULL;
break;
}
if (hw_config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && hw_config->device_type == AV_HWDEVICE_TYPE_D3D11VA)
{ HwPixfmt = hw_config->pix_fmt;
pHWConfig = (AVCodecHWConfig*)hw_config;

if (!(hw_config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL))
{ // Second Options
methods = hw_config->methods;
HwPixfmt = hw_config->pix_fmt;
hwDeviceType = hw_config->device_type;
break;
}
break;
}
}
enum AVHWDeviceType iterateType = AV_HWDEVICE_TYPE_NONE;
while ((iterateType = av_hwdevice_iterate_types (iterateType)) != AV_HWDEVICE_TYPE_NONE)
{ if (iterateType == AV_HWDEVICE_TYPE_DXVA2)
{ HwDeviceType = iterateType;
}
}


But in this case, I have ffmpeg error saying...
[h264 @ 0e4d7fc0] Format d3d11va_vld chosen by get_format().
[h264 @ 0e4d7fc0] Format d3d11va_vld requires hwaccel initialisation.

How can I make the hwaccel initialisation ???

[h264 @ 0e4d7fc0] A hardware frames or device context is required for hardware accelerated decoding.
[h264 @ 0e4d7fc0] Failed setup for format d3d11va_vld: hwaccel initialisation returned error.
[h264 @ 0e4d7fc0] Format d3d11va_vld not usable, retrying get_format() without it.


Thanks for your answers...

Loading...