From 74ce1d2d11a56b522a343d944bf5b184bb2d9212 Mon Sep 17 00:00:00 2001 From: Wenbin Chen Date: Thu, 21 Sep 2023 09:26:31 +0800 Subject: [PATCH] libavfilter/dnn: add layout option to openvino backend Dnn models have different input layout (NCHW or NHWC), so a "layout" option is added Use openvino's API to do layout conversion for input data. Use swscale to do layout conversion for output data as openvino doesn't have similiar C API for output. Signed-off-by: Wenbin Chen --- libavfilter/dnn/dnn_backend_openvino.c | 47 +++++++- libavfilter/dnn/dnn_io_proc.c | 151 ++++++++++++++++++++++--- libavfilter/dnn_interface.h | 7 ++ 3 files changed, 185 insertions(+), 20 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index ded156289b..ae53488837 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -45,6 +45,7 @@ typedef struct OVOptions{ uint8_t async; int batch_size; int input_resizable; + DNNLayout layout; } OVOptions; typedef struct OVContext { @@ -100,6 +101,10 @@ static const AVOption dnn_openvino_options[] = { DNN_BACKEND_COMMON_OPTIONS { "batch_size", "batch size per request", OFFSET(options.batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1000, FLAGS}, { "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, + { "layout", "input layout of model", OFFSET(options.layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, "layout" }, + { "none", "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, "layout"}, + { "nchw", "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, "layout"}, + { "nhwc", "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, "layout"}, { NULL } }; @@ -235,9 +240,9 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) avpriv_report_missing_feature(ctx, "Do not support dynamic model."); return AVERROR(ENOSYS); } - input.height = dims[2]; - input.width = dims[3]; - input.channels = dims[1]; + input.height = dims[1]; + input.width = dims[2]; + input.channels = dims[3]; input.dt = precision_to_datatype(precision); input.data = av_malloc(input.height * input.width * input.channels * get_datatype_size(input.dt)); if (!input.data) { @@ -412,6 +417,7 @@ static void infer_completion_callback(void *args) av_assert0(request->lltask_count <= dims.dims[0]); #endif output.dt = precision_to_datatype(precision); + output.layout = ctx->options.layout; av_assert0(request->lltask_count >= 1); for (int i = 0; i < request->lltask_count; ++i) { @@ -540,11 +546,14 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * OVContext *ctx = &ov_model->ctx; #if HAVE_OPENVINO2 ov_status_e status; - ov_preprocess_input_tensor_info_t* input_tensor_info; - ov_preprocess_output_tensor_info_t* output_tensor_info; + ov_preprocess_input_tensor_info_t* input_tensor_info = NULL; + ov_preprocess_output_tensor_info_t* output_tensor_info = NULL; + ov_preprocess_input_model_info_t* input_model_info = NULL; ov_model_t *tmp_ov_model; ov_layout_t* NHWC_layout = NULL; + ov_layout_t* NCHW_layout = NULL; const char* NHWC_desc = "NHWC"; + const char* NCHW_desc = "NCHW"; const char* device = ctx->options.device_type; #else IEStatusCode status; @@ -589,6 +598,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * //set input layout status = ov_layout_create(NHWC_desc, &NHWC_layout); + status |= ov_layout_create(NCHW_desc, &NCHW_layout); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to create layout for input.\n"); ret = ov2_map_error(status, NULL); @@ -602,6 +612,22 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } + status = ov_preprocess_input_info_get_model_info(ov_model->input_info, &input_model_info); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to get input model info\n"); + ret = ov2_map_error(status, NULL); + goto err; + } + if (ctx->options.layout == DL_NCHW) + status = ov_preprocess_input_model_info_set_layout(input_model_info, NCHW_layout); + else if (ctx->options.layout == DL_NHWC) + status = ov_preprocess_input_model_info_set_layout(input_model_info, NHWC_layout); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to get set input model layout\n"); + ret = ov2_map_error(status, NULL); + goto err; + } + if (ov_model->model->func_type != DFT_PROCESS_FRAME) //set precision only for detect and classify status = ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8); @@ -639,6 +665,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ret = ov2_map_error(status, NULL); goto err; } + ov_preprocess_input_model_info_free(input_model_info); + ov_layout_free(NCHW_layout); + ov_layout_free(NHWC_layout); #else if (ctx->options.batch_size > 1) { input_shapes_t input_shapes; @@ -783,6 +812,14 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * return 0; err: +#if HAVE_OPENVINO2 + if (NCHW_layout) + ov_layout_free(NCHW_layout); + if (NHWC_layout) + ov_layout_free(NHWC_layout); + if (input_model_info) + ov_preprocess_input_model_info_free(input_model_info); +#endif dnn_free_model_ov(&ov_model->model); return ret; } diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c index 7961bf6b95..dfa0d5e5da 100644 --- a/libavfilter/dnn/dnn_io_proc.c +++ b/libavfilter/dnn/dnn_io_proc.c @@ -27,6 +27,12 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) { struct SwsContext *sws_ctx; + int ret = 0; + int linesize[4] = { 0 }; + void **dst_data = NULL; + void *middle_data = NULL; + uint8_t *planar_data[4] = { 0 }; + int plane_size = frame->width * frame->height * sizeof(uint8_t); int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); if (bytewidth < 0) { return AVERROR(EINVAL); @@ -35,6 +41,17 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) avpriv_report_missing_feature(log_ctx, "data type rather than DNN_FLOAT"); return AVERROR(ENOSYS); } + dst_data = (void **)frame->data; + linesize[0] = frame->linesize[0]; + if (output->layout == DL_NCHW) { + middle_data = av_malloc(plane_size * output->channels); + if (!middle_data) { + ret = AVERROR(ENOMEM); + goto err; + } + dst_data = &middle_data; + linesize[0] = frame->width * 3; + } switch (frame->format) { case AV_PIX_FMT_RGB24: @@ -51,18 +68,52 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32), frame->width * 3, frame->height, av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height); - return AVERROR(EINVAL); + ret = AVERROR(EINVAL); + goto err; } sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0}, (const int[4]){frame->width * 3 * sizeof(float), 0, 0, 0}, 0, frame->height, - (uint8_t * const*)frame->data, frame->linesize); + (uint8_t * const*)dst_data, linesize); sws_freeContext(sws_ctx); - return 0; + // convert data from planar to packed + if (output->layout == DL_NCHW) { + sws_ctx = sws_getContext(frame->width, + frame->height, + AV_PIX_FMT_GBRP, + frame->width, + frame->height, + frame->format, + 0, NULL, NULL, NULL); + if (!sws_ctx) { + av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " + "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", + av_get_pix_fmt_name(AV_PIX_FMT_GBRP), frame->width, frame->height, + av_get_pix_fmt_name(frame->format),frame->width, frame->height); + ret = AVERROR(EINVAL); + goto err; + } + if (frame->format == AV_PIX_FMT_RGB24) { + planar_data[0] = (uint8_t *)middle_data + plane_size; + planar_data[1] = (uint8_t *)middle_data + plane_size * 2; + planar_data[2] = (uint8_t *)middle_data; + } else if (frame->format == AV_PIX_FMT_BGR24) { + planar_data[0] = (uint8_t *)middle_data + plane_size; + planar_data[1] = (uint8_t *)middle_data; + planar_data[2] = (uint8_t *)middle_data + plane_size * 2; + } + sws_scale(sws_ctx, (const uint8_t * const *)planar_data, + (const int [4]){frame->width * sizeof(uint8_t), + frame->width * sizeof(uint8_t), + frame->width * sizeof(uint8_t), 0}, + 0, frame->height, frame->data, frame->linesize); + sws_freeContext(sws_ctx); + } + break; case AV_PIX_FMT_GRAYF32: av_image_copy_plane(frame->data[0], frame->linesize[0], output->data, bytewidth, bytewidth, frame->height); - return 0; + break; case AV_PIX_FMT_YUV420P: case AV_PIX_FMT_YUV422P: case AV_PIX_FMT_YUV444P: @@ -82,24 +133,34 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32), frame->width, frame->height, av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height); - return AVERROR(EINVAL); + ret = AVERROR(EINVAL); + goto err; } sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0}, (const int[4]){frame->width * sizeof(float), 0, 0, 0}, 0, frame->height, (uint8_t * const*)frame->data, frame->linesize); sws_freeContext(sws_ctx); - return 0; + break; default: avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format)); - return AVERROR(ENOSYS); + ret = AVERROR(ENOSYS); + goto err; } - return 0; +err: + av_free(middle_data); + return ret; } int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) { struct SwsContext *sws_ctx; + int ret = 0; + int linesize[4] = { 0 }; + void **src_data = NULL; + void *middle_data = NULL; + uint8_t *planar_data[4] = { 0 }; + int plane_size = frame->width * frame->height * sizeof(uint8_t); int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); if (bytewidth < 0) { return AVERROR(EINVAL); @@ -109,9 +170,54 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) return AVERROR(ENOSYS); } + src_data = (void **)frame->data; + linesize[0] = frame->linesize[0]; + if (input->layout == DL_NCHW) { + middle_data = av_malloc(plane_size * input->channels); + if (!middle_data) { + ret = AVERROR(ENOMEM); + goto err; + } + src_data = &middle_data; + linesize[0] = frame->width * 3; + } + switch (frame->format) { case AV_PIX_FMT_RGB24: case AV_PIX_FMT_BGR24: + // convert data from planar to packed + if (input->layout == DL_NCHW) { + sws_ctx = sws_getContext(frame->width, + frame->height, + frame->format, + frame->width, + frame->height, + AV_PIX_FMT_GBRP, + 0, NULL, NULL, NULL); + if (!sws_ctx) { + av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " + "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", + av_get_pix_fmt_name(frame->format), frame->width, frame->height, + av_get_pix_fmt_name(AV_PIX_FMT_GBRP),frame->width, frame->height); + ret = AVERROR(EINVAL); + goto err; + } + if (frame->format == AV_PIX_FMT_RGB24) { + planar_data[0] = (uint8_t *)middle_data + plane_size; + planar_data[1] = (uint8_t *)middle_data + plane_size * 2; + planar_data[2] = (uint8_t *)middle_data; + } else if (frame->format == AV_PIX_FMT_BGR24) { + planar_data[0] = (uint8_t *)middle_data + plane_size; + planar_data[1] = (uint8_t *)middle_data; + planar_data[2] = (uint8_t *)middle_data + plane_size * 2; + } + sws_scale(sws_ctx, (const uint8_t * const *)frame->data, + frame->linesize, 0, frame->height, planar_data, + (const int [4]){frame->width * sizeof(uint8_t), + frame->width * sizeof(uint8_t), + frame->width * sizeof(uint8_t), 0}); + sws_freeContext(sws_ctx); + } sws_ctx = sws_getContext(frame->width * 3, frame->height, AV_PIX_FMT_GRAY8, @@ -124,10 +230,11 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height, av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32),frame->width * 3, frame->height); - return AVERROR(EINVAL); + ret = AVERROR(EINVAL); + goto err; } - sws_scale(sws_ctx, (const uint8_t **)frame->data, - frame->linesize, 0, frame->height, + sws_scale(sws_ctx, (const uint8_t **)src_data, + linesize, 0, frame->height, (uint8_t * const [4]){input->data, 0, 0, 0}, (const int [4]){frame->width * 3 * sizeof(float), 0, 0, 0}); sws_freeContext(sws_ctx); @@ -156,7 +263,8 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height, av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32),frame->width, frame->height); - return AVERROR(EINVAL); + ret = AVERROR(EINVAL); + goto err; } sws_scale(sws_ctx, (const uint8_t **)frame->data, frame->linesize, 0, frame->height, @@ -166,10 +274,12 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) break; default: avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format)); - return AVERROR(ENOSYS); + ret = AVERROR(ENOSYS); + goto err; } - - return 0; +err: + av_free(middle_data); + return ret; } static enum AVPixelFormat get_pixel_format(DNNData *data) @@ -205,6 +315,11 @@ int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES); av_assert0(sd); + if (input->layout == DL_NCHW) { + av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support layout: NCHW\n"); + return AVERROR(ENOSYS); + } + header = (const AVDetectionBBoxHeader *)sd->data; bbox = av_get_detection_bbox(header, bbox_index); @@ -257,6 +372,12 @@ int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx) int linesizes[4]; int ret = 0; enum AVPixelFormat fmt = get_pixel_format(input); + + if (input->layout == DL_NCHW) { + av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support layout: NCHW\n"); + return AVERROR(ENOSYS); + } + sws_ctx = sws_getContext(frame->width, frame->height, frame->format, input->width, input->height, fmt, SWS_FAST_BILINEAR, NULL, NULL, NULL); diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 20c6a0a896..956a63443a 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -56,12 +56,19 @@ typedef enum { DFT_ANALYTICS_CLASSIFY, // classify for each bounding box }DNNFunctionType; +typedef enum { + DL_NONE, + DL_NCHW, + DL_NHWC, +} DNNLayout; + typedef struct DNNData{ void *data; int width, height, channels; // dt and order together decide the color format DNNDataType dt; DNNColorOrder order; + DNNLayout layout; } DNNData; typedef struct DNNExecBaseParams {