mirror of
https://github.com/librempeg/librempeg
synced 2024-11-22 18:49:58 +00:00
libavfilter/dnn: add layout option to openvino backend
Dnn models have different input layout (NCHW or NHWC), so a "layout" option is added Use openvino's API to do layout conversion for input data. Use swscale to do layout conversion for output data as openvino doesn't have similiar C API for output. Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
This commit is contained in:
parent
58d9b5caf3
commit
74ce1d2d11
@ -45,6 +45,7 @@ typedef struct OVOptions{
|
||||
uint8_t async;
|
||||
int batch_size;
|
||||
int input_resizable;
|
||||
DNNLayout layout;
|
||||
} OVOptions;
|
||||
|
||||
typedef struct OVContext {
|
||||
@ -100,6 +101,10 @@ static const AVOption dnn_openvino_options[] = {
|
||||
DNN_BACKEND_COMMON_OPTIONS
|
||||
{ "batch_size", "batch size per request", OFFSET(options.batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1000, FLAGS},
|
||||
{ "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
|
||||
{ "layout", "input layout of model", OFFSET(options.layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, "layout" },
|
||||
{ "none", "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, "layout"},
|
||||
{ "nchw", "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, "layout"},
|
||||
{ "nhwc", "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, "layout"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
@ -235,9 +240,9 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request)
|
||||
avpriv_report_missing_feature(ctx, "Do not support dynamic model.");
|
||||
return AVERROR(ENOSYS);
|
||||
}
|
||||
input.height = dims[2];
|
||||
input.width = dims[3];
|
||||
input.channels = dims[1];
|
||||
input.height = dims[1];
|
||||
input.width = dims[2];
|
||||
input.channels = dims[3];
|
||||
input.dt = precision_to_datatype(precision);
|
||||
input.data = av_malloc(input.height * input.width * input.channels * get_datatype_size(input.dt));
|
||||
if (!input.data) {
|
||||
@ -412,6 +417,7 @@ static void infer_completion_callback(void *args)
|
||||
av_assert0(request->lltask_count <= dims.dims[0]);
|
||||
#endif
|
||||
output.dt = precision_to_datatype(precision);
|
||||
output.layout = ctx->options.layout;
|
||||
|
||||
av_assert0(request->lltask_count >= 1);
|
||||
for (int i = 0; i < request->lltask_count; ++i) {
|
||||
@ -540,11 +546,14 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
|
||||
OVContext *ctx = &ov_model->ctx;
|
||||
#if HAVE_OPENVINO2
|
||||
ov_status_e status;
|
||||
ov_preprocess_input_tensor_info_t* input_tensor_info;
|
||||
ov_preprocess_output_tensor_info_t* output_tensor_info;
|
||||
ov_preprocess_input_tensor_info_t* input_tensor_info = NULL;
|
||||
ov_preprocess_output_tensor_info_t* output_tensor_info = NULL;
|
||||
ov_preprocess_input_model_info_t* input_model_info = NULL;
|
||||
ov_model_t *tmp_ov_model;
|
||||
ov_layout_t* NHWC_layout = NULL;
|
||||
ov_layout_t* NCHW_layout = NULL;
|
||||
const char* NHWC_desc = "NHWC";
|
||||
const char* NCHW_desc = "NCHW";
|
||||
const char* device = ctx->options.device_type;
|
||||
#else
|
||||
IEStatusCode status;
|
||||
@ -589,6 +598,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
|
||||
|
||||
//set input layout
|
||||
status = ov_layout_create(NHWC_desc, &NHWC_layout);
|
||||
status |= ov_layout_create(NCHW_desc, &NCHW_layout);
|
||||
if (status != OK) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create layout for input.\n");
|
||||
ret = ov2_map_error(status, NULL);
|
||||
@ -602,6 +612,22 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
|
||||
goto err;
|
||||
}
|
||||
|
||||
status = ov_preprocess_input_info_get_model_info(ov_model->input_info, &input_model_info);
|
||||
if (status != OK) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to get input model info\n");
|
||||
ret = ov2_map_error(status, NULL);
|
||||
goto err;
|
||||
}
|
||||
if (ctx->options.layout == DL_NCHW)
|
||||
status = ov_preprocess_input_model_info_set_layout(input_model_info, NCHW_layout);
|
||||
else if (ctx->options.layout == DL_NHWC)
|
||||
status = ov_preprocess_input_model_info_set_layout(input_model_info, NHWC_layout);
|
||||
if (status != OK) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to get set input model layout\n");
|
||||
ret = ov2_map_error(status, NULL);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (ov_model->model->func_type != DFT_PROCESS_FRAME)
|
||||
//set precision only for detect and classify
|
||||
status = ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8);
|
||||
@ -639,6 +665,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
|
||||
ret = ov2_map_error(status, NULL);
|
||||
goto err;
|
||||
}
|
||||
ov_preprocess_input_model_info_free(input_model_info);
|
||||
ov_layout_free(NCHW_layout);
|
||||
ov_layout_free(NHWC_layout);
|
||||
#else
|
||||
if (ctx->options.batch_size > 1) {
|
||||
input_shapes_t input_shapes;
|
||||
@ -783,6 +812,14 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
|
||||
return 0;
|
||||
|
||||
err:
|
||||
#if HAVE_OPENVINO2
|
||||
if (NCHW_layout)
|
||||
ov_layout_free(NCHW_layout);
|
||||
if (NHWC_layout)
|
||||
ov_layout_free(NHWC_layout);
|
||||
if (input_model_info)
|
||||
ov_preprocess_input_model_info_free(input_model_info);
|
||||
#endif
|
||||
dnn_free_model_ov(&ov_model->model);
|
||||
return ret;
|
||||
}
|
||||
|
@ -27,6 +27,12 @@
|
||||
int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
|
||||
{
|
||||
struct SwsContext *sws_ctx;
|
||||
int ret = 0;
|
||||
int linesize[4] = { 0 };
|
||||
void **dst_data = NULL;
|
||||
void *middle_data = NULL;
|
||||
uint8_t *planar_data[4] = { 0 };
|
||||
int plane_size = frame->width * frame->height * sizeof(uint8_t);
|
||||
int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
|
||||
if (bytewidth < 0) {
|
||||
return AVERROR(EINVAL);
|
||||
@ -35,6 +41,17 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
|
||||
avpriv_report_missing_feature(log_ctx, "data type rather than DNN_FLOAT");
|
||||
return AVERROR(ENOSYS);
|
||||
}
|
||||
dst_data = (void **)frame->data;
|
||||
linesize[0] = frame->linesize[0];
|
||||
if (output->layout == DL_NCHW) {
|
||||
middle_data = av_malloc(plane_size * output->channels);
|
||||
if (!middle_data) {
|
||||
ret = AVERROR(ENOMEM);
|
||||
goto err;
|
||||
}
|
||||
dst_data = &middle_data;
|
||||
linesize[0] = frame->width * 3;
|
||||
}
|
||||
|
||||
switch (frame->format) {
|
||||
case AV_PIX_FMT_RGB24:
|
||||
@ -51,18 +68,52 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
|
||||
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
|
||||
av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32), frame->width * 3, frame->height,
|
||||
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height);
|
||||
return AVERROR(EINVAL);
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
|
||||
(const int[4]){frame->width * 3 * sizeof(float), 0, 0, 0}, 0, frame->height,
|
||||
(uint8_t * const*)frame->data, frame->linesize);
|
||||
(uint8_t * const*)dst_data, linesize);
|
||||
sws_freeContext(sws_ctx);
|
||||
return 0;
|
||||
// convert data from planar to packed
|
||||
if (output->layout == DL_NCHW) {
|
||||
sws_ctx = sws_getContext(frame->width,
|
||||
frame->height,
|
||||
AV_PIX_FMT_GBRP,
|
||||
frame->width,
|
||||
frame->height,
|
||||
frame->format,
|
||||
0, NULL, NULL, NULL);
|
||||
if (!sws_ctx) {
|
||||
av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
|
||||
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
|
||||
av_get_pix_fmt_name(AV_PIX_FMT_GBRP), frame->width, frame->height,
|
||||
av_get_pix_fmt_name(frame->format),frame->width, frame->height);
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
if (frame->format == AV_PIX_FMT_RGB24) {
|
||||
planar_data[0] = (uint8_t *)middle_data + plane_size;
|
||||
planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
|
||||
planar_data[2] = (uint8_t *)middle_data;
|
||||
} else if (frame->format == AV_PIX_FMT_BGR24) {
|
||||
planar_data[0] = (uint8_t *)middle_data + plane_size;
|
||||
planar_data[1] = (uint8_t *)middle_data;
|
||||
planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
|
||||
}
|
||||
sws_scale(sws_ctx, (const uint8_t * const *)planar_data,
|
||||
(const int [4]){frame->width * sizeof(uint8_t),
|
||||
frame->width * sizeof(uint8_t),
|
||||
frame->width * sizeof(uint8_t), 0},
|
||||
0, frame->height, frame->data, frame->linesize);
|
||||
sws_freeContext(sws_ctx);
|
||||
}
|
||||
break;
|
||||
case AV_PIX_FMT_GRAYF32:
|
||||
av_image_copy_plane(frame->data[0], frame->linesize[0],
|
||||
output->data, bytewidth,
|
||||
bytewidth, frame->height);
|
||||
return 0;
|
||||
break;
|
||||
case AV_PIX_FMT_YUV420P:
|
||||
case AV_PIX_FMT_YUV422P:
|
||||
case AV_PIX_FMT_YUV444P:
|
||||
@ -82,24 +133,34 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
|
||||
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
|
||||
av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32), frame->width, frame->height,
|
||||
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height);
|
||||
return AVERROR(EINVAL);
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
|
||||
(const int[4]){frame->width * sizeof(float), 0, 0, 0}, 0, frame->height,
|
||||
(uint8_t * const*)frame->data, frame->linesize);
|
||||
sws_freeContext(sws_ctx);
|
||||
return 0;
|
||||
break;
|
||||
default:
|
||||
avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));
|
||||
return AVERROR(ENOSYS);
|
||||
ret = AVERROR(ENOSYS);
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
av_free(middle_data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
|
||||
{
|
||||
struct SwsContext *sws_ctx;
|
||||
int ret = 0;
|
||||
int linesize[4] = { 0 };
|
||||
void **src_data = NULL;
|
||||
void *middle_data = NULL;
|
||||
uint8_t *planar_data[4] = { 0 };
|
||||
int plane_size = frame->width * frame->height * sizeof(uint8_t);
|
||||
int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
|
||||
if (bytewidth < 0) {
|
||||
return AVERROR(EINVAL);
|
||||
@ -109,9 +170,54 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
|
||||
return AVERROR(ENOSYS);
|
||||
}
|
||||
|
||||
src_data = (void **)frame->data;
|
||||
linesize[0] = frame->linesize[0];
|
||||
if (input->layout == DL_NCHW) {
|
||||
middle_data = av_malloc(plane_size * input->channels);
|
||||
if (!middle_data) {
|
||||
ret = AVERROR(ENOMEM);
|
||||
goto err;
|
||||
}
|
||||
src_data = &middle_data;
|
||||
linesize[0] = frame->width * 3;
|
||||
}
|
||||
|
||||
switch (frame->format) {
|
||||
case AV_PIX_FMT_RGB24:
|
||||
case AV_PIX_FMT_BGR24:
|
||||
// convert data from planar to packed
|
||||
if (input->layout == DL_NCHW) {
|
||||
sws_ctx = sws_getContext(frame->width,
|
||||
frame->height,
|
||||
frame->format,
|
||||
frame->width,
|
||||
frame->height,
|
||||
AV_PIX_FMT_GBRP,
|
||||
0, NULL, NULL, NULL);
|
||||
if (!sws_ctx) {
|
||||
av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
|
||||
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
|
||||
av_get_pix_fmt_name(frame->format), frame->width, frame->height,
|
||||
av_get_pix_fmt_name(AV_PIX_FMT_GBRP),frame->width, frame->height);
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
if (frame->format == AV_PIX_FMT_RGB24) {
|
||||
planar_data[0] = (uint8_t *)middle_data + plane_size;
|
||||
planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
|
||||
planar_data[2] = (uint8_t *)middle_data;
|
||||
} else if (frame->format == AV_PIX_FMT_BGR24) {
|
||||
planar_data[0] = (uint8_t *)middle_data + plane_size;
|
||||
planar_data[1] = (uint8_t *)middle_data;
|
||||
planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
|
||||
}
|
||||
sws_scale(sws_ctx, (const uint8_t * const *)frame->data,
|
||||
frame->linesize, 0, frame->height, planar_data,
|
||||
(const int [4]){frame->width * sizeof(uint8_t),
|
||||
frame->width * sizeof(uint8_t),
|
||||
frame->width * sizeof(uint8_t), 0});
|
||||
sws_freeContext(sws_ctx);
|
||||
}
|
||||
sws_ctx = sws_getContext(frame->width * 3,
|
||||
frame->height,
|
||||
AV_PIX_FMT_GRAY8,
|
||||
@ -124,10 +230,11 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
|
||||
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
|
||||
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height,
|
||||
av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32),frame->width * 3, frame->height);
|
||||
return AVERROR(EINVAL);
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
sws_scale(sws_ctx, (const uint8_t **)frame->data,
|
||||
frame->linesize, 0, frame->height,
|
||||
sws_scale(sws_ctx, (const uint8_t **)src_data,
|
||||
linesize, 0, frame->height,
|
||||
(uint8_t * const [4]){input->data, 0, 0, 0},
|
||||
(const int [4]){frame->width * 3 * sizeof(float), 0, 0, 0});
|
||||
sws_freeContext(sws_ctx);
|
||||
@ -156,7 +263,8 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
|
||||
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
|
||||
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height,
|
||||
av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32),frame->width, frame->height);
|
||||
return AVERROR(EINVAL);
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
sws_scale(sws_ctx, (const uint8_t **)frame->data,
|
||||
frame->linesize, 0, frame->height,
|
||||
@ -166,10 +274,12 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
|
||||
break;
|
||||
default:
|
||||
avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));
|
||||
return AVERROR(ENOSYS);
|
||||
ret = AVERROR(ENOSYS);
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
av_free(middle_data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static enum AVPixelFormat get_pixel_format(DNNData *data)
|
||||
@ -205,6 +315,11 @@ int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index
|
||||
AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
|
||||
av_assert0(sd);
|
||||
|
||||
if (input->layout == DL_NCHW) {
|
||||
av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support layout: NCHW\n");
|
||||
return AVERROR(ENOSYS);
|
||||
}
|
||||
|
||||
header = (const AVDetectionBBoxHeader *)sd->data;
|
||||
bbox = av_get_detection_bbox(header, bbox_index);
|
||||
|
||||
@ -257,6 +372,12 @@ int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)
|
||||
int linesizes[4];
|
||||
int ret = 0;
|
||||
enum AVPixelFormat fmt = get_pixel_format(input);
|
||||
|
||||
if (input->layout == DL_NCHW) {
|
||||
av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support layout: NCHW\n");
|
||||
return AVERROR(ENOSYS);
|
||||
}
|
||||
|
||||
sws_ctx = sws_getContext(frame->width, frame->height, frame->format,
|
||||
input->width, input->height, fmt,
|
||||
SWS_FAST_BILINEAR, NULL, NULL, NULL);
|
||||
|
@ -56,12 +56,19 @@ typedef enum {
|
||||
DFT_ANALYTICS_CLASSIFY, // classify for each bounding box
|
||||
}DNNFunctionType;
|
||||
|
||||
typedef enum {
|
||||
DL_NONE,
|
||||
DL_NCHW,
|
||||
DL_NHWC,
|
||||
} DNNLayout;
|
||||
|
||||
typedef struct DNNData{
|
||||
void *data;
|
||||
int width, height, channels;
|
||||
// dt and order together decide the color format
|
||||
DNNDataType dt;
|
||||
DNNColorOrder order;
|
||||
DNNLayout layout;
|
||||
} DNNData;
|
||||
|
||||
typedef struct DNNExecBaseParams {
|
||||
|
Loading…
Reference in New Issue
Block a user