avfilter/dnn: Refactor DNN parameter configuration system

This patch trying to resolve mulitiple issues related to parameter
configuration:

Firstly, each DNN filters duplicate DNN_COMMON_OPTIONS, which should
be the common options of backend.

Secondly, backend options are hidden behind the scene. It's a
AV_OPT_TYPE_STRING backend_configs for user, and parsed by each
backend. We don't know each backend support what kind of options
from the help message.

Third, DNN backends duplicate DNN_BACKEND_COMMON_OPTIONS.

Last but not the least, pass backend options via AV_OPT_TYPE_STRING
makes it hard to pass AV_OPT_TYPE_BINARY to backend, if not impossible.

This patch puts backend common options and each backend options inside
DnnContext to reduce code duplication, make options user friendly, and
easy to extend for future usecase.

For example,

./ffmpeg -h filter=dnn_processing

dnn_processing AVOptions:
   dnn_backend       <int>        ..FV....... DNN backend (from INT_MIN to INT_MAX) (default tensorflow)
     tensorflow      1            ..FV....... tensorflow backend flag
     openvino        2            ..FV....... openvino backend flag
     torch           3            ..FV....... torch backend flag

dnn_base AVOptions:
   model             <string>     ..F........ path to model file
   input             <string>     ..F........ input name of the model
   output            <string>     ..F........ output name of the model
   backend_configs   <string>     ..F.......P backend configs (deprecated)
   options           <string>     ..F.......P backend configs (deprecated)
   nireq             <int>        ..F........ number of request (from 0 to INT_MAX) (default 0)
   async             <boolean>    ..F........ use DNN async inference (default true)
   device            <string>     ..F........ device to run model

dnn_tensorflow AVOptions:
   sess_config       <string>     ..F........ config for SessionOptions

dnn_openvino AVOptions:
   batch_size        <int>        ..F........ batch size per request (from 1 to 1000) (default 1)
   input_resizable   <boolean>    ..F........ can input be resizable or not (default false)
   layout            <int>        ..F........ input layout of model (from 0 to 2) (default none)
     none            0            ..F........ none
     nchw            1            ..F........ nchw
     nhwc            2            ..F........ nhwc
   scale             <float>      ..F........ Add scale preprocess operation. Divide each element of input by specified value. (from INT_MIN to INT_MAX) (default 0)
   mean              <float>      ..F........ Add mean preprocess operation. Subtract specified value from each element of input. (from INT_MIN to INT_MAX) (default 0)

dnn_th AVOptions:
   optimize          <int>        ..F........ turn on graph executor optimization (from 0 to 1) (default 0)

Signed-off-by: Zhao Zhili <zhilizhao@tencent.com>
Reviewed-by: Wenbin Chen <wenbin.chen@intel.com>
Reviewed-by: Guo Yejun <yejun.guo@intel.com>
Signed-off-by: Paul B Mahol <onemda@gmail.com>
This commit is contained in:
Zhao Zhili 2024-05-08 00:08:08 +08:00 committed by Paul B Mahol
parent 9b0e097abd
commit b62a678da8
13 changed files with 334 additions and 226 deletions

View File

@ -28,9 +28,16 @@
#include "../dnn_interface.h"
#include "libavutil/thread.h"
#define DNN_BACKEND_COMMON_OPTIONS \
{ "nireq", "number of request", OFFSET(options.nireq), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS }, \
{ "async", "use DNN async inference", OFFSET(options.async), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
#define DNN_DEFINE_CLASS_EXT(name, desc, options) \
{ \
.class_name = desc, \
.item_name = av_default_item_name, \
.option = options, \
.version = LIBAVUTIL_VERSION_INT, \
.category = AV_CLASS_CATEGORY_FILTER, \
}
#define DNN_DEFINE_CLASS(fname) \
DNN_DEFINE_CLASS_EXT(fname, #fname, fname##_options)
// one task for one function call from dnn interface
typedef struct TaskItem {

View File

@ -40,24 +40,8 @@
#endif
#include "dnn_backend_common.h"
typedef struct OVOptions{
char *device_type;
int nireq;
uint8_t async;
int batch_size;
int input_resizable;
DNNLayout layout;
float scale;
float mean;
} OVOptions;
typedef struct OVContext {
const AVClass *class;
OVOptions options;
} OVContext;
typedef struct OVModel{
OVContext ctx;
DnnContext *ctx;
DNNModel *model;
#if HAVE_OPENVINO2
ov_core_t *core;
@ -98,24 +82,20 @@ typedef struct OVRequestItem {
generated_string = generated_string ? av_asprintf("%s %s", generated_string, iterate_string) : \
av_asprintf("%s", iterate_string);
#define OFFSET(x) offsetof(OVContext, x)
#define OFFSET(x) offsetof(OVOptions, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM
static const AVOption dnn_openvino_options[] = {
{ "device", "device to run model", OFFSET(options.device_type), AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS },
DNN_BACKEND_COMMON_OPTIONS
{ "batch_size", "batch size per request", OFFSET(options.batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1000, FLAGS},
{ "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
{ "layout", "input layout of model", OFFSET(options.layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, .unit = "layout" },
{ "batch_size", "batch size per request", OFFSET(batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1000, FLAGS},
{ "input_resizable", "can input be resizable or not", OFFSET(input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
{ "layout", "input layout of model", OFFSET(layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, .unit = "layout" },
{ "none", "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, .unit = "layout"},
{ "nchw", "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, .unit = "layout"},
{ "nhwc", "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, .unit = "layout"},
{ "scale", "Add scale preprocess operation. Divide each element of input by specified value.", OFFSET(options.scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS},
{ "mean", "Add mean preprocess operation. Subtract specified value from each element of input.", OFFSET(options.mean), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS},
{ "scale", "Add scale preprocess operation. Divide each element of input by specified value.", OFFSET(scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS},
{ "mean", "Add mean preprocess operation. Subtract specified value from each element of input.", OFFSET(mean), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS},
{ NULL }
};
AVFILTER_DEFINE_CLASS(dnn_openvino);
#if HAVE_OPENVINO2
static const struct {
ov_status_e status;
@ -199,7 +179,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request)
DNNData input;
LastLevelTaskItem *lltask;
TaskItem *task;
OVContext *ctx = &ov_model->ctx;
DnnContext *ctx = ov_model->ctx;
#if HAVE_OPENVINO2
int64_t* dims;
ov_status_e status;
@ -292,7 +272,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request)
input.scale = 1;
input.mean = 0;
for (int i = 0; i < ctx->options.batch_size; ++i) {
for (int i = 0; i < ctx->ov_option.batch_size; ++i) {
lltask = ff_queue_pop_front(ov_model->lltask_queue);
if (!lltask) {
break;
@ -360,7 +340,7 @@ static void infer_completion_callback(void *args)
OVModel *ov_model = task->model;
SafeQueue *requestq = ov_model->request_queue;
DNNData *outputs;
OVContext *ctx = &ov_model->ctx;
DnnContext *ctx = ov_model->ctx;
#if HAVE_OPENVINO2
size_t* dims;
ov_status_e status;
@ -410,9 +390,9 @@ static void infer_completion_callback(void *args)
outputs[i].dims[2] = output_shape.rank > 1 ? dims[output_shape.rank - 2] : 1;
outputs[i].dims[3] = output_shape.rank > 0 ? dims[output_shape.rank - 1] : 1;
av_assert0(request->lltask_count <= dims[0]);
outputs[i].layout = ctx->options.layout;
outputs[i].scale = ctx->options.scale;
outputs[i].mean = ctx->options.mean;
outputs[i].layout = ctx->ov_option.layout;
outputs[i].scale = ctx->ov_option.scale;
outputs[i].mean = ctx->ov_option.mean;
ov_shape_free(&output_shape);
ov_tensor_free(output_tensor);
output_tensor = NULL;
@ -452,9 +432,9 @@ static void infer_completion_callback(void *args)
output.dims[i] = dims.dims[i];
av_assert0(request->lltask_count <= dims.dims[0]);
output.dt = precision_to_datatype(precision);
output.layout = ctx->options.layout;
output.scale = ctx->options.scale;
output.mean = ctx->options.mean;
output.layout = ctx->ov_option.layout;
output.scale = ctx->ov_option.scale;
output.mean = ctx->ov_option.mean;
outputs = &output;
#endif
@ -590,7 +570,6 @@ static void dnn_free_model_ov(DNNModel **model)
av_free(ov_model->all_output_names);
av_free(ov_model->all_input_names);
#endif
av_opt_free(&ov_model->ctx);
av_freep(&ov_model);
av_freep(model);
}
@ -599,7 +578,7 @@ static void dnn_free_model_ov(DNNModel **model)
static int init_model_ov(OVModel *ov_model, const char *input_name, const char **output_names, int nb_outputs)
{
int ret = 0;
OVContext *ctx = &ov_model->ctx;
DnnContext *ctx = ov_model->ctx;
#if HAVE_OPENVINO2
ov_status_e status;
ov_preprocess_input_tensor_info_t* input_tensor_info = NULL;
@ -610,7 +589,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
ov_layout_t* NCHW_layout = NULL;
const char* NHWC_desc = "NHWC";
const char* NCHW_desc = "NCHW";
const char* device = ctx->options.device_type;
const char* device = ctx->device ? ctx->device : "CPU";
#else
IEStatusCode status;
ie_available_devices_t a_dev;
@ -618,17 +597,17 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
char *all_dev_names = NULL;
#endif
// We scale pixel by default when do frame processing.
if (fabsf(ctx->options.scale) < 1e-6f)
ctx->options.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 255 : 1;
if (fabsf(ctx->ov_option.scale) < 1e-6f)
ctx->ov_option.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 255 : 1;
// batch size
if (ctx->options.batch_size <= 0) {
ctx->options.batch_size = 1;
if (ctx->ov_option.batch_size <= 0) {
ctx->ov_option.batch_size = 1;
}
#if HAVE_OPENVINO2
if (ctx->options.batch_size > 1) {
if (ctx->ov_option.batch_size > 1) {
avpriv_report_missing_feature(ctx, "Do not support batch_size > 1 for now,"
"change batch_size to 1.\n");
ctx->options.batch_size = 1;
ctx->ov_option.batch_size = 1;
}
status = ov_preprocess_prepostprocessor_create(ov_model->ov_model, &ov_model->preprocess);
@ -677,9 +656,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
ret = ov2_map_error(status, NULL);
goto err;
}
if (ctx->options.layout == DL_NCHW)
if (ctx->ov_option.layout == DL_NCHW)
status = ov_preprocess_input_model_info_set_layout(input_model_info, NCHW_layout);
else if (ctx->options.layout == DL_NHWC)
else if (ctx->ov_option.layout == DL_NHWC)
status = ov_preprocess_input_model_info_set_layout(input_model_info, NHWC_layout);
if (status != OK) {
av_log(ctx, AV_LOG_ERROR, "Failed to get set input model layout\n");
@ -725,7 +704,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
}
if (ov_model->model->func_type != DFT_PROCESS_FRAME)
status |= ov_preprocess_output_set_element_type(output_tensor_info, F32);
else if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f)
else if (fabsf(ctx->ov_option.scale - 1) > 1e-6f || fabsf(ctx->ov_option.mean) > 1e-6f)
status |= ov_preprocess_output_set_element_type(output_tensor_info, F32);
else
status |= ov_preprocess_output_set_element_type(output_tensor_info, U8);
@ -740,7 +719,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
ov_model->output_info = NULL;
}
// set preprocess steps.
if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f) {
if (fabsf(ctx->ov_option.scale - 1) > 1e-6f || fabsf(ctx->ov_option.mean) > 1e-6f) {
ov_preprocess_preprocess_steps_t* input_process_steps = NULL;
status = ov_preprocess_input_info_get_preprocess_steps(ov_model->input_info, &input_process_steps);
if (status != OK) {
@ -749,8 +728,8 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
goto err;
}
status = ov_preprocess_preprocess_steps_convert_element_type(input_process_steps, F32);
status |= ov_preprocess_preprocess_steps_mean(input_process_steps, ctx->options.mean);
status |= ov_preprocess_preprocess_steps_scale(input_process_steps, ctx->options.scale);
status |= ov_preprocess_preprocess_steps_mean(input_process_steps, ctx->ov_option.mean);
status |= ov_preprocess_preprocess_steps_scale(input_process_steps, ctx->ov_option.scale);
if (status != OK) {
av_log(ctx, AV_LOG_ERROR, "Failed to set preprocess steps\n");
ov_preprocess_preprocess_steps_free(input_process_steps);
@ -824,7 +803,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
ov_layout_free(NCHW_layout);
ov_layout_free(NHWC_layout);
#else
if (ctx->options.batch_size > 1) {
if (ctx->ov_option.batch_size > 1) {
input_shapes_t input_shapes;
status = ie_network_get_input_shapes(ov_model->network, &input_shapes);
if (status != OK) {
@ -832,7 +811,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
goto err;
}
for (int i = 0; i < input_shapes.shape_num; i++)
input_shapes.shapes[i].shape.dims[0] = ctx->options.batch_size;
input_shapes.shapes[i].shape.dims[0] = ctx->ov_option.batch_size;
status = ie_network_reshape(ov_model->network, input_shapes);
ie_network_input_shapes_free(&input_shapes);
if (status != OK) {
@ -882,7 +861,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
}
}
status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network);
status = ie_core_load_network(ov_model->core, ov_model->network, ctx->device, &config, &ov_model->exe_network);
if (status != OK) {
av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n");
status = ie_core_get_available_devices(ov_model->core, &a_dev);
@ -895,15 +874,15 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
APPEND_STRING(all_dev_names, a_dev.devices[i])
}
av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all available devices are: \"%s\"\n",
ctx->options.device_type, all_dev_names);
ctx->device, all_dev_names);
ret = AVERROR(ENODEV);
goto err;
}
#endif
// create infer_requests for async execution
if (ctx->options.nireq <= 0) {
if (ctx->nireq <= 0) {
// the default value is a rough estimation
ctx->options.nireq = av_cpu_count() / 2 + 1;
ctx->nireq = av_cpu_count() / 2 + 1;
}
ov_model->request_queue = ff_safe_queue_create();
@ -912,7 +891,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
goto err;
}
for (int i = 0; i < ctx->options.nireq; i++) {
for (int i = 0; i < ctx->nireq; i++) {
OVRequestItem *item = av_mallocz(sizeof(*item));
if (!item) {
ret = AVERROR(ENOMEM);
@ -945,7 +924,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
}
#endif
item->lltasks = av_malloc_array(ctx->options.batch_size, sizeof(*item->lltasks));
item->lltasks = av_malloc_array(ctx->ov_option.batch_size, sizeof(*item->lltasks));
if (!item->lltasks) {
ret = AVERROR(ENOMEM);
goto err;
@ -994,7 +973,7 @@ static int execute_model_ov(OVRequestItem *request, Queue *inferenceq)
LastLevelTaskItem *lltask;
int ret = 0;
TaskItem *task;
OVContext *ctx;
DnnContext *ctx;
OVModel *ov_model;
if (ff_queue_size(inferenceq) == 0) {
@ -1010,7 +989,7 @@ static int execute_model_ov(OVRequestItem *request, Queue *inferenceq)
lltask = ff_queue_peek_front(inferenceq);
task = lltask->task;
ov_model = task->model;
ctx = &ov_model->ctx;
ctx = ov_model->ctx;
ret = fill_model_input_ov(ov_model, request);
if (ret != 0) {
@ -1084,8 +1063,8 @@ err:
static int get_input_ov(void *model, DNNData *input, const char *input_name)
{
OVModel *ov_model = model;
OVContext *ctx = &ov_model->ctx;
int input_resizable = ctx->options.input_resizable;
DnnContext *ctx = ov_model->ctx;
int input_resizable = ctx->ov_option.input_resizable;
#if HAVE_OPENVINO2
ov_shape_t input_shape = {0};
@ -1291,7 +1270,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i
#endif
int ret;
OVModel *ov_model = model;
OVContext *ctx = &ov_model->ctx;
DnnContext *ctx = ov_model->ctx;
TaskItem task;
OVRequestItem *request;
DNNExecBaseParams exec_params = {
@ -1308,7 +1287,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i
}
#if HAVE_OPENVINO2
if (ctx->options.input_resizable) {
if (ctx->ov_option.input_resizable) {
status = ov_partial_shape_create(4, dims, &partial_shape);
if (status != OK) {
av_log(ctx, AV_LOG_ERROR, "Failed to create partial shape.\n");
@ -1339,7 +1318,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i
if (!ov_model->compiled_model) {
#else
if (ctx->options.input_resizable) {
if (ctx->ov_option.input_resizable) {
status = ie_network_get_input_shapes(ov_model->network, &input_shapes);
input_shapes.shapes->shape.dims[2] = input_height;
input_shapes.shapes->shape.dims[3] = input_width;
@ -1386,11 +1365,10 @@ err:
return ret;
}
static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
static DNNModel *dnn_load_model_ov(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
{
DNNModel *model = NULL;
OVModel *ov_model = NULL;
OVContext *ctx = NULL;
#if HAVE_OPENVINO2
ov_core_t* core = NULL;
ov_model_t* ovmodel = NULL;
@ -1411,17 +1389,9 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f
av_freep(&model);
return NULL;
}
ov_model->ctx = ctx;
model->model = ov_model;
ov_model->model = model;
ov_model->ctx.class = &dnn_openvino_class;
ctx = &ov_model->ctx;
//parse options
av_opt_set_defaults(ctx);
if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) {
av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
goto err;
}
#if HAVE_OPENVINO2
status = ov_core_create(&core);
@ -1430,13 +1400,13 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f
}
ov_model->core = core;
status = ov_core_read_model(core, model_filename, NULL, &ovmodel);
status = ov_core_read_model(core, ctx->model_filename, NULL, &ovmodel);
if (status != OK) {
ov_version_t ver;
status = ov_get_openvino_version(&ver);
av_log(NULL, AV_LOG_ERROR, "Failed to read the network from model file %s,\n"
"Please check if the model version matches the runtime OpenVINO Version:\n",
model_filename);
ctx->model_filename);
if (status == OK) {
av_log(NULL, AV_LOG_ERROR, "BuildNumber: %s\n", ver.buildNumber);
}
@ -1452,13 +1422,13 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f
if (status != OK)
goto err;
status = ie_core_read_network(ov_model->core, model_filename, NULL, &ov_model->network);
status = ie_core_read_network(ov_model->core, ctx->model_filename, NULL, &ov_model->network);
if (status != OK) {
ie_version_t ver;
ver = ie_c_api_version();
av_log(ctx, AV_LOG_ERROR, "Failed to read the network from model file %s,\n"
"Please check if the model version matches the runtime OpenVINO %s\n",
model_filename, ver.api_version);
ctx->model_filename, ver.api_version);
ie_version_free(&ver);
goto err;
}
@ -1496,7 +1466,6 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f
model->get_input = &get_input_ov;
model->get_output = &get_output_ov;
model->options = options;
model->filter_ctx = filter_ctx;
model->func_type = func_type;
@ -1510,7 +1479,7 @@ err:
static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_params)
{
OVModel *ov_model = model->model;
OVContext *ctx = &ov_model->ctx;
DnnContext *ctx = ov_model->ctx;
OVRequestItem *request;
TaskItem *task;
int ret;
@ -1539,7 +1508,7 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p
return AVERROR(ENOMEM);
}
ret = ff_dnn_fill_task(task, exec_params, ov_model, ctx->options.async, 1);
ret = ff_dnn_fill_task(task, exec_params, ov_model, ctx->async, 1);
if (ret != 0) {
av_freep(&task);
return ret;
@ -1557,8 +1526,8 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p
return ret;
}
if (ctx->options.async) {
while (ff_queue_size(ov_model->lltask_queue) >= ctx->options.batch_size) {
if (ctx->async) {
while (ff_queue_size(ov_model->lltask_queue) >= ctx->ov_option.batch_size) {
request = ff_safe_queue_pop_front(ov_model->request_queue);
if (!request) {
av_log(ctx, AV_LOG_ERROR, "unable to get infer request.\n");
@ -1581,7 +1550,7 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p
return AVERROR(ENOSYS);
}
if (ctx->options.batch_size > 1) {
if (ctx->ov_option.batch_size > 1) {
avpriv_report_missing_feature(ctx, "batch mode for sync execution");
return AVERROR(ENOSYS);
}
@ -1604,7 +1573,7 @@ static DNNAsyncStatusType dnn_get_result_ov(const DNNModel *model, AVFrame **in,
static int dnn_flush_ov(const DNNModel *model)
{
OVModel *ov_model = model->model;
OVContext *ctx = &ov_model->ctx;
DnnContext *ctx = ov_model->ctx;
OVRequestItem *request;
#if HAVE_OPENVINO2
ov_status_e status;
@ -1652,6 +1621,7 @@ static int dnn_flush_ov(const DNNModel *model)
}
const DNNModule ff_dnn_backend_openvino = {
.clazz = DNN_DEFINE_CLASS(dnn_openvino),
.load_model = dnn_load_model_ov,
.execute_model = dnn_execute_model_ov,
.get_result = dnn_get_result_ov,

View File

@ -36,19 +36,8 @@
#include "safe_queue.h"
#include <tensorflow/c/c_api.h>
typedef struct TFOptions{
char *sess_config;
uint8_t async;
uint32_t nireq;
} TFOptions;
typedef struct TFContext {
const AVClass *class;
TFOptions options;
} TFContext;
typedef struct TFModel{
TFContext ctx;
typedef struct TFModel {
DnnContext *ctx;
DNNModel *model;
TF_Graph *graph;
TF_Session *session;
@ -76,15 +65,13 @@ typedef struct TFRequestItem {
DNNAsyncExecModule exec_module;
} TFRequestItem;
#define OFFSET(x) offsetof(TFContext, x)
#define OFFSET(x) offsetof(TFOptions, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM
static const AVOption dnn_tensorflow_options[] = {
{ "sess_config", "config for SessionOptions", OFFSET(options.sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
DNN_BACKEND_COMMON_OPTIONS
{ "sess_config", "config for SessionOptions", OFFSET(sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
{ NULL }
};
AVFILTER_DEFINE_CLASS(dnn_tensorflow);
static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue);
static void infer_completion_callback(void *args);
@ -160,7 +147,7 @@ static int tf_start_inference(void *args)
TFModel *tf_model = task->model;
if (!request) {
av_log(&tf_model->ctx, AV_LOG_ERROR, "TFRequestItem is NULL\n");
av_log(tf_model->ctx, AV_LOG_ERROR, "TFRequestItem is NULL\n");
return AVERROR(EINVAL);
}
@ -170,7 +157,7 @@ static int tf_start_inference(void *args)
task->nb_output, NULL, 0, NULL,
request->status);
if (TF_GetCode(request->status) != TF_OK) {
av_log(&tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status));
av_log(tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status));
return DNN_GENERIC_ERROR;
}
return 0;
@ -198,7 +185,7 @@ static inline void destroy_request_item(TFRequestItem **arg) {
static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue)
{
TFModel *tf_model = task->model;
TFContext *ctx = &tf_model->ctx;
DnnContext *ctx = tf_model->ctx;
LastLevelTaskItem *lltask = av_malloc(sizeof(*lltask));
if (!lltask) {
av_log(ctx, AV_LOG_ERROR, "Unable to allocate space for LastLevelTaskItem\n");
@ -278,7 +265,7 @@ static TF_Tensor *allocate_input_tensor(const DNNData *input)
static int get_input_tf(void *model, DNNData *input, const char *input_name)
{
TFModel *tf_model = model;
TFContext *ctx = &tf_model->ctx;
DnnContext *ctx = tf_model->ctx;
TF_Status *status;
TF_DataType dt;
int64_t dims[4];
@ -328,7 +315,7 @@ static int get_output_tf(void *model, const char *input_name, int input_width, i
{
int ret;
TFModel *tf_model = model;
TFContext *ctx = &tf_model->ctx;
DnnContext *ctx = tf_model->ctx;
TaskItem task;
TFRequestItem *request;
DNNExecBaseParams exec_params = {
@ -399,7 +386,7 @@ static int hex_to_data(uint8_t *data, const char *p)
static int load_tf_model(TFModel *tf_model, const char *model_filename)
{
TFContext *ctx = &tf_model->ctx;
DnnContext *ctx = tf_model->ctx;
TF_Buffer *graph_def;
TF_ImportGraphDefOptions *graph_opts;
TF_SessionOptions *sess_opts;
@ -408,7 +395,7 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename)
int sess_config_length = 0;
// prepare the sess config data
if (tf_model->ctx.options.sess_config != NULL) {
if (ctx->tf_option.sess_config != NULL) {
const char *config;
/*
tf_model->ctx.options.sess_config is hex to present the serialized proto
@ -416,11 +403,11 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename)
proto in a python script, tools/python/tf_sess_config.py is a script example
to generate the configs of sess_config.
*/
if (strncmp(tf_model->ctx.options.sess_config, "0x", 2) != 0) {
if (strncmp(ctx->tf_option.sess_config, "0x", 2) != 0) {
av_log(ctx, AV_LOG_ERROR, "sess_config should start with '0x'\n");
return AVERROR(EINVAL);
}
config = tf_model->ctx.options.sess_config + 2;
config = ctx->tf_option.sess_config + 2;
sess_config_length = hex_to_data(NULL, config);
sess_config = av_mallocz(sess_config_length + AV_INPUT_BUFFER_PADDING_SIZE);
@ -461,7 +448,7 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename)
if (TF_GetCode(tf_model->status) != TF_OK) {
TF_DeleteSessionOptions(sess_opts);
av_log(ctx, AV_LOG_ERROR, "Failed to set config for sess options with %s\n",
tf_model->ctx.options.sess_config);
ctx->tf_option.sess_config);
return DNN_GENERIC_ERROR;
}
}
@ -529,15 +516,14 @@ static void dnn_free_model_tf(DNNModel **model)
TF_DeleteStatus(tf_model->status);
}
av_freep(&tf_model);
av_freep(model);
av_freep(&model);
}
}
static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
static DNNModel *dnn_load_model_tf(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
{
DNNModel *model = NULL;
TFModel *tf_model = NULL;
TFContext *ctx = NULL;
model = av_mallocz(sizeof(DNNModel));
if (!model){
@ -551,23 +537,15 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f
}
model->model = tf_model;
tf_model->model = model;
ctx = &tf_model->ctx;
ctx->class = &dnn_tensorflow_class;
tf_model->ctx = ctx;
//parse options
av_opt_set_defaults(ctx);
if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) {
av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
if (load_tf_model(tf_model, ctx->model_filename) != 0){
av_log(ctx, AV_LOG_ERROR, "Failed to load TensorFlow model: \"%s\"\n", ctx->model_filename);
goto err;
}
if (load_tf_model(tf_model, model_filename) != 0){
av_log(ctx, AV_LOG_ERROR, "Failed to load TensorFlow model: \"%s\"\n", model_filename);
goto err;
}
if (ctx->options.nireq <= 0) {
ctx->options.nireq = av_cpu_count() / 2 + 1;
if (ctx->nireq <= 0) {
ctx->nireq = av_cpu_count() / 2 + 1;
}
#if !HAVE_PTHREAD_CANCEL
@ -582,7 +560,7 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f
goto err;
}
for (int i = 0; i < ctx->options.nireq; i++) {
for (int i = 0; i < ctx->nireq; i++) {
TFRequestItem *item = av_mallocz(sizeof(*item));
if (!item) {
goto err;
@ -617,7 +595,6 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f
model->get_input = &get_input_tf;
model->get_output = &get_output_tf;
model->options = options;
model->filter_ctx = filter_ctx;
model->func_type = func_type;
@ -632,7 +609,7 @@ static int fill_model_input_tf(TFModel *tf_model, TFRequestItem *request) {
LastLevelTaskItem *lltask;
TaskItem *task;
TFInferRequest *infer_request = NULL;
TFContext *ctx = &tf_model->ctx;
DnnContext *ctx = tf_model->ctx;
int ret = 0;
lltask = ff_queue_pop_front(tf_model->lltask_queue);
@ -728,7 +705,7 @@ static void infer_completion_callback(void *args) {
DNNData *outputs;
TFInferRequest *infer_request = request->infer_request;
TFModel *tf_model = task->model;
TFContext *ctx = &tf_model->ctx;
DnnContext *ctx = tf_model->ctx;
outputs = av_calloc(task->nb_output, sizeof(*outputs));
if (!outputs) {
@ -787,7 +764,7 @@ err:
static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue)
{
TFModel *tf_model;
TFContext *ctx;
DnnContext *ctx;
LastLevelTaskItem *lltask;
TaskItem *task;
int ret = 0;
@ -800,7 +777,7 @@ static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue)
lltask = ff_queue_peek_front(lltask_queue);
task = lltask->task;
tf_model = task->model;
ctx = &tf_model->ctx;
ctx = tf_model->ctx;
ret = fill_model_input_tf(tf_model, request);
if (ret != 0) {
@ -833,7 +810,7 @@ err:
static int dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_params)
{
TFModel *tf_model = model->model;
TFContext *ctx = &tf_model->ctx;
DnnContext *ctx = tf_model->ctx;
TaskItem *task;
TFRequestItem *request;
int ret = 0;
@ -849,7 +826,7 @@ static int dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_p
return AVERROR(ENOMEM);
}
ret = ff_dnn_fill_task(task, exec_params, tf_model, ctx->options.async, 1);
ret = ff_dnn_fill_task(task, exec_params, tf_model, ctx->async, 1);
if (ret != 0) {
av_log(ctx, AV_LOG_ERROR, "Fill task with invalid parameter(s).\n");
av_freep(&task);
@ -887,7 +864,7 @@ static DNNAsyncStatusType dnn_get_result_tf(const DNNModel *model, AVFrame **in,
static int dnn_flush_tf(const DNNModel *model)
{
TFModel *tf_model = model->model;
TFContext *ctx = &tf_model->ctx;
DnnContext *ctx = tf_model->ctx;
TFRequestItem *request;
int ret;
@ -915,6 +892,7 @@ static int dnn_flush_tf(const DNNModel *model)
}
const DNNModule ff_dnn_backend_tf = {
.clazz = DNN_DEFINE_CLASS(dnn_tensorflow),
.load_model = dnn_load_model_tf,
.execute_model = dnn_execute_model_tf,
.get_result = dnn_get_result_tf,

View File

@ -36,18 +36,8 @@ extern "C" {
#include "safe_queue.h"
}
typedef struct THOptions{
char *device_name;
int optimize;
} THOptions;
typedef struct THContext {
const AVClass *c_class;
THOptions options;
} THContext;
typedef struct THModel {
THContext ctx;
DnnContext *ctx;
DNNModel *model;
torch::jit::Module *jit_model;
SafeQueue *request_queue;
@ -67,20 +57,17 @@ typedef struct THRequestItem {
} THRequestItem;
#define OFFSET(x) offsetof(THContext, x)
#define OFFSET(x) offsetof(THOptions, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM
static const AVOption dnn_th_options[] = {
{ "device", "device to run model", OFFSET(options.device_name), AV_OPT_TYPE_STRING, { .str = "cpu" }, 0, 0, FLAGS },
{ "optimize", "turn on graph executor optimization", OFFSET(options.optimize), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS},
{ "optimize", "turn on graph executor optimization", OFFSET(optimize), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS},
{ NULL }
};
AVFILTER_DEFINE_CLASS(dnn_th);
static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue)
{
THModel *th_model = (THModel *)task->model;
THContext *ctx = &th_model->ctx;
DnnContext *ctx = th_model->ctx;
LastLevelTaskItem *lltask = (LastLevelTaskItem *)av_malloc(sizeof(*lltask));
if (!lltask) {
av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for LastLevelTaskItem\n");
@ -153,7 +140,6 @@ static void dnn_free_model_th(DNNModel **model)
}
ff_queue_destroy(th_model->task_queue);
delete th_model->jit_model;
av_opt_free(&th_model->ctx);
av_freep(&th_model);
av_freep(model);
}
@ -181,7 +167,7 @@ static int fill_model_input_th(THModel *th_model, THRequestItem *request)
TaskItem *task = NULL;
THInferRequest *infer_request = NULL;
DNNData input = { 0 };
THContext *ctx = &th_model->ctx;
DnnContext *ctx = th_model->ctx;
int ret, width_idx, height_idx, channel_idx;
lltask = (LastLevelTaskItem *)ff_queue_pop_front(th_model->lltask_queue);
@ -241,7 +227,7 @@ static int th_start_inference(void *args)
LastLevelTaskItem *lltask = NULL;
TaskItem *task = NULL;
THModel *th_model = NULL;
THContext *ctx = NULL;
DnnContext *ctx = NULL;
std::vector<torch::jit::IValue> inputs;
torch::NoGradGuard no_grad;
@ -253,9 +239,9 @@ static int th_start_inference(void *args)
lltask = request->lltask;
task = lltask->task;
th_model = (THModel *)task->model;
ctx = &th_model->ctx;
ctx = th_model->ctx;
if (ctx->options.optimize)
if (ctx->torch_option.optimize)
torch::jit::setGraphExecutorOptimize(true);
else
torch::jit::setGraphExecutorOptimize(false);
@ -292,7 +278,7 @@ static void infer_completion_callback(void *args) {
outputs.dims[2] = sizes.at(2); // H
outputs.dims[3] = sizes.at(3); // W
} else {
avpriv_report_missing_feature(&th_model->ctx, "Support of this kind of model");
avpriv_report_missing_feature(th_model->ctx, "Support of this kind of model");
goto err;
}
@ -304,7 +290,7 @@ static void infer_completion_callback(void *args) {
if (th_model->model->frame_post_proc != NULL) {
th_model->model->frame_post_proc(task->out_frame, &outputs, th_model->model->filter_ctx);
} else {
ff_proc_from_dnn_to_frame(task->out_frame, &outputs, &th_model->ctx);
ff_proc_from_dnn_to_frame(task->out_frame, &outputs, th_model->ctx);
}
} else {
task->out_frame->width = outputs.dims[dnn_get_width_idx_by_layout(outputs.layout)];
@ -312,7 +298,7 @@ static void infer_completion_callback(void *args) {
}
break;
default:
avpriv_report_missing_feature(&th_model->ctx, "model function type %d", th_model->model->func_type);
avpriv_report_missing_feature(th_model->ctx, "model function type %d", th_model->model->func_type);
goto err;
}
task->inference_done++;
@ -322,7 +308,7 @@ err:
if (ff_safe_queue_push_back(th_model->request_queue, request) < 0) {
destroy_request_item(&request);
av_log(&th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue when failed to start inference.\n");
av_log(th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue when failed to start inference.\n");
}
}
@ -352,7 +338,7 @@ static int execute_model_th(THRequestItem *request, Queue *lltask_queue)
goto err;
}
if (task->async) {
avpriv_report_missing_feature(&th_model->ctx, "LibTorch async");
avpriv_report_missing_feature(th_model->ctx, "LibTorch async");
} else {
ret = th_start_inference((void *)(request));
if (ret != 0) {
@ -375,7 +361,7 @@ static int get_output_th(void *model, const char *input_name, int input_width, i
{
int ret = 0;
THModel *th_model = (THModel*) model;
THContext *ctx = &th_model->ctx;
DnnContext *ctx = th_model->ctx;
TaskItem task = { 0 };
THRequestItem *request = NULL;
DNNExecBaseParams exec_params = {
@ -424,12 +410,12 @@ static THInferRequest *th_create_inference_request(void)
return request;
}
static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
static DNNModel *dnn_load_model_th(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
{
DNNModel *model = NULL;
THModel *th_model = NULL;
THRequestItem *item = NULL;
THContext *ctx;
const char *device_name = ctx->device ? ctx->device : "cpu";
model = (DNNModel *)av_mallocz(sizeof(DNNModel));
if (!model) {
@ -443,24 +429,17 @@ static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType f
}
th_model->model = model;
model->model = th_model;
th_model->ctx.c_class = &dnn_th_class;
ctx = &th_model->ctx;
//parse options
av_opt_set_defaults(ctx);
if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) {
av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
return NULL;
}
th_model->ctx = ctx;
c10::Device device = c10::Device(ctx->options.device_name);
c10::Device device = c10::Device(device_name);
if (!device.is_cpu()) {
av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", ctx->options.device_name);
av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", device_name);
goto fail;
}
try {
th_model->jit_model = new torch::jit::Module;
(*th_model->jit_model) = torch::jit::load(model_filename);
(*th_model->jit_model) = torch::jit::load(ctx->model_filename);
} catch (const c10::Error& e) {
av_log(ctx, AV_LOG_ERROR, "Failed to load torch model\n");
goto fail;
@ -502,7 +481,6 @@ static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType f
model->get_input = &get_input_th;
model->get_output = &get_output_th;
model->options = NULL;
model->filter_ctx = filter_ctx;
model->func_type = func_type;
return model;
@ -519,7 +497,7 @@ fail:
static int dnn_execute_model_th(const DNNModel *model, DNNExecBaseParams *exec_params)
{
THModel *th_model = (THModel *)model->model;
THContext *ctx = &th_model->ctx;
DnnContext *ctx = th_model->ctx;
TaskItem *task;
THRequestItem *request;
int ret = 0;
@ -582,7 +560,7 @@ static int dnn_flush_th(const DNNModel *model)
request = (THRequestItem *)ff_safe_queue_pop_front(th_model->request_queue);
if (!request) {
av_log(&th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n");
av_log(th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n");
return AVERROR(EINVAL);
}
@ -590,6 +568,7 @@ static int dnn_flush_th(const DNNModel *model)
}
extern const DNNModule ff_dnn_backend_torch = {
.clazz = DNN_DEFINE_CLASS(dnn_th),
.load_model = dnn_load_model_th,
.execute_model = dnn_execute_model_th,
.get_result = dnn_get_result_th,

View File

@ -24,12 +24,61 @@
*/
#include "../dnn_interface.h"
#include "libavutil/avassert.h"
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#include "libavfilter/internal.h"
extern const DNNModule ff_dnn_backend_openvino;
extern const DNNModule ff_dnn_backend_tf;
extern const DNNModule ff_dnn_backend_torch;
#define OFFSET(x) offsetof(DnnContext, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM
static const AVOption dnn_base_options[] = {
{"model", "path to model file",
OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
{"input", "input name of the model",
OFFSET(model_inputname), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
{"output", "output name of the model",
OFFSET(model_outputnames_string), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
{"backend_configs", "backend configs (deprecated)",
OFFSET(backend_options), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED},
{"options", "backend configs (deprecated)",
OFFSET(backend_options), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED},
{"nireq", "number of request",
OFFSET(nireq), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS},
{"async", "use DNN async inference",
OFFSET(async), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS},
{"device", "device to run model",
OFFSET(device), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
{NULL}
};
AVFILTER_DEFINE_CLASS(dnn_base);
typedef struct DnnBackendInfo {
const size_t offset;
union {
const AVClass *class;
const DNNModule *module;
};
} DnnBackendInfo;
static const DnnBackendInfo dnn_backend_info_list[] = {
{0, .class = &dnn_base_class},
// Must keep the same order as in DNNOptions, so offset value in incremental order
#if CONFIG_LIBTENSORFLOW
{offsetof(DnnContext, tf_option), .module = &ff_dnn_backend_tf},
#endif
#if CONFIG_LIBOPENVINO
{offsetof(DnnContext, ov_option), .module = &ff_dnn_backend_openvino},
#endif
#if CONFIG_LIBTORCH
{offsetof(DnnContext, torch_option), .module = &ff_dnn_backend_torch},
#endif
};
const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx)
{
switch(backend_type){
@ -52,3 +101,44 @@ const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx)
return NULL;
}
}
void ff_dnn_init_child_class(DnnContext *ctx)
{
for (int i = 0; i < FF_ARRAY_ELEMS(dnn_backend_info_list); i++) {
const AVClass **ptr = (const AVClass **) ((char *) ctx + dnn_backend_info_list[i].offset);
*ptr = dnn_backend_info_list[i].class;
}
}
void *ff_dnn_child_next(DnnContext *obj, void *prev) {
size_t pre_offset;
if (!prev) {
av_assert0(obj->clazz);
return obj;
}
pre_offset = (char *)prev - (char *)obj;
for (int i = 0; i < FF_ARRAY_ELEMS(dnn_backend_info_list) - 1; i++) {
if (dnn_backend_info_list[i].offset == pre_offset) {
const AVClass **ptr = (const AVClass **) ((char *) obj + dnn_backend_info_list[i + 1].offset);
av_assert0(*ptr);
return ptr;
}
}
return NULL;
}
const AVClass *ff_dnn_child_class_iterate(void **iter)
{
uintptr_t i = (uintptr_t) *iter;
if (i < FF_ARRAY_ELEMS(dnn_backend_info_list)) {
*iter = (void *)(i + 1);
return dnn_backend_info_list[i].class;
}
return NULL;
}

View File

@ -19,6 +19,7 @@
#include "dnn_filter_common.h"
#include "libavutil/avstring.h"
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#define MAX_SUPPORTED_OUTPUTS_NB 4
@ -52,6 +53,23 @@ static char **separate_output_names(const char *expr, const char *val_sep, int *
return parsed_vals;
}
typedef struct DnnFilterBase {
const AVClass *class;
DnnContext dnnctx;
} DnnFilterBase;
int ff_dnn_filter_init_child_class(AVFilterContext *filter) {
DnnFilterBase *base = filter->priv;
ff_dnn_init_child_class(&base->dnnctx);
return 0;
}
void *ff_dnn_filter_child_next(void *obj, void *prev)
{
DnnFilterBase *base = obj;
return ff_dnn_child_next(&base->dnnctx, prev);
}
int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
{
DNNBackendType backend = ctx->backend_type;
@ -91,7 +109,25 @@ int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *fil
return AVERROR(EINVAL);
}
ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, func_type, ctx->backend_options, filter_ctx);
if (ctx->backend_options) {
void *child = NULL;
av_log(filter_ctx, AV_LOG_WARNING,
"backend_configs is deprecated, please set backend options directly\n");
while (child = ff_dnn_child_next(ctx, child)) {
if (*(const AVClass **)child == &ctx->dnn_module->clazz) {
int ret = av_opt_set_from_string(child, ctx->backend_options,
NULL, "=", "&");
if (ret < 0) {
av_log(filter_ctx, AV_LOG_ERROR, "failed to parse options \"%s\"\n",
ctx->backend_options);
return ret;
}
}
}
}
ctx->model = (ctx->dnn_module->load_model)(ctx, func_type, filter_ctx);
if (!ctx->model) {
av_log(filter_ctx, AV_LOG_ERROR, "could not load DNN model\n");
return AVERROR(EINVAL);

View File

@ -26,28 +26,23 @@
#include "dnn_interface.h"
typedef struct DnnContext {
char *model_filename;
DNNBackendType backend_type;
char *model_inputname;
char *model_outputnames_string;
char *backend_options;
int async;
#define AVFILTER_DNN_DEFINE_CLASS_EXT(name, desc, options) \
static const AVClass name##_class = { \
.class_name = desc, \
.item_name = av_default_item_name, \
.option = options, \
.version = LIBAVUTIL_VERSION_INT, \
.category = AV_CLASS_CATEGORY_FILTER, \
.child_next = ff_dnn_filter_child_next, \
.child_class_iterate = ff_dnn_child_class_iterate, \
}
char **model_outputnames;
uint32_t nb_outputs;
const DNNModule *dnn_module;
DNNModel *model;
} DnnContext;
#define AVFILTER_DNN_DEFINE_CLASS(fname) \
AVFILTER_DNN_DEFINE_CLASS_EXT(fname, #fname, fname##_options)
#define DNN_COMMON_OPTIONS \
{ "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\
{ "input", "input name of the model", OFFSET(model_inputname), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\
{ "output", "output name of the model", OFFSET(model_outputnames_string), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\
{ "backend_configs", "backend configs", OFFSET(backend_options), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\
{ "options", "backend configs (deprecated, use backend_configs)", OFFSET(backend_options), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED},\
{ "async", "use DNN async inference (ignored, use backend_configs='async=1')", OFFSET(async), AV_OPT_TYPE_BOOL, { .i64 = 1}, 0, 1, FLAGS},
void *ff_dnn_filter_child_next(void *obj, void *prev);
int ff_dnn_filter_init_child_class(AVFilterContext *filter);
int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx);
int ff_dnn_set_frame_proc(DnnContext *ctx, FramePrePostProc pre_proc, FramePrePostProc post_proc);

View File

@ -93,8 +93,6 @@ typedef int (*ClassifyPostProc)(AVFrame *frame, DNNData *output, uint32_t bbox_i
typedef struct DNNModel{
// Stores model that can be different for different backends.
void *model;
// Stores options when the model is executed by the backend
const char *options;
// Stores FilterContext used for the interaction between AVFrame and DNNData
AVFilterContext *filter_ctx;
// Stores function type of the model
@ -117,10 +115,65 @@ typedef struct DNNModel{
ClassifyPostProc classify_post_proc;
} DNNModel;
typedef struct TFOptions{
const AVClass *clazz;
char *sess_config;
} TFOptions;
typedef struct OVOptions {
const AVClass *clazz;
int batch_size;
int input_resizable;
DNNLayout layout;
float scale;
float mean;
} OVOptions;
typedef struct THOptions {
const AVClass *clazz;
int optimize;
} THOptions;
typedef struct DNNModule DNNModule;
typedef struct DnnContext {
const AVClass *clazz;
DNNModel *model;
char *model_filename;
DNNBackendType backend_type;
char *model_inputname;
char *model_outputnames_string;
char *backend_options;
int async;
char **model_outputnames;
uint32_t nb_outputs;
const DNNModule *dnn_module;
int nireq;
char *device;
#if CONFIG_LIBTENSORFLOW
TFOptions tf_option;
#endif
#if CONFIG_LIBOPENVINO
OVOptions ov_option;
#endif
#if CONFIG_LIBTORCH
THOptions torch_option;
#endif
} DnnContext;
// Stores pointers to functions for loading, executing, freeing DNN models for one of the backends.
typedef struct DNNModule{
struct DNNModule {
const AVClass clazz;
// Loads model and parameters from given file. Returns NULL if it is not possible.
DNNModel *(*load_model)(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);
DNNModel *(*load_model)(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx);
// Executes model with specified input and output. Returns the error code otherwise.
int (*execute_model)(const DNNModel *model, DNNExecBaseParams *exec_params);
// Retrieve inference result.
@ -129,11 +182,15 @@ typedef struct DNNModule{
int (*flush)(const DNNModel *model);
// Frees memory allocated for model.
void (*free_model)(DNNModel **model);
} DNNModule;
};
// Initializes DNNModule depending on chosen backend.
const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx);
void ff_dnn_init_child_class(DnnContext *ctx);
void *ff_dnn_child_next(DnnContext *obj, void *prev);
const AVClass *ff_dnn_child_class_iterate(void **iter);
static inline int dnn_get_width_idx_by_layout(DNNLayout layout)
{
return layout == DL_NHWC ? 2 : 3;

View File

@ -46,13 +46,10 @@ static const AVOption derain_options[] = {
#if (CONFIG_LIBTENSORFLOW == 1)
{ "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "backend" },
#endif
{ "model", "path to model file", OFFSET(dnnctx.model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
{ "input", "input name of the model", OFFSET(dnnctx.model_inputname), AV_OPT_TYPE_STRING, { .str = "x" }, 0, 0, FLAGS },
{ "output", "output name of the model", OFFSET(dnnctx.model_outputnames_string), AV_OPT_TYPE_STRING, { .str = "y" }, 0, 0, FLAGS },
{ NULL }
};
AVFILTER_DEFINE_CLASS(derain);
AVFILTER_DNN_DEFINE_CLASS(derain);
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
{
@ -113,6 +110,7 @@ const AVFilter ff_vf_derain = {
.name = "derain",
.description = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."),
.priv_size = sizeof(DRContext),
.preinit = ff_dnn_filter_init_child_class,
.init = init,
.uninit = uninit,
FILTER_INPUTS(derain_inputs),

View File

@ -50,14 +50,13 @@ static const AVOption dnn_classify_options[] = {
#if (CONFIG_LIBOPENVINO == 1)
{ "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_OV }, 0, 0, FLAGS, .unit = "backend" },
#endif
DNN_COMMON_OPTIONS
{ "confidence", "threshold of confidence", OFFSET2(confidence), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0, 1, FLAGS},
{ "labels", "path to labels file", OFFSET2(labels_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
{ "target", "which one to be classified", OFFSET2(target), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
{ NULL }
};
AVFILTER_DEFINE_CLASS(dnn_classify);
AVFILTER_DNN_DEFINE_CLASS(dnn_classify);
static int dnn_classify_post_proc(AVFrame *frame, DNNData *output, uint32_t bbox_index, AVFilterContext *filter_ctx)
{
@ -299,6 +298,7 @@ const AVFilter ff_vf_dnn_classify = {
.name = "dnn_classify",
.description = NULL_IF_CONFIG_SMALL("Apply DNN classify filter to the input."),
.priv_size = sizeof(DnnClassifyContext),
.preinit = ff_dnn_filter_init_child_class,
.init = dnn_classify_init,
.uninit = dnn_classify_uninit,
FILTER_INPUTS(ff_video_default_filterpad),

View File

@ -70,7 +70,6 @@ static const AVOption dnn_detect_options[] = {
#if (CONFIG_LIBOPENVINO == 1)
{ "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_OV }, 0, 0, FLAGS, .unit = "backend" },
#endif
DNN_COMMON_OPTIONS
{ "confidence", "threshold of confidence", OFFSET2(confidence), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0, 1, FLAGS},
{ "labels", "path to labels file", OFFSET2(labels_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
{ "model_type", "DNN detection model type", OFFSET2(model_type), AV_OPT_TYPE_INT, { .i64 = DDMT_SSD }, INT_MIN, INT_MAX, FLAGS, .unit = "model_type" },
@ -85,7 +84,7 @@ static const AVOption dnn_detect_options[] = {
{ NULL }
};
AVFILTER_DEFINE_CLASS(dnn_detect);
AVFILTER_DNN_DEFINE_CLASS(dnn_detect);
static inline float sigmoid(float x) {
return 1.f / (1.f + exp(-x));
@ -851,6 +850,7 @@ const AVFilter ff_vf_dnn_detect = {
.name = "dnn_detect",
.description = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."),
.priv_size = sizeof(DnnDetectContext),
.preinit = ff_dnn_filter_init_child_class,
.init = dnn_detect_init,
.uninit = dnn_detect_uninit,
FILTER_INPUTS(dnn_detect_inputs),

View File

@ -54,11 +54,10 @@ static const AVOption dnn_processing_options[] = {
#if (CONFIG_LIBTORCH == 1)
{ "torch", "torch backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_TH }, 0, 0, FLAGS, "backend" },
#endif
DNN_COMMON_OPTIONS
{ NULL }
};
AVFILTER_DEFINE_CLASS(dnn_processing);
AVFILTER_DNN_DEFINE_CLASS(dnn_processing);
static av_cold int init(AVFilterContext *context)
{
@ -373,6 +372,7 @@ const AVFilter ff_vf_dnn_processing = {
.name = "dnn_processing",
.description = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."),
.priv_size = sizeof(DnnProcessingContext),
.preinit = ff_dnn_filter_init_child_class,
.init = init,
.uninit = uninit,
FILTER_INPUTS(dnn_processing_inputs),

View File

@ -50,13 +50,10 @@ static const AVOption sr_options[] = {
{ "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "backend" },
#endif
{ "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS },
{ "model", "path to model file specifying network architecture and its parameters", OFFSET(dnnctx.model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
{ "input", "input name of the model", OFFSET(dnnctx.model_inputname), AV_OPT_TYPE_STRING, { .str = "x" }, 0, 0, FLAGS },
{ "output", "output name of the model", OFFSET(dnnctx.model_outputnames_string), AV_OPT_TYPE_STRING, { .str = "y" }, 0, 0, FLAGS },
{ NULL }
};
AVFILTER_DEFINE_CLASS(sr);
AVFILTER_DNN_DEFINE_CLASS(sr);
static av_cold int init(AVFilterContext *context)
{
@ -192,6 +189,7 @@ const AVFilter ff_vf_sr = {
.name = "sr",
.description = NULL_IF_CONFIG_SMALL("Apply DNN-based image super resolution to the input."),
.priv_size = sizeof(SRContext),
.preinit = ff_dnn_filter_init_child_class,
.init = init,
.uninit = uninit,
FILTER_INPUTS(sr_inputs),