diff --git a/libavfilter/dnn/dnn_backend_common.h b/libavfilter/dnn/dnn_backend_common.h index 42c67c7040..9f5d37b3e0 100644 --- a/libavfilter/dnn/dnn_backend_common.h +++ b/libavfilter/dnn/dnn_backend_common.h @@ -28,9 +28,16 @@ #include "../dnn_interface.h" #include "libavutil/thread.h" -#define DNN_BACKEND_COMMON_OPTIONS \ - { "nireq", "number of request", OFFSET(options.nireq), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS }, \ - { "async", "use DNN async inference", OFFSET(options.async), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, +#define DNN_DEFINE_CLASS_EXT(name, desc, options) \ + { \ + .class_name = desc, \ + .item_name = av_default_item_name, \ + .option = options, \ + .version = LIBAVUTIL_VERSION_INT, \ + .category = AV_CLASS_CATEGORY_FILTER, \ + } +#define DNN_DEFINE_CLASS(fname) \ + DNN_DEFINE_CLASS_EXT(fname, #fname, fname##_options) // one task for one function call from dnn interface typedef struct TaskItem { diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 374f21b7a1..c4b0682f11 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -40,24 +40,8 @@ #endif #include "dnn_backend_common.h" -typedef struct OVOptions{ - char *device_type; - int nireq; - uint8_t async; - int batch_size; - int input_resizable; - DNNLayout layout; - float scale; - float mean; -} OVOptions; - -typedef struct OVContext { - const AVClass *class; - OVOptions options; -} OVContext; - typedef struct OVModel{ - OVContext ctx; + DnnContext *ctx; DNNModel *model; #if HAVE_OPENVINO2 ov_core_t *core; @@ -98,24 +82,20 @@ typedef struct OVRequestItem { generated_string = generated_string ? av_asprintf("%s %s", generated_string, iterate_string) : \ av_asprintf("%s", iterate_string); -#define OFFSET(x) offsetof(OVContext, x) +#define OFFSET(x) offsetof(OVOptions, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM static const AVOption dnn_openvino_options[] = { - { "device", "device to run model", OFFSET(options.device_type), AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS }, - DNN_BACKEND_COMMON_OPTIONS - { "batch_size", "batch size per request", OFFSET(options.batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1000, FLAGS}, - { "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, - { "layout", "input layout of model", OFFSET(options.layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, .unit = "layout" }, + { "batch_size", "batch size per request", OFFSET(batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1000, FLAGS}, + { "input_resizable", "can input be resizable or not", OFFSET(input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, + { "layout", "input layout of model", OFFSET(layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, .unit = "layout" }, { "none", "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, .unit = "layout"}, { "nchw", "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, .unit = "layout"}, { "nhwc", "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, .unit = "layout"}, - { "scale", "Add scale preprocess operation. Divide each element of input by specified value.", OFFSET(options.scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, - { "mean", "Add mean preprocess operation. Subtract specified value from each element of input.", OFFSET(options.mean), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, + { "scale", "Add scale preprocess operation. Divide each element of input by specified value.", OFFSET(scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, + { "mean", "Add mean preprocess operation. Subtract specified value from each element of input.", OFFSET(mean), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_openvino); - #if HAVE_OPENVINO2 static const struct { ov_status_e status; @@ -199,7 +179,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) DNNData input; LastLevelTaskItem *lltask; TaskItem *task; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; #if HAVE_OPENVINO2 int64_t* dims; ov_status_e status; @@ -292,7 +272,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) input.scale = 1; input.mean = 0; - for (int i = 0; i < ctx->options.batch_size; ++i) { + for (int i = 0; i < ctx->ov_option.batch_size; ++i) { lltask = ff_queue_pop_front(ov_model->lltask_queue); if (!lltask) { break; @@ -360,7 +340,7 @@ static void infer_completion_callback(void *args) OVModel *ov_model = task->model; SafeQueue *requestq = ov_model->request_queue; DNNData *outputs; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; #if HAVE_OPENVINO2 size_t* dims; ov_status_e status; @@ -410,9 +390,9 @@ static void infer_completion_callback(void *args) outputs[i].dims[2] = output_shape.rank > 1 ? dims[output_shape.rank - 2] : 1; outputs[i].dims[3] = output_shape.rank > 0 ? dims[output_shape.rank - 1] : 1; av_assert0(request->lltask_count <= dims[0]); - outputs[i].layout = ctx->options.layout; - outputs[i].scale = ctx->options.scale; - outputs[i].mean = ctx->options.mean; + outputs[i].layout = ctx->ov_option.layout; + outputs[i].scale = ctx->ov_option.scale; + outputs[i].mean = ctx->ov_option.mean; ov_shape_free(&output_shape); ov_tensor_free(output_tensor); output_tensor = NULL; @@ -452,9 +432,9 @@ static void infer_completion_callback(void *args) output.dims[i] = dims.dims[i]; av_assert0(request->lltask_count <= dims.dims[0]); output.dt = precision_to_datatype(precision); - output.layout = ctx->options.layout; - output.scale = ctx->options.scale; - output.mean = ctx->options.mean; + output.layout = ctx->ov_option.layout; + output.scale = ctx->ov_option.scale; + output.mean = ctx->ov_option.mean; outputs = &output; #endif @@ -590,7 +570,6 @@ static void dnn_free_model_ov(DNNModel **model) av_free(ov_model->all_output_names); av_free(ov_model->all_input_names); #endif - av_opt_free(&ov_model->ctx); av_freep(&ov_model); av_freep(model); } @@ -599,7 +578,7 @@ static void dnn_free_model_ov(DNNModel **model) static int init_model_ov(OVModel *ov_model, const char *input_name, const char **output_names, int nb_outputs) { int ret = 0; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; #if HAVE_OPENVINO2 ov_status_e status; ov_preprocess_input_tensor_info_t* input_tensor_info = NULL; @@ -610,7 +589,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ov_layout_t* NCHW_layout = NULL; const char* NHWC_desc = "NHWC"; const char* NCHW_desc = "NCHW"; - const char* device = ctx->options.device_type; + const char* device = ctx->device ? ctx->device : "CPU"; #else IEStatusCode status; ie_available_devices_t a_dev; @@ -618,17 +597,17 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * char *all_dev_names = NULL; #endif // We scale pixel by default when do frame processing. - if (fabsf(ctx->options.scale) < 1e-6f) - ctx->options.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 255 : 1; + if (fabsf(ctx->ov_option.scale) < 1e-6f) + ctx->ov_option.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 255 : 1; // batch size - if (ctx->options.batch_size <= 0) { - ctx->options.batch_size = 1; + if (ctx->ov_option.batch_size <= 0) { + ctx->ov_option.batch_size = 1; } #if HAVE_OPENVINO2 - if (ctx->options.batch_size > 1) { + if (ctx->ov_option.batch_size > 1) { avpriv_report_missing_feature(ctx, "Do not support batch_size > 1 for now," "change batch_size to 1.\n"); - ctx->options.batch_size = 1; + ctx->ov_option.batch_size = 1; } status = ov_preprocess_prepostprocessor_create(ov_model->ov_model, &ov_model->preprocess); @@ -677,9 +656,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ret = ov2_map_error(status, NULL); goto err; } - if (ctx->options.layout == DL_NCHW) + if (ctx->ov_option.layout == DL_NCHW) status = ov_preprocess_input_model_info_set_layout(input_model_info, NCHW_layout); - else if (ctx->options.layout == DL_NHWC) + else if (ctx->ov_option.layout == DL_NHWC) status = ov_preprocess_input_model_info_set_layout(input_model_info, NHWC_layout); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to get set input model layout\n"); @@ -725,7 +704,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * } if (ov_model->model->func_type != DFT_PROCESS_FRAME) status |= ov_preprocess_output_set_element_type(output_tensor_info, F32); - else if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f) + else if (fabsf(ctx->ov_option.scale - 1) > 1e-6f || fabsf(ctx->ov_option.mean) > 1e-6f) status |= ov_preprocess_output_set_element_type(output_tensor_info, F32); else status |= ov_preprocess_output_set_element_type(output_tensor_info, U8); @@ -740,7 +719,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ov_model->output_info = NULL; } // set preprocess steps. - if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f) { + if (fabsf(ctx->ov_option.scale - 1) > 1e-6f || fabsf(ctx->ov_option.mean) > 1e-6f) { ov_preprocess_preprocess_steps_t* input_process_steps = NULL; status = ov_preprocess_input_info_get_preprocess_steps(ov_model->input_info, &input_process_steps); if (status != OK) { @@ -749,8 +728,8 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } status = ov_preprocess_preprocess_steps_convert_element_type(input_process_steps, F32); - status |= ov_preprocess_preprocess_steps_mean(input_process_steps, ctx->options.mean); - status |= ov_preprocess_preprocess_steps_scale(input_process_steps, ctx->options.scale); + status |= ov_preprocess_preprocess_steps_mean(input_process_steps, ctx->ov_option.mean); + status |= ov_preprocess_preprocess_steps_scale(input_process_steps, ctx->ov_option.scale); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to set preprocess steps\n"); ov_preprocess_preprocess_steps_free(input_process_steps); @@ -824,7 +803,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ov_layout_free(NCHW_layout); ov_layout_free(NHWC_layout); #else - if (ctx->options.batch_size > 1) { + if (ctx->ov_option.batch_size > 1) { input_shapes_t input_shapes; status = ie_network_get_input_shapes(ov_model->network, &input_shapes); if (status != OK) { @@ -832,7 +811,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } for (int i = 0; i < input_shapes.shape_num; i++) - input_shapes.shapes[i].shape.dims[0] = ctx->options.batch_size; + input_shapes.shapes[i].shape.dims[0] = ctx->ov_option.batch_size; status = ie_network_reshape(ov_model->network, input_shapes); ie_network_input_shapes_free(&input_shapes); if (status != OK) { @@ -882,7 +861,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * } } - status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network); + status = ie_core_load_network(ov_model->core, ov_model->network, ctx->device, &config, &ov_model->exe_network); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n"); status = ie_core_get_available_devices(ov_model->core, &a_dev); @@ -895,15 +874,15 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * APPEND_STRING(all_dev_names, a_dev.devices[i]) } av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all available devices are: \"%s\"\n", - ctx->options.device_type, all_dev_names); + ctx->device, all_dev_names); ret = AVERROR(ENODEV); goto err; } #endif // create infer_requests for async execution - if (ctx->options.nireq <= 0) { + if (ctx->nireq <= 0) { // the default value is a rough estimation - ctx->options.nireq = av_cpu_count() / 2 + 1; + ctx->nireq = av_cpu_count() / 2 + 1; } ov_model->request_queue = ff_safe_queue_create(); @@ -912,7 +891,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } - for (int i = 0; i < ctx->options.nireq; i++) { + for (int i = 0; i < ctx->nireq; i++) { OVRequestItem *item = av_mallocz(sizeof(*item)); if (!item) { ret = AVERROR(ENOMEM); @@ -945,7 +924,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * } #endif - item->lltasks = av_malloc_array(ctx->options.batch_size, sizeof(*item->lltasks)); + item->lltasks = av_malloc_array(ctx->ov_option.batch_size, sizeof(*item->lltasks)); if (!item->lltasks) { ret = AVERROR(ENOMEM); goto err; @@ -994,7 +973,7 @@ static int execute_model_ov(OVRequestItem *request, Queue *inferenceq) LastLevelTaskItem *lltask; int ret = 0; TaskItem *task; - OVContext *ctx; + DnnContext *ctx; OVModel *ov_model; if (ff_queue_size(inferenceq) == 0) { @@ -1010,7 +989,7 @@ static int execute_model_ov(OVRequestItem *request, Queue *inferenceq) lltask = ff_queue_peek_front(inferenceq); task = lltask->task; ov_model = task->model; - ctx = &ov_model->ctx; + ctx = ov_model->ctx; ret = fill_model_input_ov(ov_model, request); if (ret != 0) { @@ -1084,8 +1063,8 @@ err: static int get_input_ov(void *model, DNNData *input, const char *input_name) { OVModel *ov_model = model; - OVContext *ctx = &ov_model->ctx; - int input_resizable = ctx->options.input_resizable; + DnnContext *ctx = ov_model->ctx; + int input_resizable = ctx->ov_option.input_resizable; #if HAVE_OPENVINO2 ov_shape_t input_shape = {0}; @@ -1291,7 +1270,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i #endif int ret; OVModel *ov_model = model; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; TaskItem task; OVRequestItem *request; DNNExecBaseParams exec_params = { @@ -1308,7 +1287,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i } #if HAVE_OPENVINO2 - if (ctx->options.input_resizable) { + if (ctx->ov_option.input_resizable) { status = ov_partial_shape_create(4, dims, &partial_shape); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to create partial shape.\n"); @@ -1339,7 +1318,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i if (!ov_model->compiled_model) { #else - if (ctx->options.input_resizable) { + if (ctx->ov_option.input_resizable) { status = ie_network_get_input_shapes(ov_model->network, &input_shapes); input_shapes.shapes->shape.dims[2] = input_height; input_shapes.shapes->shape.dims[3] = input_width; @@ -1386,11 +1365,10 @@ err: return ret; } -static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx) +static DNNModel *dnn_load_model_ov(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx) { DNNModel *model = NULL; OVModel *ov_model = NULL; - OVContext *ctx = NULL; #if HAVE_OPENVINO2 ov_core_t* core = NULL; ov_model_t* ovmodel = NULL; @@ -1411,17 +1389,9 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f av_freep(&model); return NULL; } + ov_model->ctx = ctx; model->model = ov_model; ov_model->model = model; - ov_model->ctx.class = &dnn_openvino_class; - ctx = &ov_model->ctx; - - //parse options - av_opt_set_defaults(ctx); - if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) { - av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options); - goto err; - } #if HAVE_OPENVINO2 status = ov_core_create(&core); @@ -1430,13 +1400,13 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f } ov_model->core = core; - status = ov_core_read_model(core, model_filename, NULL, &ovmodel); + status = ov_core_read_model(core, ctx->model_filename, NULL, &ovmodel); if (status != OK) { ov_version_t ver; status = ov_get_openvino_version(&ver); av_log(NULL, AV_LOG_ERROR, "Failed to read the network from model file %s,\n" "Please check if the model version matches the runtime OpenVINO Version:\n", - model_filename); + ctx->model_filename); if (status == OK) { av_log(NULL, AV_LOG_ERROR, "BuildNumber: %s\n", ver.buildNumber); } @@ -1452,13 +1422,13 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f if (status != OK) goto err; - status = ie_core_read_network(ov_model->core, model_filename, NULL, &ov_model->network); + status = ie_core_read_network(ov_model->core, ctx->model_filename, NULL, &ov_model->network); if (status != OK) { ie_version_t ver; ver = ie_c_api_version(); av_log(ctx, AV_LOG_ERROR, "Failed to read the network from model file %s,\n" "Please check if the model version matches the runtime OpenVINO %s\n", - model_filename, ver.api_version); + ctx->model_filename, ver.api_version); ie_version_free(&ver); goto err; } @@ -1496,7 +1466,6 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f model->get_input = &get_input_ov; model->get_output = &get_output_ov; - model->options = options; model->filter_ctx = filter_ctx; model->func_type = func_type; @@ -1510,7 +1479,7 @@ err: static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_params) { OVModel *ov_model = model->model; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; OVRequestItem *request; TaskItem *task; int ret; @@ -1539,7 +1508,7 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p return AVERROR(ENOMEM); } - ret = ff_dnn_fill_task(task, exec_params, ov_model, ctx->options.async, 1); + ret = ff_dnn_fill_task(task, exec_params, ov_model, ctx->async, 1); if (ret != 0) { av_freep(&task); return ret; @@ -1557,8 +1526,8 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p return ret; } - if (ctx->options.async) { - while (ff_queue_size(ov_model->lltask_queue) >= ctx->options.batch_size) { + if (ctx->async) { + while (ff_queue_size(ov_model->lltask_queue) >= ctx->ov_option.batch_size) { request = ff_safe_queue_pop_front(ov_model->request_queue); if (!request) { av_log(ctx, AV_LOG_ERROR, "unable to get infer request.\n"); @@ -1581,7 +1550,7 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p return AVERROR(ENOSYS); } - if (ctx->options.batch_size > 1) { + if (ctx->ov_option.batch_size > 1) { avpriv_report_missing_feature(ctx, "batch mode for sync execution"); return AVERROR(ENOSYS); } @@ -1604,7 +1573,7 @@ static DNNAsyncStatusType dnn_get_result_ov(const DNNModel *model, AVFrame **in, static int dnn_flush_ov(const DNNModel *model) { OVModel *ov_model = model->model; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; OVRequestItem *request; #if HAVE_OPENVINO2 ov_status_e status; @@ -1652,6 +1621,7 @@ static int dnn_flush_ov(const DNNModel *model) } const DNNModule ff_dnn_backend_openvino = { + .clazz = DNN_DEFINE_CLASS(dnn_openvino), .load_model = dnn_load_model_ov, .execute_model = dnn_execute_model_ov, .get_result = dnn_get_result_ov, diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index 2ed17c3c87..d24591b90b 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -36,19 +36,8 @@ #include "safe_queue.h" #include -typedef struct TFOptions{ - char *sess_config; - uint8_t async; - uint32_t nireq; -} TFOptions; - -typedef struct TFContext { - const AVClass *class; - TFOptions options; -} TFContext; - -typedef struct TFModel{ - TFContext ctx; +typedef struct TFModel { + DnnContext *ctx; DNNModel *model; TF_Graph *graph; TF_Session *session; @@ -76,15 +65,13 @@ typedef struct TFRequestItem { DNNAsyncExecModule exec_module; } TFRequestItem; -#define OFFSET(x) offsetof(TFContext, x) +#define OFFSET(x) offsetof(TFOptions, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM static const AVOption dnn_tensorflow_options[] = { - { "sess_config", "config for SessionOptions", OFFSET(options.sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, - DNN_BACKEND_COMMON_OPTIONS + { "sess_config", "config for SessionOptions", OFFSET(sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_tensorflow); static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue); static void infer_completion_callback(void *args); @@ -160,7 +147,7 @@ static int tf_start_inference(void *args) TFModel *tf_model = task->model; if (!request) { - av_log(&tf_model->ctx, AV_LOG_ERROR, "TFRequestItem is NULL\n"); + av_log(tf_model->ctx, AV_LOG_ERROR, "TFRequestItem is NULL\n"); return AVERROR(EINVAL); } @@ -170,7 +157,7 @@ static int tf_start_inference(void *args) task->nb_output, NULL, 0, NULL, request->status); if (TF_GetCode(request->status) != TF_OK) { - av_log(&tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status)); + av_log(tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status)); return DNN_GENERIC_ERROR; } return 0; @@ -198,7 +185,7 @@ static inline void destroy_request_item(TFRequestItem **arg) { static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue) { TFModel *tf_model = task->model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; LastLevelTaskItem *lltask = av_malloc(sizeof(*lltask)); if (!lltask) { av_log(ctx, AV_LOG_ERROR, "Unable to allocate space for LastLevelTaskItem\n"); @@ -278,7 +265,7 @@ static TF_Tensor *allocate_input_tensor(const DNNData *input) static int get_input_tf(void *model, DNNData *input, const char *input_name) { TFModel *tf_model = model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; TF_Status *status; TF_DataType dt; int64_t dims[4]; @@ -328,7 +315,7 @@ static int get_output_tf(void *model, const char *input_name, int input_width, i { int ret; TFModel *tf_model = model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; TaskItem task; TFRequestItem *request; DNNExecBaseParams exec_params = { @@ -399,7 +386,7 @@ static int hex_to_data(uint8_t *data, const char *p) static int load_tf_model(TFModel *tf_model, const char *model_filename) { - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; TF_Buffer *graph_def; TF_ImportGraphDefOptions *graph_opts; TF_SessionOptions *sess_opts; @@ -408,7 +395,7 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename) int sess_config_length = 0; // prepare the sess config data - if (tf_model->ctx.options.sess_config != NULL) { + if (ctx->tf_option.sess_config != NULL) { const char *config; /* tf_model->ctx.options.sess_config is hex to present the serialized proto @@ -416,11 +403,11 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename) proto in a python script, tools/python/tf_sess_config.py is a script example to generate the configs of sess_config. */ - if (strncmp(tf_model->ctx.options.sess_config, "0x", 2) != 0) { + if (strncmp(ctx->tf_option.sess_config, "0x", 2) != 0) { av_log(ctx, AV_LOG_ERROR, "sess_config should start with '0x'\n"); return AVERROR(EINVAL); } - config = tf_model->ctx.options.sess_config + 2; + config = ctx->tf_option.sess_config + 2; sess_config_length = hex_to_data(NULL, config); sess_config = av_mallocz(sess_config_length + AV_INPUT_BUFFER_PADDING_SIZE); @@ -461,7 +448,7 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename) if (TF_GetCode(tf_model->status) != TF_OK) { TF_DeleteSessionOptions(sess_opts); av_log(ctx, AV_LOG_ERROR, "Failed to set config for sess options with %s\n", - tf_model->ctx.options.sess_config); + ctx->tf_option.sess_config); return DNN_GENERIC_ERROR; } } @@ -529,15 +516,14 @@ static void dnn_free_model_tf(DNNModel **model) TF_DeleteStatus(tf_model->status); } av_freep(&tf_model); - av_freep(model); + av_freep(&model); } } -static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx) +static DNNModel *dnn_load_model_tf(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx) { DNNModel *model = NULL; TFModel *tf_model = NULL; - TFContext *ctx = NULL; model = av_mallocz(sizeof(DNNModel)); if (!model){ @@ -551,23 +537,15 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f } model->model = tf_model; tf_model->model = model; - ctx = &tf_model->ctx; - ctx->class = &dnn_tensorflow_class; + tf_model->ctx = ctx; - //parse options - av_opt_set_defaults(ctx); - if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) { - av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options); + if (load_tf_model(tf_model, ctx->model_filename) != 0){ + av_log(ctx, AV_LOG_ERROR, "Failed to load TensorFlow model: \"%s\"\n", ctx->model_filename); goto err; } - if (load_tf_model(tf_model, model_filename) != 0){ - av_log(ctx, AV_LOG_ERROR, "Failed to load TensorFlow model: \"%s\"\n", model_filename); - goto err; - } - - if (ctx->options.nireq <= 0) { - ctx->options.nireq = av_cpu_count() / 2 + 1; + if (ctx->nireq <= 0) { + ctx->nireq = av_cpu_count() / 2 + 1; } #if !HAVE_PTHREAD_CANCEL @@ -582,7 +560,7 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f goto err; } - for (int i = 0; i < ctx->options.nireq; i++) { + for (int i = 0; i < ctx->nireq; i++) { TFRequestItem *item = av_mallocz(sizeof(*item)); if (!item) { goto err; @@ -617,7 +595,6 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f model->get_input = &get_input_tf; model->get_output = &get_output_tf; - model->options = options; model->filter_ctx = filter_ctx; model->func_type = func_type; @@ -632,7 +609,7 @@ static int fill_model_input_tf(TFModel *tf_model, TFRequestItem *request) { LastLevelTaskItem *lltask; TaskItem *task; TFInferRequest *infer_request = NULL; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; int ret = 0; lltask = ff_queue_pop_front(tf_model->lltask_queue); @@ -728,7 +705,7 @@ static void infer_completion_callback(void *args) { DNNData *outputs; TFInferRequest *infer_request = request->infer_request; TFModel *tf_model = task->model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; outputs = av_calloc(task->nb_output, sizeof(*outputs)); if (!outputs) { @@ -787,7 +764,7 @@ err: static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue) { TFModel *tf_model; - TFContext *ctx; + DnnContext *ctx; LastLevelTaskItem *lltask; TaskItem *task; int ret = 0; @@ -800,7 +777,7 @@ static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue) lltask = ff_queue_peek_front(lltask_queue); task = lltask->task; tf_model = task->model; - ctx = &tf_model->ctx; + ctx = tf_model->ctx; ret = fill_model_input_tf(tf_model, request); if (ret != 0) { @@ -833,7 +810,7 @@ err: static int dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_params) { TFModel *tf_model = model->model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; TaskItem *task; TFRequestItem *request; int ret = 0; @@ -849,7 +826,7 @@ static int dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_p return AVERROR(ENOMEM); } - ret = ff_dnn_fill_task(task, exec_params, tf_model, ctx->options.async, 1); + ret = ff_dnn_fill_task(task, exec_params, tf_model, ctx->async, 1); if (ret != 0) { av_log(ctx, AV_LOG_ERROR, "Fill task with invalid parameter(s).\n"); av_freep(&task); @@ -887,7 +864,7 @@ static DNNAsyncStatusType dnn_get_result_tf(const DNNModel *model, AVFrame **in, static int dnn_flush_tf(const DNNModel *model) { TFModel *tf_model = model->model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; TFRequestItem *request; int ret; @@ -915,6 +892,7 @@ static int dnn_flush_tf(const DNNModel *model) } const DNNModule ff_dnn_backend_tf = { + .clazz = DNN_DEFINE_CLASS(dnn_tensorflow), .load_model = dnn_load_model_tf, .execute_model = dnn_execute_model_tf, .get_result = dnn_get_result_tf, diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp index ae55893a50..abdef1f178 100644 --- a/libavfilter/dnn/dnn_backend_torch.cpp +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -36,18 +36,8 @@ extern "C" { #include "safe_queue.h" } -typedef struct THOptions{ - char *device_name; - int optimize; -} THOptions; - -typedef struct THContext { - const AVClass *c_class; - THOptions options; -} THContext; - typedef struct THModel { - THContext ctx; + DnnContext *ctx; DNNModel *model; torch::jit::Module *jit_model; SafeQueue *request_queue; @@ -67,20 +57,17 @@ typedef struct THRequestItem { } THRequestItem; -#define OFFSET(x) offsetof(THContext, x) +#define OFFSET(x) offsetof(THOptions, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM static const AVOption dnn_th_options[] = { - { "device", "device to run model", OFFSET(options.device_name), AV_OPT_TYPE_STRING, { .str = "cpu" }, 0, 0, FLAGS }, - { "optimize", "turn on graph executor optimization", OFFSET(options.optimize), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS}, + { "optimize", "turn on graph executor optimization", OFFSET(optimize), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS}, { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_th); - static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue) { THModel *th_model = (THModel *)task->model; - THContext *ctx = &th_model->ctx; + DnnContext *ctx = th_model->ctx; LastLevelTaskItem *lltask = (LastLevelTaskItem *)av_malloc(sizeof(*lltask)); if (!lltask) { av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for LastLevelTaskItem\n"); @@ -153,7 +140,6 @@ static void dnn_free_model_th(DNNModel **model) } ff_queue_destroy(th_model->task_queue); delete th_model->jit_model; - av_opt_free(&th_model->ctx); av_freep(&th_model); av_freep(model); } @@ -181,7 +167,7 @@ static int fill_model_input_th(THModel *th_model, THRequestItem *request) TaskItem *task = NULL; THInferRequest *infer_request = NULL; DNNData input = { 0 }; - THContext *ctx = &th_model->ctx; + DnnContext *ctx = th_model->ctx; int ret, width_idx, height_idx, channel_idx; lltask = (LastLevelTaskItem *)ff_queue_pop_front(th_model->lltask_queue); @@ -241,7 +227,7 @@ static int th_start_inference(void *args) LastLevelTaskItem *lltask = NULL; TaskItem *task = NULL; THModel *th_model = NULL; - THContext *ctx = NULL; + DnnContext *ctx = NULL; std::vector inputs; torch::NoGradGuard no_grad; @@ -253,9 +239,9 @@ static int th_start_inference(void *args) lltask = request->lltask; task = lltask->task; th_model = (THModel *)task->model; - ctx = &th_model->ctx; + ctx = th_model->ctx; - if (ctx->options.optimize) + if (ctx->torch_option.optimize) torch::jit::setGraphExecutorOptimize(true); else torch::jit::setGraphExecutorOptimize(false); @@ -292,7 +278,7 @@ static void infer_completion_callback(void *args) { outputs.dims[2] = sizes.at(2); // H outputs.dims[3] = sizes.at(3); // W } else { - avpriv_report_missing_feature(&th_model->ctx, "Support of this kind of model"); + avpriv_report_missing_feature(th_model->ctx, "Support of this kind of model"); goto err; } @@ -304,7 +290,7 @@ static void infer_completion_callback(void *args) { if (th_model->model->frame_post_proc != NULL) { th_model->model->frame_post_proc(task->out_frame, &outputs, th_model->model->filter_ctx); } else { - ff_proc_from_dnn_to_frame(task->out_frame, &outputs, &th_model->ctx); + ff_proc_from_dnn_to_frame(task->out_frame, &outputs, th_model->ctx); } } else { task->out_frame->width = outputs.dims[dnn_get_width_idx_by_layout(outputs.layout)]; @@ -312,7 +298,7 @@ static void infer_completion_callback(void *args) { } break; default: - avpriv_report_missing_feature(&th_model->ctx, "model function type %d", th_model->model->func_type); + avpriv_report_missing_feature(th_model->ctx, "model function type %d", th_model->model->func_type); goto err; } task->inference_done++; @@ -322,7 +308,7 @@ err: if (ff_safe_queue_push_back(th_model->request_queue, request) < 0) { destroy_request_item(&request); - av_log(&th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue when failed to start inference.\n"); + av_log(th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue when failed to start inference.\n"); } } @@ -352,7 +338,7 @@ static int execute_model_th(THRequestItem *request, Queue *lltask_queue) goto err; } if (task->async) { - avpriv_report_missing_feature(&th_model->ctx, "LibTorch async"); + avpriv_report_missing_feature(th_model->ctx, "LibTorch async"); } else { ret = th_start_inference((void *)(request)); if (ret != 0) { @@ -375,7 +361,7 @@ static int get_output_th(void *model, const char *input_name, int input_width, i { int ret = 0; THModel *th_model = (THModel*) model; - THContext *ctx = &th_model->ctx; + DnnContext *ctx = th_model->ctx; TaskItem task = { 0 }; THRequestItem *request = NULL; DNNExecBaseParams exec_params = { @@ -424,12 +410,12 @@ static THInferRequest *th_create_inference_request(void) return request; } -static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx) +static DNNModel *dnn_load_model_th(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx) { DNNModel *model = NULL; THModel *th_model = NULL; THRequestItem *item = NULL; - THContext *ctx; + const char *device_name = ctx->device ? ctx->device : "cpu"; model = (DNNModel *)av_mallocz(sizeof(DNNModel)); if (!model) { @@ -443,24 +429,17 @@ static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType f } th_model->model = model; model->model = th_model; - th_model->ctx.c_class = &dnn_th_class; - ctx = &th_model->ctx; - //parse options - av_opt_set_defaults(ctx); - if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) { - av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options); - return NULL; - } + th_model->ctx = ctx; - c10::Device device = c10::Device(ctx->options.device_name); + c10::Device device = c10::Device(device_name); if (!device.is_cpu()) { - av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", ctx->options.device_name); + av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", device_name); goto fail; } try { th_model->jit_model = new torch::jit::Module; - (*th_model->jit_model) = torch::jit::load(model_filename); + (*th_model->jit_model) = torch::jit::load(ctx->model_filename); } catch (const c10::Error& e) { av_log(ctx, AV_LOG_ERROR, "Failed to load torch model\n"); goto fail; @@ -502,7 +481,6 @@ static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType f model->get_input = &get_input_th; model->get_output = &get_output_th; - model->options = NULL; model->filter_ctx = filter_ctx; model->func_type = func_type; return model; @@ -519,7 +497,7 @@ fail: static int dnn_execute_model_th(const DNNModel *model, DNNExecBaseParams *exec_params) { THModel *th_model = (THModel *)model->model; - THContext *ctx = &th_model->ctx; + DnnContext *ctx = th_model->ctx; TaskItem *task; THRequestItem *request; int ret = 0; @@ -582,7 +560,7 @@ static int dnn_flush_th(const DNNModel *model) request = (THRequestItem *)ff_safe_queue_pop_front(th_model->request_queue); if (!request) { - av_log(&th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n"); + av_log(th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n"); return AVERROR(EINVAL); } @@ -590,6 +568,7 @@ static int dnn_flush_th(const DNNModel *model) } extern const DNNModule ff_dnn_backend_torch = { + .clazz = DNN_DEFINE_CLASS(dnn_th), .load_model = dnn_load_model_th, .execute_model = dnn_execute_model_th, .get_result = dnn_get_result_th, diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c index b9f71aea53..e7453f1bb1 100644 --- a/libavfilter/dnn/dnn_interface.c +++ b/libavfilter/dnn/dnn_interface.c @@ -24,12 +24,61 @@ */ #include "../dnn_interface.h" +#include "libavutil/avassert.h" #include "libavutil/mem.h" +#include "libavutil/opt.h" +#include "libavfilter/internal.h" extern const DNNModule ff_dnn_backend_openvino; extern const DNNModule ff_dnn_backend_tf; extern const DNNModule ff_dnn_backend_torch; +#define OFFSET(x) offsetof(DnnContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM +static const AVOption dnn_base_options[] = { + {"model", "path to model file", + OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, + {"input", "input name of the model", + OFFSET(model_inputname), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, + {"output", "output name of the model", + OFFSET(model_outputnames_string), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, + {"backend_configs", "backend configs (deprecated)", + OFFSET(backend_options), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED}, + {"options", "backend configs (deprecated)", + OFFSET(backend_options), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED}, + {"nireq", "number of request", + OFFSET(nireq), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS}, + {"async", "use DNN async inference", + OFFSET(async), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS}, + {"device", "device to run model", + OFFSET(device), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, + {NULL} +}; + +AVFILTER_DEFINE_CLASS(dnn_base); + +typedef struct DnnBackendInfo { + const size_t offset; + union { + const AVClass *class; + const DNNModule *module; + }; +} DnnBackendInfo; + +static const DnnBackendInfo dnn_backend_info_list[] = { + {0, .class = &dnn_base_class}, + // Must keep the same order as in DNNOptions, so offset value in incremental order +#if CONFIG_LIBTENSORFLOW + {offsetof(DnnContext, tf_option), .module = &ff_dnn_backend_tf}, +#endif +#if CONFIG_LIBOPENVINO + {offsetof(DnnContext, ov_option), .module = &ff_dnn_backend_openvino}, +#endif +#if CONFIG_LIBTORCH + {offsetof(DnnContext, torch_option), .module = &ff_dnn_backend_torch}, +#endif +}; + const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx) { switch(backend_type){ @@ -52,3 +101,44 @@ const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx) return NULL; } } + +void ff_dnn_init_child_class(DnnContext *ctx) +{ + for (int i = 0; i < FF_ARRAY_ELEMS(dnn_backend_info_list); i++) { + const AVClass **ptr = (const AVClass **) ((char *) ctx + dnn_backend_info_list[i].offset); + *ptr = dnn_backend_info_list[i].class; + } +} + +void *ff_dnn_child_next(DnnContext *obj, void *prev) { + size_t pre_offset; + + if (!prev) { + av_assert0(obj->clazz); + return obj; + } + + pre_offset = (char *)prev - (char *)obj; + for (int i = 0; i < FF_ARRAY_ELEMS(dnn_backend_info_list) - 1; i++) { + if (dnn_backend_info_list[i].offset == pre_offset) { + const AVClass **ptr = (const AVClass **) ((char *) obj + dnn_backend_info_list[i + 1].offset); + av_assert0(*ptr); + return ptr; + } + } + + return NULL; +} + +const AVClass *ff_dnn_child_class_iterate(void **iter) +{ + uintptr_t i = (uintptr_t) *iter; + + if (i < FF_ARRAY_ELEMS(dnn_backend_info_list)) { + *iter = (void *)(i + 1); + return dnn_backend_info_list[i].class; + } + + return NULL; +} + diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c index 5e76b9ba45..860ca7591f 100644 --- a/libavfilter/dnn_filter_common.c +++ b/libavfilter/dnn_filter_common.c @@ -19,6 +19,7 @@ #include "dnn_filter_common.h" #include "libavutil/avstring.h" #include "libavutil/mem.h" +#include "libavutil/opt.h" #define MAX_SUPPORTED_OUTPUTS_NB 4 @@ -52,6 +53,23 @@ static char **separate_output_names(const char *expr, const char *val_sep, int * return parsed_vals; } +typedef struct DnnFilterBase { + const AVClass *class; + DnnContext dnnctx; +} DnnFilterBase; + +int ff_dnn_filter_init_child_class(AVFilterContext *filter) { + DnnFilterBase *base = filter->priv; + ff_dnn_init_child_class(&base->dnnctx); + return 0; +} + +void *ff_dnn_filter_child_next(void *obj, void *prev) +{ + DnnFilterBase *base = obj; + return ff_dnn_child_next(&base->dnnctx, prev); +} + int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx) { DNNBackendType backend = ctx->backend_type; @@ -91,7 +109,25 @@ int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *fil return AVERROR(EINVAL); } - ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, func_type, ctx->backend_options, filter_ctx); + if (ctx->backend_options) { + void *child = NULL; + + av_log(filter_ctx, AV_LOG_WARNING, + "backend_configs is deprecated, please set backend options directly\n"); + while (child = ff_dnn_child_next(ctx, child)) { + if (*(const AVClass **)child == &ctx->dnn_module->clazz) { + int ret = av_opt_set_from_string(child, ctx->backend_options, + NULL, "=", "&"); + if (ret < 0) { + av_log(filter_ctx, AV_LOG_ERROR, "failed to parse options \"%s\"\n", + ctx->backend_options); + return ret; + } + } + } + } + + ctx->model = (ctx->dnn_module->load_model)(ctx, func_type, filter_ctx); if (!ctx->model) { av_log(filter_ctx, AV_LOG_ERROR, "could not load DNN model\n"); return AVERROR(EINVAL); diff --git a/libavfilter/dnn_filter_common.h b/libavfilter/dnn_filter_common.h index 30871ee381..b52b55a90d 100644 --- a/libavfilter/dnn_filter_common.h +++ b/libavfilter/dnn_filter_common.h @@ -26,28 +26,23 @@ #include "dnn_interface.h" -typedef struct DnnContext { - char *model_filename; - DNNBackendType backend_type; - char *model_inputname; - char *model_outputnames_string; - char *backend_options; - int async; +#define AVFILTER_DNN_DEFINE_CLASS_EXT(name, desc, options) \ + static const AVClass name##_class = { \ + .class_name = desc, \ + .item_name = av_default_item_name, \ + .option = options, \ + .version = LIBAVUTIL_VERSION_INT, \ + .category = AV_CLASS_CATEGORY_FILTER, \ + .child_next = ff_dnn_filter_child_next, \ + .child_class_iterate = ff_dnn_child_class_iterate, \ + } - char **model_outputnames; - uint32_t nb_outputs; - const DNNModule *dnn_module; - DNNModel *model; -} DnnContext; +#define AVFILTER_DNN_DEFINE_CLASS(fname) \ + AVFILTER_DNN_DEFINE_CLASS_EXT(fname, #fname, fname##_options) -#define DNN_COMMON_OPTIONS \ - { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\ - { "input", "input name of the model", OFFSET(model_inputname), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\ - { "output", "output name of the model", OFFSET(model_outputnames_string), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\ - { "backend_configs", "backend configs", OFFSET(backend_options), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\ - { "options", "backend configs (deprecated, use backend_configs)", OFFSET(backend_options), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED},\ - { "async", "use DNN async inference (ignored, use backend_configs='async=1')", OFFSET(async), AV_OPT_TYPE_BOOL, { .i64 = 1}, 0, 1, FLAGS}, +void *ff_dnn_filter_child_next(void *obj, void *prev); +int ff_dnn_filter_init_child_class(AVFilterContext *filter); int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx); int ff_dnn_set_frame_proc(DnnContext *ctx, FramePrePostProc pre_proc, FramePrePostProc post_proc); diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 63f492e690..4e544486cc 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -93,8 +93,6 @@ typedef int (*ClassifyPostProc)(AVFrame *frame, DNNData *output, uint32_t bbox_i typedef struct DNNModel{ // Stores model that can be different for different backends. void *model; - // Stores options when the model is executed by the backend - const char *options; // Stores FilterContext used for the interaction between AVFrame and DNNData AVFilterContext *filter_ctx; // Stores function type of the model @@ -117,10 +115,65 @@ typedef struct DNNModel{ ClassifyPostProc classify_post_proc; } DNNModel; +typedef struct TFOptions{ + const AVClass *clazz; + + char *sess_config; +} TFOptions; + +typedef struct OVOptions { + const AVClass *clazz; + + int batch_size; + int input_resizable; + DNNLayout layout; + float scale; + float mean; +} OVOptions; + +typedef struct THOptions { + const AVClass *clazz; + int optimize; +} THOptions; + +typedef struct DNNModule DNNModule; + +typedef struct DnnContext { + const AVClass *clazz; + + DNNModel *model; + + char *model_filename; + DNNBackendType backend_type; + char *model_inputname; + char *model_outputnames_string; + char *backend_options; + int async; + + char **model_outputnames; + uint32_t nb_outputs; + const DNNModule *dnn_module; + + int nireq; + char *device; + +#if CONFIG_LIBTENSORFLOW + TFOptions tf_option; +#endif + +#if CONFIG_LIBOPENVINO + OVOptions ov_option; +#endif +#if CONFIG_LIBTORCH + THOptions torch_option; +#endif +} DnnContext; + // Stores pointers to functions for loading, executing, freeing DNN models for one of the backends. -typedef struct DNNModule{ +struct DNNModule { + const AVClass clazz; // Loads model and parameters from given file. Returns NULL if it is not possible. - DNNModel *(*load_model)(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx); + DNNModel *(*load_model)(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx); // Executes model with specified input and output. Returns the error code otherwise. int (*execute_model)(const DNNModel *model, DNNExecBaseParams *exec_params); // Retrieve inference result. @@ -129,11 +182,15 @@ typedef struct DNNModule{ int (*flush)(const DNNModel *model); // Frees memory allocated for model. void (*free_model)(DNNModel **model); -} DNNModule; +}; // Initializes DNNModule depending on chosen backend. const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx); +void ff_dnn_init_child_class(DnnContext *ctx); +void *ff_dnn_child_next(DnnContext *obj, void *prev); +const AVClass *ff_dnn_child_class_iterate(void **iter); + static inline int dnn_get_width_idx_by_layout(DNNLayout layout) { return layout == DL_NHWC ? 2 : 3; diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c index c8848dd7ba..7f665b73ab 100644 --- a/libavfilter/vf_derain.c +++ b/libavfilter/vf_derain.c @@ -46,13 +46,10 @@ static const AVOption derain_options[] = { #if (CONFIG_LIBTENSORFLOW == 1) { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "backend" }, #endif - { "model", "path to model file", OFFSET(dnnctx.model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, - { "input", "input name of the model", OFFSET(dnnctx.model_inputname), AV_OPT_TYPE_STRING, { .str = "x" }, 0, 0, FLAGS }, - { "output", "output name of the model", OFFSET(dnnctx.model_outputnames_string), AV_OPT_TYPE_STRING, { .str = "y" }, 0, 0, FLAGS }, { NULL } }; -AVFILTER_DEFINE_CLASS(derain); +AVFILTER_DNN_DEFINE_CLASS(derain); static int filter_frame(AVFilterLink *inlink, AVFrame *in) { @@ -113,6 +110,7 @@ const AVFilter ff_vf_derain = { .name = "derain", .description = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."), .priv_size = sizeof(DRContext), + .preinit = ff_dnn_filter_init_child_class, .init = init, .uninit = uninit, FILTER_INPUTS(derain_inputs), diff --git a/libavfilter/vf_dnn_classify.c b/libavfilter/vf_dnn_classify.c index 1f8f227e3a..965779a8ab 100644 --- a/libavfilter/vf_dnn_classify.c +++ b/libavfilter/vf_dnn_classify.c @@ -50,14 +50,13 @@ static const AVOption dnn_classify_options[] = { #if (CONFIG_LIBOPENVINO == 1) { "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_OV }, 0, 0, FLAGS, .unit = "backend" }, #endif - DNN_COMMON_OPTIONS { "confidence", "threshold of confidence", OFFSET2(confidence), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0, 1, FLAGS}, { "labels", "path to labels file", OFFSET2(labels_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, { "target", "which one to be classified", OFFSET2(target), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_classify); +AVFILTER_DNN_DEFINE_CLASS(dnn_classify); static int dnn_classify_post_proc(AVFrame *frame, DNNData *output, uint32_t bbox_index, AVFilterContext *filter_ctx) { @@ -299,6 +298,7 @@ const AVFilter ff_vf_dnn_classify = { .name = "dnn_classify", .description = NULL_IF_CONFIG_SMALL("Apply DNN classify filter to the input."), .priv_size = sizeof(DnnClassifyContext), + .preinit = ff_dnn_filter_init_child_class, .init = dnn_classify_init, .uninit = dnn_classify_uninit, FILTER_INPUTS(ff_video_default_filterpad), diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index bacea3ef29..926966368a 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -70,7 +70,6 @@ static const AVOption dnn_detect_options[] = { #if (CONFIG_LIBOPENVINO == 1) { "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_OV }, 0, 0, FLAGS, .unit = "backend" }, #endif - DNN_COMMON_OPTIONS { "confidence", "threshold of confidence", OFFSET2(confidence), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0, 1, FLAGS}, { "labels", "path to labels file", OFFSET2(labels_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, { "model_type", "DNN detection model type", OFFSET2(model_type), AV_OPT_TYPE_INT, { .i64 = DDMT_SSD }, INT_MIN, INT_MAX, FLAGS, .unit = "model_type" }, @@ -85,7 +84,7 @@ static const AVOption dnn_detect_options[] = { { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_detect); +AVFILTER_DNN_DEFINE_CLASS(dnn_detect); static inline float sigmoid(float x) { return 1.f / (1.f + exp(-x)); @@ -851,6 +850,7 @@ const AVFilter ff_vf_dnn_detect = { .name = "dnn_detect", .description = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."), .priv_size = sizeof(DnnDetectContext), + .preinit = ff_dnn_filter_init_child_class, .init = dnn_detect_init, .uninit = dnn_detect_uninit, FILTER_INPUTS(dnn_detect_inputs), diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index fdac31665e..9a1dd2a356 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -54,11 +54,10 @@ static const AVOption dnn_processing_options[] = { #if (CONFIG_LIBTORCH == 1) { "torch", "torch backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_TH }, 0, 0, FLAGS, "backend" }, #endif - DNN_COMMON_OPTIONS { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_processing); +AVFILTER_DNN_DEFINE_CLASS(dnn_processing); static av_cold int init(AVFilterContext *context) { @@ -373,6 +372,7 @@ const AVFilter ff_vf_dnn_processing = { .name = "dnn_processing", .description = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."), .priv_size = sizeof(DnnProcessingContext), + .preinit = ff_dnn_filter_init_child_class, .init = init, .uninit = uninit, FILTER_INPUTS(dnn_processing_inputs), diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c index 60683b5209..f14c0c0cd3 100644 --- a/libavfilter/vf_sr.c +++ b/libavfilter/vf_sr.c @@ -50,13 +50,10 @@ static const AVOption sr_options[] = { { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "backend" }, #endif { "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS }, - { "model", "path to model file specifying network architecture and its parameters", OFFSET(dnnctx.model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, - { "input", "input name of the model", OFFSET(dnnctx.model_inputname), AV_OPT_TYPE_STRING, { .str = "x" }, 0, 0, FLAGS }, - { "output", "output name of the model", OFFSET(dnnctx.model_outputnames_string), AV_OPT_TYPE_STRING, { .str = "y" }, 0, 0, FLAGS }, { NULL } }; -AVFILTER_DEFINE_CLASS(sr); +AVFILTER_DNN_DEFINE_CLASS(sr); static av_cold int init(AVFilterContext *context) { @@ -192,6 +189,7 @@ const AVFilter ff_vf_sr = { .name = "sr", .description = NULL_IF_CONFIG_SMALL("Apply DNN-based image super resolution to the input."), .priv_size = sizeof(SRContext), + .preinit = ff_dnn_filter_init_child_class, .init = init, .uninit = uninit, FILTER_INPUTS(sr_inputs),