From 1954f5c832256e8bbca936e2aea4ab2a3ac3d421 Mon Sep 17 00:00:00 2001
From: Connor Worley <connorbworley@gmail.com>
Date: Tue, 23 Jan 2024 19:59:00 +0100
Subject: [PATCH] lavc/dxvenc: add DXV encoder with support for DXT1 texture
 format

Signed-off-by: Vittorio Giovara <vittorio.giovara@gmail.com>
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 Changelog                 |   1 +
 configure                 |   1 +
 doc/general_contents.texi |   3 +-
 libavcodec/Makefile       |   1 +
 libavcodec/allcodecs.c    |   1 +
 libavcodec/dxvenc.c       | 361 ++++++++++++++++++++++++++++++++++++++
 libavcodec/version.h      |   2 +-
 7 files changed, 368 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/dxvenc.c
diff --git a/Changelog b/Changelog
index 7ebf8e7cab..1df9dd71d7 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 version <next>:
+- DXV DXT1 encoder
 - LEAD MCMP decoder
 - EVC decoding using external library libxevd
 - EVC encoding using external library libxeve
diff --git a/configure b/configure
index c67f1e2543..d2a8edf12e 100755
--- a/configure
+++ b/configure
@@ -2845,6 +2845,7 @@ dvvideo_decoder_select="dvprofile idctdsp"
 dvvideo_encoder_select="dvprofile fdctdsp me_cmp pixblockdsp"
 dxa_decoder_deps="zlib"
 dxv_decoder_select="lzf texturedsp"
+dxv_encoder_select="texturedspenc"
 eac3_decoder_select="ac3_decoder"
 eac3_encoder_select="ac3_encoder"
 eamad_decoder_select="aandcttables blockdsp bswapdsp"
diff --git a/doc/general_contents.texi b/doc/general_contents.texi
index 90166e9b4d..ac6d37e677 100644
--- a/doc/general_contents.texi
+++ b/doc/general_contents.texi
@@ -670,7 +670,8 @@ library:
 @item Redirector                @tab   @tab X
 @item RedSpark                  @tab   @tab X
 @item Renderware TeXture Dictionary @tab   @tab X
-@item Resolume DXV              @tab   @tab X
+@item Resolume DXV              @tab X @tab X
+    @tab Encoding is only supported for the DXT1 (Normal Quality, No Alpha) texture format.
 @item RF64                      @tab   @tab X
 @item RL2                       @tab   @tab X
     @tab Audio and video format used in some games by Entertainment Software Partners.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index c4db9fd4fa..4065ca90ff 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -341,6 +341,7 @@ OBJS-$(CONFIG_DVVIDEO_ENCODER)         += dvenc.o dv.o dvdata.o
 OBJS-$(CONFIG_DXA_DECODER)             += dxa.o
 OBJS-$(CONFIG_DXTORY_DECODER)          += dxtory.o
 OBJS-$(CONFIG_DXV_DECODER)             += dxv.o
+OBJS-$(CONFIG_DXV_ENCODER)             += dxvenc.o
 OBJS-$(CONFIG_EAC3_DECODER)            += eac3_data.o
 OBJS-$(CONFIG_EAC3_ENCODER)            += eac3enc.o eac3_data.o
 OBJS-$(CONFIG_EACMV_DECODER)           += eacmv.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 433ba6ab07..4613b8b6a8 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -106,6 +106,7 @@ extern const FFCodec ff_dvvideo_encoder;
 extern const FFCodec ff_dvvideo_decoder;
 extern const FFCodec ff_dxa_decoder;
 extern const FFCodec ff_dxtory_decoder;
+extern const FFCodec ff_dxv_encoder;
 extern const FFCodec ff_dxv_decoder;
 extern const FFCodec ff_eacmv_decoder;
 extern const FFCodec ff_eamad_decoder;
diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c
new file mode 100644
index 0000000000..3a5b310c9b
--- /dev/null
+++ b/libavcodec/dxvenc.c
@@ -0,0 +1,361 @@
+/*
+ * Resolume DXV encoder
+ * Copyright (C) 2024 Connor Worley <connorbworley@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/crc.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+
+#include "bytestream.h"
+#include "codec_internal.h"
+#include "encode.h"
+#include "texturedsp.h"
+
+#define DXV_HEADER_LENGTH 12
+
+/*
+ * DXV uses LZ-like back-references to avoid copying words that have already
+ * appeared in the decompressed stream. Using a simple hash table (HT)
+ * significantly speeds up the lookback process while encoding.
+ */
+#define LOOKBACK_HT_ELEMS 0x40000
+#define LOOKBACK_WORDS    0x20202
+
+enum DXVTextureFormat {
+    DXV_FMT_DXT1 = MKBETAG('D', 'X', 'T', '1'),
+};
+
+typedef struct HTEntry {
+    uint32_t key;
+    uint32_t pos;
+} HTEntry;
+
+static void ht_init(HTEntry *ht)
+{
+    for (size_t i = 0; i < LOOKBACK_HT_ELEMS; i++) {
+        ht[i].pos = -1;
+    }
+}
+
+static uint32_t ht_lookup_and_upsert(HTEntry *ht, AVCRC *hash_ctx,
+                                    uint32_t key, uint32_t pos)
+{
+    uint32_t ret = -1;
+    size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS;
+    for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) {
+        size_t wrapped_index = i % LOOKBACK_HT_ELEMS;
+        HTEntry *entry = &ht[wrapped_index];
+        if (entry->key == key || entry->pos == -1) {
+            ret = entry->pos;
+            entry->key = key;
+            entry->pos = pos;
+            break;
+        }
+    }
+    return ret;
+}
+
+static void ht_delete(HTEntry *ht, AVCRC *hash_ctx,
+                      uint32_t key, uint32_t pos)
+{
+    HTEntry *removed_entry = NULL;
+    size_t removed_hash;
+    size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS;
+
+    for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) {
+        size_t wrapped_index = i % LOOKBACK_HT_ELEMS;
+        HTEntry *entry = &ht[wrapped_index];
+        if (entry->pos == -1)
+            return;
+        if (removed_entry) {
+            size_t candidate_hash = av_crc(hash_ctx, 0, (uint8_t*)&entry->key, 4) % LOOKBACK_HT_ELEMS;
+            if ((wrapped_index > removed_hash && (candidate_hash <= removed_hash || candidate_hash > wrapped_index)) ||
+                (wrapped_index < removed_hash && (candidate_hash <= removed_hash && candidate_hash > wrapped_index))) {
+                *removed_entry = *entry;
+                entry->pos = -1;
+                removed_entry = entry;
+                removed_hash = wrapped_index;
+            }
+        } else if (entry->key == key) {
+            if (entry->pos <= pos) {
+                entry->pos = -1;
+                removed_entry = entry;
+                removed_hash = wrapped_index;
+            } else {
+                return;
+            }
+        }
+    }
+}
+
+typedef struct DXVEncContext {
+    AVClass *class;
+
+    TextureDSPContext texdsp;
+    PutByteContext pbc;
+
+    uint8_t *tex_data;   // Compressed texture
+    int64_t tex_size;    // Texture size
+
+    /* Optimal number of slices for parallel decoding */
+    int slice_count;
+
+    TextureDSPThreadContext enc;
+
+    enum DXVTextureFormat tex_fmt;
+    int (*compress_tex)(AVCodecContext *avctx);
+
+    AVCRC *crc_ctx;
+
+    HTEntry color_lookback_ht[LOOKBACK_HT_ELEMS];
+    HTEntry lut_lookback_ht[LOOKBACK_HT_ELEMS];
+} DXVEncContext;
+
+static int compress_texture_thread(AVCodecContext *avctx, void *arg,
+                                   int slice, int thread_nb)
+{
+    DXVEncContext *ctx = avctx->priv_data;
+    AVFrame *frame = arg;
+
+    if (ctx->enc.tex_funct) {
+        ctx->enc.tex_data.out = ctx->tex_data;
+        ctx->enc.frame_data.in = frame->data[0];
+        ctx->enc.stride = frame->linesize[0];
+        return ff_texturedsp_compress_thread(avctx, &ctx->enc, slice, thread_nb);
+    } else {
+        /* unimplemented: YCoCg formats */
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+/* Converts an index offset value to a 2-bit opcode and pushes it to a stream.
+ * Inverse of CHECKPOINT in dxv.c.  */
+#define PUSH_OP(x)                                                            \
+    do {                                                                      \
+        if (state == 16) {                                                    \
+            if (bytestream2_get_bytes_left_p(pbc) < 4) {                      \
+                return AVERROR_INVALIDDATA;                                   \
+            }                                                                 \
+            value = (uint32_t*)pbc->buffer;                                   \
+            bytestream2_put_le32(pbc, 0);                                     \
+            state = 0;                                                        \
+        }                                                                     \
+        if (idx >= 0x102 * x) {                                               \
+            op = 3;                                                           \
+            bytestream2_put_le16(pbc, (idx / x) - 0x102);                     \
+        } else if (idx >= 2 * x) {                                            \
+            op = 2;                                                           \
+            bytestream2_put_byte(pbc, (idx / x) - 2);                         \
+        } else if (idx == x) {                                                \
+            op = 1;                                                           \
+        } else {                                                              \
+            op = 0;                                                           \
+        }                                                                     \
+        *value |= (op << (state * 2));                                        \
+        state++;                                                              \
+    } while (0)
+
+static int dxv_compress_dxt1(AVCodecContext *avctx)
+{
+    DXVEncContext *ctx = avctx->priv_data;
+    PutByteContext *pbc = &ctx->pbc;
+    uint32_t *value;
+    uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 2, op = 0;
+
+    ht_init(ctx->color_lookback_ht);
+    ht_init(ctx->lut_lookback_ht);
+
+    bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data));
+    bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data + 4));
+
+    ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data), 0);
+    ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data + 4), 1);
+
+    while (pos + 2 <= ctx->tex_size / 4) {
+        idx = 0;
+
+        color = AV_RL32(ctx->tex_data + pos * 4);
+        prev_pos = ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, color, pos);
+        color_idx = prev_pos != -1 ? pos - prev_pos : 0;
+        if (pos >= LOOKBACK_WORDS) {
+            uint32_t old_pos = pos - LOOKBACK_WORDS;
+            uint32_t old_color = AV_RL32(ctx->tex_data + old_pos * 4);
+            ht_delete(ctx->color_lookback_ht, ctx->crc_ctx, old_color, old_pos);
+        }
+        pos++;
+
+        lut = AV_RL32(ctx->tex_data + pos * 4);
+        if (color_idx && lut == AV_RL32(ctx->tex_data + (pos - color_idx) * 4)) {
+            idx = color_idx;
+        } else {
+            idx = 0;
+            prev_pos = ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, lut, pos);
+            lut_idx = prev_pos != -1 ? pos - prev_pos : 0;
+        }
+        if (pos >= LOOKBACK_WORDS) {
+            uint32_t old_pos = pos - LOOKBACK_WORDS;
+            uint32_t old_lut = AV_RL32(ctx->tex_data + old_pos * 4);
+            ht_delete(ctx->lut_lookback_ht, ctx->crc_ctx, old_lut, old_pos);
+        }
+        pos++;
+
+        PUSH_OP(2);
+
+        if (!idx) {
+            idx = color_idx;
+            PUSH_OP(2);
+            if (!idx)
+                bytestream2_put_le32(pbc,  color);
+
+            idx = lut_idx;
+            PUSH_OP(2);
+            if (!idx)
+                bytestream2_put_le32(pbc,  lut);
+        }
+    }
+
+    return 0;
+}
+
+static int dxv_encode(AVCodecContext *avctx, AVPacket *pkt,
+                      const AVFrame *frame, int *got_packet)
+{
+    DXVEncContext *ctx = avctx->priv_data;
+    PutByteContext *pbc = &ctx->pbc;
+    int ret;
+
+    /* unimplemented: needs to depend on compression ratio of tex format */
+    /* under DXT1, we need 3 words to encode load ops for 32 words.
+     * the first 2 words of the texture do not need load ops. */
+    ret = ff_alloc_packet(avctx, pkt, DXV_HEADER_LENGTH + ctx->tex_size + AV_CEIL_RSHIFT(ctx->tex_size - 8, 7) * 12);
+    if (ret < 0)
+        return ret;
+
+    avctx->execute2(avctx, compress_texture_thread, (void*)frame, NULL, ctx->enc.slice_count);
+
+    bytestream2_init_writer(pbc, pkt->data, pkt->size);
+
+    bytestream2_put_le32(pbc, ctx->tex_fmt);
+    bytestream2_put_byte(pbc, 4);
+    bytestream2_put_byte(pbc, 0);
+    bytestream2_put_byte(pbc, 0);
+    bytestream2_put_byte(pbc, 0);
+    /* Fill in compressed size later */
+    bytestream2_skip_p(pbc, 4);
+
+    ret = ctx->compress_tex(avctx);
+    if (ret < 0)
+        return ret;
+
+    AV_WL32(pkt->data + 8, bytestream2_tell_p(pbc) - DXV_HEADER_LENGTH);
+    av_shrink_packet(pkt, bytestream2_tell_p(pbc));
+
+    *got_packet = 1;
+    return 0;
+}
+
+static av_cold int dxv_init(AVCodecContext *avctx)
+{
+    DXVEncContext *ctx = avctx->priv_data;
+    int ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
+
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid image size %dx%d.\n",
+               avctx->width, avctx->height);
+        return ret;
+    }
+
+    ff_texturedspenc_init(&ctx->texdsp);
+
+    switch (ctx->tex_fmt) {
+    case DXV_FMT_DXT1:
+        ctx->compress_tex = dxv_compress_dxt1;
+        ctx->enc.tex_funct = ctx->texdsp.dxt1_block;
+        ctx->enc.tex_ratio = 8;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Invalid format %08X\n", ctx->tex_fmt);
+        return AVERROR_INVALIDDATA;
+    }
+    ctx->enc.raw_ratio = 16;
+    ctx->tex_size = FFALIGN(avctx->width, 16) / TEXTURE_BLOCK_W *
+                    FFALIGN(avctx->height, 16) / TEXTURE_BLOCK_H *
+                    ctx->enc.tex_ratio;
+    ctx->enc.slice_count = av_clip(avctx->thread_count, 1, FFALIGN(avctx->height, 16) / TEXTURE_BLOCK_H);
+
+    ctx->tex_data = av_malloc(ctx->tex_size);
+    if (!ctx->tex_data) {
+        return AVERROR(ENOMEM);
+    }
+
+    ctx->crc_ctx = (AVCRC*)av_crc_get_table(AV_CRC_32_IEEE);
+    if (!ctx->crc_ctx) {
+        av_log(avctx, AV_LOG_ERROR, "Could not initialize CRC table.\n");
+        return AVERROR_BUG;
+    }
+
+    return 0;
+}
+
+static av_cold int dxv_close(AVCodecContext *avctx)
+{
+    DXVEncContext *ctx = avctx->priv_data;
+
+    av_freep(&ctx->tex_data);
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(DXVEncContext, x)
+#define FLAGS     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "format", NULL, OFFSET(tex_fmt), AV_OPT_TYPE_INT, { .i64 = DXV_FMT_DXT1 }, DXV_FMT_DXT1, DXV_FMT_DXT1, FLAGS, "format" },
+        { "dxt1", "DXT1 (Normal Quality, No Alpha)", 0, AV_OPT_TYPE_CONST, { .i64 = DXV_FMT_DXT1   }, 0, 0, FLAGS, "format" },
+    { NULL },
+};
+
+static const AVClass dxvenc_class = {
+    .class_name = "DXV encoder",
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const FFCodec ff_dxv_encoder = {
+    .p.name         = "dxv",
+    CODEC_LONG_NAME("Resolume DXV"),
+    .p.type         = AVMEDIA_TYPE_VIDEO,
+    .p.id           = AV_CODEC_ID_DXV,
+    .init           = dxv_init,
+    FF_CODEC_ENCODE_CB(dxv_encode),
+    .close          = dxv_close,
+    .priv_data_size = sizeof(DXVEncContext),
+    .p.capabilities = AV_CODEC_CAP_DR1 |
+                      AV_CODEC_CAP_SLICE_THREADS |
+                      AV_CODEC_CAP_FRAME_THREADS,
+    .p.priv_class   = &dxvenc_class,
+    .p.pix_fmts     = (const enum AVPixelFormat[]) {
+        AV_PIX_FMT_RGBA, AV_PIX_FMT_NONE,
+    },
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 376388c5bb..0fae3d06d3 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 
 #include "version_major.h"
 
-#define LIBAVCODEC_VERSION_MINOR  37
+#define LIBAVCODEC_VERSION_MINOR  38
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \