Update CUDA support and model versions (#78)

2024-11-07 18:57:14 +00:00 · 2024-03-24 21:23:06 -04:00 · 2024-03-24 21:23:06 -04:00 · 0c7d7234af
commit 0c7d7234af
parent 6791e5a5d3
8 changed files with 43 additions and 17 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -41,6 +41,7 @@ set(USE_SYSTEM_CURL
    CACHE STRING "Use system cURL")

 if(LOCALVOCAL_WITH_CUDA)
+  message(STATUS "Building with CUDA support")
  add_compile_definitions("LOCALVOCAL_WITH_CUDA")
 endif()

--- a/buildspec.json
+++ b/buildspec.json
@ -45,7 +45,7 @@
        }
    },
    "name": "obs-localvocal",
-    "version": "0.1.1",
+    "version": "0.2.0",
    "author": "Roy Shilkrot",
    "website": "https://github.com/occ-ai/obs-localvocal",
    "email": "roy.shil@gmail.com",
--- a/cmake/BuildWhispercpp.cmake
+++ b/cmake/BuildWhispercpp.cmake
@ -42,7 +42,7 @@ if(WIN32)
    endif(NOT DEFINED CUDA_TOOLKIT_ROOT_DIR)

    set(WHISPER_ADDITIONAL_ENV "CUDAToolkit_ROOT=${CUDA_TOOLKIT_ROOT_DIR}")
-    set(WHISPER_ADDITIONAL_CMAKE_ARGS -DWHISPER_CUBLAS=ON -DWHISPER_OPENBLAS=OFF
+    set(WHISPER_ADDITIONAL_CMAKE_ARGS -DWHISPER_BLAS=OFF -DWHISPER_CUBLAS=ON -DWHISPER_OPENBLAS=OFF
                                      -DCMAKE_GENERATOR_TOOLSET=cuda=${CUDA_TOOLKIT_ROOT_DIR})
  else()
    # Build with OpenBLAS
--- a/data/models/ggml-model-whisper-tiny.en.bin
+++ b/data/models/ggml-model-whisper-tiny.en.bin
--- a/src/model-utils/model-downloader-ui.cpp
+++ b/src/model-utils/model-downloader-ui.cpp
@ -5,8 +5,7 @@

 #include <filesystem>

-const std::string MODEL_BASE_PATH = "https://huggingface.co/ggerganov/whisper.cpp";
-const std::string MODEL_PREFIX = "resolve/main/";
+const std::string MODEL_BASE_PATH = "https://ggml.ggerganov.com/";

 size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream)
 {
@ -143,7 +142,7 @@ void ModelDownloadWorker::download_model()
 	const std::string model_filename =
 		this->model_name.substr(this->model_name.find_last_of("/\\") + 1);

-	std::string model_url = MODEL_BASE_PATH + "/" + MODEL_PREFIX + model_filename;
+	std::string model_url = MODEL_BASE_PATH + model_filename;
 	obs_log(LOG_INFO, "Model URL: %s", model_url.c_str());

 	CURL *curl = curl_easy_init();
--- a/src/transcription-filter.cpp
+++ b/src/transcription-filter.cpp
@ -654,7 +654,8 @@ void transcription_filter_defaults(obs_data_t *s)
 	obs_data_set_default_int(s, "log_level", LOG_DEBUG);
 	obs_data_set_default_bool(s, "log_words", true);
 	obs_data_set_default_bool(s, "caption_to_stream", false);
-	obs_data_set_default_string(s, "whisper_model_path", "models/ggml-tiny.en.bin");
+	obs_data_set_default_string(s, "whisper_model_path",
+				    "models/ggml-model-whisper-tiny.en.bin");
 	obs_data_set_default_string(s, "whisper_language_select", "en");
 	obs_data_set_default_string(s, "subtitle_sources", "none");
 	obs_data_set_default_bool(s, "step_by_step_processing", false);
@ -754,15 +755,38 @@ obs_properties_t *transcription_filter_properties(void *data)
 		obs_properties_add_list(ppts, "whisper_model_path", MT_("whisper_model"),
 					OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);

-	obs_property_list_add_string(whisper_models_list, "Tiny (Eng) 75Mb",
-				     "models/ggml-tiny.en.bin");
-	obs_property_list_add_string(whisper_models_list, "Tiny 75Mb", "models/ggml-tiny.bin");
-	obs_property_list_add_string(whisper_models_list, "Base (Eng) 142Mb",
-				     "models/ggml-base.en.bin");
-	obs_property_list_add_string(whisper_models_list, "Base 142Mb", "models/ggml-base.bin");
-	obs_property_list_add_string(whisper_models_list, "Small (Eng) 466Mb",
-				     "models/ggml-small.en.bin");
-	obs_property_list_add_string(whisper_models_list, "Small 466Mb", "models/ggml-small.bin");
+	obs_property_list_add_string(whisper_models_list, "Base q5 57M",
+				     "models/ggml-model-whisper-base-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Base 141M",
+				     "models/ggml-model-whisper-base.bin");
+	obs_property_list_add_string(whisper_models_list, "Base (Eng) q5 57M",
+				     "models/ggml-model-whisper-base.en-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Base (Eng) 141M",
+				     "models/ggml-model-whisper-base.en.bin");
+	obs_property_list_add_string(whisper_models_list, "Large q5 1G",
+				     "models/ggml-model-whisper-large-q5_0.bin");
+	obs_property_list_add_string(whisper_models_list, "Medium q5 514M",
+				     "models/ggml-model-whisper-medium-q5_0.bin");
+	obs_property_list_add_string(whisper_models_list, "Medium (Eng) 514M",
+				     "models/ggml-model-whisper-medium.en-q5_0.bin");
+	obs_property_list_add_string(whisper_models_list, "Small q5 181M",
+				     "models/ggml-model-whisper-small-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Small 465M",
+				     "models/ggml-model-whisper-small.bin");
+	obs_property_list_add_string(whisper_models_list, "Small (Eng) q5 181M",
+				     "models/ggml-model-whisper-small.en-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Small (Eng) 465M",
+				     "models/ggml-model-whisper-small.en.bin");
+	obs_property_list_add_string(whisper_models_list, "Tiny q5 31M",
+				     "models/ggml-model-whisper-tiny-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Tiny 74M",
+				     "models/ggml-model-whisper-tiny.bin");
+	obs_property_list_add_string(whisper_models_list, "Tiny (Eng) q5 31M",
+				     "models/ggml-model-whisper-tiny.en-q5_1.bin");
+	obs_property_list_add_string(whisper_models_list, "Tiny (Eng) q8 42M",
+				     "models/ggml-model-whisper-tiny.en-q8_0.bin");
+	obs_property_list_add_string(whisper_models_list, "Tiny (Eng) 74M",
+				     "models/ggml-model-whisper-tiny.en.bin");
 	obs_property_list_add_string(whisper_models_list, "Load external model file",
 				     "!!!external!!!");

--- a/src/whisper-utils/whisper-processing.cpp
+++ b/src/whisper-utils/whisper-processing.cpp
@ -112,11 +112,13 @@ struct whisper_context *init_whisper_context(const std::string &model_path)
 {
 	obs_log(LOG_INFO, "Loading whisper model from %s", model_path.c_str());

-	struct whisper_context_params cparams;
+	struct whisper_context_params cparams = whisper_context_default_params();
 #ifdef LOCALVOCAL_WITH_CUDA
 	cparams.use_gpu = true;
+	obs_log(LOG_INFO, "Using GPU for inference, device %d", cparams.gpu_device);
 #else
 	cparams.use_gpu = false;
+	obs_log(LOG_INFO, "Using CPU for inference");
 #endif

 #ifdef _WIN32
--- a/vendor/curl
+++ b/vendor/curl
@ -1 +1 @@
-Subproject commit 439ff2052e219162708faddedacdf6f1242bb8c8
+Subproject commit 98044e81705dc24a56daaf3544f30c13f0fc3a31