Browse Source

obs-ffmpeg: Add native non-texture NVENC implementation

derrod 1 year ago
parent
commit
2f55f046fe

+ 1 - 1
plugins/obs-ffmpeg/cmake/dependencies.cmake

@@ -26,7 +26,7 @@ endif()
 
 if(OS_WINDOWS)
   find_package(AMF 1.4.29 REQUIRED)
-  find_package(FFnvcodec 12 REQUIRED)
+  find_package(FFnvcodec 12.0.0.0...<12.2.0.0 REQUIRED)
 
   add_library(obs-nvenc-version INTERFACE)
   add_library(OBS::obs-nvenc-version ALIAS obs-nvenc-version)

+ 8 - 0
plugins/obs-ffmpeg/cmake/legacy.cmake

@@ -2,6 +2,7 @@ project(obs-ffmpeg)
 
 option(ENABLE_FFMPEG_LOGGING "Enables obs-ffmpeg logging" OFF)
 option(ENABLE_NEW_MPEGTS_OUTPUT "Use native SRT/RIST mpegts output" ON)
+option(ENABLE_NATIVE_NVENC "Use native NVENC implementation" ON)
 
 find_package(
   FFmpeg REQUIRED
@@ -112,6 +113,13 @@ elseif(OS_POSIX AND NOT OS_MACOS)
   find_package(Libpci REQUIRED)
   target_sources(obs-ffmpeg PRIVATE obs-ffmpeg-vaapi.c vaapi-utils.c vaapi-utils.h)
   target_link_libraries(obs-ffmpeg PRIVATE Libva::va Libva::drm LIBPCI::LIBPCI)
+
+  if(ENABLE_NATIVE_NVENC)
+    find_package(FFnvcodec 12.0.0.0...<12.2.0.0 REQUIRED)
+    target_sources(obs-ffmpeg PRIVATE obs-nvenc.c obs-nvenc.h obs-nvenc-helpers.c obs-nvenc-ver.h)
+    target_link_libraries(obs-ffmpeg PRIVATE FFnvcodec::FFnvcodec)
+    target_compile_definitions(obs-ffmpeg PRIVATE NVCODEC_AVAILABLE)
+  endif()
 endif()
 
 setup_plugin_target(obs-ffmpeg)

+ 2 - 2
plugins/obs-ffmpeg/obs-ffmpeg-nvenc.c

@@ -676,7 +676,7 @@ struct obs_encoder_info h264_nvenc_encoder_info = {
 	.get_extra_data = nvenc_extra_data,
 	.get_sei_data = nvenc_sei_data,
 	.get_video_info = nvenc_video_info,
-#ifdef _WIN32
+#if defined(_WIN32) || defined(NVCODEC_AVAILABLE)
 	.caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_INTERNAL,
 #else
 	.caps = OBS_ENCODER_CAP_DYN_BITRATE,
@@ -698,7 +698,7 @@ struct obs_encoder_info hevc_nvenc_encoder_info = {
 	.get_extra_data = nvenc_extra_data,
 	.get_sei_data = nvenc_sei_data,
 	.get_video_info = nvenc_video_info,
-#ifdef _WIN32
+#if defined(_WIN32) || defined(NVCODEC_AVAILABLE)
 	.caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_INTERNAL,
 #else
 	.caps = OBS_ENCODER_CAP_DYN_BITRATE,

+ 13 - 13
plugins/obs-ffmpeg/obs-ffmpeg.c

@@ -7,8 +7,12 @@
 #ifdef _WIN32
 #include <dxgi.h>
 #include <util/windows/win-version.h>
+#endif
 
+#if defined(_WIN32) || defined(NVCODEC_AVAILABLE)
 #include "obs-nvenc.h"
+
+#define OBS_NVENC_AVAILABLE
 #endif
 
 #if !defined(_WIN32) && !defined(__APPLE__)
@@ -236,7 +240,7 @@ static bool nvenc_device_available(void)
 }
 #endif
 
-#ifdef _WIN32
+#ifdef OBS_NVENC_AVAILABLE
 extern bool load_nvenc_lib(void);
 #endif
 
@@ -264,18 +268,9 @@ static bool nvenc_supported(bool *out_h264, bool *out_hevc, bool *out_av1)
 
 	bool success = h264 || hevc;
 	if (success) {
-#if defined(_WIN32)
+#ifdef OBS_NVENC_AVAILABLE
 		success = nvenc_device_available() && load_nvenc_lib();
 		av1 = success && (get_nvenc_ver() >= ((12 << 4) | 0));
-
-#elif defined(__linux__)
-		success = nvenc_device_available();
-		if (success) {
-			void *const lib = os_dlopen("libnvidia-encode.so.1");
-			success = lib != NULL;
-			if (success)
-				os_dlclose(lib);
-		}
 #else
 		void *const lib = os_dlopen("libnvidia-encode.so.1");
 		success = lib != NULL;
@@ -336,9 +331,12 @@ static bool hevc_vaapi_supported(void)
 #endif
 #endif
 
-#ifdef _WIN32
+#ifdef OBS_NVENC_AVAILABLE
 extern void obs_nvenc_load(bool h264, bool hevc, bool av1);
 extern void obs_nvenc_unload(void);
+#endif
+
+#ifdef _WIN32
 extern void amf_load(void);
 extern void amf_unload(void);
 #endif
@@ -381,7 +379,7 @@ bool obs_module_load(void)
 	if (nvenc_supported(&h264, &hevc, &av1)) {
 		blog(LOG_INFO, "NVENC supported");
 
-#ifdef _WIN32
+#ifdef OBS_NVENC_AVAILABLE
 		obs_nvenc_load(h264, hevc, av1);
 #endif
 		if (h264)
@@ -442,6 +440,8 @@ void obs_module_unload(void)
 
 #ifdef _WIN32
 	amf_unload();
+#endif
+#ifdef OBS_NVENC_AVAILABLE
 	obs_nvenc_unload();
 #endif
 }

+ 150 - 8
plugins/obs-ffmpeg/obs-nvenc-helpers.c

@@ -2,19 +2,26 @@
 #include <util/platform.h>
 #include <util/threading.h>
 #include <util/config-file.h>
-#include <util/windows/device-enum.h>
 #include <util/dstr.h>
 #include <util/pipe.h>
 
+#ifdef _WIN32
+#include <util/windows/device-enum.h>
+#endif
+
 static void *nvenc_lib = NULL;
+static void *cuda_lib = NULL;
 static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
 NV_ENCODE_API_FUNCTION_LIST nv = {NV_ENCODE_API_FUNCTION_LIST_VER};
 NV_CREATE_INSTANCE_FUNC nv_create_instance = NULL;
+CudaFunctions *cu = NULL;
 
 #define error(format, ...) blog(LOG_ERROR, "[obs-nvenc] " format, ##__VA_ARGS__)
 
 bool nv_fail2(obs_encoder_t *encoder, void *session, const char *format, ...)
 {
+	UNUSED_PARAMETER(session);
+
 	struct dstr message = {0};
 	struct dstr error_message = {0};
 
@@ -98,9 +105,11 @@ bool nv_failed2(obs_encoder_t *encoder, void *session, NVENCSTATUS err,
 
 bool load_nvenc_lib(void)
 {
-	const char *const file = (sizeof(void *) == 8) ? "nvEncodeAPI64.dll"
-						       : "nvEncodeAPI.dll";
-	nvenc_lib = os_dlopen(file);
+#ifdef _WIN32
+	nvenc_lib = os_dlopen("nvEncodeAPI64.dll");
+#else
+	nvenc_lib = os_dlopen("libnvidia-encode.so.1");
+#endif
 	return nvenc_lib != NULL;
 }
 
@@ -113,6 +122,25 @@ static void *load_nv_func(const char *func)
 	return func_ptr;
 }
 
+bool load_cuda_lib(void)
+{
+#ifdef _WIN32
+	cuda_lib = os_dlopen("nvcuda.dll");
+#else
+	cuda_lib = os_dlopen("libcuda.so.1");
+#endif
+	return cuda_lib != NULL;
+}
+
+static void *load_cuda_func(const char *func)
+{
+	void *func_ptr = os_dlsym(cuda_lib, func);
+	if (!func_ptr) {
+		error("Could not load function: %s", func);
+	}
+	return func_ptr;
+}
+
 typedef NVENCSTATUS(NVENCAPI *NV_MAX_VER_FUNC)(uint32_t *);
 
 uint32_t get_nvenc_ver(void)
@@ -227,6 +255,81 @@ static inline bool init_nvenc_internal(obs_encoder_t *encoder)
 	return true;
 }
 
+typedef struct cuda_function {
+	ptrdiff_t offset;
+	const char *name;
+} cuda_function;
+
+static const cuda_function cuda_functions[] = {
+	{offsetof(CudaFunctions, cuInit), "cuInit"},
+
+	{offsetof(CudaFunctions, cuDeviceGetCount), "cuDeviceGetCount"},
+	{offsetof(CudaFunctions, cuDeviceGet), "cuDeviceGet"},
+	{offsetof(CudaFunctions, cuDeviceGetAttribute), "cuDeviceGetAttribute"},
+
+	{offsetof(CudaFunctions, cuCtxCreate), "cuCtxCreate_v2"},
+	{offsetof(CudaFunctions, cuCtxDestroy), "cuCtxDestroy_v2"},
+	{offsetof(CudaFunctions, cuCtxPushCurrent), "cuCtxPushCurrent_v2"},
+	{offsetof(CudaFunctions, cuCtxPopCurrent), "cuCtxPopCurrent_v2"},
+
+	{offsetof(CudaFunctions, cuArray3DCreate), "cuArray3DCreate_v2"},
+	{offsetof(CudaFunctions, cuArrayDestroy), "cuArrayDestroy"},
+	{offsetof(CudaFunctions, cuMemcpy2D), "cuMemcpy2D_v2"},
+
+	{offsetof(CudaFunctions, cuGetErrorName), "cuGetErrorName"},
+	{offsetof(CudaFunctions, cuGetErrorString), "cuGetErrorString"},
+
+	{offsetof(CudaFunctions, cuMemHostRegister), "cuMemHostRegister_v2"},
+	{offsetof(CudaFunctions, cuMemHostUnregister), "cuMemHostUnregister"},
+};
+
+static const size_t num_cuda_funcs =
+	sizeof(cuda_functions) / sizeof(cuda_function);
+
+static bool init_cuda_internal(obs_encoder_t *encoder)
+{
+	static bool initialized = false;
+	static bool success = false;
+
+	if (initialized)
+		return success;
+	initialized = true;
+
+	if (!load_cuda_lib()) {
+		obs_encoder_set_last_error(encoder,
+					   "Loading CUDA library failed.");
+		return false;
+	}
+
+	cu = bzalloc(sizeof(CudaFunctions));
+
+	for (size_t idx = 0; idx < num_cuda_funcs; idx++) {
+		const cuda_function func = cuda_functions[idx];
+		void *fptr = load_cuda_func(func.name);
+
+		if (!fptr) {
+			error("Failed to find CUDA function: %s", func.name);
+			obs_encoder_set_last_error(
+				encoder, "Loading CUDA functions failed.");
+			return false;
+		}
+
+		*(uintptr_t *)((uintptr_t)cu + func.offset) = (uintptr_t)fptr;
+	}
+
+	success = true;
+	return true;
+}
+
+bool cuda_get_error_desc(CUresult res, const char **name, const char **desc)
+{
+	if (cu->cuGetErrorName(res, name) != CUDA_SUCCESS ||
+	    cu->cuGetErrorString(res, desc) != CUDA_SUCCESS)
+		return false;
+
+	return true;
+}
+
 bool init_nvenc(obs_encoder_t *encoder)
 {
 	bool success;
@@ -238,12 +341,32 @@ bool init_nvenc(obs_encoder_t *encoder)
 	return success;
 }
 
+bool init_cuda(obs_encoder_t *encoder)
+{
+	bool success;
+
+	pthread_mutex_lock(&init_mutex);
+	success = init_cuda_internal(encoder);
+	pthread_mutex_unlock(&init_mutex);
+
+	return success;
+}
+
+#ifdef _WIN32
 extern struct obs_encoder_info h264_nvenc_info;
 #ifdef ENABLE_HEVC
 extern struct obs_encoder_info hevc_nvenc_info;
 #endif
 extern struct obs_encoder_info av1_nvenc_info;
+#endif
 
+extern struct obs_encoder_info h264_nvenc_soft_info;
+#ifdef ENABLE_HEVC
+extern struct obs_encoder_info hevc_nvenc_soft_info;
+#endif
+extern struct obs_encoder_info av1_nvenc_soft_info;
+
+#ifdef _WIN32
 static bool enum_luids(void *param, uint32_t idx, uint64_t luid)
 {
 	struct dstr *cmd = param;
@@ -319,23 +442,42 @@ fail:
 
 	return av1_supported;
 }
+#else
+bool av1_supported()
+{
+	return get_nvenc_ver() >= ((12 << 4) | 0);
+}
+#endif
 
 void obs_nvenc_load(bool h264, bool hevc, bool av1)
 {
 	pthread_mutex_init(&init_mutex, NULL);
-	if (h264)
+	if (h264) {
+#ifdef _WIN32
 		obs_register_encoder(&h264_nvenc_info);
+#endif
+		obs_register_encoder(&h264_nvenc_soft_info);
+	}
 #ifdef ENABLE_HEVC
-	if (hevc)
+	if (hevc) {
+#ifdef _WIN32
 		obs_register_encoder(&hevc_nvenc_info);
 #endif
-	if (av1 && av1_supported())
+		obs_register_encoder(&hevc_nvenc_soft_info);
+	}
+#endif
+	if (av1 && av1_supported()) {
+#ifdef _WIN32
 		obs_register_encoder(&av1_nvenc_info);
-	else
+#endif
+		obs_register_encoder(&av1_nvenc_soft_info);
+	} else {
 		blog(LOG_WARNING, "[NVENC] AV1 is not supported");
+	}
 }
 
 void obs_nvenc_unload(void)
 {
+	bfree(cu);
 	pthread_mutex_destroy(&init_mutex);
 }

File diff suppressed because it is too large
+ 598 - 98
plugins/obs-ffmpeg/obs-nvenc.c


+ 35 - 0
plugins/obs-ffmpeg/obs-nvenc.h

@@ -1,20 +1,55 @@
 #pragma once
 
+#ifdef _WIN32
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
+#endif
 
 #include <obs-module.h>
 #include <ffnvcodec/nvEncodeAPI.h>
+#include <ffnvcodec/dynlink_cuda.h>
+
 #include "obs-nvenc-ver.h"
 
+/* Missing from FFmpeg headers */
+typedef CUresult CUDAAPI tcuMemHostRegister(void *p, size_t bytesize,
+					    unsigned int Flags);
+typedef CUresult CUDAAPI tcuMemHostUnregister(void *p);
+
+typedef struct CudaFunctions {
+	tcuInit *cuInit;
+
+	tcuDeviceGetCount *cuDeviceGetCount;
+	tcuDeviceGet *cuDeviceGet;
+	tcuDeviceGetAttribute *cuDeviceGetAttribute;
+
+	tcuCtxCreate_v2 *cuCtxCreate;
+	tcuCtxDestroy_v2 *cuCtxDestroy;
+	tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
+	tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
+
+	tcuArray3DCreate *cuArray3DCreate;
+	tcuArrayDestroy *cuArrayDestroy;
+	tcuMemcpy2D_v2 *cuMemcpy2D;
+
+	tcuGetErrorName *cuGetErrorName;
+	tcuGetErrorString *cuGetErrorString;
+
+	tcuMemHostRegister *cuMemHostRegister;
+	tcuMemHostUnregister *cuMemHostUnregister;
+} CudaFunctions;
+
 typedef NVENCSTATUS(NVENCAPI *NV_CREATE_INSTANCE_FUNC)(
 	NV_ENCODE_API_FUNCTION_LIST *);
 
 extern const char *nv_error_name(NVENCSTATUS err);
 extern NV_ENCODE_API_FUNCTION_LIST nv;
 extern NV_CREATE_INSTANCE_FUNC nv_create_instance;
+extern CudaFunctions *cu;
 extern uint32_t get_nvenc_ver(void);
 extern bool init_nvenc(obs_encoder_t *encoder);
+extern bool init_cuda(obs_encoder_t *encoder);
+bool cuda_get_error_desc(CUresult res, const char **name, const char **desc);
 bool nv_fail2(obs_encoder_t *encoder, void *session, const char *format, ...);
 bool nv_failed2(obs_encoder_t *encoder, void *session, NVENCSTATUS err,
 		const char *func, const char *call);

Some files were not shown because too many files changed in this diff