|
@@ -2,19 +2,26 @@
|
|
|
#include <util/platform.h>
|
|
|
#include <util/threading.h>
|
|
|
#include <util/config-file.h>
|
|
|
-#include <util/windows/device-enum.h>
|
|
|
#include <util/dstr.h>
|
|
|
#include <util/pipe.h>
|
|
|
|
|
|
+#ifdef _WIN32
|
|
|
+#include <util/windows/device-enum.h>
|
|
|
+#endif
|
|
|
+
|
|
|
static void *nvenc_lib = NULL;
|
|
|
+static void *cuda_lib = NULL;
|
|
|
static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
|
|
|
NV_ENCODE_API_FUNCTION_LIST nv = {NV_ENCODE_API_FUNCTION_LIST_VER};
|
|
|
NV_CREATE_INSTANCE_FUNC nv_create_instance = NULL;
|
|
|
+CudaFunctions *cu = NULL;
|
|
|
|
|
|
#define error(format, ...) blog(LOG_ERROR, "[obs-nvenc] " format, ##__VA_ARGS__)
|
|
|
|
|
|
bool nv_fail2(obs_encoder_t *encoder, void *session, const char *format, ...)
|
|
|
{
|
|
|
+ UNUSED_PARAMETER(session);
|
|
|
+
|
|
|
struct dstr message = {0};
|
|
|
struct dstr error_message = {0};
|
|
|
|
|
@@ -98,9 +105,11 @@ bool nv_failed2(obs_encoder_t *encoder, void *session, NVENCSTATUS err,
|
|
|
|
|
|
bool load_nvenc_lib(void)
|
|
|
{
|
|
|
- const char *const file = (sizeof(void *) == 8) ? "nvEncodeAPI64.dll"
|
|
|
- : "nvEncodeAPI.dll";
|
|
|
- nvenc_lib = os_dlopen(file);
|
|
|
+#ifdef _WIN32
|
|
|
+ nvenc_lib = os_dlopen("nvEncodeAPI64.dll");
|
|
|
+#else
|
|
|
+ nvenc_lib = os_dlopen("libnvidia-encode.so.1");
|
|
|
+#endif
|
|
|
return nvenc_lib != NULL;
|
|
|
}
|
|
|
|
|
@@ -113,6 +122,25 @@ static void *load_nv_func(const char *func)
|
|
|
return func_ptr;
|
|
|
}
|
|
|
|
|
|
+bool load_cuda_lib(void)
|
|
|
+{
|
|
|
+#ifdef _WIN32
|
|
|
+ cuda_lib = os_dlopen("nvcuda.dll");
|
|
|
+#else
|
|
|
+ cuda_lib = os_dlopen("libcuda.so.1");
|
|
|
+#endif
|
|
|
+ return cuda_lib != NULL;
|
|
|
+}
|
|
|
+
|
|
|
+static void *load_cuda_func(const char *func)
|
|
|
+{
|
|
|
+ void *func_ptr = os_dlsym(cuda_lib, func);
|
|
|
+ if (!func_ptr) {
|
|
|
+ error("Could not load function: %s", func);
|
|
|
+ }
|
|
|
+ return func_ptr;
|
|
|
+}
|
|
|
+
|
|
|
typedef NVENCSTATUS(NVENCAPI *NV_MAX_VER_FUNC)(uint32_t *);
|
|
|
|
|
|
uint32_t get_nvenc_ver(void)
|
|
@@ -227,6 +255,81 @@ static inline bool init_nvenc_internal(obs_encoder_t *encoder)
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
+typedef struct cuda_function {
|
|
|
+ ptrdiff_t offset;
|
|
|
+ const char *name;
|
|
|
+} cuda_function;
|
|
|
+
|
|
|
+static const cuda_function cuda_functions[] = {
|
|
|
+ {offsetof(CudaFunctions, cuInit), "cuInit"},
|
|
|
+
|
|
|
+ {offsetof(CudaFunctions, cuDeviceGetCount), "cuDeviceGetCount"},
|
|
|
+ {offsetof(CudaFunctions, cuDeviceGet), "cuDeviceGet"},
|
|
|
+ {offsetof(CudaFunctions, cuDeviceGetAttribute), "cuDeviceGetAttribute"},
|
|
|
+
|
|
|
+ {offsetof(CudaFunctions, cuCtxCreate), "cuCtxCreate_v2"},
|
|
|
+ {offsetof(CudaFunctions, cuCtxDestroy), "cuCtxDestroy_v2"},
|
|
|
+ {offsetof(CudaFunctions, cuCtxPushCurrent), "cuCtxPushCurrent_v2"},
|
|
|
+ {offsetof(CudaFunctions, cuCtxPopCurrent), "cuCtxPopCurrent_v2"},
|
|
|
+
|
|
|
+ {offsetof(CudaFunctions, cuArray3DCreate), "cuArray3DCreate_v2"},
|
|
|
+ {offsetof(CudaFunctions, cuArrayDestroy), "cuArrayDestroy"},
|
|
|
+ {offsetof(CudaFunctions, cuMemcpy2D), "cuMemcpy2D_v2"},
|
|
|
+
|
|
|
+ {offsetof(CudaFunctions, cuGetErrorName), "cuGetErrorName"},
|
|
|
+ {offsetof(CudaFunctions, cuGetErrorString), "cuGetErrorString"},
|
|
|
+
|
|
|
+ {offsetof(CudaFunctions, cuMemHostRegister), "cuMemHostRegister_v2"},
|
|
|
+ {offsetof(CudaFunctions, cuMemHostUnregister), "cuMemHostUnregister"},
|
|
|
+};
|
|
|
+
|
|
|
+static const size_t num_cuda_funcs =
|
|
|
+ sizeof(cuda_functions) / sizeof(cuda_function);
|
|
|
+
|
|
|
+static bool init_cuda_internal(obs_encoder_t *encoder)
|
|
|
+{
|
|
|
+ static bool initialized = false;
|
|
|
+ static bool success = false;
|
|
|
+
|
|
|
+ if (initialized)
|
|
|
+ return success;
|
|
|
+ initialized = true;
|
|
|
+
|
|
|
+ if (!load_cuda_lib()) {
|
|
|
+ obs_encoder_set_last_error(encoder,
|
|
|
+ "Loading CUDA library failed.");
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ cu = bzalloc(sizeof(CudaFunctions));
|
|
|
+
|
|
|
+ for (size_t idx = 0; idx < num_cuda_funcs; idx++) {
|
|
|
+ const cuda_function func = cuda_functions[idx];
|
|
|
+ void *fptr = load_cuda_func(func.name);
|
|
|
+
|
|
|
+ if (!fptr) {
|
|
|
+ error("Failed to find CUDA function: %s", func.name);
|
|
|
+ obs_encoder_set_last_error(
|
|
|
+ encoder, "Loading CUDA functions failed.");
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ *(uintptr_t *)((uintptr_t)cu + func.offset) = (uintptr_t)fptr;
|
|
|
+ }
|
|
|
+
|
|
|
+ success = true;
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+bool cuda_get_error_desc(CUresult res, const char **name, const char **desc)
|
|
|
+{
|
|
|
+ if (cu->cuGetErrorName(res, name) != CUDA_SUCCESS ||
|
|
|
+ cu->cuGetErrorString(res, desc) != CUDA_SUCCESS)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
bool init_nvenc(obs_encoder_t *encoder)
|
|
|
{
|
|
|
bool success;
|
|
@@ -238,12 +341,32 @@ bool init_nvenc(obs_encoder_t *encoder)
|
|
|
return success;
|
|
|
}
|
|
|
|
|
|
+bool init_cuda(obs_encoder_t *encoder)
|
|
|
+{
|
|
|
+ bool success;
|
|
|
+
|
|
|
+ pthread_mutex_lock(&init_mutex);
|
|
|
+ success = init_cuda_internal(encoder);
|
|
|
+ pthread_mutex_unlock(&init_mutex);
|
|
|
+
|
|
|
+ return success;
|
|
|
+}
|
|
|
+
|
|
|
+#ifdef _WIN32
|
|
|
extern struct obs_encoder_info h264_nvenc_info;
|
|
|
#ifdef ENABLE_HEVC
|
|
|
extern struct obs_encoder_info hevc_nvenc_info;
|
|
|
#endif
|
|
|
extern struct obs_encoder_info av1_nvenc_info;
|
|
|
+#endif
|
|
|
|
|
|
+extern struct obs_encoder_info h264_nvenc_soft_info;
|
|
|
+#ifdef ENABLE_HEVC
|
|
|
+extern struct obs_encoder_info hevc_nvenc_soft_info;
|
|
|
+#endif
|
|
|
+extern struct obs_encoder_info av1_nvenc_soft_info;
|
|
|
+
|
|
|
+#ifdef _WIN32
|
|
|
static bool enum_luids(void *param, uint32_t idx, uint64_t luid)
|
|
|
{
|
|
|
struct dstr *cmd = param;
|
|
@@ -319,23 +442,42 @@ fail:
|
|
|
|
|
|
return av1_supported;
|
|
|
}
|
|
|
+#else
|
|
|
+bool av1_supported()
|
|
|
+{
|
|
|
+ return get_nvenc_ver() >= ((12 << 4) | 0);
|
|
|
+}
|
|
|
+#endif
|
|
|
|
|
|
void obs_nvenc_load(bool h264, bool hevc, bool av1)
|
|
|
{
|
|
|
pthread_mutex_init(&init_mutex, NULL);
|
|
|
- if (h264)
|
|
|
+ if (h264) {
|
|
|
+#ifdef _WIN32
|
|
|
obs_register_encoder(&h264_nvenc_info);
|
|
|
+#endif
|
|
|
+ obs_register_encoder(&h264_nvenc_soft_info);
|
|
|
+ }
|
|
|
#ifdef ENABLE_HEVC
|
|
|
- if (hevc)
|
|
|
+ if (hevc) {
|
|
|
+#ifdef _WIN32
|
|
|
obs_register_encoder(&hevc_nvenc_info);
|
|
|
#endif
|
|
|
- if (av1 && av1_supported())
|
|
|
+ obs_register_encoder(&hevc_nvenc_soft_info);
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ if (av1 && av1_supported()) {
|
|
|
+#ifdef _WIN32
|
|
|
obs_register_encoder(&av1_nvenc_info);
|
|
|
- else
|
|
|
+#endif
|
|
|
+ obs_register_encoder(&av1_nvenc_soft_info);
|
|
|
+ } else {
|
|
|
blog(LOG_WARNING, "[NVENC] AV1 is not supported");
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
void obs_nvenc_unload(void)
|
|
|
{
|
|
|
+ bfree(cu);
|
|
|
pthread_mutex_destroy(&init_mutex);
|
|
|
}
|