Browse Source

obs-nvenc: Add new NVENC plugin

derrod 1 year ago
parent
commit
911a49070b

+ 1 - 0
plugins/CMakeLists.txt

@@ -62,6 +62,7 @@ if(OBS_CMAKE_VERSION VERSION_GREATER_EQUAL 3.0.0)
   add_obs_plugin(obs-ffmpeg)
   add_obs_plugin(obs-filters)
   add_obs_plugin(obs-libfdk)
+  add_obs_plugin(obs-nvenc PLATFORMS WINDOWS LINUX)
   add_obs_plugin(obs-outputs)
   add_obs_plugin(
     obs-qsv11

+ 58 - 0
plugins/obs-nvenc/CMakeLists.txt

@@ -0,0 +1,58 @@
+cmake_minimum_required(VERSION 3.22...3.25)
+
+option(ENABLE_NVENC "Build NVIDIA Hardware Encoder Plugin" ON)
+option(ENABLE_NVENC_FFMPEG_IDS "Register FFmpeg encoder IDs" ON)
+mark_as_advanced(ENABLE_NVENC_FFMPEG_IDS)
+
+if(NOT ENABLE_NVENC)
+  target_disable_feature(obs-nvenc "NVIDIA Hardware Encoder")
+  target_disable(obs-nvenc)
+  return()
+endif()
+
+if(NOT TARGET OBS::opts-parser)
+  add_subdirectory("${CMAKE_SOURCE_DIR}/shared/opts-parser" "${CMAKE_BINARY_DIR}/shared/opts-parser")
+endif()
+
+if(OS_LINUX AND NOT TARGET OBS::glad)
+  add_subdirectory("${CMAKE_SOURCE_DIR}/deps/glad" "${CMAKE_BINARY_DIR}/deps/glad")
+endif()
+
+find_package(FFnvcodec 12 REQUIRED)
+
+add_library(obs-nvenc MODULE)
+add_library(OBS::nvenc ALIAS obs-nvenc)
+
+add_subdirectory(obs-nvenc-test)
+
+target_sources(
+  obs-nvenc
+  PRIVATE # cmake-format: sortable
+          $<$<PLATFORM_ID:Linux>:nvenc-opengl.c>
+          $<$<PLATFORM_ID:Windows>:nvenc-d3d11.c>
+          cuda-helpers.c
+          cuda-helpers.h
+          nvenc-compat.c
+          nvenc-cuda.c
+          nvenc-helpers.c
+          nvenc-helpers.h
+          nvenc-internal.h
+          nvenc-opts-parser.c
+          nvenc-properties.c
+          nvenc.c
+          obs-nvenc.c
+          obs-nvenc.h)
+
+target_link_libraries(obs-nvenc PRIVATE OBS::libobs OBS::opts-parser FFnvcodec::FFnvcodec
+                                        $<$<PLATFORM_ID:Linux>:OBS::glad>)
+
+target_compile_definitions(obs-nvenc PRIVATE $<$<BOOL:${ENABLE_NVENC_FFMPEG_IDS}>:REGISTER_FFMPEG_IDS>)
+
+if(OS_WINDOWS)
+  configure_file(cmake/windows/obs-module.rc.in obs-nvenc.rc)
+  target_sources(obs-nvenc PRIVATE obs-nvenc.rc)
+endif()
+
+# cmake-format: off
+set_target_properties_obs(obs-nvenc PROPERTIES FOLDER plugins/obs-nvenc PREFIX "")
+# cmake-format: on

+ 24 - 0
plugins/obs-nvenc/cmake/windows/obs-module.rc.in

@@ -0,0 +1,24 @@
+1 VERSIONINFO
+FILEVERSION ${OBS_VERSION_MAJOR},${OBS_VERSION_MINOR},${OBS_VERSION_PATCH},0
+BEGIN
+  BLOCK "StringFileInfo"
+  BEGIN
+    BLOCK "040904B0"
+    BEGIN
+      VALUE "CompanyName", "${OBS_COMPANY_NAME}"
+      VALUE "FileDescription", "OBS NVENC module"
+      VALUE "FileVersion", "${OBS_VERSION_CANONICAL}"
+      VALUE "ProductName", "${OBS_PRODUCT_NAME}"
+      VALUE "ProductVersion", "${OBS_VERSION_CANONICAL}"
+      VALUE "Comments", "${OBS_COMMENTS}"
+      VALUE "LegalCopyright", "${OBS_LEGAL_COPYRIGHT}"
+      VALUE "InternalName", "obs-nvenc"
+      VALUE "OriginalFilename", "obs-nvenc"
+    END
+  END
+
+  BLOCK "VarFileInfo"
+  BEGIN
+    VALUE "Translation", 0x0409, 0x04B0
+  END
+END

+ 172 - 0
plugins/obs-nvenc/cuda-helpers.c

@@ -0,0 +1,172 @@
+#include "obs-nvenc.h"
+
+#include "nvenc-internal.h"
+#include "cuda-helpers.h"
+
+#include <util/platform.h>
+#include <util/threading.h>
+#include <util/config-file.h>
+#include <util/dstr.h>
+#include <util/pipe.h>
+
+static void *cuda_lib = NULL;
+static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
+CudaFunctions *cu = NULL;
+
+bool load_cuda_lib(void)
+{
+#ifdef _WIN32
+	cuda_lib = os_dlopen("nvcuda.dll");
+#else
+	cuda_lib = os_dlopen("libcuda.so.1");
+#endif
+	return cuda_lib != NULL;
+}
+
+static void *load_cuda_func(const char *func)
+{
+	void *func_ptr = os_dlsym(cuda_lib, func);
+	if (!func_ptr) {
+		blog(LOG_ERROR, "[obs-nvenc] Could not load function: %s",
+		     func);
+	}
+	return func_ptr;
+}
+
+typedef struct cuda_function {
+	ptrdiff_t offset;
+	const char *name;
+} cuda_function;
+
+static const cuda_function cuda_functions[] = {
+	{offsetof(CudaFunctions, cuInit), "cuInit"},
+
+	{offsetof(CudaFunctions, cuDeviceGetCount), "cuDeviceGetCount"},
+	{offsetof(CudaFunctions, cuDeviceGet), "cuDeviceGet"},
+	{offsetof(CudaFunctions, cuDeviceGetAttribute), "cuDeviceGetAttribute"},
+
+	{offsetof(CudaFunctions, cuCtxCreate), "cuCtxCreate_v2"},
+	{offsetof(CudaFunctions, cuCtxDestroy), "cuCtxDestroy_v2"},
+	{offsetof(CudaFunctions, cuCtxPushCurrent), "cuCtxPushCurrent_v2"},
+	{offsetof(CudaFunctions, cuCtxPopCurrent), "cuCtxPopCurrent_v2"},
+
+	{offsetof(CudaFunctions, cuArray3DCreate), "cuArray3DCreate_v2"},
+	{offsetof(CudaFunctions, cuArrayDestroy), "cuArrayDestroy"},
+	{offsetof(CudaFunctions, cuMemcpy2D), "cuMemcpy2D_v2"},
+
+	{offsetof(CudaFunctions, cuGetErrorName), "cuGetErrorName"},
+	{offsetof(CudaFunctions, cuGetErrorString), "cuGetErrorString"},
+
+	{offsetof(CudaFunctions, cuMemHostRegister), "cuMemHostRegister_v2"},
+	{offsetof(CudaFunctions, cuMemHostUnregister), "cuMemHostUnregister"},
+
+#ifndef _WIN32
+	{offsetof(CudaFunctions, cuGLGetDevices), "cuGLGetDevices_v2"},
+	{offsetof(CudaFunctions, cuGraphicsGLRegisterImage),
+	 "cuGraphicsGLRegisterImage"},
+	{offsetof(CudaFunctions, cuGraphicsUnregisterResource),
+	 "cuGraphicsUnregisterResource"},
+	{offsetof(CudaFunctions, cuGraphicsMapResources),
+	 "cuGraphicsMapResources"},
+	{offsetof(CudaFunctions, cuGraphicsUnmapResources),
+	 "cuGraphicsUnmapResources"},
+	{offsetof(CudaFunctions, cuGraphicsSubResourceGetMappedArray),
+	 "cuGraphicsSubResourceGetMappedArray"},
+#endif
+};
+
+static const size_t num_cuda_funcs =
+	sizeof(cuda_functions) / sizeof(cuda_function);
+
+static bool init_cuda_internal(obs_encoder_t *encoder)
+{
+	static bool initialized = false;
+	static bool success = false;
+
+	if (initialized)
+		return success;
+	initialized = true;
+
+	if (!load_cuda_lib()) {
+		obs_encoder_set_last_error(encoder,
+					   "Loading CUDA library failed.");
+		return false;
+	}
+
+	cu = bzalloc(sizeof(CudaFunctions));
+
+	for (size_t idx = 0; idx < num_cuda_funcs; idx++) {
+		const cuda_function func = cuda_functions[idx];
+		void *fptr = load_cuda_func(func.name);
+
+		if (!fptr) {
+			blog(LOG_ERROR,
+			     "[obs-nvenc] Failed to find CUDA function: %s",
+			     func.name);
+			obs_encoder_set_last_error(
+				encoder, "Loading CUDA functions failed.");
+			return false;
+		}
+
+		*(uintptr_t *)((uintptr_t)cu + func.offset) = (uintptr_t)fptr;
+	}
+
+	success = true;
+	return true;
+}
+
+bool cuda_get_error_desc(CUresult res, const char **name, const char **desc)
+{
+	if (cu->cuGetErrorName(res, name) != CUDA_SUCCESS ||
+	    cu->cuGetErrorString(res, desc) != CUDA_SUCCESS)
+		return false;
+
+	return true;
+}
+
+bool cuda_error_check(struct nvenc_data *enc, CUresult res, const char *func,
+		      const char *call)
+{
+	if (res == CUDA_SUCCESS)
+		return true;
+
+	struct dstr message = {0};
+
+	const char *name, *desc;
+	if (cuda_get_error_desc(res, &name, &desc)) {
+		dstr_printf(&message,
+			    "%s: CUDA call \"%s\" failed with %s (%d): %s",
+			    func, call, name, res, desc);
+	} else {
+		dstr_printf(&message, "%s: CUDA call \"%s\" failed with %d",
+			    func, call, res);
+	}
+
+	error("%s", message.array);
+	obs_encoder_set_last_error(enc->encoder, message.array);
+
+	dstr_free(&message);
+	return false;
+}
+
+bool init_cuda(obs_encoder_t *encoder)
+{
+	bool success;
+
+	pthread_mutex_lock(&init_mutex);
+	success = init_cuda_internal(encoder);
+	pthread_mutex_unlock(&init_mutex);
+
+	return success;
+}
+
+void obs_cuda_load(void)
+{
+	pthread_mutex_init(&init_mutex, NULL);
+}
+
+void obs_cuda_unload(void)
+{
+	bfree(cu);
+	pthread_mutex_destroy(&init_mutex);
+}

+ 66 - 0
plugins/obs-nvenc/cuda-helpers.h

@@ -0,0 +1,66 @@
+#pragma once
+
+#include <obs-module.h>
+
+#include <ffnvcodec/dynlink_cuda.h>
+
+/* Missing from FFmpeg headers */
+typedef CUresult CUDAAPI tcuMemHostRegister(void *p, size_t bytesize,
+					    unsigned int Flags);
+typedef CUresult CUDAAPI tcuMemHostUnregister(void *p);
+
+#define CUDA_ERROR_INVALID_GRAPHICS_CONTEXT 219
+#define CUDA_ARRAY3D_SURFACE_LDST 0x02
+
+typedef struct CudaFunctions {
+	tcuInit *cuInit;
+
+	tcuDeviceGetCount *cuDeviceGetCount;
+	tcuDeviceGet *cuDeviceGet;
+	tcuDeviceGetAttribute *cuDeviceGetAttribute;
+
+	tcuCtxCreate_v2 *cuCtxCreate;
+	tcuCtxDestroy_v2 *cuCtxDestroy;
+	tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
+	tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
+
+	tcuArray3DCreate *cuArray3DCreate;
+	tcuArrayDestroy *cuArrayDestroy;
+	tcuMemcpy2D_v2 *cuMemcpy2D;
+
+	tcuGetErrorName *cuGetErrorName;
+	tcuGetErrorString *cuGetErrorString;
+
+	tcuMemHostRegister *cuMemHostRegister;
+	tcuMemHostUnregister *cuMemHostUnregister;
+
+#ifndef _WIN32
+	tcuGLGetDevices_v2 *cuGLGetDevices;
+	tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
+	tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
+	tcuGraphicsMapResources *cuGraphicsMapResources;
+	tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
+	tcuGraphicsSubResourceGetMappedArray
+		*cuGraphicsSubResourceGetMappedArray;
+#endif
+} CudaFunctions;
+
+extern CudaFunctions *cu;
+
+bool init_cuda(obs_encoder_t *encoder);
+bool cuda_get_error_desc(CUresult res, const char **name, const char **desc);
+
+struct nvenc_data;
+bool cuda_error_check(struct nvenc_data *enc, CUresult res, const char *func,
+		      const char *call);
+
+/* CUDA error handling */
+#define CU_FAILED(call)                                        \
+	if (!cuda_error_check(enc, call, __FUNCTION__, #call)) \
+		return false;
+
+#define CU_CHECK(call)                                           \
+	if (!cuda_error_check(enc, call, __FUNCTION__, #call)) { \
+		success = false;                                 \
+		goto unmap;                                      \
+	}

+ 71 - 0
plugins/obs-nvenc/data/locale/en-US.ini

@@ -0,0 +1,71 @@
+RateControl="Rate Control"
+CBR="Constant Bitrate"
+VBR="Variable Bitrate"
+CQVBR="Variable Bitrate with Target Quality"
+CQP="Constant QP"
+Lossless="Lossless"
+
+Bitrate="Bitrate"
+MaxBitrate="Maximum Bitrate (0 = Level Limit)"
+TargetQuality="Target Quality"
+
+KeyframeIntervalSec="Keyframe interval (seconds, 0 = auto)"
+
+LookAhead="Look-ahead"
+LookAhead.ToolTip="Enables Lookahead.\n\nIf enabled, it will increase visual quality by determining a better bitrate distribution through analysis of future frames,\nat the cost of increased GPU utilization and latency."
+
+AdaptiveQuantization="Adaptive Quantization"
+AdaptiveQuantization.ToolTip="Enables Temporal/Spatial Adaptive Quantization which optimizes the use of bitrate for increased perceived visual quality,\nespecially in situations with high motion, at the cost of increased GPU utilization.\n\nFormerly known as \"Psycho-Visual Tuning\"."
+
+Preset="Preset"
+Preset.p1="P1: Fastest (Lowest Quality)"
+Preset.p2="P2: Faster (Lower Quality)"
+Preset.p3="P3: Fast (Low Quality)"
+Preset.p4="P4: Medium (Medium Quality)"
+Preset.p5="P5: Slow (Good Quality)"
+Preset.p6="P6: Slower (Better Quality)"
+Preset.p7="P7: Slowest (Best Quality)"
+
+Tuning.uhq="Ultra High Quality (slow, not recommended)"
+Tuning.hq="High Quality"
+Tuning.ll="Low Latency"
+Tuning.ull="Ultra Low Latency"
+
+Multipass="Multipass Mode"
+Multipass.disabled="Single Pass"
+Multipass.qres="Two Passes (Quarter Resolution)"
+Multipass.fullres="Two Passes (Full Resolution)"
+
+BFrames="B-Frames"
+BFrameRefMode="B-Frame as Reference"
+BframeRefMode.Disabled="Disabled"
+BframeRefMode.Each="Each"
+BframeRefMode.Middle="Middle b-frame only"
+
+SplitEncode="Split Encode"
+SplitEncode.Auto="Auto"
+SplitEncode.Disabled="Disabled"
+SplitEncode.Enabled="Two-way split"
+SplitEncode.ThreeWay="Three-way split"
+
+Opts="Custom Encoder Options"
+Opts.TT="Space-separated list of options to apply to the rate control and codec settings,\nbased their names in the nvEncodeAPI header.\ne.g. \"lookaheadDepth=16 aqStrength=4\""
+
+Error="Failed to open NVENC codec: %1"
+GenericError="Try installing the latest <a href=\"https://obsproject.com/go/nvidia-drivers\">NVIDIA driver</a> and closing other recording software that might be using NVENC such as NVIDIA ShadowPlay or Windows Game DVR."
+BadGPUIndex="You have selected GPU %1 in your output encoder settings. Set this back to 0 and try again."
+OutdatedDriver="The installed NVIDIA driver does not support this NVENC version, try <a href=\"https://obsproject.com/go/nvidia-drivers\">updating the driver</a>."
+UnsupportedDevice="NVENC Error: Unsupported device. Check that your video card <a href=\"https://obsproject.com/go/nvenc-matrix\">supports NVENC</a> and try <a href=\"https://obsproject.com/go/nvidia-drivers\">updating the driver</a>."
+TooManySessions="NVENC Error: Too many concurrent sessions. Try closing other recording software that might be using NVENC such as NVIDIA ShadowPlay or Windows Game DVR."
+CheckDrivers="Try installing the latest <a href=\"https://obsproject.com/go/nvidia-drivers\">NVIDIA driver</a>."
+
+8bitUnsupportedHdr="OBS does not support 8-bit output of Rec. 2100."
+I010Unsupported="NVENC does not support I010. Use P010 instead."
+10bitUnsupported="Cannot perform 10-bit encode on this encoder."
+16bitUnsupported="Cannot perform 16-bit encode on this encoder."
+444Unsupported="Cannot perform 4:4:4 encode on this encoder."
+
+# Legacy strings, to be removed once compat encoders are removed
+CQLevel="CQ Level"
+PsychoVisualTuning="Psycho Visual Tuning"
+PsychoVisualTuning.ToolTip="Enables encoder settings that optimize the use of bitrate for increased perceived visual quality,\nespecially in situations with high motion, at the cost of increased GPU utilization."

+ 457 - 0
plugins/obs-nvenc/nvenc-compat.c

@@ -0,0 +1,457 @@
+#include "nvenc-helpers.h"
+
+#include <util/dstr.h>
+
+/*
+ * Compatibility encoder objects for pre-31.0 encoder compatibility.
+ *
+ * All they do is update the settings object, and then reroute to one of the
+ * new encoder implementations.
+ *
+ * This should be removed once NVENC settings are migrated directly and
+ * backwards-compatibility is no longer required.
+ */
+
+/* ------------------------------------------------------------------------- */
+/* Actual redirector implementation.                                         */
+
+static void migrate_settings(obs_data_t *settings, enum codec_type codec)
+{
+	struct encoder_caps *caps = get_encoder_caps(codec);
+
+	const char *preset = obs_data_get_string(settings, "preset2");
+	obs_data_set_string(settings, "preset", preset);
+
+	const char *rc = obs_data_get_string(settings, "rate_control");
+	/* Old NVENC allowed lossless even if unsupported,
+	 * and just emulated it via CQP 0, do the same here. */
+	if (!caps->lossless && strcmp(rc, "lossless") == 0) {
+		obs_data_set_string(settings, "rate_control", "CQP");
+		obs_data_set_int(settings, "cqp", 0);
+	}
+
+	obs_data_set_bool(settings, "adaptive_quantization",
+			  obs_data_get_bool(settings, "psycho_aq"));
+
+	if (obs_data_has_user_value(settings, "gpu") &&
+	    num_encoder_devices() > 1) {
+		obs_data_set_int(settings, "device",
+				 obs_data_get_int(settings, "gpu"));
+	}
+}
+
+static void *nvenc_reroute(enum codec_type codec, obs_data_t *settings,
+			   obs_encoder_t *encoder, bool texture)
+{
+	/* Update settings object to v2 encoder configuration */
+	migrate_settings(settings, codec);
+
+	switch (codec) {
+	case CODEC_H264:
+		return obs_encoder_create_rerouted(
+			encoder,
+			texture ? "obs_nvenc_h264_tex" : "obs_nvenc_h264_soft");
+	case CODEC_HEVC:
+		return obs_encoder_create_rerouted(
+			encoder,
+			texture ? "obs_nvenc_hevc_tex" : "obs_nvenc_hevc_soft");
+	case CODEC_AV1:
+		return obs_encoder_create_rerouted(
+			encoder,
+			texture ? "obs_nvenc_av1_tex" : "obs_nvenc_av1_soft");
+	}
+
+	return NULL;
+}
+
+/* ------------------------------------------------------------------------- */
+
+static const char *h264_nvenc_get_name(void *type_data)
+{
+	UNUSED_PARAMETER(type_data);
+	return "NVIDIA NVENC H.264 (deprecated)";
+}
+
+#ifdef ENABLE_HEVC
+static const char *hevc_nvenc_get_name(void *type_data)
+{
+	UNUSED_PARAMETER(type_data);
+	return "NVIDIA NVENC HEVC (deprecated)";
+}
+#endif
+
+static const char *av1_nvenc_get_name(void *type_data)
+{
+	UNUSED_PARAMETER(type_data);
+	return "NVIDIA NVENC AV1 (deprecated)";
+}
+
+static void *h264_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
+{
+	return nvenc_reroute(CODEC_H264, settings, encoder, true);
+}
+
+#ifdef ENABLE_HEVC
+static void *hevc_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
+{
+	return nvenc_reroute(CODEC_HEVC, settings, encoder, true);
+}
+#endif
+
+static void *av1_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
+{
+	return nvenc_reroute(CODEC_AV1, settings, encoder, true);
+}
+
+static void *h264_nvenc_soft_create(obs_data_t *settings,
+				    obs_encoder_t *encoder)
+{
+	return nvenc_reroute(CODEC_H264, settings, encoder, false);
+}
+
+#ifdef ENABLE_HEVC
+static void *hevc_nvenc_soft_create(obs_data_t *settings,
+				    obs_encoder_t *encoder)
+{
+	return nvenc_reroute(CODEC_HEVC, settings, encoder, false);
+}
+#endif
+
+static void *av1_nvenc_soft_create(obs_data_t *settings, obs_encoder_t *encoder)
+{
+	return nvenc_reroute(CODEC_AV1, settings, encoder, false);
+}
+
+static void nvenc_defaults_base(enum codec_type codec, obs_data_t *settings)
+{
+	/* Defaults from legacy FFmpeg encoder */
+	obs_data_set_default_int(settings, "bitrate", 2500);
+	obs_data_set_default_int(settings, "max_bitrate", 5000);
+	obs_data_set_default_int(settings, "keyint_sec", 0);
+	obs_data_set_default_int(settings, "cqp", 20);
+	obs_data_set_default_string(settings, "rate_control", "CBR");
+	obs_data_set_default_string(settings, "preset2", "p5");
+	obs_data_set_default_string(settings, "multipass", "qres");
+	obs_data_set_default_string(settings, "tune", "hq");
+	obs_data_set_default_string(settings, "profile",
+				    codec != CODEC_H264 ? "main" : "high");
+	obs_data_set_default_bool(settings, "psycho_aq", true);
+	obs_data_set_default_int(settings, "gpu", 0);
+	obs_data_set_default_int(settings, "bf", 2);
+	obs_data_set_default_bool(settings, "repeat_headers", false);
+}
+
+static void h264_nvenc_defaults(obs_data_t *settings)
+{
+	nvenc_defaults_base(CODEC_H264, settings);
+}
+
+#ifdef ENABLE_HEVC
+static void hevc_nvenc_defaults(obs_data_t *settings)
+{
+	nvenc_defaults_base(CODEC_HEVC, settings);
+}
+#endif
+
+static void av1_nvenc_defaults(obs_data_t *settings)
+{
+	nvenc_defaults_base(CODEC_AV1, settings);
+}
+
+static bool rate_control_modified(obs_properties_t *ppts, obs_property_t *p,
+				  obs_data_t *settings)
+{
+	const char *rc = obs_data_get_string(settings, "rate_control");
+	bool cqp = astrcmpi(rc, "CQP") == 0;
+	bool vbr = astrcmpi(rc, "VBR") == 0;
+	bool lossless = astrcmpi(rc, "lossless") == 0;
+
+	p = obs_properties_get(ppts, "bitrate");
+	obs_property_set_visible(p, !cqp && !lossless);
+	p = obs_properties_get(ppts, "max_bitrate");
+	obs_property_set_visible(p, vbr);
+	p = obs_properties_get(ppts, "cqp");
+	obs_property_set_visible(p, cqp);
+	p = obs_properties_get(ppts, "preset2");
+	obs_property_set_visible(p, !lossless);
+	p = obs_properties_get(ppts, "tune");
+	obs_property_set_visible(p, !lossless);
+
+	return true;
+}
+
+static obs_properties_t *nvenc_properties_internal(enum codec_type codec)
+{
+	obs_properties_t *props = obs_properties_create();
+	obs_property_t *p;
+
+	p = obs_properties_add_list(props, "rate_control",
+				    obs_module_text("RateControl"),
+				    OBS_COMBO_TYPE_LIST,
+				    OBS_COMBO_FORMAT_STRING);
+	obs_property_list_add_string(p, "CBR", "CBR");
+	obs_property_list_add_string(p, "CQP", "CQP");
+	obs_property_list_add_string(p, "VBR", "VBR");
+	obs_property_list_add_string(p, obs_module_text("Lossless"),
+				     "lossless");
+
+	obs_property_set_modified_callback(p, rate_control_modified);
+
+	p = obs_properties_add_int(props, "bitrate", obs_module_text("Bitrate"),
+				   50, 300000, 50);
+	obs_property_int_set_suffix(p, " Kbps");
+	p = obs_properties_add_int(props, "max_bitrate",
+				   obs_module_text("MaxBitrate"), 50, 300000,
+				   50);
+	obs_property_int_set_suffix(p, " Kbps");
+
+	obs_properties_add_int(props, "cqp", obs_module_text("CQLevel"), 1,
+			       codec == CODEC_AV1 ? 63 : 51, 1);
+
+	p = obs_properties_add_int(props, "keyint_sec",
+				   obs_module_text("KeyframeIntervalSec"), 0,
+				   10, 1);
+	obs_property_int_set_suffix(p, " s");
+
+	p = obs_properties_add_list(props, "preset2", obs_module_text("Preset"),
+				    OBS_COMBO_TYPE_LIST,
+				    OBS_COMBO_FORMAT_STRING);
+
+#define add_preset(val) \
+	obs_property_list_add_string(p, obs_module_text("Preset." val), val)
+
+	add_preset("p1");
+	add_preset("p2");
+	add_preset("p3");
+	add_preset("p4");
+	add_preset("p5");
+	add_preset("p6");
+	add_preset("p7");
+#undef add_preset
+
+	p = obs_properties_add_list(props, "tune", obs_module_text("Tuning"),
+				    OBS_COMBO_TYPE_LIST,
+				    OBS_COMBO_FORMAT_STRING);
+
+#define add_tune(val) \
+	obs_property_list_add_string(p, obs_module_text("Tuning." val), val)
+	add_tune("hq");
+	add_tune("ll");
+	add_tune("ull");
+#undef add_tune
+
+	p = obs_properties_add_list(props, "multipass",
+				    obs_module_text("Multipass"),
+				    OBS_COMBO_TYPE_LIST,
+				    OBS_COMBO_FORMAT_STRING);
+
+#define add_multipass(val) \
+	obs_property_list_add_string(p, obs_module_text("Multipass." val), val)
+	add_multipass("disabled");
+	add_multipass("qres");
+	add_multipass("fullres");
+#undef add_multipass
+
+	p = obs_properties_add_list(props, "profile",
+				    obs_module_text("Profile"),
+				    OBS_COMBO_TYPE_LIST,
+				    OBS_COMBO_FORMAT_STRING);
+
+#define add_profile(val) obs_property_list_add_string(p, val, val)
+	if (codec == CODEC_HEVC) {
+		add_profile("main10");
+		add_profile("main");
+	} else if (codec == CODEC_AV1) {
+		add_profile("main");
+	} else {
+		add_profile("high");
+		add_profile("main");
+		add_profile("baseline");
+	}
+#undef add_profile
+
+	p = obs_properties_add_bool(props, "lookahead",
+				    obs_module_text("LookAhead"));
+	obs_property_set_long_description(p,
+					  obs_module_text("LookAhead.ToolTip"));
+	p = obs_properties_add_bool(props, "repeat_headers", "repeat_headers");
+	obs_property_set_visible(p, false);
+
+	p = obs_properties_add_bool(props, "psycho_aq",
+				    obs_module_text("PsychoVisualTuning"));
+	obs_property_set_long_description(
+		p, obs_module_text("PsychoVisualTuning.ToolTip"));
+
+	obs_properties_add_int(props, "gpu", obs_module_text("GPU"), 0, 8, 1);
+
+	obs_properties_add_int(props, "bf", obs_module_text("BFrames"), 0, 4,
+			       1);
+
+	return props;
+}
+
+static obs_properties_t *h264_nvenc_properties(void *unused)
+{
+	UNUSED_PARAMETER(unused);
+	return nvenc_properties_internal(CODEC_H264);
+}
+
+#ifdef ENABLE_HEVC
+static obs_properties_t *hevc_nvenc_properties(void *unused)
+{
+	UNUSED_PARAMETER(unused);
+	return nvenc_properties_internal(CODEC_HEVC);
+}
+#endif
+
+static obs_properties_t *av1_nvenc_properties(void *unused)
+{
+	UNUSED_PARAMETER(unused);
+	return nvenc_properties_internal(CODEC_AV1);
+}
+
+/* ------------------------------------------------------------------------- */
+/* Stubs for required - but unused - functions.                              */
+
+static void fake_nvenc_destroy(void *p)
+{
+	UNUSED_PARAMETER(p);
+}
+
+static bool fake_encode(void *data, struct encoder_frame *frame,
+			struct encoder_packet *packet, bool *received_packet)
+{
+	UNUSED_PARAMETER(data);
+	UNUSED_PARAMETER(frame);
+	UNUSED_PARAMETER(packet);
+	UNUSED_PARAMETER(received_packet);
+
+	return true;
+}
+
+static bool fake_encode_tex2(void *data, struct encoder_texture *texture,
+			     int64_t pts, uint64_t lock_key, uint64_t *next_key,
+			     struct encoder_packet *packet,
+			     bool *received_packet)
+{
+	UNUSED_PARAMETER(data);
+	UNUSED_PARAMETER(texture);
+	UNUSED_PARAMETER(pts);
+	UNUSED_PARAMETER(lock_key);
+	UNUSED_PARAMETER(next_key);
+	UNUSED_PARAMETER(packet);
+	UNUSED_PARAMETER(received_packet);
+
+	return true;
+}
+
+struct obs_encoder_info compat_h264_nvenc_info = {
+	.id = "jim_nvenc",
+	.codec = "h264",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
+		OBS_ENCODER_CAP_ROI | OBS_ENCODER_CAP_DEPRECATED,
+	.get_name = h264_nvenc_get_name,
+	.create = h264_nvenc_create,
+	.destroy = fake_nvenc_destroy,
+	.encode_texture2 = fake_encode_tex2,
+	.get_defaults = h264_nvenc_defaults,
+	.get_properties = h264_nvenc_properties,
+};
+
+#ifdef ENABLE_HEVC
+struct obs_encoder_info compat_hevc_nvenc_info = {
+	.id = "jim_hevc_nvenc",
+	.codec = "hevc",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
+		OBS_ENCODER_CAP_ROI | OBS_ENCODER_CAP_DEPRECATED,
+	.get_name = hevc_nvenc_get_name,
+	.create = hevc_nvenc_create,
+	.destroy = fake_nvenc_destroy,
+	.encode_texture2 = fake_encode_tex2,
+	.get_defaults = hevc_nvenc_defaults,
+	.get_properties = hevc_nvenc_properties,
+};
+#endif
+
+struct obs_encoder_info compat_av1_nvenc_info = {
+	.id = "jim_av1_nvenc",
+	.codec = "av1",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
+		OBS_ENCODER_CAP_ROI | OBS_ENCODER_CAP_DEPRECATED,
+	.get_name = av1_nvenc_get_name,
+	.create = av1_nvenc_create,
+	.destroy = fake_nvenc_destroy,
+	.encode_texture2 = fake_encode_tex2,
+	.get_defaults = av1_nvenc_defaults,
+	.get_properties = av1_nvenc_properties,
+};
+
+struct obs_encoder_info compat_h264_nvenc_soft_info = {
+	.id = "obs_nvenc_h264_cuda",
+	.codec = "h264",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
+		OBS_ENCODER_CAP_DEPRECATED,
+	.get_name = h264_nvenc_get_name,
+	.create = h264_nvenc_soft_create,
+	.destroy = fake_nvenc_destroy,
+	.encode = fake_encode,
+	.get_defaults = h264_nvenc_defaults,
+	.get_properties = h264_nvenc_properties,
+};
+
+#ifdef ENABLE_HEVC
+struct obs_encoder_info compat_hevc_nvenc_soft_info = {
+	.id = "obs_nvenc_hevc_cuda",
+	.codec = "hevc",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
+		OBS_ENCODER_CAP_DEPRECATED,
+	.get_name = hevc_nvenc_get_name,
+	.create = hevc_nvenc_soft_create,
+	.destroy = fake_nvenc_destroy,
+	.encode = fake_encode,
+	.get_defaults = hevc_nvenc_defaults,
+	.get_properties = hevc_nvenc_properties,
+};
+#endif
+
+struct obs_encoder_info compat_av1_nvenc_soft_info = {
+	.id = "obs_nvenc_av1_cuda",
+	.codec = "av1",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
+		OBS_ENCODER_CAP_DEPRECATED,
+	.get_name = av1_nvenc_get_name,
+	.create = av1_nvenc_soft_create,
+	.destroy = fake_nvenc_destroy,
+	.encode = fake_encode,
+	.get_defaults = av1_nvenc_defaults,
+	.get_properties = av1_nvenc_properties,
+};
+
+void register_compat_encoders(void)
+{
+	obs_register_encoder(&compat_h264_nvenc_info);
+	obs_register_encoder(&compat_h264_nvenc_soft_info);
+#ifdef ENABLE_HEVC
+	obs_register_encoder(&compat_hevc_nvenc_info);
+	obs_register_encoder(&compat_hevc_nvenc_soft_info);
+#endif
+	if (is_codec_supported(CODEC_AV1)) {
+		obs_register_encoder(&compat_av1_nvenc_info);
+		obs_register_encoder(&compat_av1_nvenc_soft_info);
+	}
+
+#ifdef REGISTER_FFMPEG_IDS
+	compat_h264_nvenc_soft_info.id = "ffmpeg_nvenc";
+	obs_register_encoder(&compat_h264_nvenc_soft_info);
+#ifdef ENABLE_HEVC
+	compat_hevc_nvenc_soft_info.id = "ffmpeg_hevc_nvenc";
+	obs_register_encoder(&compat_hevc_nvenc_soft_info);
+#endif
+#endif
+}

+ 345 - 0
plugins/obs-nvenc/nvenc-cuda.c

@@ -0,0 +1,345 @@
+#include "nvenc-internal.h"
+#include "nvenc-helpers.h"
+
+/*
+ * NVENC implementation using CUDA context and arrays
+ */
+
+/* ------------------------------------------------------------------------- */
+/* CUDA Context management                                                   */
+
+bool cuda_ctx_init(struct nvenc_data *enc, obs_data_t *settings,
+		   const bool texture)
+{
+#ifdef _WIN32
+	if (texture)
+		return true;
+#endif
+
+	int count;
+	CUdevice device;
+
+	int gpu = (int)obs_data_get_int(settings, "device");
+#ifndef _WIN32
+	/* CUDA can do fairly efficient cross-GPU OpenGL mappings, allow it as
+	 * a hidden option for experimentation. */
+	bool force_cuda_tex = obs_data_get_bool(settings, "force_cuda_tex");
+#endif
+
+	if (gpu == -1)
+		gpu = 0;
+
+	CU_FAILED(cu->cuInit(0))
+	CU_FAILED(cu->cuDeviceGetCount(&count))
+	if (!count) {
+		NV_FAIL("No CUDA devices found");
+		return false;
+	}
+#ifdef _WIN32
+	CU_FAILED(cu->cuDeviceGet(&device, gpu))
+#else
+	if (!texture || force_cuda_tex) {
+		CU_FAILED(cu->cuDeviceGet(&device, gpu))
+	} else {
+		unsigned int ctx_count = 0;
+		CUdevice devices[2];
+
+		obs_enter_graphics();
+		CUresult res = cu->cuGLGetDevices(&ctx_count, devices, 2,
+						  CU_GL_DEVICE_LIST_ALL);
+		obs_leave_graphics();
+
+		if (res != CUDA_SUCCESS || !ctx_count) {
+			/* Probably running on iGPU, should just fall back to
+			 * non-texture encoder. */
+			if (res == CUDA_ERROR_INVALID_GRAPHICS_CONTEXT) {
+				info("Not running on NVIDIA GPU, falling back "
+				     "to non-texture encoder");
+			} else {
+				const char *name, *desc;
+				if (cuda_get_error_desc(res, &name, &desc)) {
+					error("Failed to get a CUDA device for "
+					      "the current OpenGL context: "
+					      "%s: %s",
+					      name, desc);
+				} else {
+					error("Failed to get a CUDA device for "
+					      "the current OpenGL context: %d",
+					      res);
+				}
+			}
+			return false;
+		}
+
+		/* Documentation indicates this should only ever happen with
+		 * SLI, i.e. never for OBS. */
+		if (ctx_count > 1) {
+			warn("Got more than one CUDA devices for OpenGL context,"
+			     " this is untested.");
+		}
+
+		device = devices[0];
+		debug("Loading up CUDA on device %u", device);
+	}
+#endif
+	CU_FAILED(cu->cuCtxCreate(&enc->cu_ctx, 0, device))
+	CU_FAILED(cu->cuCtxPopCurrent(NULL))
+
+	return true;
+}
+
+void cuda_ctx_free(struct nvenc_data *enc)
+{
+	if (enc->cu_ctx) {
+		cu->cuCtxPopCurrent(NULL);
+		cu->cuCtxDestroy(enc->cu_ctx);
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+/* CUDA Surface management                                                   */
+
+static bool cuda_surface_init(struct nvenc_data *enc,
+			      struct nv_cuda_surface *nvsurf)
+{
+	const bool p010 = obs_p010_tex_active();
+	CUDA_ARRAY3D_DESCRIPTOR desc;
+	desc.Width = enc->cx;
+	desc.Height = enc->cy;
+	desc.Depth = 0;
+	desc.Flags = CUDA_ARRAY3D_SURFACE_LDST;
+	desc.NumChannels = 1;
+
+	if (!enc->non_texture) {
+		desc.Format = p010 ? CU_AD_FORMAT_UNSIGNED_INT16
+				   : CU_AD_FORMAT_UNSIGNED_INT8;
+		desc.Height = enc->cy + enc->cy / 2;
+	} else {
+		switch (enc->surface_format) {
+		case NV_ENC_BUFFER_FORMAT_NV12:
+			desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
+			// Additional half-height plane for UV data
+			desc.Height += enc->cy / 2;
+			break;
+		case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+			desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
+			desc.Height += enc->cy / 2;
+			desc.NumChannels = 2; // number of bytes per element
+			break;
+		case NV_ENC_BUFFER_FORMAT_YUV444:
+			desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
+			desc.Height *= 3; // 3 full-size planes
+			break;
+		default:
+			error("Unknown input format: %d", enc->surface_format);
+			return false;
+		}
+	}
+
+	CU_FAILED(cu->cuArray3DCreate(&nvsurf->tex, &desc))
+
+	NV_ENC_REGISTER_RESOURCE res = {0};
+	res.version = NV_ENC_REGISTER_RESOURCE_VER;
+	res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY;
+	res.resourceToRegister = (void *)nvsurf->tex;
+	res.width = enc->cx;
+	res.height = enc->cy;
+	res.pitch = (uint32_t)(desc.Width * desc.NumChannels);
+	if (!enc->non_texture) {
+		res.bufferFormat = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
+					: NV_ENC_BUFFER_FORMAT_NV12;
+	} else {
+		res.bufferFormat = enc->surface_format;
+	}
+
+	if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) {
+		return false;
+	}
+
+	nvsurf->res = res.registeredResource;
+	nvsurf->mapped_res = NULL;
+	return true;
+}
+
+bool cuda_init_surfaces(struct nvenc_data *enc)
+{
+	switch (enc->in_format) {
+	case VIDEO_FORMAT_P010:
+		enc->surface_format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
+		break;
+	case VIDEO_FORMAT_I444:
+		enc->surface_format = NV_ENC_BUFFER_FORMAT_YUV444;
+		break;
+	default:
+		enc->surface_format = NV_ENC_BUFFER_FORMAT_NV12;
+	}
+
+	da_reserve(enc->surfaces, enc->buf_count);
+
+	CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx))
+	for (uint32_t i = 0; i < enc->buf_count; i++) {
+		struct nv_cuda_surface buf;
+		if (!cuda_surface_init(enc, &buf)) {
+			return false;
+		}
+
+		da_push_back(enc->surfaces, &buf);
+	}
+	CU_FAILED(cu->cuCtxPopCurrent(NULL))
+
+	return true;
+}
+
+static void cuda_surface_free(struct nvenc_data *enc,
+			      struct nv_cuda_surface *nvsurf)
+{
+	if (nvsurf->res) {
+		if (nvsurf->mapped_res) {
+			nv.nvEncUnmapInputResource(enc->session,
+						   nvsurf->mapped_res);
+		}
+		nv.nvEncUnregisterResource(enc->session, nvsurf->res);
+		cu->cuArrayDestroy(nvsurf->tex);
+	}
+}
+
+void cuda_free_surfaces(struct nvenc_data *enc)
+{
+	if (!enc->cu_ctx)
+		return;
+
+	cu->cuCtxPushCurrent(enc->cu_ctx);
+	for (size_t i = 0; i < enc->surfaces.num; i++) {
+		cuda_surface_free(enc, &enc->surfaces.array[i]);
+	}
+	cu->cuCtxPopCurrent(NULL);
+}
+
+/* ------------------------------------------------------------------------- */
+/* Actual encoding stuff                                                     */
+
+static inline bool copy_frame(struct nvenc_data *enc,
+			      struct encoder_frame *frame,
+			      struct nv_cuda_surface *surf)
+{
+	bool success = true;
+	size_t height = enc->cy;
+	size_t width = enc->cx;
+	CUDA_MEMCPY2D m = {0};
+
+	m.srcMemoryType = CU_MEMORYTYPE_HOST;
+	m.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+	m.dstArray = surf->tex;
+	m.WidthInBytes = width;
+	m.Height = height;
+
+	CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx))
+
+	if (enc->surface_format == NV_ENC_BUFFER_FORMAT_NV12) {
+		/* Page-locks the host memory so that it can be DMAd directly
+		 * rather than CUDA doing an internal copy to page-locked
+		 * memory before actually DMA-ing to the GPU. */
+		CU_CHECK(cu->cuMemHostRegister(frame->data[0],
+					       frame->linesize[0] * height, 0))
+		CU_CHECK(cu->cuMemHostRegister(
+			frame->data[1], frame->linesize[1] * height / 2, 0))
+
+		m.srcPitch = frame->linesize[0];
+		m.srcHost = frame->data[0];
+		CU_FAILED(cu->cuMemcpy2D(&m))
+
+		m.srcPitch = frame->linesize[1];
+		m.srcHost = frame->data[1];
+		m.dstY += height;
+		m.Height /= 2;
+		CU_FAILED(cu->cuMemcpy2D(&m))
+	} else if (enc->surface_format == NV_ENC_BUFFER_FORMAT_YUV420_10BIT) {
+		CU_CHECK(cu->cuMemHostRegister(frame->data[0],
+					       frame->linesize[0] * height, 0))
+		CU_CHECK(cu->cuMemHostRegister(
+			frame->data[1], frame->linesize[1] * height / 2, 0))
+
+		// P010 lines are double the size (16 bit per pixel)
+		m.WidthInBytes *= 2;
+
+		m.srcPitch = frame->linesize[0];
+		m.srcHost = frame->data[0];
+		CU_FAILED(cu->cuMemcpy2D(&m))
+
+		m.srcPitch = frame->linesize[1];
+		m.srcHost = frame->data[1];
+		m.dstY += height;
+		m.Height /= 2;
+		CU_FAILED(cu->cuMemcpy2D(&m))
+	} else { // I444
+		CU_CHECK(cu->cuMemHostRegister(frame->data[0],
+					       frame->linesize[0] * height, 0))
+		CU_CHECK(cu->cuMemHostRegister(frame->data[1],
+					       frame->linesize[1] * height, 0))
+		CU_CHECK(cu->cuMemHostRegister(frame->data[2],
+					       frame->linesize[2] * height, 0))
+
+		m.srcPitch = frame->linesize[0];
+		m.srcHost = frame->data[0];
+		CU_FAILED(cu->cuMemcpy2D(&m))
+
+		m.srcPitch = frame->linesize[1];
+		m.srcHost = frame->data[1];
+		m.dstY += height;
+		CU_FAILED(cu->cuMemcpy2D(&m))
+
+		m.srcPitch = frame->linesize[2];
+		m.srcHost = frame->data[2];
+		m.dstY += height;
+		CU_FAILED(cu->cuMemcpy2D(&m))
+	}
+
+unmap:
+	if (frame->data[0])
+		cu->cuMemHostUnregister(frame->data[0]);
+	if (frame->data[1])
+		cu->cuMemHostUnregister(frame->data[1]);
+	if (frame->data[2])
+		cu->cuMemHostUnregister(frame->data[2]);
+
+	CU_FAILED(cu->cuCtxPopCurrent(NULL))
+
+	return success;
+}
+
+bool cuda_encode(void *data, struct encoder_frame *frame,
+		 struct encoder_packet *packet, bool *received_packet)
+{
+	struct nvenc_data *enc = data;
+	struct nv_cuda_surface *surf;
+	struct nv_bitstream *bs;
+
+	bs = &enc->bitstreams.array[enc->next_bitstream];
+	surf = &enc->surfaces.array[enc->next_bitstream];
+
+	deque_push_back(&enc->dts_list, &frame->pts, sizeof(frame->pts));
+
+	/* ------------------------------------ */
+	/* copy to CUDA surface                 */
+
+	if (!copy_frame(enc, frame, surf))
+		return false;
+
+	/* ------------------------------------ */
+	/* map output tex so nvenc can use it   */
+
+	NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER};
+	map.registeredResource = surf->res;
+	map.mappedBufferFmt = enc->surface_format;
+
+	if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map)))
+		return false;
+
+	surf->mapped_res = map.mappedResource;
+
+	/* ------------------------------------ */
+	/* do actual encode call                */
+
+	return nvenc_encode_base(enc, bs, surf->mapped_res, frame->pts, packet,
+				 received_packet);
+}

+ 278 - 0
plugins/obs-nvenc/nvenc-d3d11.c

@@ -0,0 +1,278 @@
+#include "nvenc-internal.h"
+#include "nvenc-helpers.h"
+
+/*
+ * NVENC implementation using Direct3D 11 context and textures
+ */
+
+/* ------------------------------------------------------------------------- */
+/* D3D11 Context/Device management                                           */
+
+static HANDLE get_lib(struct nvenc_data *enc, const char *lib)
+{
+	HMODULE mod = GetModuleHandleA(lib);
+	if (mod)
+		return mod;
+
+	mod = LoadLibraryA(lib);
+	if (!mod)
+		error("Failed to load %s", lib);
+	return mod;
+}
+
+typedef HRESULT(WINAPI *CREATEDXGIFACTORY1PROC)(REFIID, void **);
+
+bool d3d11_init(struct nvenc_data *enc, obs_data_t *settings)
+{
+	HMODULE dxgi = get_lib(enc, "DXGI.dll");
+	HMODULE d3d11 = get_lib(enc, "D3D11.dll");
+	CREATEDXGIFACTORY1PROC create_dxgi;
+	PFN_D3D11_CREATE_DEVICE create_device;
+	IDXGIFactory1 *factory;
+	IDXGIAdapter *adapter;
+	ID3D11Device *device;
+	ID3D11DeviceContext *context;
+	HRESULT hr;
+
+	if (!dxgi || !d3d11) {
+		return false;
+	}
+
+	create_dxgi = (CREATEDXGIFACTORY1PROC)GetProcAddress(
+		dxgi, "CreateDXGIFactory1");
+	create_device = (PFN_D3D11_CREATE_DEVICE)GetProcAddress(
+		d3d11, "D3D11CreateDevice");
+
+	if (!create_dxgi || !create_device) {
+		error("Failed to load D3D11/DXGI procedures");
+		return false;
+	}
+
+	hr = create_dxgi(&IID_IDXGIFactory1, &factory);
+	if (FAILED(hr)) {
+		error_hr("CreateDXGIFactory1 failed");
+		return false;
+	}
+
+	hr = factory->lpVtbl->EnumAdapters(factory, 0, &adapter);
+	factory->lpVtbl->Release(factory);
+	if (FAILED(hr)) {
+		error_hr("EnumAdapters failed");
+		return false;
+	}
+
+	hr = create_device(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, 0, NULL, 0,
+			   D3D11_SDK_VERSION, &device, NULL, &context);
+	adapter->lpVtbl->Release(adapter);
+	if (FAILED(hr)) {
+		error_hr("D3D11CreateDevice failed");
+		return false;
+	}
+
+	enc->device = device;
+	enc->context = context;
+	return true;
+}
+
+void d3d11_free(struct nvenc_data *enc)
+{
+	for (size_t i = 0; i < enc->input_textures.num; i++) {
+		ID3D11Texture2D *tex = enc->input_textures.array[i].tex;
+		IDXGIKeyedMutex *km = enc->input_textures.array[i].km;
+		tex->lpVtbl->Release(tex);
+		km->lpVtbl->Release(km);
+	}
+	if (enc->context) {
+		enc->context->lpVtbl->Release(enc->context);
+	}
+	if (enc->device) {
+		enc->device->lpVtbl->Release(enc->device);
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+/* D3D11 Surface management                                                  */
+
+static bool d3d11_texture_init(struct nvenc_data *enc, struct nv_texture *nvtex)
+{
+	const bool p010 = obs_p010_tex_active();
+
+	D3D11_TEXTURE2D_DESC desc = {0};
+	desc.Width = enc->cx;
+	desc.Height = enc->cy;
+	desc.MipLevels = 1;
+	desc.ArraySize = 1;
+	desc.Format = p010 ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12;
+	desc.SampleDesc.Count = 1;
+	desc.BindFlags = D3D11_BIND_RENDER_TARGET;
+
+	ID3D11Device *const device = enc->device;
+	ID3D11Texture2D *tex;
+	HRESULT hr = device->lpVtbl->CreateTexture2D(device, &desc, NULL, &tex);
+	if (FAILED(hr)) {
+		error_hr("Failed to create texture");
+		return false;
+	}
+
+	tex->lpVtbl->SetEvictionPriority(tex, DXGI_RESOURCE_PRIORITY_MAXIMUM);
+
+	NV_ENC_REGISTER_RESOURCE res = {NV_ENC_REGISTER_RESOURCE_VER};
+	res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
+	res.resourceToRegister = tex;
+	res.width = enc->cx;
+	res.height = enc->cy;
+	res.bufferFormat = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
+				: NV_ENC_BUFFER_FORMAT_NV12;
+
+	if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) {
+		tex->lpVtbl->Release(tex);
+		return false;
+	}
+
+	nvtex->res = res.registeredResource;
+	nvtex->tex = tex;
+	nvtex->mapped_res = NULL;
+	return true;
+}
+
+bool d3d11_init_textures(struct nvenc_data *enc)
+{
+	//blog(LOG_DEBUG, "buf count: %d", enc->buf_count);
+	da_reserve(enc->textures, enc->buf_count);
+	for (uint32_t i = 0; i < enc->buf_count; i++) {
+		struct nv_texture texture;
+		if (!d3d11_texture_init(enc, &texture)) {
+			return false;
+		}
+
+		da_push_back(enc->textures, &texture);
+	}
+
+	return true;
+}
+
+static void d3d11_texture_free(struct nvenc_data *enc, struct nv_texture *nvtex)
+{
+
+	if (nvtex->res) {
+		if (nvtex->mapped_res) {
+			nv.nvEncUnmapInputResource(enc->session,
+						   nvtex->mapped_res);
+		}
+		nv.nvEncUnregisterResource(enc->session, nvtex->res);
+		nvtex->tex->lpVtbl->Release(nvtex->tex);
+	}
+}
+
+void d3d11_free_textures(struct nvenc_data *enc)
+{
+	for (size_t i = 0; i < enc->textures.num; i++) {
+		d3d11_texture_free(enc, &enc->textures.array[i]);
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+/* Actual encoding stuff                                                     */
+
+static ID3D11Texture2D *get_tex_from_handle(struct nvenc_data *enc,
+					    uint32_t handle,
+					    IDXGIKeyedMutex **km_out)
+{
+	ID3D11Device *device = enc->device;
+	IDXGIKeyedMutex *km;
+	ID3D11Texture2D *input_tex;
+	HRESULT hr;
+
+	for (size_t i = 0; i < enc->input_textures.num; i++) {
+		struct handle_tex *ht = &enc->input_textures.array[i];
+		if (ht->handle == handle) {
+			*km_out = ht->km;
+			return ht->tex;
+		}
+	}
+
+	hr = device->lpVtbl->OpenSharedResource(device,
+						(HANDLE)(uintptr_t)handle,
+						&IID_ID3D11Texture2D,
+						&input_tex);
+	if (FAILED(hr)) {
+		error_hr("OpenSharedResource failed");
+		return NULL;
+	}
+
+	hr = input_tex->lpVtbl->QueryInterface(input_tex, &IID_IDXGIKeyedMutex,
+					       &km);
+	if (FAILED(hr)) {
+		error_hr("QueryInterface(IDXGIKeyedMutex) failed");
+		input_tex->lpVtbl->Release(input_tex);
+		return NULL;
+	}
+
+	input_tex->lpVtbl->SetEvictionPriority(input_tex,
+					       DXGI_RESOURCE_PRIORITY_MAXIMUM);
+
+	*km_out = km;
+
+	struct handle_tex new_ht = {handle, input_tex, km};
+	da_push_back(enc->input_textures, &new_ht);
+	return input_tex;
+}
+
+bool d3d11_encode(void *data, struct encoder_texture *texture, int64_t pts,
+		  uint64_t lock_key, uint64_t *next_key,
+		  struct encoder_packet *packet, bool *received_packet)
+{
+	struct nvenc_data *enc = data;
+	ID3D11DeviceContext *context = enc->context;
+	ID3D11Texture2D *input_tex;
+	ID3D11Texture2D *output_tex;
+	IDXGIKeyedMutex *km;
+	struct nv_texture *nvtex;
+	struct nv_bitstream *bs;
+
+	if (texture->handle == GS_INVALID_HANDLE) {
+		error("Encode failed: bad texture handle");
+		*next_key = lock_key;
+		return false;
+	}
+
+	bs = &enc->bitstreams.array[enc->next_bitstream];
+	nvtex = &enc->textures.array[enc->next_bitstream];
+
+	input_tex = get_tex_from_handle(enc, texture->handle, &km);
+	output_tex = nvtex->tex;
+
+	if (!input_tex) {
+		*next_key = lock_key;
+		return false;
+	}
+
+	deque_push_back(&enc->dts_list, &pts, sizeof(pts));
+
+	/* ------------------------------------ */
+	/* copy to output tex                   */
+
+	km->lpVtbl->AcquireSync(km, lock_key, INFINITE);
+
+	context->lpVtbl->CopyResource(context, (ID3D11Resource *)output_tex,
+				      (ID3D11Resource *)input_tex);
+
+	km->lpVtbl->ReleaseSync(km, *next_key);
+
+	/* ------------------------------------ */
+	/* map output tex so nvenc can use it   */
+
+	NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER};
+	map.registeredResource = nvtex->res;
+	if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map))) {
+		return false;
+	}
+
+	nvtex->mapped_res = map.mappedResource;
+
+	/* ------------------------------------ */
+	/* do actual encode call                */
+
+	return nvenc_encode_base(enc, bs, nvtex->mapped_res, pts, packet,
+				 received_packet);
+}

+ 409 - 0
plugins/obs-nvenc/nvenc-helpers.c

@@ -0,0 +1,409 @@
+#include "obs-nvenc.h"
+#include "nvenc-helpers.h"
+
+#include <util/platform.h>
+#include <util/threading.h>
+#include <util/config-file.h>
+#include <util/dstr.h>
+#include <util/pipe.h>
+
+static void *nvenc_lib = NULL;
+static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
+NV_ENCODE_API_FUNCTION_LIST nv = {NV_ENCODE_API_FUNCTION_LIST_VER};
+NV_CREATE_INSTANCE_FUNC nv_create_instance = NULL;
+
+/* Will be populated with results from obs-nvenc-test */
+static struct encoder_caps encoder_capabilities[3];
+static bool codec_supported[3];
+static int num_devices;
+static int driver_version_major;
+static int driver_version_minor;
+
+#define error(format, ...) blog(LOG_ERROR, "[obs-nvenc] " format, ##__VA_ARGS__)
+
+bool nv_fail2(obs_encoder_t *encoder, void *session, const char *format, ...)
+{
+	UNUSED_PARAMETER(session);
+
+	struct dstr message = {0};
+	struct dstr error_message = {0};
+
+	va_list args;
+	va_start(args, format);
+	dstr_vprintf(&message, format, args);
+	va_end(args);
+
+	dstr_printf(&error_message, "NVENC Error: %s", message.array);
+	obs_encoder_set_last_error(encoder, error_message.array);
+	error("%s", error_message.array);
+
+	dstr_free(&error_message);
+	dstr_free(&message);
+
+	return true;
+}
+
+bool nv_failed2(obs_encoder_t *encoder, void *session, NVENCSTATUS err,
+		const char *func, const char *call)
+{
+	struct dstr error_message = {0};
+	const char *nvenc_error = NULL;
+
+	if (err == NV_ENC_SUCCESS)
+		return false;
+
+	if (session) {
+		nvenc_error = nv.nvEncGetLastErrorString(session);
+		if (nvenc_error) {
+			// Some NVENC errors begin with :: which looks
+			// odd to users. Strip it off.
+			while (*nvenc_error == ':')
+				nvenc_error++;
+		}
+	}
+
+	switch (err) {
+	case NV_ENC_ERR_OUT_OF_MEMORY:
+		obs_encoder_set_last_error(encoder,
+					   obs_module_text("TooManySessions"));
+		break;
+
+	case NV_ENC_ERR_NO_ENCODE_DEVICE:
+	case NV_ENC_ERR_UNSUPPORTED_DEVICE:
+		obs_encoder_set_last_error(
+			encoder, obs_module_text("UnsupportedDevice"));
+		break;
+
+	case NV_ENC_ERR_INVALID_VERSION:
+		obs_encoder_set_last_error(encoder,
+					   obs_module_text("OutdatedDriver"));
+		break;
+
+	default:
+		if (nvenc_error && *nvenc_error) {
+			dstr_printf(&error_message, "NVENC Error: %s (%s)",
+				    nvenc_error, nv_error_name(err));
+		} else {
+
+			dstr_printf(&error_message,
+				    "NVENC Error: %s: %s failed: %d (%s)", func,
+				    call, (int)err, nv_error_name(err));
+		}
+		obs_encoder_set_last_error(encoder, error_message.array);
+		dstr_free(&error_message);
+		break;
+	}
+
+	if (nvenc_error && *nvenc_error) {
+		error("%s: %s failed: %d (%s): %s", func, call, (int)err,
+		      nv_error_name(err), nvenc_error);
+	} else {
+		error("%s: %s failed: %d (%s)", func, call, (int)err,
+		      nv_error_name(err));
+	}
+	return true;
+}
+
+#define NV_FAILED(e, x) nv_failed2(e, NULL, x, __FUNCTION__, #x)
+
+bool load_nvenc_lib(void)
+{
+#ifdef _WIN32
+	nvenc_lib = os_dlopen("nvEncodeAPI64.dll");
+#else
+	nvenc_lib = os_dlopen("libnvidia-encode.so.1");
+#endif
+	return nvenc_lib != NULL;
+}
+
+static void *load_nv_func(const char *func)
+{
+	void *func_ptr = os_dlsym(nvenc_lib, func);
+	if (!func_ptr) {
+		error("Could not load function: %s", func);
+	}
+	return func_ptr;
+}
+
+typedef NVENCSTATUS(NVENCAPI *NV_MAX_VER_FUNC)(uint32_t *);
+
+static uint32_t get_nvenc_ver(void)
+{
+	static NV_MAX_VER_FUNC nv_max_ver = NULL;
+	static bool failed = false;
+	static uint32_t ver = 0;
+
+	if (!failed && ver)
+		return ver;
+
+	if (!nv_max_ver) {
+		if (failed)
+			return 0;
+
+		nv_max_ver = (NV_MAX_VER_FUNC)load_nv_func(
+			"NvEncodeAPIGetMaxSupportedVersion");
+		if (!nv_max_ver) {
+			failed = true;
+			return 0;
+		}
+	}
+
+	if (nv_max_ver(&ver) != NV_ENC_SUCCESS) {
+		return 0;
+	}
+	return ver;
+}
+
+const char *nv_error_name(NVENCSTATUS err)
+{
+#define RETURN_CASE(x) \
+	case x:        \
+		return #x
+
+	switch (err) {
+		RETURN_CASE(NV_ENC_SUCCESS);
+		RETURN_CASE(NV_ENC_ERR_NO_ENCODE_DEVICE);
+		RETURN_CASE(NV_ENC_ERR_UNSUPPORTED_DEVICE);
+		RETURN_CASE(NV_ENC_ERR_INVALID_ENCODERDEVICE);
+		RETURN_CASE(NV_ENC_ERR_INVALID_DEVICE);
+		RETURN_CASE(NV_ENC_ERR_DEVICE_NOT_EXIST);
+		RETURN_CASE(NV_ENC_ERR_INVALID_PTR);
+		RETURN_CASE(NV_ENC_ERR_INVALID_EVENT);
+		RETURN_CASE(NV_ENC_ERR_INVALID_PARAM);
+		RETURN_CASE(NV_ENC_ERR_INVALID_CALL);
+		RETURN_CASE(NV_ENC_ERR_OUT_OF_MEMORY);
+		RETURN_CASE(NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
+		RETURN_CASE(NV_ENC_ERR_UNSUPPORTED_PARAM);
+		RETURN_CASE(NV_ENC_ERR_LOCK_BUSY);
+		RETURN_CASE(NV_ENC_ERR_NOT_ENOUGH_BUFFER);
+		RETURN_CASE(NV_ENC_ERR_INVALID_VERSION);
+		RETURN_CASE(NV_ENC_ERR_MAP_FAILED);
+		RETURN_CASE(NV_ENC_ERR_NEED_MORE_INPUT);
+		RETURN_CASE(NV_ENC_ERR_ENCODER_BUSY);
+		RETURN_CASE(NV_ENC_ERR_EVENT_NOT_REGISTERD);
+		RETURN_CASE(NV_ENC_ERR_GENERIC);
+		RETURN_CASE(NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY);
+		RETURN_CASE(NV_ENC_ERR_UNIMPLEMENTED);
+		RETURN_CASE(NV_ENC_ERR_RESOURCE_REGISTER_FAILED);
+		RETURN_CASE(NV_ENC_ERR_RESOURCE_NOT_REGISTERED);
+		RETURN_CASE(NV_ENC_ERR_RESOURCE_NOT_MAPPED);
+	}
+#undef RETURN_CASE
+
+	return "Unknown Error";
+}
+
+static inline bool init_nvenc_internal(obs_encoder_t *encoder)
+{
+	static bool initialized = false;
+	static bool success = false;
+
+	if (initialized)
+		return success;
+	initialized = true;
+
+	uint32_t ver = get_nvenc_ver();
+	if (ver == 0) {
+		obs_encoder_set_last_error(
+			encoder,
+			"Missing NvEncodeAPIGetMaxSupportedVersion, check "
+			"your video card drivers are up to date.");
+		return false;
+	}
+
+	if (ver < NVCODEC_CONFIGURED_VERSION) {
+		obs_encoder_set_last_error(encoder,
+					   obs_module_text("OutdatedDriver"));
+
+		error("Current driver version does not support this NVENC "
+		      "version, please upgrade your driver");
+		return false;
+	}
+
+	nv_create_instance = (NV_CREATE_INSTANCE_FUNC)load_nv_func(
+		"NvEncodeAPICreateInstance");
+	if (!nv_create_instance) {
+		obs_encoder_set_last_error(
+			encoder, "Missing NvEncodeAPICreateInstance, check "
+				 "your video card drivers are up to date.");
+		return false;
+	}
+
+	if (NV_FAILED(encoder, nv_create_instance(&nv))) {
+		return false;
+	}
+
+	success = true;
+	return true;
+}
+
+bool init_nvenc(obs_encoder_t *encoder)
+{
+	bool success;
+
+	pthread_mutex_lock(&init_mutex);
+	success = init_nvenc_internal(encoder);
+	pthread_mutex_unlock(&init_mutex);
+
+	return success;
+}
+
+struct encoder_caps *get_encoder_caps(enum codec_type codec)
+{
+	struct encoder_caps *caps = &encoder_capabilities[codec];
+	return caps;
+}
+
+int num_encoder_devices(void)
+{
+	return num_devices;
+}
+
+bool is_codec_supported(enum codec_type codec)
+{
+	return codec_supported[codec];
+}
+
+bool has_broken_split_encoding(void)
+{
+	/* CBR padding and tearing artifacts with split encoding are fixed in
+	 * driver versions 555+, previous ones should be considered broken. */
+	return driver_version_major < 555;
+}
+
+static void read_codec_caps(config_t *config, enum codec_type codec,
+			    const char *section)
+{
+	struct encoder_caps *caps = &encoder_capabilities[codec];
+
+	codec_supported[codec] =
+		config_get_bool(config, section, "codec_supported");
+	if (!codec_supported[codec])
+		return;
+
+	caps->bframes = (int)config_get_int(config, section, "bframes");
+	caps->bref_modes = (int)config_get_int(config, section, "bref");
+	caps->engines = (int)config_get_int(config, section, "engines");
+	caps->max_width = (int)config_get_int(config, section, "max_width");
+	caps->max_height = (int)config_get_int(config, section, "max_height");
+	caps->temporal_filter =
+		(int)config_get_int(config, section, "temporal_filter");
+	caps->lookahead_level =
+		(int)config_get_int(config, section, "lookahead_level");
+
+	caps->dyn_bitrate = config_get_bool(config, section, "dynamic_bitrate");
+	caps->lookahead = config_get_bool(config, section, "lookahead");
+	caps->lossless = config_get_bool(config, section, "lossless");
+	caps->temporal_aq = config_get_bool(config, section, "temporal_aq");
+	caps->ten_bit = config_get_bool(config, section, "10bit");
+	caps->four_four_four = config_get_bool(config, section, "yuv_444");
+}
+
+static bool nvenc_check(void)
+{
+#ifdef _WIN32
+	char *test_exe = os_get_executable_path_ptr("obs-nvenc-test.exe");
+#else
+	char *test_exe = os_get_executable_path_ptr("obs-nvenc-test");
+#endif
+	os_process_args_t *args;
+	struct dstr caps_str = {0};
+	config_t *config = NULL;
+
+	args = os_process_args_create(test_exe);
+
+	os_process_pipe_t *pp = os_process_pipe_create2(args, "r");
+	if (!pp) {
+		blog(LOG_WARNING, "[NVENC] Failed to launch the NVENC "
+				  "test process I guess");
+		goto fail;
+	}
+
+	for (;;) {
+		char data[2048];
+		size_t len =
+			os_process_pipe_read(pp, (uint8_t *)data, sizeof(data));
+		if (!len)
+			break;
+
+		dstr_ncat(&caps_str, data, len);
+	}
+
+	os_process_pipe_destroy(pp);
+
+	if (dstr_is_empty(&caps_str)) {
+		blog(LOG_WARNING,
+		     "[NVENC] Seems the NVENC test subprocess crashed. "
+		     "Better there than here I guess. ");
+		goto fail;
+	}
+
+	if (config_open_string(&config, caps_str.array) != 0) {
+		blog(LOG_WARNING, "[NVENC] Failed to open config string");
+		goto fail;
+	}
+
+	bool success = config_get_bool(config, "general", "nvenc_supported");
+	if (!success) {
+		const char *error =
+			config_get_string(config, "general", "reason");
+		blog(LOG_WARNING, "[NVENC] Test process failed: %s",
+		     error ? error : "unknown");
+		goto fail;
+	}
+
+	num_devices = (int)config_get_int(config, "general", "nvenc_devices");
+	read_codec_caps(config, CODEC_H264, "h264");
+	read_codec_caps(config, CODEC_HEVC, "hevc");
+	read_codec_caps(config, CODEC_AV1, "av1");
+
+	const char *nvenc_ver =
+		config_get_string(config, "general", "nvenc_ver");
+	const char *cuda_ver = config_get_string(config, "general", "cuda_ver");
+	const char *driver_ver =
+		config_get_string(config, "general", "driver_ver");
+	/* Parse out major/minor for some brokenness checks  */
+	sscanf(driver_ver, "%d.%d", &driver_version_major,
+	       &driver_version_minor);
+
+	blog(LOG_INFO,
+	     "[obs-nvenc] NVENC version: %d.%d (compiled) / %s (driver), "
+	     "CUDA driver version: %s, AV1 supported: %s",
+	     NVCODEC_CONFIGURED_VERSION >> 4, NVCODEC_CONFIGURED_VERSION & 0xf,
+	     nvenc_ver, cuda_ver,
+	     codec_supported[CODEC_AV1] ? "true" : "false");
+
+fail:
+	if (config)
+		config_close(config);
+
+	bfree(test_exe);
+	dstr_free(&caps_str);
+	os_process_args_destroy(args);
+
+	return true;
+}
+
+static const char *nvenc_check_name = "nvenc_check";
+bool nvenc_supported(void)
+{
+	bool success;
+
+	profile_start(nvenc_check_name);
+	success = load_nvenc_lib() && nvenc_check();
+	profile_end(nvenc_check_name);
+
+	return success;
+}
+
+void obs_nvenc_load(void)
+{
+	pthread_mutex_init(&init_mutex, NULL);
+	register_encoders();
+	register_compat_encoders();
+}
+
+void obs_nvenc_unload(void)
+{
+	pthread_mutex_destroy(&init_mutex);
+}

+ 89 - 0
plugins/obs-nvenc/nvenc-helpers.h

@@ -0,0 +1,89 @@
+#pragma once
+
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
+#include <obs-module.h>
+#include <ffnvcodec/nvEncodeAPI.h>
+
+#define NVCODEC_CONFIGURED_VERSION \
+	((NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION)
+
+#if NVENCAPI_MAJOR_VERSION > 12 || NVENCAPI_MINOR_VERSION >= 1
+#define NVENC_12_1_OR_LATER
+#endif
+
+#if NVENCAPI_MAJOR_VERSION > 12 || NVENCAPI_MINOR_VERSION >= 2
+#define NVENC_12_2_OR_LATER
+#endif
+
+enum codec_type {
+	CODEC_H264,
+	CODEC_HEVC,
+	CODEC_AV1,
+};
+
+static const char *get_codec_name(enum codec_type type)
+{
+	switch (type) {
+	case CODEC_H264:
+		return "H264";
+	case CODEC_HEVC:
+		return "HEVC";
+	case CODEC_AV1:
+		return "AV1";
+	}
+
+	return "Unknown";
+}
+
+struct encoder_caps {
+	int bframes;
+	int bref_modes;
+	int engines;
+
+	int max_width;
+	int max_height;
+
+	/* These don't seem to work correctly, thanks NVIDIA. */
+	int temporal_filter;
+	int lookahead_level;
+
+	bool dyn_bitrate;
+	bool lookahead;
+	bool lossless;
+	bool temporal_aq;
+
+	/* Yeah... */
+	bool ten_bit;
+	bool four_four_four;
+};
+
+typedef NVENCSTATUS(NVENCAPI *NV_CREATE_INSTANCE_FUNC)(
+	NV_ENCODE_API_FUNCTION_LIST *);
+
+extern NV_ENCODE_API_FUNCTION_LIST nv;
+extern NV_CREATE_INSTANCE_FUNC nv_create_instance;
+
+const char *nv_error_name(NVENCSTATUS err);
+
+bool init_nvenc(obs_encoder_t *encoder);
+bool nv_fail2(obs_encoder_t *encoder, void *session, const char *format, ...);
+bool nv_failed2(obs_encoder_t *encoder, void *session, NVENCSTATUS err,
+		const char *func, const char *call);
+
+struct encoder_caps *get_encoder_caps(enum codec_type codec);
+int num_encoder_devices(void);
+bool is_codec_supported(enum codec_type codec);
+bool has_broken_split_encoding(void);
+
+void register_encoders(void);
+void register_compat_encoders(void);
+
+#define nv_fail(encoder, format, ...) \
+	nv_fail2(encoder, enc->session, format, ##__VA_ARGS__)
+
+#define nv_failed(encoder, err, func, call) \
+	nv_failed2(encoder, enc->session, err, func, call)

+ 211 - 0
plugins/obs-nvenc/nvenc-internal.h

@@ -0,0 +1,211 @@
+#pragma once
+
+#include "cuda-helpers.h"
+#include "nvenc-helpers.h"
+
+#include <util/deque.h>
+#include <opts-parser.h>
+
+#ifdef _WIN32
+#define INITGUID
+#include <dxgi.h>
+#include <d3d11.h>
+#include <d3d11_1.h>
+#else
+#include <glad/glad.h>
+#endif
+
+#define do_log(level, format, ...)               \
+	blog(level, "[obs-nvenc: '%s'] " format, \
+	     obs_encoder_get_name(enc->encoder), ##__VA_ARGS__)
+
+#define error(format, ...) do_log(LOG_ERROR, format, ##__VA_ARGS__)
+#define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
+#define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
+#define debug(format, ...) do_log(LOG_DEBUG, format, ##__VA_ARGS__)
+
+#define error_hr(msg) error("%s: %s: 0x%08lX", __FUNCTION__, msg, (uint32_t)hr);
+
+#define NV_FAIL(format, ...) nv_fail(enc->encoder, format, ##__VA_ARGS__)
+#define NV_FAILED(x) nv_failed(enc->encoder, x, __FUNCTION__, #x)
+
+/* ------------------------------------------------------------------------- */
+/* Main Implementation Structure                                             */
+
+struct nvenc_properties {
+	int64_t bitrate;
+	int64_t max_bitrate;
+	int64_t keyint_sec;
+	int64_t cqp;
+	int64_t device;
+	int64_t bf;
+	int64_t bframe_ref_mode;
+	int64_t split_encode;
+	int64_t target_quality;
+
+	const char *rate_control;
+	const char *preset;
+	const char *profile;
+	const char *tune;
+	const char *multipass;
+	const char *opts_str;
+
+	bool adaptive_quantization;
+	bool lookahead;
+	bool disable_scenecut;
+	bool repeat_headers;
+	bool force_cuda_tex;
+
+	struct obs_options opts;
+	obs_data_t *data;
+};
+
+struct nvenc_data {
+	obs_encoder_t *encoder;
+	enum codec_type codec;
+	GUID codec_guid;
+
+	void *session;
+	NV_ENC_INITIALIZE_PARAMS params;
+	NV_ENC_CONFIG config;
+	uint32_t buf_count;
+	int output_delay;
+	int buffers_queued;
+	size_t next_bitstream;
+	size_t cur_bitstream;
+	bool encode_started;
+	bool first_packet;
+	bool can_change_bitrate;
+	bool non_texture;
+
+	DARRAY(struct handle_tex) input_textures;
+	DARRAY(struct nv_bitstream) bitstreams;
+	DARRAY(struct nv_cuda_surface) surfaces;
+	NV_ENC_BUFFER_FORMAT surface_format;
+	struct deque dts_list;
+
+	DARRAY(uint8_t) packet_data;
+	int64_t packet_pts;
+	bool packet_keyframe;
+
+#ifdef _WIN32
+	DARRAY(struct nv_texture) textures;
+	ID3D11Device *device;
+	ID3D11DeviceContext *context;
+#endif
+
+	uint32_t cx;
+	uint32_t cy;
+	enum video_format in_format;
+
+	uint8_t *header;
+	size_t header_size;
+
+	uint8_t *sei;
+	size_t sei_size;
+
+	int8_t *roi_map;
+	size_t roi_map_size;
+	uint32_t roi_increment;
+
+	struct nvenc_properties props;
+
+	CUcontext cu_ctx;
+};
+
+/* ------------------------------------------------------------------------- */
+/* Resource data structures                                                  */
+
+/* Input texture handles */
+struct handle_tex {
+#ifdef _WIN32
+	uint32_t handle;
+	ID3D11Texture2D *tex;
+	IDXGIKeyedMutex *km;
+#else
+	GLuint tex_id;
+	/* CUDA mappings */
+	CUgraphicsResource res_y;
+	CUgraphicsResource res_uv;
+#endif
+};
+
+/* Bitstream buffer */
+struct nv_bitstream {
+	void *ptr;
+};
+
+/** Mapped resources **/
+/* CUDA Arrays */
+struct nv_cuda_surface {
+	CUarray tex;
+	NV_ENC_REGISTERED_PTR res;
+	NV_ENC_INPUT_PTR *mapped_res;
+};
+
+#ifdef _WIN32
+/* DX11 textures */
+struct nv_texture {
+	void *res;
+	ID3D11Texture2D *tex;
+	void *mapped_res;
+};
+#endif
+
+/* ------------------------------------------------------------------------- */
+/* Shared functions                                                          */
+
+bool nvenc_encode_base(struct nvenc_data *enc, struct nv_bitstream *bs,
+		       void *pic, int64_t pts, struct encoder_packet *packet,
+		       bool *received_packet);
+
+/* ------------------------------------------------------------------------- */
+/* Backend-specific functions                                                */
+
+#ifdef _WIN32
+/** D3D11 **/
+bool d3d11_init(struct nvenc_data *enc, obs_data_t *settings);
+void d3d11_free(struct nvenc_data *enc);
+
+bool d3d11_init_textures(struct nvenc_data *enc);
+void d3d11_free_textures(struct nvenc_data *enc);
+
+bool d3d11_encode(void *data, struct encoder_texture *texture, int64_t pts,
+		  uint64_t lock_key, uint64_t *next_key,
+		  struct encoder_packet *packet, bool *received_packet);
+#endif
+
+/** CUDA **/
+bool cuda_ctx_init(struct nvenc_data *enc, obs_data_t *settings, bool texture);
+void cuda_ctx_free(struct nvenc_data *enc);
+
+bool cuda_init_surfaces(struct nvenc_data *enc);
+void cuda_free_surfaces(struct nvenc_data *enc);
+
+bool cuda_encode(void *data, struct encoder_frame *frame,
+		 struct encoder_packet *packet, bool *received_packet);
+
+#ifndef _WIN32
+/** CUDA OpenGL **/
+void cuda_opengl_free(struct nvenc_data *enc);
+bool cuda_opengl_encode(void *data, struct encoder_texture *tex, int64_t pts,
+			uint64_t lock_key, uint64_t *next_key,
+			struct encoder_packet *packet, bool *received_packet);
+#endif
+
+/* ------------------------------------------------------------------------- */
+/* Properties crap                                                           */
+
+void nvenc_properties_read(struct nvenc_properties *enc, obs_data_t *settings);
+
+void h264_nvenc_defaults(obs_data_t *settings);
+void hevc_nvenc_defaults(obs_data_t *settings);
+void av1_nvenc_defaults(obs_data_t *settings);
+
+obs_properties_t *h264_nvenc_properties(void *);
+obs_properties_t *hevc_nvenc_properties(void *);
+obs_properties_t *av1_nvenc_properties(void *);
+
+/* Custom argument parsing */
+void apply_user_args(struct nvenc_data *enc);
+bool get_user_arg_int(struct nvenc_data *enc, const char *name, int *val);

+ 158 - 0
plugins/obs-nvenc/nvenc-opengl.c

@@ -0,0 +1,158 @@
+#include "nvenc-internal.h"
+#include "nvenc-helpers.h"
+
+/*
+ * NVENC implementation using CUDA context and OpenGL textures
+ */
+
+void cuda_opengl_free(struct nvenc_data *enc)
+{
+	if (!enc->cu_ctx)
+		return;
+
+	cu->cuCtxPushCurrent(enc->cu_ctx);
+	for (size_t i = 0; i < enc->input_textures.num; i++) {
+		CUgraphicsResource res_y = enc->input_textures.array[i].res_y;
+		CUgraphicsResource res_uv = enc->input_textures.array[i].res_uv;
+		cu->cuGraphicsUnregisterResource(res_y);
+		cu->cuGraphicsUnregisterResource(res_uv);
+	}
+	cu->cuCtxPopCurrent(NULL);
+}
+
+/* ------------------------------------------------------------------------- */
+/* Actual encoding stuff                                                     */
+
+static inline bool get_res_for_tex_ids(struct nvenc_data *enc, GLuint tex_id_y,
+				       GLuint tex_id_uv,
+				       CUgraphicsResource *tex_y,
+				       CUgraphicsResource *tex_uv)
+{
+	bool success = true;
+
+	for (size_t idx = 0; idx < enc->input_textures.num; idx++) {
+		struct handle_tex *ht = &enc->input_textures.array[idx];
+		if (ht->tex_id != tex_id_y)
+			continue;
+
+		*tex_y = ht->res_y;
+		*tex_uv = ht->res_uv;
+		return success;
+	}
+
+	CU_CHECK(cu->cuGraphicsGLRegisterImage(
+		tex_y, tex_id_y, GL_TEXTURE_2D,
+		CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY))
+	CU_CHECK(cu->cuGraphicsGLRegisterImage(
+		tex_uv, tex_id_uv, GL_TEXTURE_2D,
+		CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY))
+
+	struct handle_tex ht = {tex_id_y, *tex_y, *tex_uv};
+	da_push_back(enc->input_textures, &ht);
+
+unmap:
+	if (!success) {
+		cu->cuGraphicsUnregisterResource(*tex_y);
+		cu->cuGraphicsUnregisterResource(*tex_uv);
+	}
+
+	return success;
+}
+
+static inline bool copy_tex_data(struct nvenc_data *enc, const bool p010,
+				 GLuint tex[2], struct nv_cuda_surface *surf)
+{
+	bool success = true;
+	CUgraphicsResource mapped_tex[2] = {0};
+	CUarray mapped_cuda;
+
+	if (!get_res_for_tex_ids(enc, tex[0], tex[1], &mapped_tex[0],
+				 &mapped_tex[1]))
+		return false;
+
+	CU_CHECK(cu->cuGraphicsMapResources(2, mapped_tex, 0))
+
+	CUDA_MEMCPY2D m = {0};
+	m.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+	m.srcMemoryType = CU_MEMORYTYPE_ARRAY;
+	m.dstArray = surf->tex;
+	m.WidthInBytes = p010 ? enc->cx * 2 : enc->cx;
+	m.Height = enc->cy;
+
+	// Map and copy Y texture
+	CU_CHECK(cu->cuGraphicsSubResourceGetMappedArray(&mapped_cuda,
+							 mapped_tex[0], 0, 0));
+	m.srcArray = mapped_cuda;
+	CU_CHECK(cu->cuMemcpy2D(&m))
+
+	// Map and copy UV texture
+	CU_CHECK(cu->cuGraphicsSubResourceGetMappedArray(&mapped_cuda,
+							 mapped_tex[1], 0, 0))
+	m.srcArray = mapped_cuda;
+	m.dstY += enc->cy;
+	m.Height = enc->cy / 2;
+
+	CU_CHECK(cu->cuMemcpy2D(&m))
+
+unmap:
+	cu->cuGraphicsUnmapResources(2, mapped_tex, 0);
+
+	return success;
+}
+
+bool cuda_opengl_encode(void *data, struct encoder_texture *tex, int64_t pts,
+			uint64_t lock_key, uint64_t *next_key,
+			struct encoder_packet *packet, bool *received_packet)
+{
+	struct nvenc_data *enc = data;
+	struct nv_cuda_surface *surf;
+	struct nv_bitstream *bs;
+	const bool p010 = obs_p010_tex_active();
+	GLuint input_tex[2];
+
+	if (tex == NULL || tex->tex[0] == NULL) {
+		error("Encode failed: bad texture handle");
+		*next_key = lock_key;
+		return false;
+	}
+
+	bs = &enc->bitstreams.array[enc->next_bitstream];
+	surf = &enc->surfaces.array[enc->next_bitstream];
+
+	deque_push_back(&enc->dts_list, &pts, sizeof(pts));
+
+	/* ------------------------------------ */
+	/* copy to CUDA data                    */
+
+	CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx))
+	obs_enter_graphics();
+	input_tex[0] = *(GLuint *)gs_texture_get_obj(tex->tex[0]);
+	input_tex[1] = *(GLuint *)gs_texture_get_obj(tex->tex[1]);
+
+	bool success = copy_tex_data(enc, p010, input_tex, surf);
+
+	obs_leave_graphics();
+	CU_FAILED(cu->cuCtxPopCurrent(NULL))
+
+	if (!success)
+		return false;
+
+	/* ------------------------------------ */
+	/* map output tex so nvenc can use it   */
+
+	NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER};
+	map.registeredResource = surf->res;
+	map.mappedBufferFmt = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
+				   : NV_ENC_BUFFER_FORMAT_NV12;
+
+	if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map)))
+		return false;
+
+	surf->mapped_res = map.mappedResource;
+
+	/* ------------------------------------ */
+	/* do actual encode call                */
+
+	return nvenc_encode_base(enc, bs, surf->mapped_res, pts, packet,
+				 received_packet);
+}

+ 220 - 0
plugins/obs-nvenc/nvenc-opts-parser.c

@@ -0,0 +1,220 @@
+#include "nvenc-internal.h"
+
+#include <stdio.h>
+
+/* NVIDIA uses bitfields for a variety of options. As it is not possible to
+ * use offsetof() or similar with those we resort to macros here to avoid too
+ * much boilerplate. */
+
+#define APPLY_BIT_OPT(opt_name, bits)                                     \
+	if (strcmp(opt->name, #opt_name) == 0) {                          \
+		uint32_t old_val = nv_conf->opt_name;                     \
+		nv_conf->opt_name = strtol(opt->value, NULL, 10);         \
+		blog(LOG_DEBUG, "[obs-nvenc] Changing: \"%s\": %u -> %u", \
+		     #opt_name, old_val, nv_conf->opt_name);              \
+		return true;                                              \
+	}
+
+#define APPLY_INT_OPT(opt_name, type)                                         \
+	if (strcmp(opt->name, #opt_name) == 0) {                              \
+		blog(LOG_DEBUG, "[obs-nvenc] Changing \"%s\": %d -> %s (%s)", \
+		     #opt_name, nv_conf->opt_name, opt->value, #type);        \
+		nv_conf->opt_name = (type)strtol(opt->value, NULL, 10);       \
+		return true;                                                  \
+	}
+
+static void parse_qp_opt(const char *name, const char *val, NV_ENC_QP *qp_opt)
+{
+	/* QP options can be passed in either as a single value to apply to all
+	 * or as three values separated by ":". */
+	int32_t p, b, i;
+
+	if (sscanf(val, "%d:%d:%d", &p, &b, &i) != 3) {
+		p = b = i = atoi(val);
+	}
+
+	blog(LOG_DEBUG,
+	     "[obs-nvenc] Applying custom %s = %d / %d / %d (P / B / I)", name,
+	     p, b, i);
+
+	/* Values should be treated as int32_t but are passed in as uint32_t
+	 * for legacy reasons, see comment in nvEncodeAPI.h */
+	qp_opt->qpInterP = (uint32_t)p;
+	qp_opt->qpInterB = (uint32_t)b;
+	qp_opt->qpIntra = (uint32_t)i;
+}
+
+#define APPLY_QP_OPT(opt_name)                                           \
+	if (strcmp(opt->name, #opt_name) == 0) {                         \
+		parse_qp_opt(#opt_name, opt->value, &nv_conf->opt_name); \
+		return true;                                             \
+	}
+
+static bool apply_rc_opt(const struct obs_option *opt,
+			 NV_ENC_RC_PARAMS *nv_conf)
+{
+	APPLY_QP_OPT(constQP)
+	APPLY_QP_OPT(minQP)
+	APPLY_QP_OPT(maxQP)
+	APPLY_QP_OPT(initialRCQP)
+
+	APPLY_INT_OPT(averageBitRate, uint32_t)
+	APPLY_INT_OPT(maxBitRate, uint32_t)
+	APPLY_INT_OPT(vbvBufferSize, uint32_t)
+	APPLY_INT_OPT(vbvInitialDelay, uint32_t)
+
+	APPLY_INT_OPT(targetQuality, uint8_t)
+	APPLY_INT_OPT(targetQualityLSB, uint8_t)
+
+	APPLY_INT_OPT(cbQPIndexOffset, int8_t)
+	APPLY_INT_OPT(crQPIndexOffset, int8_t)
+
+	APPLY_BIT_OPT(enableMinQP, 1)
+	APPLY_BIT_OPT(enableMaxQP, 1)
+	APPLY_BIT_OPT(enableInitialRCQP, 1)
+	APPLY_BIT_OPT(enableAQ, 1)
+	APPLY_BIT_OPT(enableLookahead, 1)
+	APPLY_BIT_OPT(disableIadapt, 1)
+	APPLY_BIT_OPT(disableBadapt, 1)
+	APPLY_BIT_OPT(enableTemporalAQ, 1)
+	APPLY_BIT_OPT(aqStrength, 4)
+
+#ifdef NVENC_12_2_OR_LATER
+	APPLY_INT_OPT(lookaheadLevel, NV_ENC_LOOKAHEAD_LEVEL)
+#endif
+
+	/* Macros above will return true if succesfully evaluated.
+	 * Otherwise, return false if option unknown/unsupported. */
+	return false;
+}
+
+static bool apply_conf_opt(const struct obs_option *opt, NV_ENC_CONFIG *nv_conf)
+{
+	APPLY_INT_OPT(gopLength, uint32_t)
+	APPLY_INT_OPT(frameIntervalP, int32_t)
+
+	return false;
+}
+
+static void parse_level_opt(const char *val, uint32_t *level, bool hevc)
+{
+	/* Support for passing level both as raw value (e.g. "42")
+	 * and human-readable format (e.g. "4.2"). */
+	uint32_t int_val = 0;
+
+	if (strstr(val, ".") != NULL) {
+		uint32_t high_val, low_val;
+		if (sscanf(val, "%u.%u", &high_val, &low_val) == 2) {
+			int_val = high_val * 10 + low_val;
+		}
+	} else {
+		int_val = strtol(val, NULL, 10);
+	}
+
+	if (!int_val)
+		return;
+
+	if (hevc)
+		int_val *= 3;
+
+	blog(LOG_DEBUG, "[obs-nvenc] Applying custom level = %s (%u)", val,
+	     int_val);
+	*level = int_val;
+}
+
+static bool apply_h264_opt(struct obs_option *opt, NV_ENC_CONFIG_H264 *nv_conf)
+{
+	if (strcmp(opt->name, "level") == 0) {
+		parse_level_opt(opt->value, &nv_conf->level, false);
+		return true;
+	}
+
+	APPLY_INT_OPT(idrPeriod, uint32_t)
+	APPLY_INT_OPT(useBFramesAsRef, NV_ENC_BFRAME_REF_MODE)
+
+	APPLY_BIT_OPT(enableFillerDataInsertion, 1)
+
+	return false;
+}
+
+static bool apply_hevc_opt(struct obs_option *opt, NV_ENC_CONFIG_HEVC *nv_conf)
+{
+	if (strcmp(opt->name, "level") == 0) {
+		parse_level_opt(opt->value, &nv_conf->level, true);
+		return true;
+	}
+
+	APPLY_INT_OPT(tier, uint32_t)
+	APPLY_INT_OPT(idrPeriod, uint32_t)
+	APPLY_INT_OPT(useBFramesAsRef, NV_ENC_BFRAME_REF_MODE)
+#ifdef NVENC_12_2_OR_LATER
+	APPLY_INT_OPT(tfLevel, NV_ENC_TEMPORAL_FILTER_LEVEL)
+#endif
+
+	APPLY_BIT_OPT(enableFillerDataInsertion, 1)
+
+	return false;
+}
+
+static bool apply_av1_opt(struct obs_option *opt, NV_ENC_CONFIG_AV1 *nv_conf)
+{
+	APPLY_INT_OPT(level, uint32_t)
+	APPLY_INT_OPT(tier, uint32_t)
+	APPLY_INT_OPT(numTileColumns, uint32_t)
+	APPLY_INT_OPT(numTileRows, uint32_t)
+	APPLY_INT_OPT(idrPeriod, uint32_t)
+	APPLY_INT_OPT(useBFramesAsRef, NV_ENC_BFRAME_REF_MODE)
+
+	APPLY_BIT_OPT(enableBitstreamPadding, 1)
+
+	return false;
+}
+
+static bool apply_codec_opt(enum codec_type codec, struct obs_option *opt,
+			    NV_ENC_CODEC_CONFIG *enc_config)
+{
+	if (codec == CODEC_H264)
+		return apply_h264_opt(opt, &enc_config->h264Config);
+	if (codec == CODEC_HEVC)
+		return apply_hevc_opt(opt, &enc_config->hevcConfig);
+	if (codec == CODEC_AV1)
+		return apply_av1_opt(opt, &enc_config->av1Config);
+
+	return false;
+}
+
+void apply_user_args(struct nvenc_data *enc)
+{
+	for (size_t idx = 0; idx < enc->props.opts.count; idx++) {
+		struct obs_option *opt = &enc->props.opts.options[idx];
+
+		/* Special options handled elsewhere */
+		if (strcmp(opt->name, "lookaheadDepth") == 0 ||
+		    strcmp(opt->name, "keyint") == 0)
+			continue;
+
+		if (apply_rc_opt(opt, &enc->config.rcParams))
+			continue;
+		if (apply_conf_opt(opt, &enc->config))
+			continue;
+		if (apply_codec_opt(enc->codec, opt,
+				    &enc->config.encodeCodecConfig))
+			continue;
+
+		warn("Unknown custom option: \"%s\"", opt->name);
+	}
+}
+
+bool get_user_arg_int(struct nvenc_data *enc, const char *name, int *val)
+{
+	for (size_t idx = 0; idx < enc->props.opts.count; idx++) {
+		struct obs_option *opt = &enc->props.opts.options[idx];
+		if (strcmp(opt->name, name) != 0)
+			continue;
+
+		*val = strtol(opt->value, NULL, 10);
+		return true;
+	}
+
+	return false;
+}

+ 324 - 0
plugins/obs-nvenc/nvenc-properties.c

@@ -0,0 +1,324 @@
+#include "nvenc-internal.h"
+
+void nvenc_properties_read(struct nvenc_properties *props, obs_data_t *settings)
+{
+	props->bitrate = obs_data_get_int(settings, "bitrate");
+	props->max_bitrate = obs_data_get_int(settings, "max_bitrate");
+	props->keyint_sec = obs_data_get_int(settings, "keyint_sec");
+	props->cqp = obs_data_get_int(settings, "cqp");
+	props->device = obs_data_get_int(settings, "device");
+	props->bf = obs_data_get_int(settings, "bf");
+	props->bframe_ref_mode = obs_data_get_int(settings, "bframe_ref_mode");
+	props->split_encode = obs_data_get_int(settings, "split_encode");
+	props->target_quality = obs_data_get_int(settings, "target_quality");
+
+	props->rate_control = obs_data_get_string(settings, "rate_control");
+	props->preset = obs_data_get_string(settings, "preset");
+	props->profile = obs_data_get_string(settings, "profile");
+	props->tune = obs_data_get_string(settings, "tune");
+	props->multipass = obs_data_get_string(settings, "multipass");
+
+	props->adaptive_quantization =
+		obs_data_get_bool(settings, "adaptive_quantization");
+	props->lookahead = obs_data_get_bool(settings, "lookahead");
+	props->disable_scenecut =
+		obs_data_get_bool(settings, "disable_scenecut");
+	props->repeat_headers = obs_data_get_bool(settings, "repeat_headers");
+	props->force_cuda_tex = obs_data_get_bool(settings, "force_cuda_tex");
+
+	if (obs_data_has_user_value(settings, "opts")) {
+		props->opts_str = obs_data_get_string(settings, "opts");
+		props->opts = obs_parse_options(props->opts_str);
+	}
+
+	/* Retain settings object until destroyed since we use its strings. */
+	obs_data_addref(settings);
+	props->data = settings;
+}
+
+static void nvenc_defaults_base(enum codec_type codec, obs_data_t *settings)
+{
+	struct encoder_caps *caps = get_encoder_caps(codec);
+
+	obs_data_set_default_int(settings, "bitrate", 10000);
+	obs_data_set_default_int(settings, "max_bitrate", 10000);
+	obs_data_set_default_int(settings, "target_quality", 20);
+	obs_data_set_default_int(settings, "keyint_sec", 0);
+	obs_data_set_default_int(settings, "cqp", 20);
+	obs_data_set_default_int(settings, "device", -1);
+	obs_data_set_default_int(settings, "bf", caps->bframes > 0 ? 2 : 0);
+
+	obs_data_set_default_string(settings, "rate_control", "cbr");
+	obs_data_set_default_string(settings, "preset", "p5");
+	obs_data_set_default_string(settings, "multipass", "qres");
+	obs_data_set_default_string(settings, "tune", "hq");
+	obs_data_set_default_string(settings, "profile",
+				    codec != CODEC_H264 ? "main" : "high");
+
+	obs_data_set_default_bool(settings, "adaptive_quantization", true);
+	obs_data_set_default_bool(settings, "lookahead", caps->lookahead);
+
+	/* Hidden options */
+	obs_data_set_default_bool(settings, "repeat_headers", false);
+	obs_data_set_default_bool(settings, "force_cuda_tex", false);
+	obs_data_set_default_bool(settings, "disable_scenecut", false);
+}
+
+void h264_nvenc_defaults(obs_data_t *settings)
+{
+	nvenc_defaults_base(CODEC_H264, settings);
+}
+
+#ifdef ENABLE_HEVC
+void hevc_nvenc_defaults(obs_data_t *settings)
+{
+	nvenc_defaults_base(CODEC_HEVC, settings);
+}
+#endif
+
+void av1_nvenc_defaults(obs_data_t *settings)
+{
+	nvenc_defaults_base(CODEC_AV1, settings);
+}
+
+static bool rate_control_modified(obs_properties_t *ppts, obs_property_t *p,
+				  obs_data_t *settings)
+{
+	const char *rc = obs_data_get_string(settings, "rate_control");
+	bool cqp = strcmp(rc, "CQP") == 0;
+	bool vbr = strcmp(rc, "VBR") == 0;
+	bool cqvbr = strcmp(rc, "CQVBR") == 0;
+	bool lossless = strcmp(rc, "lossless") == 0;
+
+	p = obs_properties_get(ppts, "bitrate");
+	obs_property_set_visible(p, !cqp && !lossless && !cqvbr);
+	p = obs_properties_get(ppts, "max_bitrate");
+	obs_property_set_visible(p, vbr || cqvbr);
+	p = obs_properties_get(ppts, "target_quality");
+	obs_property_set_visible(p, cqvbr);
+	p = obs_properties_get(ppts, "cqp");
+	obs_property_set_visible(p, cqp);
+	p = obs_properties_get(ppts, "preset");
+	obs_property_set_visible(p, !lossless);
+	p = obs_properties_get(ppts, "tune");
+	obs_property_set_visible(p, !lossless);
+	p = obs_properties_get(ppts, "adaptive_quantization");
+	obs_property_set_visible(p, !lossless);
+
+	return true;
+}
+
+obs_properties_t *nvenc_properties_internal(enum codec_type codec)
+{
+	obs_properties_t *props = obs_properties_create();
+	obs_property_t *p;
+
+	struct encoder_caps *caps = get_encoder_caps(codec);
+
+	p = obs_properties_add_list(props, "rate_control",
+				    obs_module_text("RateControl"),
+				    OBS_COMBO_TYPE_LIST,
+				    OBS_COMBO_FORMAT_STRING);
+	obs_property_list_add_string(p, obs_module_text("CBR"), "CBR");
+	obs_property_list_add_string(p, obs_module_text("CQP"), "CQP");
+	obs_property_list_add_string(p, obs_module_text("VBR"), "VBR");
+	obs_property_list_add_string(p, obs_module_text("CQVBR"), "CQVBR");
+	if (caps->lossless) {
+		obs_property_list_add_string(p, obs_module_text("Lossless"),
+					     "lossless");
+	}
+
+	obs_property_set_modified_callback(p, rate_control_modified);
+
+	p = obs_properties_add_int(props, "bitrate", obs_module_text("Bitrate"),
+				   50, UINT32_MAX / 1000, 50);
+	obs_property_int_set_suffix(p, " Kbps");
+
+	obs_properties_add_int(props, "target_quality",
+			       obs_module_text("TargetQuality"), 1, 51, 1);
+
+	p = obs_properties_add_int(props, "max_bitrate",
+				   obs_module_text("MaxBitrate"), 0,
+				   UINT32_MAX / 1000, 50);
+	obs_property_int_set_suffix(p, " Kbps");
+
+	/* AV1 uses 0-255 instead of 0-51 for QP, and most implementations just
+	 * multiply the value by 4 to keep the range smaller. */
+	obs_properties_add_int(props, "cqp", obs_module_text("CQP"), 1,
+			       codec == CODEC_AV1 ? 63 : 51, 1);
+
+	p = obs_properties_add_int(props, "keyint_sec",
+				   obs_module_text("KeyframeIntervalSec"), 0,
+				   10, 1);
+	obs_property_int_set_suffix(p, " s");
+
+	p = obs_properties_add_list(props, "preset", obs_module_text("Preset"),
+				    OBS_COMBO_TYPE_LIST,
+				    OBS_COMBO_FORMAT_STRING);
+
+#define add_preset(val) \
+	obs_property_list_add_string(p, obs_module_text("Preset." val), val)
+
+	add_preset("p1");
+	add_preset("p2");
+	add_preset("p3");
+	add_preset("p4");
+	add_preset("p5");
+	add_preset("p6");
+	add_preset("p7");
+#undef add_preset
+
+	p = obs_properties_add_list(props, "tune", obs_module_text("Tuning"),
+				    OBS_COMBO_TYPE_LIST,
+				    OBS_COMBO_FORMAT_STRING);
+
+#define add_tune(val) \
+	obs_property_list_add_string(p, obs_module_text("Tuning." val), val)
+#ifdef NVENC_12_2_OR_LATER
+	if (codec == CODEC_HEVC)
+		add_tune("uhq");
+#endif
+	add_tune("hq");
+	add_tune("ll");
+	add_tune("ull");
+#undef add_tune
+
+	p = obs_properties_add_list(props, "multipass",
+				    obs_module_text("Multipass"),
+				    OBS_COMBO_TYPE_LIST,
+				    OBS_COMBO_FORMAT_STRING);
+
+#define add_multipass(val) \
+	obs_property_list_add_string(p, obs_module_text("Multipass." val), val)
+	add_multipass("disabled");
+	add_multipass("qres");
+	add_multipass("fullres");
+#undef add_multipass
+
+	p = obs_properties_add_list(props, "profile",
+				    obs_module_text("Profile"),
+				    OBS_COMBO_TYPE_LIST,
+				    OBS_COMBO_FORMAT_STRING);
+
+#define add_profile(val) obs_property_list_add_string(p, val, val)
+	if (codec == CODEC_HEVC) {
+		if (caps->ten_bit)
+			add_profile("main10");
+		add_profile("main");
+	} else if (codec == CODEC_AV1) {
+		add_profile("main");
+	} else {
+		add_profile("high");
+		add_profile("main");
+		add_profile("baseline");
+	}
+#undef add_profile
+
+	p = obs_properties_add_bool(props, "lookahead",
+				    obs_module_text("LookAhead"));
+	obs_property_set_long_description(p,
+					  obs_module_text("LookAhead.ToolTip"));
+
+	p = obs_properties_add_bool(props, "adaptive_quantization",
+				    obs_module_text("AdaptiveQuantization"));
+	obs_property_set_long_description(
+		p, obs_module_text("AdaptiveQuantization.ToolTip"));
+
+	if (num_encoder_devices() > 1) {
+		obs_properties_add_int(props, "device", obs_module_text("GPU"),
+				       -1, num_encoder_devices(), 1);
+	}
+
+	if (caps->bframes > 0) {
+		obs_properties_add_int(props, "bf", obs_module_text("BFrames"),
+				       0, caps->bframes, 1);
+	}
+
+	/* H.264 supports this, but seems to cause issues with some decoders,
+	 * so restrict it to the custom options field for now. */
+	if (caps->bref_modes && codec != CODEC_H264) {
+		p = obs_properties_add_list(props, "bframe_ref_mode",
+					    obs_module_text("BFrameRefMode"),
+					    OBS_COMBO_TYPE_LIST,
+					    OBS_COMBO_FORMAT_INT);
+
+		obs_property_list_add_int(
+			p, obs_module_text("BframeRefMode.Disabled"),
+			NV_ENC_BFRAME_REF_MODE_DISABLED);
+
+		if (caps->bref_modes & 1) {
+			obs_property_list_add_int(
+				p, obs_module_text("BframeRefMode.Each"),
+				NV_ENC_BFRAME_REF_MODE_EACH);
+		}
+		if (caps->bref_modes & 2) {
+			obs_property_list_add_int(
+				p, obs_module_text("BframeRefMode.Middle"),
+				NV_ENC_BFRAME_REF_MODE_MIDDLE);
+		}
+	}
+
+#ifdef NVENC_12_1_OR_LATER
+	/* Some older GPUs such as the 1080 Ti have 2 NVENC chips, but do not
+	 * support split encoding. Therefore, we check for AV1 support here to
+	 * make sure this option is only presented on 40-series and later. */
+	if (is_codec_supported(CODEC_AV1) && caps->engines > 1 &&
+	    !has_broken_split_encoding() &&
+	    (codec == CODEC_HEVC || codec == CODEC_AV1)) {
+		p = obs_properties_add_list(props, "split_encode",
+					    obs_module_text("SplitEncode"),
+					    OBS_COMBO_TYPE_LIST,
+					    OBS_COMBO_FORMAT_INT);
+
+		obs_property_list_add_int(p,
+					  obs_module_text("SplitEncode.Auto"),
+					  NV_ENC_SPLIT_AUTO_MODE);
+		obs_property_list_add_int(
+			p, obs_module_text("SplitEncode.Disabled"),
+			NV_ENC_SPLIT_DISABLE_MODE);
+		obs_property_list_add_int(
+			p, obs_module_text("SplitEncode.Enabled"),
+			NV_ENC_SPLIT_TWO_FORCED_MODE);
+		if (caps->engines > 2) {
+			obs_property_list_add_int(
+				p, obs_module_text("SplitEncode.ThreeWay"),
+				NV_ENC_SPLIT_THREE_FORCED_MODE);
+		}
+	}
+#endif
+
+	p = obs_properties_add_text(props, "opts", obs_module_text("Opts"),
+				    OBS_TEXT_DEFAULT);
+	obs_property_set_long_description(p, obs_module_text("Opts.TT"));
+
+	/* Invisible properties */
+	p = obs_properties_add_bool(props, "repeat_headers", "repeat_headers");
+	obs_property_set_visible(p, false);
+	p = obs_properties_add_bool(props, "force_cuda_tex", "force_cuda_tex");
+	obs_property_set_visible(p, false);
+	p = obs_properties_add_bool(props, "disable_scenecut",
+				    "disable_scenecut");
+	obs_property_set_visible(p, false);
+
+	return props;
+}
+
+obs_properties_t *h264_nvenc_properties(void *unused)
+{
+	UNUSED_PARAMETER(unused);
+	return nvenc_properties_internal(CODEC_H264);
+}
+
+#ifdef ENABLE_HEVC
+obs_properties_t *hevc_nvenc_properties(void *unused)
+{
+	UNUSED_PARAMETER(unused);
+	return nvenc_properties_internal(CODEC_HEVC);
+}
+#endif
+
+obs_properties_t *av1_nvenc_properties(void *unused)
+{
+	UNUSED_PARAMETER(unused);
+	return nvenc_properties_internal(CODEC_AV1);
+}

+ 1444 - 0
plugins/obs-nvenc/nvenc.c

@@ -0,0 +1,1444 @@
+#include "nvenc-internal.h"
+
+#include <util/darray.h>
+#include <util/dstr.h>
+
+/* ========================================================================= */
+
+#define EXTRA_BUFFERS 5
+
+#ifndef _WIN32
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+/* ------------------------------------------------------------------------- */
+/* Bitstream Buffer                                                          */
+
+static bool nv_bitstream_init(struct nvenc_data *enc, struct nv_bitstream *bs)
+{
+	NV_ENC_CREATE_BITSTREAM_BUFFER buf = {
+		NV_ENC_CREATE_BITSTREAM_BUFFER_VER};
+
+	if (NV_FAILED(nv.nvEncCreateBitstreamBuffer(enc->session, &buf))) {
+		return false;
+	}
+
+	bs->ptr = buf.bitstreamBuffer;
+	return true;
+}
+
+static void nv_bitstream_free(struct nvenc_data *enc, struct nv_bitstream *bs)
+{
+	if (bs->ptr) {
+		nv.nvEncDestroyBitstreamBuffer(enc->session, bs->ptr);
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+/* Implementation                                                            */
+
+static const char *h264_nvenc_get_name(void *type_data)
+{
+	UNUSED_PARAMETER(type_data);
+	return "NVIDIA NVENC H.264";
+}
+
+static const char *h264_nvenc_soft_get_name(void *type_data)
+{
+	UNUSED_PARAMETER(type_data);
+	return "NVIDIA NVENC H.264 (Fallback)";
+}
+
+#ifdef ENABLE_HEVC
+static const char *hevc_nvenc_get_name(void *type_data)
+{
+	UNUSED_PARAMETER(type_data);
+	return "NVIDIA NVENC HEVC";
+}
+
+static const char *hevc_nvenc_soft_get_name(void *type_data)
+{
+	UNUSED_PARAMETER(type_data);
+	return "NVIDIA NVENC HEVC (Fallback)";
+}
+#endif
+
+static const char *av1_nvenc_get_name(void *type_data)
+{
+	UNUSED_PARAMETER(type_data);
+	return "NVIDIA NVENC AV1";
+}
+
+static const char *av1_nvenc_soft_get_name(void *type_data)
+{
+	UNUSED_PARAMETER(type_data);
+	return "NVIDIA NVENC AV1 (Fallback)";
+}
+
+static inline int nv_get_cap(struct nvenc_data *enc, NV_ENC_CAPS cap)
+{
+	if (!enc->session)
+		return 0;
+
+	NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER};
+	int v;
+
+	param.capsToQuery = cap;
+	nv.nvEncGetEncodeCaps(enc->session, enc->codec_guid, &param, &v);
+	return v;
+}
+
+static bool nvenc_update(void *data, obs_data_t *settings)
+{
+	struct nvenc_data *enc = data;
+
+	/* Only support reconfiguration of CBR bitrate */
+	if (enc->can_change_bitrate) {
+		enc->props.bitrate = obs_data_get_int(settings, "bitrate");
+		enc->props.max_bitrate =
+			obs_data_get_int(settings, "max_bitrate");
+
+		bool vbr = (enc->config.rcParams.rateControlMode ==
+			    NV_ENC_PARAMS_RC_VBR);
+		enc->config.rcParams.averageBitRate =
+			(uint32_t)enc->props.bitrate * 1000;
+		enc->config.rcParams.maxBitRate =
+			vbr ? (uint32_t)enc->props.max_bitrate * 1000
+			    : (uint32_t)enc->props.bitrate * 1000;
+
+		NV_ENC_RECONFIGURE_PARAMS params = {0};
+		params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
+		params.reInitEncodeParams = enc->params;
+		params.resetEncoder = 1;
+		params.forceIDR = 1;
+
+		if (NV_FAILED(nv.nvEncReconfigureEncoder(enc->session,
+							 &params))) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static bool init_session(struct nvenc_data *enc)
+{
+	NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = {
+		NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER};
+	params.apiVersion = NVENCAPI_VERSION;
+#ifdef _WIN32
+	if (enc->non_texture) {
+		params.device = enc->cu_ctx;
+		params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+	} else {
+		params.device = enc->device;
+		params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
+	}
+#else
+	params.device = enc->cu_ctx;
+	params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+#endif
+
+	if (NV_FAILED(nv.nvEncOpenEncodeSessionEx(&params, &enc->session))) {
+		return false;
+	}
+	return true;
+}
+
+static void initialize_params(struct nvenc_data *enc, const GUID *nv_preset,
+			      NV_ENC_TUNING_INFO nv_tuning, uint32_t width,
+			      uint32_t height, uint32_t fps_num,
+			      uint32_t fps_den)
+{
+	NV_ENC_INITIALIZE_PARAMS *params = &enc->params;
+	memset(params, 0, sizeof(*params));
+	params->version = NV_ENC_INITIALIZE_PARAMS_VER;
+	params->encodeGUID = enc->codec_guid;
+	params->presetGUID = *nv_preset;
+	params->encodeWidth = width;
+	params->encodeHeight = height;
+	params->darWidth = width;
+	params->darHeight = height;
+	params->frameRateNum = fps_num;
+	params->frameRateDen = fps_den;
+	params->enableEncodeAsync = 0;
+	params->enablePTD = 1;
+	params->encodeConfig = &enc->config;
+	params->tuningInfo = nv_tuning;
+#ifdef NVENC_12_1_OR_LATER
+	params->splitEncodeMode =
+		(NV_ENC_SPLIT_ENCODE_MODE)enc->props.split_encode;
+#endif
+}
+
+static inline GUID get_nv_preset(const char *preset2)
+{
+	if (astrcmpi(preset2, "p1") == 0) {
+		return NV_ENC_PRESET_P1_GUID;
+	} else if (astrcmpi(preset2, "p2") == 0) {
+		return NV_ENC_PRESET_P2_GUID;
+	} else if (astrcmpi(preset2, "p3") == 0) {
+		return NV_ENC_PRESET_P3_GUID;
+	} else if (astrcmpi(preset2, "p4") == 0) {
+		return NV_ENC_PRESET_P4_GUID;
+	} else if (astrcmpi(preset2, "p6") == 0) {
+		return NV_ENC_PRESET_P6_GUID;
+	} else if (astrcmpi(preset2, "p7") == 0) {
+		return NV_ENC_PRESET_P7_GUID;
+	} else {
+		return NV_ENC_PRESET_P5_GUID;
+	}
+}
+
+static inline NV_ENC_TUNING_INFO get_nv_tuning(const char *tuning)
+{
+	if (astrcmpi(tuning, "ll") == 0) {
+		return NV_ENC_TUNING_INFO_LOW_LATENCY;
+	} else if (astrcmpi(tuning, "ull") == 0) {
+		return NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY;
+#ifdef NVENC_12_2_OR_LATER
+	} else if (astrcmpi(tuning, "uhq") == 0) {
+		return NV_ENC_TUNING_INFO_ULTRA_HIGH_QUALITY;
+#endif
+	} else {
+		return NV_ENC_TUNING_INFO_HIGH_QUALITY;
+	}
+}
+
+static inline NV_ENC_MULTI_PASS get_nv_multipass(const char *multipass)
+{
+	if (astrcmpi(multipass, "qres") == 0) {
+		return NV_ENC_TWO_PASS_QUARTER_RESOLUTION;
+	} else if (astrcmpi(multipass, "fullres") == 0) {
+		return NV_ENC_TWO_PASS_FULL_RESOLUTION;
+	} else {
+		return NV_ENC_MULTI_PASS_DISABLED;
+	}
+}
+
+static bool is_10_bit(const struct nvenc_data *enc)
+{
+	return enc->non_texture ? enc->in_format == VIDEO_FORMAT_P010
+				: obs_p010_tex_active();
+}
+
+static bool init_encoder_base(struct nvenc_data *enc, obs_data_t *settings)
+{
+	UNUSED_PARAMETER(settings);
+
+	int bitrate = (int)enc->props.bitrate;
+	int max_bitrate = (int)enc->props.max_bitrate;
+	int rc_lookahead = 0;
+
+	bool cqvbr = astrcmpi(enc->props.rate_control, "CQVBR") == 0;
+	bool vbr = cqvbr || astrcmpi(enc->props.rate_control, "VBR") == 0;
+	bool lossless = strcmp(enc->props.rate_control, "lossless") == 0;
+
+	NVENCSTATUS err;
+
+	video_t *video = obs_encoder_video(enc->encoder);
+	const struct video_output_info *voi = video_output_get_info(video);
+
+	enc->cx = obs_encoder_get_width(enc->encoder);
+	enc->cy = obs_encoder_get_height(enc->encoder);
+
+	/* -------------------------- */
+	/* get preset                 */
+
+	GUID nv_preset = get_nv_preset(enc->props.preset);
+	NV_ENC_TUNING_INFO nv_tuning = get_nv_tuning(enc->props.tune);
+	NV_ENC_MULTI_PASS nv_multipass = get_nv_multipass(enc->props.multipass);
+
+	if (lossless) {
+		nv_tuning = NV_ENC_TUNING_INFO_LOSSLESS;
+		nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
+		enc->props.adaptive_quantization = false;
+		enc->props.cqp = 0;
+	}
+
+	/* -------------------------- */
+	/* get preset default config  */
+
+	NV_ENC_PRESET_CONFIG preset_config = {0};
+	preset_config.version = NV_ENC_PRESET_CONFIG_VER;
+	preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
+
+	err = nv.nvEncGetEncodePresetConfigEx(enc->session, enc->codec_guid,
+					      nv_preset, nv_tuning,
+					      &preset_config);
+	if (nv_failed(enc->encoder, err, __FUNCTION__,
+		      "nvEncGetEncodePresetConfig")) {
+		return false;
+	}
+
+	/* -------------------------- */
+	/* main configuration         */
+
+	enc->config = preset_config.presetCfg;
+
+	int keyint = (int)enc->props.keyint_sec * voi->fps_num / voi->fps_den;
+	get_user_arg_int(enc, "keyint", &keyint);
+
+	uint32_t gop_size = keyint > 0 ? keyint : 250;
+
+	NV_ENC_CONFIG *config = &enc->config;
+
+	initialize_params(enc, &nv_preset, nv_tuning, voi->width, voi->height,
+			  voi->fps_num, voi->fps_den);
+
+	config->gopLength = gop_size;
+	config->frameIntervalP = gop_size == 1 ? 0 : (int32_t)enc->props.bf + 1;
+
+	/* lookahead */
+
+	const bool use_profile_lookahead = config->rcParams.enableLookahead;
+	bool lookahead = nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_LOOKAHEAD) &&
+			 (enc->props.lookahead || use_profile_lookahead);
+
+	if (lookahead) {
+		rc_lookahead = use_profile_lookahead
+				       ? config->rcParams.lookaheadDepth
+				       : 8;
+
+		/* Due to the additional calculations required to handle lookahead,
+		 * get the user override here (if any). */
+		get_user_arg_int(enc, "lookaheadDepth", &rc_lookahead);
+	}
+
+	int buf_count = max(4, config->frameIntervalP * 2 * 2);
+	if (lookahead) {
+		buf_count =
+			max(buf_count, config->frameIntervalP + rc_lookahead +
+					       EXTRA_BUFFERS);
+	}
+
+	buf_count = min(64, buf_count);
+	enc->buf_count = buf_count;
+
+	const int output_delay = buf_count - 1;
+	enc->output_delay = output_delay;
+
+	if (lookahead) {
+		const int lkd_bound = output_delay - config->frameIntervalP - 4;
+		if (lkd_bound >= 0) {
+			config->rcParams.enableLookahead = 1;
+			config->rcParams.lookaheadDepth =
+				min(rc_lookahead, lkd_bound);
+			config->rcParams.disableIadapt = 0;
+			config->rcParams.disableBadapt = 0;
+		} else {
+			lookahead = false;
+		}
+	}
+
+	enc->config.rcParams.disableIadapt = enc->props.disable_scenecut;
+
+	/* psycho aq */
+
+	if (enc->props.adaptive_quantization) {
+		config->rcParams.enableAQ = 1;
+		config->rcParams.aqStrength = 8;
+		config->rcParams.enableTemporalAQ =
+			nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ);
+	}
+
+	/* -------------------------- */
+	/* rate control               */
+
+	enc->can_change_bitrate =
+		nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE);
+
+	config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
+	config->rcParams.averageBitRate = bitrate * 1000;
+	config->rcParams.maxBitRate = vbr ? max_bitrate * 1000 : bitrate * 1000;
+	config->rcParams.vbvBufferSize = bitrate * 1000;
+
+	if (strcmp(enc->props.rate_control, "CQP") == 0 || lossless) {
+		int cqp_val = enc->codec == CODEC_AV1 ? (int)enc->props.cqp * 4
+						      : (int)enc->props.cqp;
+
+		config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
+		config->rcParams.constQP.qpInterP = cqp_val;
+		config->rcParams.constQP.qpInterB = cqp_val;
+		config->rcParams.constQP.qpIntra = cqp_val;
+		enc->can_change_bitrate = false;
+
+		bitrate = 0;
+		max_bitrate = 0;
+
+	} else if (!vbr) { /* CBR by default */
+		config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
+	} else if (cqvbr) {
+		config->rcParams.targetQuality =
+			(uint8_t)enc->props.target_quality;
+		config->rcParams.averageBitRate = 0;
+		config->rcParams.vbvBufferSize = 0;
+	}
+
+	config->rcParams.multiPass = nv_multipass;
+	config->rcParams.qpMapMode = NV_ENC_QP_MAP_DELTA;
+
+	/* -------------------------- */
+	/* initialize                 */
+
+	info("settings:\n"
+	     "\tcodec:        %s\n"
+	     "\trate_control: %s\n"
+	     "\tbitrate:      %d\n"
+	     "\tmax_bitrate:  %d\n"
+	     "\tcq/cqp:       %ld\n"
+	     "\tkeyint:       %d\n"
+	     "\tpreset:       %s\n"
+	     "\ttuning:       %s\n"
+	     "\tmultipass:    %s\n"
+	     "\tprofile:      %s\n"
+	     "\twidth:        %d\n"
+	     "\theight:       %d\n"
+	     "\tb-frames:     %ld\n"
+	     "\tb-ref-mode:   %ld\n"
+	     "\tlookahead:    %s (%d)\n"
+	     "\taq:           %s\n"
+	     "\tsplit encode: %ld\n"
+	     "\tuser opts:    %s\n",
+	     get_codec_name(enc->codec), enc->props.rate_control, bitrate,
+	     max_bitrate, vbr ? enc->props.target_quality : enc->props.cqp,
+	     gop_size, enc->props.preset, enc->props.tune, enc->props.multipass,
+	     enc->props.profile, enc->cx, enc->cy, enc->props.bf,
+	     enc->props.bframe_ref_mode, lookahead ? "true" : "false",
+	     rc_lookahead, enc->props.adaptive_quantization ? "true" : "false",
+	     enc->props.split_encode, enc->props.opts_str);
+
+	return true;
+}
+
+static bool init_encoder_h264(struct nvenc_data *enc, obs_data_t *settings)
+{
+	bool lossless = strcmp(enc->props.rate_control, "lossless") == 0;
+
+	if (!init_encoder_base(enc, settings)) {
+		return false;
+	}
+
+	NV_ENC_CONFIG *config = &enc->config;
+	NV_ENC_CONFIG_H264 *h264_config = &config->encodeCodecConfig.h264Config;
+	NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui_params =
+		&h264_config->h264VUIParameters;
+
+	video_t *video = obs_encoder_video(enc->encoder);
+	const struct video_output_info *voi = video_output_get_info(video);
+
+	if (enc->props.repeat_headers) {
+		h264_config->repeatSPSPPS = 1;
+		h264_config->disableSPSPPS = 0;
+		h264_config->outputAUD = 1;
+	}
+
+	h264_config->idrPeriod = config->gopLength;
+
+	h264_config->sliceMode = 3;
+	h264_config->sliceModeData = 1;
+
+	h264_config->useBFramesAsRef =
+		(NV_ENC_BFRAME_REF_MODE)enc->props.bframe_ref_mode;
+
+	/* Enable CBR padding */
+	if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR)
+		h264_config->enableFillerDataInsertion = 1;
+
+	vui_params->videoSignalTypePresentFlag = 1;
+	vui_params->videoFullRangeFlag = (voi->range == VIDEO_RANGE_FULL);
+	vui_params->colourDescriptionPresentFlag = 1;
+
+	switch (voi->colorspace) {
+	case VIDEO_CS_601:
+		vui_params->colourPrimaries = 6;
+		vui_params->transferCharacteristics = 6;
+		vui_params->colourMatrix = 6;
+		break;
+	case VIDEO_CS_DEFAULT:
+	case VIDEO_CS_709:
+		vui_params->colourPrimaries = 1;
+		vui_params->transferCharacteristics = 1;
+		vui_params->colourMatrix = 1;
+		break;
+	case VIDEO_CS_SRGB:
+		vui_params->colourPrimaries = 1;
+		vui_params->transferCharacteristics = 13;
+		vui_params->colourMatrix = 1;
+		break;
+	default:
+		break;
+	}
+
+	if (lossless) {
+		h264_config->qpPrimeYZeroTransformBypassFlag = 1;
+	} else if (strcmp(enc->props.rate_control, "CBR") == 0) { /* CBR */
+		h264_config->outputBufferingPeriodSEI = 1;
+	}
+
+	h264_config->outputPictureTimingSEI = 1;
+
+	/* -------------------------- */
+	/* profile                    */
+
+	if (enc->in_format == VIDEO_FORMAT_I444) {
+		config->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
+		h264_config->chromaFormatIDC = 3;
+	} else if (astrcmpi(enc->props.profile, "main") == 0) {
+		config->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
+	} else if (astrcmpi(enc->props.profile, "baseline") == 0) {
+		config->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
+	} else if (!lossless) {
+		config->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
+	}
+
+	apply_user_args(enc);
+
+	if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) {
+		return false;
+	}
+
+	return true;
+}
+
+static bool init_encoder_hevc(struct nvenc_data *enc, obs_data_t *settings)
+{
+	if (!init_encoder_base(enc, settings)) {
+		return false;
+	}
+
+	NV_ENC_CONFIG *config = &enc->config;
+	NV_ENC_CONFIG_HEVC *hevc_config = &config->encodeCodecConfig.hevcConfig;
+	NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui_params =
+		&hevc_config->hevcVUIParameters;
+
+	video_t *video = obs_encoder_video(enc->encoder);
+	const struct video_output_info *voi = video_output_get_info(video);
+
+	if (enc->props.repeat_headers) {
+		hevc_config->repeatSPSPPS = 1;
+		hevc_config->disableSPSPPS = 0;
+		hevc_config->outputAUD = 1;
+	}
+
+	hevc_config->idrPeriod = config->gopLength;
+
+	hevc_config->sliceMode = 3;
+	hevc_config->sliceModeData = 1;
+
+	hevc_config->useBFramesAsRef =
+		(NV_ENC_BFRAME_REF_MODE)enc->props.bframe_ref_mode;
+
+	/* Enable CBR padding */
+	if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR)
+		hevc_config->enableFillerDataInsertion = 1;
+
+	vui_params->videoSignalTypePresentFlag = 1;
+	vui_params->videoFullRangeFlag = (voi->range == VIDEO_RANGE_FULL);
+	vui_params->colourDescriptionPresentFlag = 1;
+
+	switch (voi->colorspace) {
+	case VIDEO_CS_601:
+		vui_params->colourPrimaries = 6;
+		vui_params->transferCharacteristics = 6;
+		vui_params->colourMatrix = 6;
+		break;
+	case VIDEO_CS_DEFAULT:
+	case VIDEO_CS_709:
+		vui_params->colourPrimaries = 1;
+		vui_params->transferCharacteristics = 1;
+		vui_params->colourMatrix = 1;
+		break;
+	case VIDEO_CS_SRGB:
+		vui_params->colourPrimaries = 1;
+		vui_params->transferCharacteristics = 13;
+		vui_params->colourMatrix = 1;
+		break;
+	case VIDEO_CS_2100_PQ:
+		vui_params->colourPrimaries = 9;
+		vui_params->transferCharacteristics = 16;
+		vui_params->colourMatrix = 9;
+		vui_params->chromaSampleLocationFlag = 1;
+		vui_params->chromaSampleLocationTop = 2;
+		vui_params->chromaSampleLocationBot = 2;
+		break;
+	case VIDEO_CS_2100_HLG:
+		vui_params->colourPrimaries = 9;
+		vui_params->transferCharacteristics = 18;
+		vui_params->colourMatrix = 9;
+		vui_params->chromaSampleLocationFlag = 1;
+		vui_params->chromaSampleLocationTop = 2;
+		vui_params->chromaSampleLocationBot = 2;
+	}
+
+	if (astrcmpi(enc->props.rate_control, "cbr") == 0) {
+		hevc_config->outputBufferingPeriodSEI = 1;
+	}
+
+	hevc_config->outputPictureTimingSEI = 1;
+
+	/* -------------------------- */
+	/* profile                    */
+
+	bool profile_is_10bpc = false;
+
+	if (enc->in_format == VIDEO_FORMAT_I444) {
+		config->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
+		hevc_config->chromaFormatIDC = 3;
+	} else if (astrcmpi(enc->props.profile, "main10") == 0) {
+		config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+		profile_is_10bpc = true;
+	} else if (is_10_bit(enc)) {
+		blog(LOG_WARNING, "[obs-nvenc] Forcing main10 for P010");
+		config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+		profile_is_10bpc = true;
+	} else {
+		config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
+	}
+
+#ifndef NVENC_12_2_OR_LATER
+	hevc_config->pixelBitDepthMinus8 = is_10_bit(enc) ? 2 : 0;
+#else
+	hevc_config->inputBitDepth = is_10_bit(enc) ? NV_ENC_BIT_DEPTH_10
+						    : NV_ENC_BIT_DEPTH_8;
+	hevc_config->outputBitDepth = profile_is_10bpc ? NV_ENC_BIT_DEPTH_10
+						       : NV_ENC_BIT_DEPTH_8;
+#endif
+
+	apply_user_args(enc);
+
+	if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) {
+		return false;
+	}
+
+	return true;
+}
+
+static bool init_encoder_av1(struct nvenc_data *enc, obs_data_t *settings)
+{
+	if (!init_encoder_base(enc, settings)) {
+		return false;
+	}
+
+	NV_ENC_CONFIG *config = &enc->config;
+	NV_ENC_CONFIG_AV1 *av1_config = &config->encodeCodecConfig.av1Config;
+
+	video_t *video = obs_encoder_video(enc->encoder);
+	const struct video_output_info *voi = video_output_get_info(video);
+
+	av1_config->idrPeriod = config->gopLength;
+
+	av1_config->useBFramesAsRef =
+		(NV_ENC_BFRAME_REF_MODE)enc->props.bframe_ref_mode;
+
+	av1_config->colorRange = (voi->range == VIDEO_RANGE_FULL);
+
+	/* Enable CBR padding */
+	if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR)
+		av1_config->enableBitstreamPadding = 1;
+
+#define PIXELCOUNT_4K (3840 * 2160)
+
+	/* If size is 4K+, set tiles to 2 uniform columns. */
+	if ((voi->width * voi->height) >= PIXELCOUNT_4K)
+		av1_config->numTileColumns = 2;
+
+	switch (voi->colorspace) {
+	case VIDEO_CS_601:
+		av1_config->colorPrimaries = 6;
+		av1_config->transferCharacteristics = 6;
+		av1_config->matrixCoefficients = 6;
+		break;
+	case VIDEO_CS_DEFAULT:
+	case VIDEO_CS_709:
+		av1_config->colorPrimaries = 1;
+		av1_config->transferCharacteristics = 1;
+		av1_config->matrixCoefficients = 1;
+		break;
+	case VIDEO_CS_SRGB:
+		av1_config->colorPrimaries = 1;
+		av1_config->transferCharacteristics = 13;
+		av1_config->matrixCoefficients = 1;
+		break;
+	case VIDEO_CS_2100_PQ:
+		av1_config->colorPrimaries = 9;
+		av1_config->transferCharacteristics = 16;
+		av1_config->matrixCoefficients = 9;
+		break;
+	case VIDEO_CS_2100_HLG:
+		av1_config->colorPrimaries = 9;
+		av1_config->transferCharacteristics = 18;
+		av1_config->matrixCoefficients = 9;
+	}
+
+	/* -------------------------- */
+	/* profile                    */
+
+	config->profileGUID = NV_ENC_AV1_PROFILE_MAIN_GUID;
+	av1_config->tier = NV_ENC_TIER_AV1_0;
+
+	av1_config->level = NV_ENC_LEVEL_AV1_AUTOSELECT;
+	av1_config->chromaFormatIDC = 1;
+#ifndef NVENC_12_2_OR_LATER
+	av1_config->pixelBitDepthMinus8 = is_10_bit(enc) ? 2 : 0;
+	av1_config->inputPixelBitDepthMinus8 = av1_config->pixelBitDepthMinus8;
+#else
+	av1_config->inputBitDepth = is_10_bit(enc) ? NV_ENC_BIT_DEPTH_10
+						   : NV_ENC_BIT_DEPTH_8;
+	av1_config->outputBitDepth = av1_config->inputBitDepth;
+#endif
+	av1_config->numFwdRefs = 1;
+	av1_config->numBwdRefs = 1;
+	av1_config->repeatSeqHdr = 1;
+
+	apply_user_args(enc);
+
+	if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) {
+		return false;
+	}
+
+	return true;
+}
+
+static bool init_bitstreams(struct nvenc_data *enc)
+{
+	da_reserve(enc->bitstreams, enc->buf_count);
+	for (uint32_t i = 0; i < enc->buf_count; i++) {
+		struct nv_bitstream bitstream;
+		if (!nv_bitstream_init(enc, &bitstream)) {
+			return false;
+		}
+
+		da_push_back(enc->bitstreams, &bitstream);
+	}
+
+	return true;
+}
+
+static enum video_format get_preferred_format(enum video_format format)
+{
+	switch (format) {
+	case VIDEO_FORMAT_I010:
+	case VIDEO_FORMAT_P010:
+		return VIDEO_FORMAT_P010;
+	case VIDEO_FORMAT_RGBA:
+	case VIDEO_FORMAT_BGRA:
+	case VIDEO_FORMAT_BGRX:
+	case VIDEO_FORMAT_I444:
+		return VIDEO_FORMAT_I444;
+	default:
+		return VIDEO_FORMAT_NV12;
+	}
+}
+
+static void nvenc_destroy(void *data);
+
+static bool init_encoder(struct nvenc_data *enc, enum codec_type codec,
+			 obs_data_t *settings, obs_encoder_t *encoder)
+{
+	UNUSED_PARAMETER(codec);
+	UNUSED_PARAMETER(encoder);
+
+	const bool support_10bit =
+		nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
+	const bool support_444 =
+		nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE);
+
+	video_t *video = obs_encoder_video(enc->encoder);
+	const struct video_output_info *voi = video_output_get_info(video);
+	enum video_format pref_format =
+		obs_encoder_get_preferred_video_format(enc->encoder);
+	if (pref_format == VIDEO_FORMAT_NONE)
+		pref_format = voi->format;
+
+	enc->in_format = get_preferred_format(pref_format);
+
+	if (enc->in_format == VIDEO_FORMAT_I444 && !support_444) {
+		NV_FAIL(obs_module_text("NVENC.444Unsupported"));
+		return false;
+	}
+
+	if (is_10_bit(enc) && !support_10bit) {
+		NV_FAIL(obs_module_text("10bitUnsupported"));
+		return false;
+	}
+
+	switch (voi->format) {
+	case VIDEO_FORMAT_I010:
+	case VIDEO_FORMAT_P010:
+		break;
+	default:
+		switch (voi->colorspace) {
+		case VIDEO_CS_2100_PQ:
+		case VIDEO_CS_2100_HLG:
+			NV_FAIL(obs_module_text("8bitUnsupportedHdr"));
+			return false;
+		default:
+			break;
+		}
+	}
+
+	switch (enc->codec) {
+	case CODEC_HEVC:
+		return init_encoder_hevc(enc, settings);
+	case CODEC_H264:
+		return init_encoder_h264(enc, settings);
+	case CODEC_AV1:
+		return init_encoder_av1(enc, settings);
+	}
+
+	return false;
+}
+
+static void *nvenc_create_internal(enum codec_type codec, obs_data_t *settings,
+				   obs_encoder_t *encoder, bool texture)
+{
+	struct nvenc_data *enc = bzalloc(sizeof(*enc));
+	enc->encoder = encoder;
+	enc->codec = codec;
+	enc->first_packet = true;
+	enc->non_texture = !texture;
+
+	nvenc_properties_read(&enc->props, settings);
+
+	NV_ENCODE_API_FUNCTION_LIST init = {NV_ENCODE_API_FUNCTION_LIST_VER};
+
+	switch (enc->codec) {
+	case CODEC_H264:
+		enc->codec_guid = NV_ENC_CODEC_H264_GUID;
+		break;
+	case CODEC_HEVC:
+		enc->codec_guid = NV_ENC_CODEC_HEVC_GUID;
+		break;
+	case CODEC_AV1:
+		enc->codec_guid = NV_ENC_CODEC_AV1_GUID;
+		break;
+	}
+
+	if (!init_nvenc(encoder))
+		goto fail;
+
+#ifdef _WIN32
+	if (texture ? !d3d11_init(enc, settings) : !init_cuda(encoder))
+		goto fail;
+#else
+	if (!init_cuda(encoder))
+		goto fail;
+#endif
+
+	if (NV_FAILED(nv_create_instance(&init)))
+		goto fail;
+
+	if (!cuda_ctx_init(enc, settings, texture))
+		goto fail;
+
+	if (!init_session(enc)) {
+		goto fail;
+	}
+	if (!init_encoder(enc, codec, settings, encoder)) {
+		goto fail;
+	}
+	if (!init_bitstreams(enc)) {
+		goto fail;
+	}
+
+#ifdef _WIN32
+	if (texture ? !d3d11_init_textures(enc) : !cuda_init_surfaces(enc))
+		goto fail;
+#else
+	if (!cuda_init_surfaces(enc))
+		goto fail;
+#endif
+
+	enc->codec = codec;
+
+	return enc;
+
+fail:
+	nvenc_destroy(enc);
+	return NULL;
+}
+
+static void *nvenc_create_base(enum codec_type codec, obs_data_t *settings,
+			       obs_encoder_t *encoder, bool texture)
+{
+	/* This encoder requires shared textures, this cannot be used on a
+	 * gpu other than the one OBS is currently running on.
+	 *
+	 * 2024 Amendment: On Linux when using CUDA<->OpenGL interop we can
+	 * in fact use shared textures even when using a different GPU, this
+	 * will still copy data through the CPU, but much more efficiently than
+	 * our native non-texture encoder. For now allow this via a hidden
+	 * option as it may cause issues for people.
+	 */
+	const int gpu = (int)obs_data_get_int(settings, "device");
+#ifndef _WIN32
+	const bool force_tex = obs_data_get_bool(settings, "force_cuda_tex");
+#else
+	const bool force_tex = false;
+#endif
+
+	if (gpu != -1 && texture && !force_tex) {
+		blog(LOG_INFO,
+		     "[obs-nvenc] different GPU selected by user, falling back "
+		     "to non-texture encoder");
+		goto reroute;
+	}
+
+	if (obs_encoder_scaling_enabled(encoder)) {
+		if (obs_encoder_gpu_scaling_enabled(encoder)) {
+			blog(LOG_INFO, "[obs-nvenc] GPU scaling enabled");
+		} else if (texture) {
+			blog(LOG_INFO,
+			     "[obs-nvenc] CPU scaling enabled, falling back to"
+			     " non-texture encoder");
+			goto reroute;
+		}
+	}
+
+	if (texture && !obs_p010_tex_active() && !obs_nv12_tex_active()) {
+		blog(LOG_INFO,
+		     "[obs-nvenc] nv12/p010 not active, falling back to "
+		     "non-texture encoder");
+		goto reroute;
+	}
+
+	struct nvenc_data *enc =
+		nvenc_create_internal(codec, settings, encoder, texture);
+
+	if (enc) {
+		return enc;
+	}
+
+reroute:
+	if (!texture) {
+		blog(LOG_ERROR,
+		     "Already in non_texture encoder, can't fall back further!");
+		return NULL;
+	}
+
+	switch (codec) {
+	case CODEC_H264:
+		return obs_encoder_create_rerouted(encoder,
+						   "obs_nvenc_h264_soft");
+	case CODEC_HEVC:
+		return obs_encoder_create_rerouted(encoder,
+						   "obs_nvenc_hevc_soft");
+	case CODEC_AV1:
+		return obs_encoder_create_rerouted(encoder,
+						   "obs_nvenc_av1_soft");
+	}
+
+	return NULL;
+}
+
+static void *h264_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
+{
+	return nvenc_create_base(CODEC_H264, settings, encoder, true);
+}
+
+#ifdef ENABLE_HEVC
+static void *hevc_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
+{
+	return nvenc_create_base(CODEC_HEVC, settings, encoder, true);
+}
+#endif
+
+static void *av1_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
+{
+	return nvenc_create_base(CODEC_AV1, settings, encoder, true);
+}
+
+static void *h264_nvenc_soft_create(obs_data_t *settings,
+				    obs_encoder_t *encoder)
+{
+	return nvenc_create_base(CODEC_H264, settings, encoder, false);
+}
+
+#ifdef ENABLE_HEVC
+static void *hevc_nvenc_soft_create(obs_data_t *settings,
+				    obs_encoder_t *encoder)
+{
+	return nvenc_create_base(CODEC_HEVC, settings, encoder, false);
+}
+#endif
+
+static void *av1_nvenc_soft_create(obs_data_t *settings, obs_encoder_t *encoder)
+{
+	return nvenc_create_base(CODEC_AV1, settings, encoder, false);
+}
+
+static bool get_encoded_packet(struct nvenc_data *enc, bool finalize);
+
+static void nvenc_destroy(void *data)
+{
+	struct nvenc_data *enc = data;
+
+	if (enc->encode_started) {
+		NV_ENC_PIC_PARAMS params = {NV_ENC_PIC_PARAMS_VER};
+		params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
+		nv.nvEncEncodePicture(enc->session, &params);
+		get_encoded_packet(enc, true);
+	}
+
+	for (size_t i = 0; i < enc->bitstreams.num; i++) {
+		nv_bitstream_free(enc, &enc->bitstreams.array[i]);
+	}
+	if (enc->session)
+		nv.nvEncDestroyEncoder(enc->session);
+
+#ifdef _WIN32
+	d3d11_free_textures(enc);
+	d3d11_free(enc);
+#else
+	cuda_opengl_free(enc);
+#endif
+	cuda_free_surfaces(enc);
+	cuda_ctx_free(enc);
+
+	bfree(enc->header);
+	bfree(enc->sei);
+	bfree(enc->roi_map);
+
+	deque_free(&enc->dts_list);
+
+	da_free(enc->surfaces);
+	da_free(enc->input_textures);
+	da_free(enc->bitstreams);
+#ifdef _WIN32
+	da_free(enc->textures);
+#endif
+	da_free(enc->packet_data);
+
+	obs_free_options(enc->props.opts);
+	obs_data_release(enc->props.data);
+
+	bfree(enc);
+}
+
+static bool get_encoded_packet(struct nvenc_data *enc, bool finalize)
+{
+	void *s = enc->session;
+
+	da_resize(enc->packet_data, 0);
+
+	if (!enc->buffers_queued)
+		return true;
+	if (!finalize && enc->buffers_queued < enc->output_delay)
+		return true;
+
+	size_t count = finalize ? enc->buffers_queued : 1;
+
+	for (size_t i = 0; i < count; i++) {
+		size_t cur_bs_idx = enc->cur_bitstream;
+		struct nv_bitstream *bs = &enc->bitstreams.array[cur_bs_idx];
+#ifdef _WIN32
+		struct nv_texture *nvtex =
+			enc->non_texture ? NULL
+					 : &enc->textures.array[cur_bs_idx];
+		struct nv_cuda_surface *surf =
+			enc->non_texture ? &enc->surfaces.array[cur_bs_idx]
+					 : NULL;
+#else
+		struct nv_cuda_surface *surf = &enc->surfaces.array[cur_bs_idx];
+#endif
+
+		/* ---------------- */
+
+		NV_ENC_LOCK_BITSTREAM lock = {NV_ENC_LOCK_BITSTREAM_VER};
+		lock.outputBitstream = bs->ptr;
+		lock.doNotWait = false;
+
+		if (NV_FAILED(nv.nvEncLockBitstream(s, &lock))) {
+			return false;
+		}
+
+		if (enc->first_packet) {
+			NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = {0};
+			uint8_t buf[256];
+			uint32_t size = 0;
+
+			payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
+			payload.spsppsBuffer = buf;
+			payload.inBufferSize = sizeof(buf);
+			payload.outSPSPPSPayloadSize = &size;
+
+			nv.nvEncGetSequenceParams(s, &payload);
+			enc->header = bmemdup(buf, size);
+			enc->header_size = size;
+			enc->first_packet = false;
+		}
+
+		da_copy_array(enc->packet_data, lock.bitstreamBufferPtr,
+			      lock.bitstreamSizeInBytes);
+
+		enc->packet_pts = (int64_t)lock.outputTimeStamp;
+		enc->packet_keyframe = lock.pictureType == NV_ENC_PIC_TYPE_IDR;
+
+		if (NV_FAILED(nv.nvEncUnlockBitstream(s, bs->ptr))) {
+			return false;
+		}
+
+		/* ---------------- */
+#ifdef _WIN32
+		if (nvtex && nvtex->mapped_res) {
+			NVENCSTATUS err;
+			err = nv.nvEncUnmapInputResource(s, nvtex->mapped_res);
+			if (nv_failed(enc->encoder, err, __FUNCTION__,
+				      "unmap")) {
+				return false;
+			}
+			nvtex->mapped_res = NULL;
+		}
+#endif
+		/* ---------------- */
+
+		if (surf && surf->mapped_res) {
+			NVENCSTATUS err;
+			err = nv.nvEncUnmapInputResource(s, surf->mapped_res);
+			if (nv_failed(enc->encoder, err, __FUNCTION__,
+				      "unmap")) {
+				return false;
+			}
+			surf->mapped_res = NULL;
+		}
+
+		/* ---------------- */
+
+		if (++enc->cur_bitstream == enc->buf_count)
+			enc->cur_bitstream = 0;
+
+		enc->buffers_queued--;
+	}
+
+	return true;
+}
+
+struct roi_params {
+	uint32_t mb_width;
+	uint32_t mb_height;
+	uint32_t mb_size;
+	bool av1;
+	int8_t *map;
+};
+
+static void roi_cb(void *param, struct obs_encoder_roi *roi)
+{
+	const struct roi_params *rp = param;
+
+	int8_t qp_val;
+	/* AV1 has a larger QP range than HEVC/H.264 */
+	if (rp->av1) {
+		qp_val = (int8_t)(-128.0f * roi->priority);
+	} else {
+		qp_val = (int8_t)(-51.0f * roi->priority);
+	}
+
+	const uint32_t roi_left = roi->left / rp->mb_size;
+	const uint32_t roi_top = roi->top / rp->mb_size;
+	const uint32_t roi_right = (roi->right - 1) / rp->mb_size;
+	const uint32_t roi_bottom = (roi->bottom - 1) / rp->mb_size;
+
+	for (uint32_t mb_y = 0; mb_y < rp->mb_height; mb_y++) {
+		if (mb_y < roi_top || mb_y > roi_bottom)
+			continue;
+
+		for (uint32_t mb_x = 0; mb_x < rp->mb_width; mb_x++) {
+			if (mb_x < roi_left || mb_x > roi_right)
+				continue;
+
+			rp->map[mb_y * rp->mb_width + mb_x] = qp_val;
+		}
+	}
+}
+
+static void add_roi(struct nvenc_data *enc, NV_ENC_PIC_PARAMS *params)
+{
+	const uint32_t increment = obs_encoder_get_roi_increment(enc->encoder);
+
+	if (enc->roi_map && enc->roi_increment == increment) {
+		params->qpDeltaMap = enc->roi_map;
+		params->qpDeltaMapSize = (uint32_t)enc->roi_map_size;
+		return;
+	}
+
+	uint32_t mb_size = 0;
+	switch (enc->codec) {
+	case CODEC_H264:
+		/* H.264 is always 16x16 */
+		mb_size = 16;
+		break;
+	case CODEC_HEVC:
+		/* HEVC can be 16x16, 32x32, or 64x64, but NVENC is always 32x32 */
+		mb_size = 32;
+		break;
+	case CODEC_AV1:
+		/* AV1 can be 64x64 or 128x128, but NVENC is always 64x64 */
+		mb_size = 64;
+		break;
+	}
+
+	const uint32_t mb_width = (enc->cx + mb_size - 1) / mb_size;
+	const uint32_t mb_height = (enc->cy + mb_size - 1) / mb_size;
+	const size_t map_size = mb_width * mb_height * sizeof(int8_t);
+
+	if (map_size != enc->roi_map_size) {
+		enc->roi_map = brealloc(enc->roi_map, map_size);
+		enc->roi_map_size = map_size;
+	}
+
+	memset(enc->roi_map, 0, enc->roi_map_size);
+
+	struct roi_params par = {
+		.mb_width = mb_width,
+		.mb_height = mb_height,
+		.mb_size = mb_size,
+		.av1 = enc->codec == CODEC_AV1,
+		.map = enc->roi_map,
+	};
+
+	obs_encoder_enum_roi(enc->encoder, roi_cb, &par);
+
+	enc->roi_increment = increment;
+	params->qpDeltaMap = enc->roi_map;
+	params->qpDeltaMapSize = (uint32_t)map_size;
+}
+
+bool nvenc_encode_base(struct nvenc_data *enc, struct nv_bitstream *bs,
+		       void *pic, int64_t pts, struct encoder_packet *packet,
+		       bool *received_packet)
+{
+	NV_ENC_PIC_PARAMS params = {0};
+	params.version = NV_ENC_PIC_PARAMS_VER;
+	params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
+	params.inputBuffer = pic;
+	params.inputTimeStamp = (uint64_t)pts;
+	params.inputWidth = enc->cx;
+	params.inputHeight = enc->cy;
+	params.inputPitch = enc->cx;
+	params.outputBitstream = bs->ptr;
+	params.frameIdx = (uint32_t)pts;
+
+	if (enc->non_texture) {
+		params.bufferFmt = enc->surface_format;
+	} else {
+		params.bufferFmt = obs_p010_tex_active()
+					   ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
+					   : NV_ENC_BUFFER_FORMAT_NV12;
+	}
+
+	/* Add ROI map if enabled */
+	if (obs_encoder_has_roi(enc->encoder))
+		add_roi(enc, &params);
+
+	NVENCSTATUS err = nv.nvEncEncodePicture(enc->session, &params);
+	if (err != NV_ENC_SUCCESS && err != NV_ENC_ERR_NEED_MORE_INPUT) {
+		nv_failed(enc->encoder, err, __FUNCTION__,
+			  "nvEncEncodePicture");
+		return false;
+	}
+
+	enc->encode_started = true;
+	enc->buffers_queued++;
+
+	if (++enc->next_bitstream == enc->buf_count) {
+		enc->next_bitstream = 0;
+	}
+
+	/* ------------------------------------ */
+	/* check for encoded packet and parse   */
+
+	if (!get_encoded_packet(enc, false)) {
+		return false;
+	}
+
+	/* ------------------------------------ */
+	/* output encoded packet                */
+
+	if (enc->packet_data.num) {
+		int64_t dts;
+		deque_pop_front(&enc->dts_list, &dts, sizeof(dts));
+
+		/* subtract bframe delay from dts for H.264/HEVC */
+		if (enc->codec != CODEC_AV1)
+			dts -= enc->props.bf * packet->timebase_num;
+
+		*received_packet = true;
+		packet->data = enc->packet_data.array;
+		packet->size = enc->packet_data.num;
+		packet->type = OBS_ENCODER_VIDEO;
+		packet->pts = enc->packet_pts;
+		packet->dts = dts;
+		packet->keyframe = enc->packet_keyframe;
+	} else {
+		*received_packet = false;
+	}
+
+	return true;
+}
+
+static void nvenc_soft_video_info(void *data, struct video_scale_info *info)
+{
+	struct nvenc_data *enc = data;
+	info->format = enc->in_format;
+}
+
+static bool nvenc_extra_data(void *data, uint8_t **header, size_t *size)
+{
+	struct nvenc_data *enc = data;
+
+	if (!enc->header) {
+		return false;
+	}
+
+	*header = enc->header;
+	*size = enc->header_size;
+	return true;
+}
+
+static bool nvenc_sei_data(void *data, uint8_t **sei, size_t *size)
+{
+	struct nvenc_data *enc = data;
+
+	if (!enc->sei) {
+		return false;
+	}
+
+	*sei = enc->sei;
+	*size = enc->sei_size;
+	return true;
+}
+
+struct obs_encoder_info h264_nvenc_info = {
+	.id = "obs_nvenc_h264_tex",
+	.codec = "h264",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
+		OBS_ENCODER_CAP_ROI,
+	.get_name = h264_nvenc_get_name,
+	.create = h264_nvenc_create,
+	.destroy = nvenc_destroy,
+	.update = nvenc_update,
+#ifdef _WIN32
+	.encode_texture2 = d3d11_encode,
+#else
+	.encode_texture2 = cuda_opengl_encode,
+#endif
+	.get_defaults = h264_nvenc_defaults,
+	.get_properties = h264_nvenc_properties,
+	.get_extra_data = nvenc_extra_data,
+	.get_sei_data = nvenc_sei_data,
+};
+
+#ifdef ENABLE_HEVC
+struct obs_encoder_info hevc_nvenc_info = {
+	.id = "obs_nvenc_hevc_tex",
+	.codec = "hevc",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
+		OBS_ENCODER_CAP_ROI,
+	.get_name = hevc_nvenc_get_name,
+	.create = hevc_nvenc_create,
+	.destroy = nvenc_destroy,
+	.update = nvenc_update,
+#ifdef _WIN32
+	.encode_texture2 = d3d11_encode,
+#else
+	.encode_texture2 = cuda_opengl_encode,
+#endif
+	.get_defaults = hevc_nvenc_defaults,
+	.get_properties = hevc_nvenc_properties,
+	.get_extra_data = nvenc_extra_data,
+	.get_sei_data = nvenc_sei_data,
+};
+#endif
+
+struct obs_encoder_info av1_nvenc_info = {
+	.id = "obs_nvenc_av1_tex",
+	.codec = "av1",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
+		OBS_ENCODER_CAP_ROI,
+	.get_name = av1_nvenc_get_name,
+	.create = av1_nvenc_create,
+	.destroy = nvenc_destroy,
+	.update = nvenc_update,
+#ifdef _WIN32
+	.encode_texture2 = d3d11_encode,
+#else
+	.encode_texture2 = cuda_opengl_encode,
+#endif
+	.get_defaults = av1_nvenc_defaults,
+	.get_properties = av1_nvenc_properties,
+	.get_extra_data = nvenc_extra_data,
+};
+
+struct obs_encoder_info h264_nvenc_soft_info = {
+	.id = "obs_nvenc_h264_soft",
+	.codec = "h264",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
+		OBS_ENCODER_CAP_INTERNAL,
+	.get_name = h264_nvenc_soft_get_name,
+	.create = h264_nvenc_soft_create,
+	.destroy = nvenc_destroy,
+	.update = nvenc_update,
+	.encode = cuda_encode,
+	.get_defaults = h264_nvenc_defaults,
+	.get_properties = h264_nvenc_properties,
+	.get_extra_data = nvenc_extra_data,
+	.get_sei_data = nvenc_sei_data,
+	.get_video_info = nvenc_soft_video_info,
+};
+
+#ifdef ENABLE_HEVC
+struct obs_encoder_info hevc_nvenc_soft_info = {
+	.id = "obs_nvenc_hevc_soft",
+	.codec = "hevc",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
+		OBS_ENCODER_CAP_INTERNAL,
+	.get_name = hevc_nvenc_soft_get_name,
+	.create = hevc_nvenc_soft_create,
+	.destroy = nvenc_destroy,
+	.update = nvenc_update,
+	.encode = cuda_encode,
+	.get_defaults = hevc_nvenc_defaults,
+	.get_properties = hevc_nvenc_properties,
+	.get_extra_data = nvenc_extra_data,
+	.get_sei_data = nvenc_sei_data,
+	.get_video_info = nvenc_soft_video_info,
+};
+#endif
+
+struct obs_encoder_info av1_nvenc_soft_info = {
+	.id = "obs_nvenc_av1_soft",
+	.codec = "av1",
+	.type = OBS_ENCODER_VIDEO,
+	.caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
+		OBS_ENCODER_CAP_INTERNAL,
+	.get_name = av1_nvenc_soft_get_name,
+	.create = av1_nvenc_soft_create,
+	.destroy = nvenc_destroy,
+	.update = nvenc_update,
+	.encode = cuda_encode,
+	.get_defaults = av1_nvenc_defaults,
+	.get_properties = av1_nvenc_properties,
+	.get_extra_data = nvenc_extra_data,
+	.get_video_info = nvenc_soft_video_info,
+};
+
+void register_encoders(void)
+{
+	obs_register_encoder(&h264_nvenc_info);
+	obs_register_encoder(&h264_nvenc_soft_info);
+#ifdef ENABLE_HEVC
+	obs_register_encoder(&hevc_nvenc_info);
+	obs_register_encoder(&hevc_nvenc_soft_info);
+#endif
+	if (is_codec_supported(CODEC_AV1)) {
+		obs_register_encoder(&av1_nvenc_info);
+		obs_register_encoder(&av1_nvenc_soft_info);
+	}
+}

+ 12 - 0
plugins/obs-nvenc/obs-nvenc-test/CMakeLists.txt

@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 3.24...3.25)
+
+find_package(FFnvcodec 12 REQUIRED)
+
+add_executable(obs-nvenc-test)
+
+target_sources(obs-nvenc-test PRIVATE obs-nvenc-test.cpp)
+target_link_libraries(obs-nvenc-test FFnvcodec::FFnvcodec)
+
+# cmake-format: off
+set_target_properties_obs(obs-nvenc-test PROPERTIES FOLDER plugins/obs-nvenc)
+# cmake-format: on

+ 532 - 0
plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp

@@ -0,0 +1,532 @@
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+#include <chrono>
+#include <future>
+#include <cstring>
+
+#include <ffnvcodec/nvEncodeAPI.h>
+#include <ffnvcodec/dynlink_loader.h>
+
+/*
+ * Utility to check for NVENC support and capabilities.
+ * Will check all GPUs and return INI-formatted results based on highest capability of all devices.
+ */
+
+using namespace std;
+using namespace std::chrono_literals;
+
+static CudaFunctions *cu = nullptr;
+static NvencFunctions *nvenc = nullptr;
+
+NV_ENCODE_API_FUNCTION_LIST nv = {NV_ENCODE_API_FUNCTION_LIST_VER};
+static constexpr uint32_t NVENC_CONFIGURED_VERSION =
+	(NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
+
+/* NVML stuff */
+#define NVML_SUCCESS 0
+#define NVML_DEVICE_UUID_V2_BUFFER_SIZE 96
+#define NVML_DEVICE_NAME_V2_BUFFER_SIZE 96
+#define NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE 80
+
+typedef int nvmlReturn_t;
+typedef struct nvmlDevice *nvmlDevice_t;
+
+typedef enum nvmlEncoderType {
+	NVML_ENCODER_QUERY_H264,
+	NVML_ENCODER_QUERY_HEVC,
+	NVML_ENCODER_QUERY_AV1,
+	NVML_ENCODER_QUERY_UNKNOWN
+} nvmlEncoderType_t;
+
+typedef nvmlReturn_t (*NVML_GET_DRIVER_VER_FUNC)(char *, unsigned int);
+typedef nvmlReturn_t (*NVML_INIT_V2)();
+typedef nvmlReturn_t (*NVML_SHUTDOWN)();
+typedef nvmlReturn_t (*NVML_GET_HANDLE_BY_BUS_ID)(const char *, nvmlDevice_t *);
+typedef nvmlReturn_t (*NVML_GET_DEVICE_UUID)(nvmlDevice_t, char *, unsigned);
+typedef nvmlReturn_t (*NVML_GET_DEVICE_NAME)(nvmlDevice_t, char *, unsigned);
+typedef nvmlReturn_t (*NVML_GET_DEVICE_PCIE_GEN)(nvmlDevice_t, unsigned *);
+typedef nvmlReturn_t (*NVML_GET_DEVICE_PCIE_WIDTH)(nvmlDevice_t, unsigned *);
+typedef nvmlReturn_t (*NVML_GET_DEVICE_NAME)(nvmlDevice_t, char *, unsigned);
+typedef nvmlReturn_t (*NVML_GET_ENCODER_SESSIONS)(nvmlDevice_t, unsigned *,
+						  void *);
+typedef nvmlReturn_t (*NVML_GET_ENCODER_CAPACITY)(nvmlDevice_t, nvmlEncoderType,
+						  unsigned *);
+typedef nvmlReturn_t (*NVML_GET_ENCODER_UTILISATION)(nvmlDevice_t, unsigned *,
+						     unsigned *);
+/* List of capabilities to be queried per codec */
+static const vector<pair<NV_ENC_CAPS, string>> capabilities = {
+	{NV_ENC_CAPS_NUM_MAX_BFRAMES, "bframes"},
+	{NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE, "lossless"},
+	{NV_ENC_CAPS_SUPPORT_LOOKAHEAD, "lookahead"},
+	{NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ, "temporal_aq"},
+	{NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE, "dynamic_bitrate"},
+	{NV_ENC_CAPS_SUPPORT_10BIT_ENCODE, "10bit"},
+	{NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE, "bref"},
+	{NV_ENC_CAPS_NUM_ENCODER_ENGINES, "engines"},
+	{NV_ENC_CAPS_SUPPORT_YUV444_ENCODE, "yuv_444"},
+	{NV_ENC_CAPS_WIDTH_MAX, "max_width"},
+	{NV_ENC_CAPS_HEIGHT_MAX, "max_height"},
+#if NVENCAPI_MAJOR_VERSION > 12 || NVENCAPI_MINOR_VERSION >= 2
+	/* SDK 12.2+ features */
+	{NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER, "temporal_filter"},
+	{NV_ENC_CAPS_SUPPORT_LOOKAHEAD_LEVEL, "lookahead_level"},
+#endif
+};
+
+static const vector<pair<string_view, GUID>> codecs = {
+	{"h264", NV_ENC_CODEC_H264_GUID},
+	{"hevc", NV_ENC_CODEC_HEVC_GUID},
+	{"av1", NV_ENC_CODEC_AV1_GUID}};
+
+typedef unordered_map<string, unordered_map<string, int>> codec_caps_map;
+
+struct device_info {
+	string pci_id;
+	string nvml_uuid;
+	string cuda_uuid;
+	string name;
+
+	uint32_t pcie_gen;
+	uint32_t pcie_width;
+
+	uint32_t encoder_sessions;
+	uint32_t utilisation;
+	uint32_t sample_period;
+	uint32_t capacity_h264;
+	uint32_t capacity_hevc;
+	uint32_t capacity_av1;
+
+	codec_caps_map caps;
+};
+
+/* RAII wrappers to make my life a little easier. */
+struct NVML {
+	NVML_INIT_V2 init;
+	NVML_SHUTDOWN shutdown;
+	NVML_GET_DRIVER_VER_FUNC getDriverVersion;
+	NVML_GET_HANDLE_BY_BUS_ID getDeviceHandleByPCIBusId;
+	NVML_GET_DEVICE_UUID getDeviceUUID;
+	NVML_GET_DEVICE_NAME getDeviceName;
+	NVML_GET_DEVICE_PCIE_GEN getDevicePCIeGen;
+	NVML_GET_DEVICE_PCIE_WIDTH getDevicePCIeWidth;
+	NVML_GET_ENCODER_SESSIONS getEncoderSessions;
+	NVML_GET_ENCODER_CAPACITY getEncoderCapacity;
+	NVML_GET_ENCODER_UTILISATION getEncoderUtilisation;
+
+	NVML() = default;
+
+	~NVML()
+	{
+		if (initialised && shutdown)
+			shutdown();
+	}
+
+	bool Init()
+	{
+		if (!load_nvml_lib()) {
+			printf("reason=nvml_lib\n");
+			return false;
+		}
+
+		init = (NVML_INIT_V2)load_nvml_func("nvmlInit_v2");
+		shutdown = (NVML_SHUTDOWN)load_nvml_func("nvmlShutdown");
+		getDriverVersion = (NVML_GET_DRIVER_VER_FUNC)load_nvml_func(
+			"nvmlSystemGetDriverVersion");
+		getDeviceHandleByPCIBusId =
+			(NVML_GET_HANDLE_BY_BUS_ID)load_nvml_func(
+				"nvmlDeviceGetHandleByPciBusId_v2");
+		getDeviceUUID = (NVML_GET_DEVICE_UUID)load_nvml_func(
+			"nvmlDeviceGetUUID");
+		getDeviceName = (NVML_GET_DEVICE_NAME)load_nvml_func(
+			"nvmlDeviceGetName");
+		getDevicePCIeGen = (NVML_GET_DEVICE_PCIE_GEN)load_nvml_func(
+			"nvmlDeviceGetCurrPcieLinkGeneration");
+		getDevicePCIeWidth = (NVML_GET_DEVICE_PCIE_WIDTH)load_nvml_func(
+			"nvmlDeviceGetCurrPcieLinkWidth");
+		getEncoderSessions = (NVML_GET_ENCODER_SESSIONS)load_nvml_func(
+			"nvmlDeviceGetEncoderSessions");
+		getEncoderCapacity = (NVML_GET_ENCODER_CAPACITY)load_nvml_func(
+			"nvmlDeviceGetEncoderCapacity");
+		getEncoderUtilisation =
+			(NVML_GET_ENCODER_UTILISATION)load_nvml_func(
+				"nvmlDeviceGetEncoderUtilization");
+
+		if (!init || !shutdown || !getDriverVersion ||
+		    !getDeviceHandleByPCIBusId || !getDeviceUUID ||
+		    !getDeviceName || !getDevicePCIeGen ||
+		    !getDevicePCIeWidth || !getEncoderSessions ||
+		    !getEncoderCapacity || !getEncoderUtilisation) {
+			return false;
+		}
+
+		nvmlReturn_t res = init();
+		if (res != 0) {
+			printf("reason=nvml_init_%d\n", res);
+			return false;
+		}
+
+		initialised = true;
+		return true;
+	}
+
+private:
+	bool initialised = false;
+	static inline void *nvml_lib = nullptr;
+
+	bool load_nvml_lib()
+	{
+#ifdef _WIN32
+		nvml_lib = LoadLibraryA("nvml.dll");
+#else
+		nvml_lib = dlopen("libnvidia-ml.so.1", RTLD_LAZY);
+#endif
+		return nvml_lib != nullptr;
+	}
+
+	static void *load_nvml_func(const char *func)
+	{
+#ifdef _WIN32
+		void *func_ptr =
+			(void *)GetProcAddress((HMODULE)nvml_lib, func);
+#else
+		void *func_ptr = dlsym(nvml_lib, func);
+#endif
+		return func_ptr;
+	}
+};
+
+struct CUDACtx {
+	CUcontext ctx;
+
+	CUDACtx() = default;
+
+	~CUDACtx() { cu->cuCtxDestroy(ctx); }
+
+	bool Init(int adapter_idx)
+	{
+		CUdevice dev;
+		if (cu->cuDeviceGet(&dev, adapter_idx) != CUDA_SUCCESS)
+			return false;
+
+		return cu->cuCtxCreate(&ctx, 0, dev) == CUDA_SUCCESS;
+	}
+
+	string GetPCIBusId()
+	{
+		CUdevice dev;
+		string bus_id;
+		bus_id.resize(16);
+
+		cu->cuCtxGetDevice(&dev);
+		cu->cuDeviceGetPCIBusId(bus_id.data(), (int)bus_id.capacity(),
+					dev);
+		return bus_id;
+	}
+
+	string GetUUID()
+	{
+		CUdevice dev;
+		CUuuid uuid;
+		string uuid_str;
+
+		cu->cuCtxGetDevice(&dev);
+		cu->cuDeviceGetUuid_v2(&uuid, dev);
+
+		uuid_str.resize(32);
+		for (size_t idx = 0; idx < 16; idx++) {
+			sprintf(uuid_str.data() + idx * 2, "%02x",
+				uuid.bytes[idx] & 0xFF);
+		}
+
+		return uuid_str;
+	}
+};
+
+struct NVSession {
+	void *ptr = nullptr;
+
+	NVSession() = default;
+
+	~NVSession() { nv.nvEncDestroyEncoder(ptr); }
+
+	bool OpenSession(const CUDACtx &ctx)
+	{
+		NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = {};
+		params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
+		params.apiVersion = NVENCAPI_VERSION;
+		params.device = ctx.ctx;
+		params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+
+		return nv.nvEncOpenEncodeSessionEx(&params, &ptr) ==
+		       NV_ENC_SUCCESS;
+	}
+};
+
+static bool init_nvenc()
+{
+	if (nvenc_load_functions(&nvenc, nullptr)) {
+		printf("reason=nvenc_lib\n");
+		return false;
+	}
+
+	NVENCSTATUS res = nvenc->NvEncodeAPICreateInstance(&nv);
+	if (res != NV_ENC_SUCCESS) {
+		printf("reason=nvenc_init_%d\n", res);
+		return false;
+	}
+
+	return true;
+}
+
+static bool init_cuda()
+{
+	if (cuda_load_functions(&cu, nullptr)) {
+		printf("reason=cuda_lib\n");
+		return false;
+	}
+
+	CUresult res = cu->cuInit(0);
+	if (res != CUDA_SUCCESS) {
+		printf("reason=cuda_init_%d\n", res);
+		return false;
+	}
+
+	return true;
+}
+
+static bool get_adapter_caps(int adapter_idx, codec_caps_map &caps,
+			     device_info &device_info, NVML &nvml)
+{
+	CUDACtx cudaCtx;
+	NVSession nvSession;
+
+	if (!cudaCtx.Init(adapter_idx))
+		return false;
+
+	device_info.pci_id = cudaCtx.GetPCIBusId();
+	device_info.cuda_uuid = cudaCtx.GetUUID();
+
+	nvmlDevice_t dev;
+	if (nvml.getDeviceHandleByPCIBusId(device_info.pci_id.data(), &dev) ==
+	    NVML_SUCCESS) {
+		char uuid[NVML_DEVICE_UUID_V2_BUFFER_SIZE];
+		nvml.getDeviceUUID(dev, uuid, sizeof(uuid));
+		device_info.nvml_uuid = uuid;
+
+		char name[NVML_DEVICE_NAME_V2_BUFFER_SIZE];
+		nvml.getDeviceName(dev, name, sizeof(name));
+		device_info.name = name;
+
+		nvml.getDevicePCIeGen(dev, &device_info.pcie_gen);
+		nvml.getDevicePCIeWidth(dev, &device_info.pcie_width);
+		nvml.getEncoderSessions(dev, &device_info.encoder_sessions,
+					nullptr);
+		nvml.getEncoderUtilisation(dev, &device_info.utilisation,
+					   &device_info.sample_period);
+		nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_H264,
+					&device_info.capacity_h264);
+		nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_HEVC,
+					&device_info.capacity_hevc);
+		nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_AV1,
+					&device_info.capacity_av1);
+	}
+
+	if (!nvSession.OpenSession(cudaCtx))
+		return false;
+
+	uint32_t guid_count = 0;
+	if (nv.nvEncGetEncodeGUIDCount(nvSession.ptr, &guid_count) !=
+	    NV_ENC_SUCCESS)
+		return false;
+
+	vector<GUID> guids;
+	guids.resize(guid_count);
+	NVENCSTATUS stat = nv.nvEncGetEncodeGUIDs(nvSession.ptr, guids.data(),
+						  guid_count, &guid_count);
+	if (stat != NV_ENC_SUCCESS)
+		return false;
+
+	NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER};
+
+	for (uint32_t i = 0; i < guid_count; i++) {
+		GUID *guid = &guids[i];
+
+		std::string codec_name = "unknown";
+		for (const auto &[name, codec_guid] : codecs) {
+			if (memcmp(&codec_guid, guid, sizeof(GUID)) == 0) {
+				codec_name = name;
+				break;
+			}
+		}
+
+		caps[codec_name]["codec_supported"] = 1;
+		device_info.caps[codec_name]["codec_supported"] = 1;
+
+		for (const auto &[cap, name] : capabilities) {
+			int v;
+			param.capsToQuery = cap;
+			if (nv.nvEncGetEncodeCaps(nvSession.ptr, *guid, &param,
+						  &v) != NV_ENC_SUCCESS)
+				continue;
+
+			device_info.caps[codec_name][name] = v;
+			if (v > caps[codec_name][name])
+				caps[codec_name][name] = v;
+		}
+	}
+
+	return true;
+}
+
+bool nvenc_checks(codec_caps_map &caps, vector<device_info> &device_infos)
+{
+	/* NVENC API init */
+	if (!init_nvenc())
+		return false;
+
+	/* CUDA init */
+	if (!init_cuda())
+		return false;
+
+	NVML nvml;
+	if (!nvml.Init())
+		return false;
+
+	/* --------------------------------------------------------- */
+	/* obtain adapter compatibility information                  */
+
+	uint32_t nvenc_ver;
+	int cuda_driver_ver;
+	int cuda_devices = 0;
+	int nvenc_devices = 0;
+	char driver_ver[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];
+
+	/* NVIDIA driver version */
+	if (nvml.getDriverVersion(driver_ver, sizeof(driver_ver)) ==
+	    NVML_SUCCESS) {
+		printf("driver_ver=%s\n", driver_ver);
+	} else {
+		// Treat this as a non-fatal failure
+		printf("driver_ver=0.0\n");
+	}
+
+	/* CUDA driver version and devices */
+	if (cu->cuDriverGetVersion(&cuda_driver_ver) == CUDA_SUCCESS) {
+		printf("cuda_ver=%d.%d\n", cuda_driver_ver / 1000,
+		       cuda_driver_ver % 1000);
+	} else {
+		printf("reason=no_cuda_version\n");
+		return false;
+	}
+
+	if (cu->cuDeviceGetCount(&cuda_devices) == CUDA_SUCCESS &&
+	    cuda_devices) {
+		printf("cuda_devices=%d\n", cuda_devices);
+	} else {
+		printf("reason=no_devices\n");
+		return false;
+	}
+
+	/* NVENC API version */
+	if (nvenc->NvEncodeAPIGetMaxSupportedVersion(&nvenc_ver) ==
+	    NV_ENC_SUCCESS) {
+		printf("nvenc_ver=%d.%d\n", nvenc_ver >> 4, nvenc_ver & 0xf);
+	} else {
+		printf("reason=no_nvenc_version\n");
+		return false;
+	}
+
+	if (nvenc_ver < NVENC_CONFIGURED_VERSION) {
+		printf("reason=outdated_driver\n");
+		return false;
+	}
+
+	device_infos.resize(cuda_devices);
+	for (int idx = 0; idx < cuda_devices; idx++) {
+		if (get_adapter_caps(idx, caps, device_infos[idx], nvml))
+			nvenc_devices++;
+	}
+
+	printf("nvenc_devices=%d\n", nvenc_devices);
+	if (!nvenc_devices) {
+		printf("reason=no_supported_devices\n");
+		return false;
+	}
+
+	return true;
+}
+
+int check_thread()
+{
+	int ret = 0;
+	codec_caps_map caps;
+	vector<device_info> device_infos;
+
+	caps["h264"]["codec_supported"] = 0;
+	caps["hevc"]["codec_supported"] = 0;
+	caps["av1"]["codec_supported"] = 0;
+
+	printf("[general]\n");
+
+	if (nvenc_checks(caps, device_infos)) {
+		printf("nvenc_supported=true\n");
+	} else {
+		printf("nvenc_supported=false\n");
+		ret = 1;
+	}
+
+	/* Global capabilities, based on highest supported across all devices */
+	for (const auto &[codec, codec_caps] : caps) {
+		printf("\n[%s]\n", codec.c_str());
+
+		for (const auto &[name, value] : codec_caps) {
+			printf("%s=%d\n", name.c_str(), value);
+		}
+	}
+
+	/* Per-device info (mostly for debugging) */
+	for (size_t idx = 0; idx < device_infos.size(); idx++) {
+		const auto &info = device_infos[idx];
+
+		printf("\n[device.%zu]\n"
+		       "pci_id=%s\n"
+		       "nvml_uuid=%s\n"
+		       "cuda_uuid=%s\n"
+		       "name=%s\n"
+		       "pcie_link_width=%d\n"
+		       "pcie_link_gen=%d\n"
+		       "encoder_sessions=%u\n"
+		       "utilisation=%u\n"
+		       "sample_period=%u\n"
+		       "capacity_h264=%u\n"
+		       "capacity_hevc=%u\n"
+		       "capacity_av1=%u\n",
+		       idx, info.pci_id.c_str(), info.nvml_uuid.c_str(),
+		       info.cuda_uuid.c_str(), info.name.c_str(),
+		       info.pcie_width, info.pcie_gen, info.encoder_sessions,
+		       info.utilisation, info.sample_period, info.capacity_h264,
+		       info.capacity_hevc, info.capacity_av1);
+
+		for (const auto &[codec, codec_caps] : info.caps) {
+			printf("\n[device.%zu.%s]\n", idx, codec.c_str());
+
+			for (const auto &[name, value] : codec_caps) {
+				printf("%s=%d\n", name.c_str(), value);
+			}
+		}
+	}
+
+	return ret;
+}
+
+int main(int, char **)
+{
+	future<int> f = async(launch::async, check_thread);
+	future_status status = f.wait_for(2.5s);
+
+	if (status == future_status::timeout)
+		exit(1);
+
+	return f.get();
+}

+ 30 - 0
plugins/obs-nvenc/obs-nvenc.c

@@ -0,0 +1,30 @@
+#include <obs-module.h>
+
+#include "obs-nvenc.h"
+
+OBS_DECLARE_MODULE()
+OBS_MODULE_USE_DEFAULT_LOCALE("obs-nvenc", "en-US")
+
+MODULE_EXPORT const char *obs_module_description(void)
+{
+	return "NVIDIA Encoder (NVENC) Plugin";
+}
+
+bool obs_module_load(void)
+{
+	if (!nvenc_supported()) {
+		blog(LOG_INFO, "NVENC not supported");
+		return false;
+	}
+
+	obs_nvenc_load();
+	obs_cuda_load();
+
+	return true;
+}
+
+void obs_module_unload(void)
+{
+	obs_cuda_unload();
+	obs_nvenc_unload();
+}

+ 11 - 0
plugins/obs-nvenc/obs-nvenc.h

@@ -0,0 +1,11 @@
+#pragma once
+
+#include <util/platform.h>
+
+bool nvenc_supported(void);
+
+void obs_nvenc_load(void);
+void obs_nvenc_unload(void);
+
+void obs_cuda_load(void);
+void obs_cuda_unload(void);