瀏覽代碼

obs-filter: Add RTX Background Removal filter

This adds support for RTX video Effects SDK.
The Background Removal filter relies on NVIDIA RTX AI Greenscreen FX.
A front speaker is separated from a background by applying
an alpha mask computed by AI.

Signed-off-by: pkv <[email protected]>
pkv 4 年之前
父節點
當前提交
b555dcb8e2

+ 25 - 3
plugins/obs-filters/CMakeLists.txt

@@ -9,6 +9,10 @@ if(OS_WINDOWS)
     ENABLE_NVAFX
     "Enable building with NVIDIA Audio Effects SDK (requires redistributable to be installed)"
     ON)
+  option(
+    ENABLE_NVVFX
+    "Enable building with NVIDIA Video Effects SDK (requires redistributable to be installed)"
+    ON)
 endif()
 
 add_library(obs-filters MODULE)
@@ -86,16 +90,25 @@ else()
 endif()
 
 if(NOT ENABLE_NVAFX)
-  obs_status(DISABLED "NVidia Audio FX support")
+  obs_status(DISABLED "NVIDIA Audio FX support")
   set(LIBNVAFX_FOUND OFF)
 else()
-  obs_status(ENABLED "NVidia Audio FX support")
+  obs_status(ENABLED "NVIDIA Audio FX support")
 
   target_compile_definitions(obs-filters PRIVATE LIBNVAFX_ENABLED)
 
   set(LIBNVAFX_FOUND ON)
 endif()
 
+if(NOT ENABLE_NVVFX)
+  obs_status(DISABLED "NVIDIA Video FX support")
+  set(LIBNVVFX_FOUND OFF)
+else()
+  obs_status(ENABLED "NVIDIA Video FX support")
+  set(LIBNVVFX_FOUND ON)
+  target_compile_definitions(obs-filters PRIVATE LIBNVVFX_ENABLED)
+endif()
+
 if(TARGET Librnnoise::Librnnoise
    OR TARGET LibspeexDSP::LibspeexDSP
    OR LIBNVAFX_FOUND)
@@ -141,7 +154,8 @@ if(NOT OS_MACOS)
             data/luma_key_filter_v2.effect
             data/mask_alpha_filter.effect
             data/mask_color_filter.effect
-            data/sharpness.effect)
+            data/sharpness.effect
+            data/rtx_greenscreen.effect)
 
   get_target_property(_SOURCES obs-filters SOURCES)
   set(_FILTERS ${_SOURCES})
@@ -153,6 +167,14 @@ if(NOT OS_MACOS)
     FILES ${_FILTERS})
 endif()
 
+if(LIBNVVFX_FOUND)
+  target_sources(obs-filters PRIVATE nvidia-greenscreen-filter.c)
+  obs_status(
+    STATUS
+    "NVIDIA Video FX support enabled; requires redist to be installed by end-user"
+  )
+endif()
+
 target_include_directories(
   obs-filters PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/config>)
 

+ 5 - 0
plugins/obs-filters/data/locale/en-US.ini

@@ -110,3 +110,8 @@ Luma.LumaMax="Luma Max"
 Luma.LumaMin="Luma Min"
 Luma.LumaMaxSmooth="Luma Max Smooth"
 Luma.LumaMinSmooth="Luma Min Smooth"
+NvidiaGreenscreenFilter="NVIDIA Background Removal"
+Greenscreen.Mode="Mode"
+Greenscreen.Quality="Quality (higher GPU usage, better quality)"
+Greenscreen.Performance="Performance (lower GPU usage, good quality)"
+Greenscreen.Threshold="Threshold"

+ 41 - 0
plugins/obs-filters/data/rtx_greenscreen.effect

@@ -0,0 +1,41 @@
+uniform float4x4 ViewProj;
+uniform texture2d image;
+
+uniform texture2d mask;
+uniform float threshold;
+
+sampler_state texSampler {
+	Filter    = Linear;
+	AddressU  = Clamp;
+	AddressV  = Clamp;
+};
+
+struct VertData {
+	float4 pos : POSITION;
+	float2 uv  : TEXCOORD0;
+};
+
+VertData VSDefault(VertData v_in)
+{
+	VertData v_out;
+	v_out.pos = mul(float4(v_in.pos.xyz, 1.0), ViewProj);
+	v_out.uv = v_in.uv;
+	return v_out;
+}
+
+float4 PSMask(VertData v2_in) : TARGET
+{
+	float4 pix;
+	pix.rgb = image.Sample(texSampler, v2_in.uv).rgb;
+	pix.a = smoothstep(threshold - 0.1,threshold,mask.Sample(texSampler, v2_in.uv).a);
+	return pix;
+}
+
+technique Draw
+{
+	pass
+	{
+		vertex_shader = VSDefault(v_in);
+		pixel_shader  = PSMask(v2_in);
+	}
+}

+ 739 - 0
plugins/obs-filters/nvidia-greenscreen-filter.c

@@ -0,0 +1,739 @@
+#include <obs-module.h>
+#include <util/threading.h>
+#include <dxgi.h>
+#include <d3d11.h>
+#include <d3d11_1.h>
+#include "nvvfx-load.h"
+/* -------------------------------------------------------- */
+
+#define do_log(level, format, ...)                                             \
+	blog(level,                                                            \
+	     "[NVIDIA RTX AI Greenscreen (Background removal): '%s'] " format, \
+	     obs_source_get_name(filter->context), ##__VA_ARGS__)
+
+#define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
+#define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
+#define error(format, ...) do_log(LOG_ERROR, format, ##__VA_ARGS__)
+
+#ifdef _DEBUG
+#define debug(format, ...) do_log(LOG_DEBUG, format, ##__VA_ARGS__)
+#else
+#define debug(format, ...)
+#endif
+
+/* -------------------------------------------------------- */
+#define S_MODE "mode"
+#define S_MODE_QUALITY 0
+#define S_MODE_PERF 1
+#define S_THRESHOLDFX "threshold"
+#define S_THRESHOLDFX_DEFAULT 1.0
+
+#define MT_ obs_module_text
+#define TEXT_MODE MT_("Greenscreen.Mode")
+#define TEXT_MODE_QUALITY MT_("Greenscreen.Quality")
+#define TEXT_MODE_PERF MT_("Greenscreen.Performance")
+#define TEXT_MODE_THRESHOLD MT_("Greenscreen.Threshold")
+
+bool nvvfx_loaded = false;
+struct nv_greenscreen_data {
+	obs_source_t *context;
+	bool images_allocated;
+	bool initial_render;
+	bool processing_stop;
+	bool processed_frame;
+	bool target_valid;
+	volatile bool got_new_frame;
+
+	/* RTX SDK vars */
+	NvVFX_Handle handle;
+	CUstream stream;        // CUDA stream
+	int mode;               // 0 = quality, 1 = performance
+	NvCVImage *src_img;     // src img in obs format (RGBA ?) on GPU
+	NvCVImage *BGR_src_img; // src img in BGR on GPU
+	NvCVImage *A_dst_img;   // mask img on GPU
+	NvCVImage *dst_img;     // mask texture
+	NvCVImage *stage;       // planar stage img used for transfer to texture
+
+	/* alpha mask effect */
+	gs_effect_t *effect;
+	gs_texrender_t *render;
+	gs_texture_t *alpha_texture;
+	uint32_t width;  // width of texture
+	uint32_t height; // height of texture
+	gs_eparam_t *mask_param;
+	gs_texture_t *src_texture;
+	gs_eparam_t *src_param;
+	gs_eparam_t *threshold_param;
+	double threshold;
+};
+
+static const char *nv_greenscreen_filter_name(void *unused)
+{
+	UNUSED_PARAMETER(unused);
+	return obs_module_text("NvidiaGreenscreenFilter");
+}
+
+static void nv_greenscreen_filter_update(void *data, obs_data_t *settings)
+{
+	struct nv_greenscreen_data *filter = (struct nv_greenscreen_data *)data;
+	NvCV_Status vfxErr;
+	int mode = (int)obs_data_get_int(settings, S_MODE);
+	if (filter->mode != mode) {
+		filter->mode = mode;
+		vfxErr = NvVFX_SetU32(filter->handle, NVVFX_MODE, mode);
+		vfxErr = NvVFX_Load(filter->handle);
+		if (NVCV_SUCCESS != vfxErr)
+			error("Error loading AI Greenscreen FX %i", vfxErr);
+	}
+	filter->threshold =
+		(double)obs_data_get_double(settings, S_THRESHOLDFX);
+}
+
+static void nv_greenscreen_filter_actual_destroy(void *data)
+{
+	struct nv_greenscreen_data *filter = (struct nv_greenscreen_data *)data;
+	if (!nvvfx_loaded) {
+		bfree(filter);
+		return;
+	}
+
+	filter->processing_stop = true;
+
+	if (filter->images_allocated) {
+		obs_enter_graphics();
+		gs_texture_destroy(filter->alpha_texture);
+		gs_texrender_destroy(filter->render);
+		obs_leave_graphics();
+		NvCVImage_Destroy(filter->src_img);
+		NvCVImage_Destroy(filter->BGR_src_img);
+		NvCVImage_Destroy(filter->A_dst_img);
+		NvCVImage_Destroy(filter->dst_img);
+		NvCVImage_Destroy(filter->stage);
+	}
+	if (filter->stream) {
+		NvVFX_CudaStreamDestroy(filter->stream);
+	}
+	if (filter->handle) {
+		NvVFX_DestroyEffect(filter->handle);
+	}
+	if (filter->effect) {
+		obs_enter_graphics();
+		gs_effect_destroy(filter->effect);
+		obs_leave_graphics();
+	}
+
+	bfree(filter);
+}
+
+static void nv_greenscreen_filter_destroy(void *data)
+{
+	obs_queue_task(OBS_TASK_GRAPHICS, nv_greenscreen_filter_actual_destroy,
+		       data, false);
+}
+
+static void init_images_greenscreen(struct nv_greenscreen_data *filter)
+{
+	NvCV_Status vfxErr;
+	uint32_t width = filter->width;
+	uint32_t height = filter->height;
+
+	/* 1. create alpha texture */
+	obs_enter_graphics();
+	if (filter->alpha_texture) {
+		gs_texture_destroy(filter->alpha_texture);
+	}
+	filter->alpha_texture =
+		gs_texture_create(width, height, GS_A8, 1, NULL, 0);
+	if (filter->alpha_texture == NULL) {
+		error("Alpha texture couldn't be created");
+		goto fail;
+	}
+	struct ID3D11Texture2D *d11texture =
+		(struct ID3D11Texture2D *)gs_texture_get_obj(
+			filter->alpha_texture);
+	obs_leave_graphics();
+
+	/* 2. Create NvCVImage which will hold final alpha texture. */
+	if (NvCVImage_Create(width, height, NVCV_A, NVCV_U8, NVCV_CHUNKY,
+			     NVCV_GPU, 1, &filter->dst_img) != NVCV_SUCCESS) {
+		goto fail;
+	}
+
+	vfxErr = NvCVImage_InitFromD3D11Texture(filter->dst_img, d11texture);
+	if (vfxErr != NVCV_SUCCESS) {
+		const char *errString = NvCV_GetErrorStringFromCode(vfxErr);
+		error("Error passing dst ID3D11Texture to img; error %i: %s",
+		      vfxErr, errString);
+		goto fail;
+	}
+
+	/* 3. create texrender */
+	obs_enter_graphics();
+	if (filter->render)
+		gs_texrender_destroy(filter->render);
+	filter->render = gs_texrender_create(GS_BGRA_UNORM, GS_ZS_NONE);
+	obs_leave_graphics();
+	if (!filter->render) {
+		error("Failed to create a texture renderer", vfxErr);
+		goto fail;
+	}
+
+	/* 4. Create and allocate BGR NvCVImage (fx src). */
+	if (NvCVImage_Create(width, height, NVCV_BGR, NVCV_U8, NVCV_CHUNKY,
+			     NVCV_GPU, 1,
+			     &filter->BGR_src_img) != NVCV_SUCCESS) {
+		goto fail;
+	}
+	if (NvCVImage_Alloc(filter->BGR_src_img, width, height, NVCV_BGR,
+			    NVCV_U8, NVCV_CHUNKY, NVCV_GPU,
+			    1) != NVCV_SUCCESS) {
+		goto fail;
+	}
+
+	/* 5. Create and allocate Alpha NvCVimage (fx dst). */
+	if (NvCVImage_Create(width, height, NVCV_A, NVCV_U8, NVCV_CHUNKY,
+			     NVCV_GPU, 1, &filter->A_dst_img) != NVCV_SUCCESS) {
+		goto fail;
+	}
+	if (NvCVImage_Alloc(filter->A_dst_img, width, height, NVCV_A, NVCV_U8,
+			    NVCV_CHUNKY, NVCV_GPU, 1) != NVCV_SUCCESS) {
+		goto fail;
+	}
+
+	/* 6. Create stage NvCVImage which will be used as buffer for transfer */
+	if (NvCVImage_Create(width, height, NVCV_RGBA, NVCV_U8, NVCV_PLANAR,
+			     NVCV_GPU, 1, &filter->stage) != NVCV_SUCCESS) {
+		goto fail;
+	}
+	if (NvCVImage_Alloc(filter->stage, width, height, NVCV_RGBA, NVCV_U8,
+			    NVCV_PLANAR, NVCV_GPU, 1) != NVCV_SUCCESS) {
+		goto fail;
+	}
+
+	/* 7. Set input & output images for nv FX. */
+	if (NvVFX_SetImage(filter->handle, NVVFX_INPUT_IMAGE,
+			   filter->BGR_src_img) != NVCV_SUCCESS) {
+		goto fail;
+	}
+	if (NvVFX_SetImage(filter->handle, NVVFX_OUTPUT_IMAGE,
+			   filter->A_dst_img) != NVCV_SUCCESS) {
+		goto fail;
+	}
+
+	filter->images_allocated = true;
+	return;
+fail:
+	error("Error during allocation of images");
+	filter->processing_stop = true;
+	return;
+}
+
+static bool process_texture_greenscreen(struct nv_greenscreen_data *filter)
+{
+	gs_texrender_t *render = filter->render;
+	NvCV_Status vfxErr;
+
+	/* 1. Map src img holding texture. */
+	vfxErr = NvCVImage_MapResource(filter->src_img, filter->stream);
+	if (vfxErr != NVCV_SUCCESS) {
+		const char *errString = NvCV_GetErrorStringFromCode(vfxErr);
+		error("Error mapping resource for source texture; error %i : %s",
+		      vfxErr, errString);
+		goto fail;
+	}
+
+	/* 2. Convert to BGR. */
+	vfxErr = NvCVImage_Transfer(filter->src_img, filter->BGR_src_img, 1.0f,
+				    filter->stream, filter->stage);
+	if (vfxErr != NVCV_SUCCESS) {
+		const char *errString = NvCV_GetErrorStringFromCode(vfxErr);
+		error("Error converting src to BGR img; error %i: %s", vfxErr,
+		      errString);
+		goto fail;
+	}
+	vfxErr = NvCVImage_UnmapResource(filter->src_img, filter->stream);
+	if (vfxErr != NVCV_SUCCESS) {
+		const char *errString = NvCV_GetErrorStringFromCode(vfxErr);
+		error("Error unmapping resource for src texture; error %i: %s",
+		      vfxErr, errString);
+		goto fail;
+	}
+
+	/*  3. run RTX fx */
+	vfxErr = NvVFX_Run(filter->handle, 1);
+	if (vfxErr != NVCV_SUCCESS) {
+		const char *errString = NvCV_GetErrorStringFromCode(vfxErr);
+		error("Error running the FX; error %i: %s", vfxErr, errString);
+		goto fail;
+	}
+
+	/* 4. Map dst texture before transfer from dst img provided by FX */
+	vfxErr = NvCVImage_MapResource(filter->dst_img, filter->stream);
+	if (vfxErr != NVCV_SUCCESS) {
+		const char *errString = NvCV_GetErrorStringFromCode(vfxErr);
+		error("Error mapping resource for dst texture; error %i: %s",
+		      vfxErr, errString);
+		goto fail;
+	}
+
+	vfxErr = NvCVImage_Transfer(filter->A_dst_img, filter->dst_img, 1.0f,
+				    filter->stream, filter->stage);
+	if (vfxErr != NVCV_SUCCESS) {
+		const char *errString = NvCV_GetErrorStringFromCode(vfxErr);
+		error("Error transferring mask to alpha texture; error %i: %s ",
+		      vfxErr, errString);
+		goto fail;
+	}
+
+	vfxErr = NvCVImage_UnmapResource(filter->dst_img, filter->stream);
+	if (vfxErr != NVCV_SUCCESS) {
+		const char *errString = NvCV_GetErrorStringFromCode(vfxErr);
+		error("Error unmapping resource for dst texture; error %i: %s",
+		      vfxErr, errString);
+		goto fail;
+	}
+
+	return true;
+fail:
+	filter->processing_stop = true;
+	return false;
+}
+
+static void *nv_greenscreen_filter_create(obs_data_t *settings,
+					  obs_source_t *context)
+{
+	struct nv_greenscreen_data *filter =
+		(struct nv_greenscreen_data *)bzalloc(sizeof(*filter));
+	if (!nvvfx_loaded) {
+		nv_greenscreen_filter_destroy(filter);
+		return NULL;
+	}
+
+	NvCV_Status vfxErr;
+	filter->context = context;
+	filter->mode = -1; // should be 0 or 1; -1 triggers an update
+	filter->images_allocated = false;
+	filter->processed_frame = true; // start processing when false
+	filter->width = 0;
+	filter->height = 0;
+	filter->initial_render = false;
+	filter->processing_stop = false;
+
+	/* 1. Create FX */
+	vfxErr = NvVFX_CreateEffect(NVVFX_FX_GREEN_SCREEN, &filter->handle);
+	if (NVCV_SUCCESS != vfxErr) {
+		const char *errString = NvCV_GetErrorStringFromCode(vfxErr);
+		error("Error creating AI Greenscreen FX; error %i: %s", vfxErr,
+		      errString);
+		nv_greenscreen_filter_destroy(filter);
+		return NULL;
+	}
+
+	/* 2. Set models path & initialize CudaStream */
+	char buffer[MAX_PATH];
+	char modelDir[MAX_PATH];
+	nvvfx_get_sdk_path(buffer, MAX_PATH);
+	size_t max_len = sizeof(buffer) / sizeof(char);
+	snprintf(modelDir, max_len, "%s\\models", buffer);
+	vfxErr = NvVFX_SetString(filter->handle, NVVFX_MODEL_DIRECTORY,
+				 modelDir);
+	vfxErr = NvVFX_CudaStreamCreate(&filter->stream);
+	if (NVCV_SUCCESS != vfxErr) {
+		const char *errString = NvCV_GetErrorStringFromCode(vfxErr);
+		error("Error creating CUDA Stream; error %i: %s", vfxErr,
+		      errString);
+		nv_greenscreen_filter_destroy(filter);
+		return NULL;
+	}
+	vfxErr = NvVFX_SetCudaStream(filter->handle, NVVFX_CUDA_STREAM,
+				     filter->stream);
+	if (NVCV_SUCCESS != vfxErr) {
+		const char *errString = NvCV_GetErrorStringFromCode(vfxErr);
+		error("Error setting CUDA Stream %i", vfxErr);
+		nv_greenscreen_filter_destroy(filter);
+		return NULL;
+	}
+	/* log sdk version */
+	unsigned int version;
+	if (NvVFX_GetVersion(&version) == NVCV_SUCCESS) {
+		uint8_t major = (version >> 24) & 0xff;
+		uint8_t minor = (version >> 16) & 0x00ff;
+		uint8_t build = (version >> 8) & 0x0000ff;
+		info("RTX VIDEO FX version: %i.%i.%i", major, minor, build);
+	}
+
+	/* 3. Load alpha mask effect. */
+	char *effect_path = obs_module_file("rtx_greenscreen.effect");
+
+	obs_enter_graphics();
+	filter->effect = gs_effect_create_from_file(effect_path, NULL);
+	bfree(effect_path);
+	if (filter->effect) {
+		filter->mask_param =
+			gs_effect_get_param_by_name(filter->effect, "mask");
+		filter->src_param =
+			gs_effect_get_param_by_name(filter->effect, "image");
+		filter->threshold_param = gs_effect_get_param_by_name(
+			filter->effect, "threshold");
+	}
+	obs_leave_graphics();
+
+	if (!filter->effect) {
+		nv_greenscreen_filter_destroy(filter);
+		return NULL;
+	}
+
+	/*---------------------------------------- */
+
+	nv_greenscreen_filter_update(filter, settings);
+
+	return filter;
+}
+
+static obs_properties_t *nv_greenscreen_filter_properties(void *data)
+{
+	obs_properties_t *props = obs_properties_create();
+	obs_property_t *mode = obs_properties_add_list(props, S_MODE, TEXT_MODE,
+						       OBS_COMBO_TYPE_LIST,
+						       OBS_COMBO_FORMAT_INT);
+	obs_property_list_add_int(mode, TEXT_MODE_QUALITY, S_MODE_QUALITY);
+	obs_property_list_add_int(mode, TEXT_MODE_PERF, S_MODE_PERF);
+	obs_property_t *threshold = obs_properties_add_float_slider(
+		props, S_THRESHOLDFX, TEXT_MODE_THRESHOLD, 0, 1, 0.05);
+
+	UNUSED_PARAMETER(data);
+	return props;
+}
+
+static void nv_greenscreen_filter_defaults(obs_data_t *settings)
+{
+	obs_data_set_default_int(settings, S_MODE, S_MODE_QUALITY);
+	obs_data_set_default_double(settings, S_THRESHOLDFX,
+				    S_THRESHOLDFX_DEFAULT);
+}
+static struct obs_source_frame *
+nv_greenscreen_filter_video(void *data, struct obs_source_frame *frame)
+{
+	struct nv_greenscreen_data *filter = (struct nv_greenscreen_data *)data;
+	os_atomic_set_bool(&filter->got_new_frame, true);
+	return frame;
+}
+
+static void nv_greenscreen_filter_tick(void *data, float t)
+{
+	UNUSED_PARAMETER(t);
+
+	struct nv_greenscreen_data *filter = (struct nv_greenscreen_data *)data;
+
+	if (filter->processing_stop) {
+		return;
+	}
+	if (!obs_filter_get_target(filter->context)) {
+		return;
+	}
+	obs_source_t *target = obs_filter_get_target(filter->context);
+	uint32_t cx;
+	uint32_t cy;
+
+	filter->target_valid = true;
+
+	cx = obs_source_get_base_width(target);
+	cy = obs_source_get_base_height(target);
+
+	// initially the sizes are 0
+	if (!cx && !cy) {
+		filter->target_valid = false;
+		return;
+	}
+
+	/* minimum size supported by SDK is (512,288) */
+	filter->target_valid = cx >= 512 && cy >= 288;
+	if (!filter->target_valid) {
+		error("Size must be larger than (512,288)");
+		return;
+	}
+	if (cx != filter->width && cy != filter->height) {
+		filter->images_allocated = false;
+		filter->width = cx;
+		filter->height = cy;
+	}
+	if (!filter->images_allocated) {
+		init_images_greenscreen(filter);
+		filter->initial_render = 0;
+	}
+
+	filter->processed_frame = false;
+}
+
+static void draw_greenscreen(struct nv_greenscreen_data *filter)
+{
+	/* Render alpha mask */
+	if (!obs_source_process_filter_begin(filter->context, GS_BGRA_UNORM,
+					     OBS_ALLOW_DIRECT_RENDERING)) {
+		return;
+	}
+	gs_effect_set_texture(filter->mask_param, filter->alpha_texture);
+	gs_effect_set_texture(filter->src_param, filter->src_texture);
+	gs_effect_set_float(filter->threshold_param, (float)filter->threshold);
+	while (gs_effect_loop(filter->effect, "Draw")) {
+		gs_draw_sprite(NULL, 0, filter->width, filter->height);
+	}
+	obs_source_process_filter_end(filter->context, filter->effect, 0, 0);
+}
+
+static void draw_greenscreen_srgb(struct nv_greenscreen_data *filter)
+{
+	/* Render alpha mask */
+	if (!obs_source_process_filter_begin(filter->context, GS_BGRA_UNORM,
+					     OBS_ALLOW_DIRECT_RENDERING)) {
+		return;
+	}
+	const bool previous = gs_framebuffer_srgb_enabled();
+	gs_enable_framebuffer_srgb(true);
+	gs_effect_set_texture_srgb(filter->mask_param, filter->alpha_texture);
+	gs_effect_set_texture_srgb(filter->src_param, filter->src_texture);
+	gs_effect_set_float(filter->threshold_param, (float)filter->threshold);
+	while (gs_effect_loop(filter->effect, "Draw")) {
+		gs_draw_sprite(NULL, 0, filter->width, filter->height);
+	}
+	gs_enable_framebuffer_srgb(previous);
+	obs_source_process_filter_end(filter->context, filter->effect, 0, 0);
+}
+
+static void nv_greenscreen_filter_render(void *data, gs_effect_t *effect)
+{
+	NvCV_Status vfxErr;
+	bool ret;
+	struct nv_greenscreen_data *filter = (struct nv_greenscreen_data *)data;
+
+	if (filter->processing_stop)
+		return;
+	obs_source_t *target = obs_filter_get_target(filter->context);
+	obs_source_t *parent = obs_filter_get_parent(filter->context);
+	gs_texrender_t *render;
+
+	/* Skip if processing of a frame hasn't yet started */
+	if (!filter->target_valid || !target || !parent ||
+	    filter->processed_frame) {
+		obs_source_skip_video_filter(filter->context);
+		return;
+	}
+
+	/* 1. Render to retrieve texture. */
+	render = filter->render;
+	if (!render) {
+		obs_source_skip_video_filter(filter->context);
+		return;
+	}
+	uint32_t target_flags = obs_source_get_output_flags(target);
+	uint32_t parent_flags = obs_source_get_output_flags(parent);
+
+	bool custom_draw = (target_flags & OBS_SOURCE_CUSTOM_DRAW) != 0;
+	bool async = (target_flags & OBS_SOURCE_ASYNC) != 0;
+	bool srgb_draw = (parent_flags & OBS_SOURCE_SRGB) != 0;
+
+	gs_texrender_reset(render);
+	gs_blend_state_push();
+	gs_blend_function(GS_BLEND_ONE, GS_BLEND_ZERO);
+	if (gs_texrender_begin(render, filter->width, filter->height)) {
+		struct vec4 clear_color;
+
+		vec4_zero(&clear_color);
+		gs_clear(GS_CLEAR_COLOR, &clear_color, 0.0f, 0);
+		gs_ortho(0.0f, (float)filter->width, 0.0f,
+			 (float)filter->height, -100.0f, 100.0f);
+
+		if (target == parent && !custom_draw && !async)
+			obs_source_default_render(target);
+		else
+			obs_source_video_render(target);
+
+		gs_texrender_end(render);
+	}
+	gs_blend_state_pop();
+
+	/* 2. Initialize src_texture (only at startup or reset) */
+	if (!filter->initial_render) {
+		obs_enter_graphics();
+		filter->src_texture = gs_texrender_get_texture(filter->render);
+		struct ID3D11Texture2D *d11texture2 =
+			(struct ID3D11Texture2D *)gs_texture_get_obj(
+				filter->src_texture);
+		obs_leave_graphics();
+
+		if (!d11texture2) {
+			error("Couldn't retrieve d3d11texture2d.");
+			return;
+		}
+		vfxErr = NvCVImage_Create(filter->width, filter->height,
+					  NVCV_BGRA, NVCV_U8, NVCV_CHUNKY,
+					  NVCV_GPU, 1, &filter->src_img);
+		if (vfxErr != NVCV_SUCCESS) {
+			const char *errString =
+				NvCV_GetErrorStringFromCode(vfxErr);
+			error("Error creating src img; error %i: %s", vfxErr,
+			      errString);
+			filter->processing_stop = true;
+			return;
+		}
+		vfxErr = NvCVImage_InitFromD3D11Texture(filter->src_img,
+							d11texture2);
+		if (vfxErr != NVCV_SUCCESS) {
+			const char *errString =
+				NvCV_GetErrorStringFromCode(vfxErr);
+			error("Error passing src ID3D11Texture to img; error %i: %s",
+			      vfxErr, errString);
+			filter->processing_stop = true;
+			return;
+		}
+		filter->initial_render = true;
+	}
+
+	/* 3. Process FX (outputs a mask) & draw. */
+	if (filter->initial_render && filter->images_allocated) {
+		ret = true;
+		if (filter->got_new_frame) {
+			ret = process_texture_greenscreen(filter);
+			os_atomic_set_bool(&filter->got_new_frame, false);
+		}
+
+		if (ret) {
+			if (!srgb_draw)
+				draw_greenscreen(filter);
+			else
+				draw_greenscreen_srgb(filter);
+			filter->processed_frame = true;
+		}
+	} else {
+		obs_source_skip_video_filter(filter->context);
+		return;
+	}
+	UNUSED_PARAMETER(effect);
+}
+
+bool load_nvvfx(void)
+{
+	if (!load_nv_vfx_libs()) {
+		blog(LOG_INFO,
+		     "[NVIDIA RTX VIDEO FX]: FX disabled, redistributable not found.");
+		return false;
+	}
+
+#define LOAD_SYM_FROM_LIB(sym, lib, dll)                            \
+	if (!(sym = (sym##_t)GetProcAddress(lib, #sym))) {          \
+		DWORD err = GetLastError();                         \
+		printf("[NVIDIA RTX VIDEO FX]: Couldn't load " #sym \
+		       " from " dll ": %lu (0x%lx)",                \
+		       err, err);                                   \
+		release_nv_vfx();                                   \
+		goto unload_everything;                             \
+	}
+
+#define LOAD_SYM(sym) LOAD_SYM_FROM_LIB(sym, nv_videofx, "NVVideoEffects.dll")
+	LOAD_SYM(NvVFX_GetVersion);
+	LOAD_SYM(NvVFX_CreateEffect);
+	LOAD_SYM(NvVFX_DestroyEffect);
+	LOAD_SYM(NvVFX_SetU32);
+	LOAD_SYM(NvVFX_SetS32);
+	LOAD_SYM(NvVFX_SetF32);
+	LOAD_SYM(NvVFX_SetF64);
+	LOAD_SYM(NvVFX_SetU64);
+	LOAD_SYM(NvVFX_SetObject);
+	LOAD_SYM(NvVFX_SetCudaStream);
+	LOAD_SYM(NvVFX_SetImage);
+	LOAD_SYM(NvVFX_SetString);
+	LOAD_SYM(NvVFX_GetU32);
+	LOAD_SYM(NvVFX_GetS32);
+	LOAD_SYM(NvVFX_GetF32);
+	LOAD_SYM(NvVFX_GetF64);
+	LOAD_SYM(NvVFX_GetU64);
+	LOAD_SYM(NvVFX_GetObject);
+	LOAD_SYM(NvVFX_GetCudaStream);
+	LOAD_SYM(NvVFX_GetImage);
+	LOAD_SYM(NvVFX_GetString);
+	LOAD_SYM(NvVFX_Run);
+	LOAD_SYM(NvVFX_Load);
+	LOAD_SYM(NvVFX_CudaStreamCreate);
+	LOAD_SYM(NvVFX_CudaStreamDestroy);
+#undef LOAD_SYM
+#define LOAD_SYM(sym) LOAD_SYM_FROM_LIB(sym, nv_cvimage, "NVCVImage.dll")
+	LOAD_SYM(NvCV_GetErrorStringFromCode);
+	LOAD_SYM(NvCVImage_Init);
+	LOAD_SYM(NvCVImage_InitView);
+	LOAD_SYM(NvCVImage_Alloc);
+	LOAD_SYM(NvCVImage_Realloc);
+	LOAD_SYM(NvCVImage_Dealloc);
+	LOAD_SYM(NvCVImage_Create);
+	LOAD_SYM(NvCVImage_Destroy);
+	LOAD_SYM(NvCVImage_ComponentOffsets);
+	LOAD_SYM(NvCVImage_Transfer);
+	LOAD_SYM(NvCVImage_TransferRect);
+	LOAD_SYM(NvCVImage_TransferFromYUV);
+	LOAD_SYM(NvCVImage_TransferToYUV);
+	LOAD_SYM(NvCVImage_MapResource);
+	LOAD_SYM(NvCVImage_UnmapResource);
+	LOAD_SYM(NvCVImage_Composite);
+	LOAD_SYM(NvCVImage_CompositeRect);
+	LOAD_SYM(NvCVImage_CompositeOverConstant);
+	LOAD_SYM(NvCVImage_FlipY);
+	LOAD_SYM(NvCVImage_GetYUVPointers);
+	LOAD_SYM(NvCVImage_InitFromD3D11Texture);
+	LOAD_SYM(NvCVImage_ToD3DFormat);
+	LOAD_SYM(NvCVImage_FromD3DFormat);
+	LOAD_SYM(NvCVImage_ToD3DColorSpace);
+	LOAD_SYM(NvCVImage_FromD3DColorSpace);
+#undef LOAD_SYM
+#define LOAD_SYM(sym) LOAD_SYM_FROM_LIB(sym, nv_cudart, "cudart64_110.dll")
+	LOAD_SYM(cudaMalloc);
+	LOAD_SYM(cudaStreamSynchronize);
+	LOAD_SYM(cudaFree);
+	LOAD_SYM(cudaMemcpy);
+	LOAD_SYM(cudaMemsetAsync);
+#undef LOAD_SYM
+	int err;
+	NvVFX_Handle h = NULL;
+
+	/* load the effect to check if the GPU is supported */
+	err = NvVFX_CreateEffect(NVVFX_FX_GREEN_SCREEN, &h);
+	if (err != NVCV_SUCCESS) {
+		if (err == NVCV_ERR_UNSUPPORTEDGPU) {
+			blog(LOG_INFO,
+			     "[NVIDIA RTX VIDEO FX]: disabled, unsupported GPU");
+		} else {
+			blog(LOG_ERROR,
+			     "[NVIDIA RTX VIDEO FX]: disabled, error %i", err);
+		}
+		goto unload_everything;
+	}
+	NvVFX_DestroyEffect(h);
+	nvvfx_loaded = true;
+	blog(LOG_INFO, "[NVIDIA RTX VIDEO FX]: enabled, redistributable found");
+	return true;
+
+unload_everything:
+	nvvfx_loaded = false;
+	release_nv_vfx();
+	return false;
+}
+
+#ifdef LIBNVVFX_ENABLED
+void unload_nvvfx(void)
+{
+	release_nv_vfx();
+}
+#endif
+
+struct obs_source_info nvidia_greenscreen_filter_info = {
+	.id = "nv_greenscreen_filter",
+	.type = OBS_SOURCE_TYPE_FILTER,
+	.output_flags = OBS_SOURCE_VIDEO,
+	.get_name = nv_greenscreen_filter_name,
+	.create = nv_greenscreen_filter_create,
+	.destroy = nv_greenscreen_filter_destroy,
+	.get_defaults = nv_greenscreen_filter_defaults,
+	.get_properties = nv_greenscreen_filter_properties,
+	.update = nv_greenscreen_filter_update,
+	.filter_video = nv_greenscreen_filter_video,
+	.video_render = nv_greenscreen_filter_render,
+	.video_tick = nv_greenscreen_filter_tick,
+};

+ 785 - 0
plugins/obs-filters/nvvfx-load.h

@@ -0,0 +1,785 @@
+#pragma once
+#include <Windows.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <util/platform.h>
+#include <dxgitype.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif // ___cplusplus
+
+#ifndef NvVFX_API
+#ifdef _WIN32
+#ifdef NVVFX_API_EXPORT
+#define NvVFX_API __declspec(dllexport) __cdecl
+#else
+#define NvVFX_API
+#endif
+#else             //if linux
+#define NvVFX_API // TODO: Linux code goes here
+#endif            // _WIN32 or linux
+#endif            //NvVFX_API
+
+#ifndef NvCV_API
+#ifdef _WIN32
+#ifdef NVCV_API_EXPORT
+#define NvCV_API __declspec(dllexport) __cdecl
+#else
+#define NvCV_API
+#endif
+#else            //if linux
+#define NvCV_API // TODO: Linux code goes here
+#endif           // _WIN32 or linux
+#endif           //NvCV_API
+
+#define CUDARTAPI
+
+#ifdef LIBNVVFX_ENABLED
+// allows for future loading of a second fx
+static HMODULE nv_videofx = NULL;
+static HMODULE nv_cvimage = NULL;
+static HMODULE nv_cudart = NULL;
+static HMODULE nv_cuda = NULL;
+
+//! Status codes returned from APIs.
+typedef enum NvCV_Status {
+	NVCV_SUCCESS = 0,      //!< The procedure returned successfully.
+	NVCV_ERR_GENERAL = -1, //!< An otherwise unspecified error has occurred.
+	NVCV_ERR_UNIMPLEMENTED =
+		-2, //!< The requested feature is not yet implemented.
+	NVCV_ERR_MEMORY =
+		-3, //!< There is not enough memory for the requested operation.
+	NVCV_ERR_EFFECT = -4, //!< An invalid effect handle has been supplied.
+	NVCV_ERR_SELECTOR =
+		-5, //!< The given parameter selector is not valid in this effect filter.
+	NVCV_ERR_BUFFER = -6, //!< An image buffer has not been specified.
+	NVCV_ERR_PARAMETER =
+		-7, //!< An invalid parameter value has been supplied for this effect+selector.
+	NVCV_ERR_MISMATCH =
+		-8, //!< Some parameters are not appropriately matched.
+	NVCV_ERR_PIXELFORMAT =
+		-9, //!< The specified pixel format is not accommodated.
+	NVCV_ERR_MODEL = -10,   //!< Error while loading the TRT model.
+	NVCV_ERR_LIBRARY = -11, //!< Error loading the dynamic library.
+	NVCV_ERR_INITIALIZATION =
+		-12,         //!< The effect has not been properly initialized.
+	NVCV_ERR_FILE = -13, //!< The file could not be found.
+	NVCV_ERR_FEATURENOTFOUND = -14, //!< The requested feature was not found
+	NVCV_ERR_MISSINGINPUT = -15,    //!< A required parameter was not set
+	NVCV_ERR_RESOLUTION =
+		-16, //!< The specified image resolution is not supported.
+	NVCV_ERR_UNSUPPORTEDGPU = -17, //!< The GPU is not supported
+	NVCV_ERR_WRONGGPU = -18, //!< The current GPU is not the one selected.
+	NVCV_ERR_UNSUPPORTEDDRIVER =
+		-19, //!< The currently installed graphics driver is not supported
+	NVCV_ERR_MODELDEPENDENCIES =
+		-20, //!< There is no model with dependencies that match this system
+	NVCV_ERR_PARSE =
+		-21, //!< There has been a parsing or syntax error while reading a file
+	NVCV_ERR_MODELSUBSTITUTION =
+		-22, //!< The specified model does not exist and has been substituted.
+	NVCV_ERR_READ = -23,  //!< An error occurred while reading a file.
+	NVCV_ERR_WRITE = -24, //!< An error occurred while writing a file.
+	NVCV_ERR_PARAMREADONLY = -25, //!< The selected parameter is read-only.
+	NVCV_ERR_TRT_ENQUEUE = -26,   //!< TensorRT enqueue failed.
+	NVCV_ERR_TRT_BINDINGS = -27,  //!< Unexpected TensorRT bindings.
+	NVCV_ERR_TRT_CONTEXT =
+		-28, //!< An error occurred while creating a TensorRT context.
+	NVCV_ERR_TRT_INFER =
+		-29, ///< The was a problem creating the inference engine.
+	NVCV_ERR_TRT_ENGINE =
+		-30, ///< There was a problem deserializing the inference runtime engine.
+	NVCV_ERR_NPP = -31, //!< An error has occurred in the NPP library.
+	NVCV_ERR_CONFIG =
+		-32, //!< No suitable model exists for the specified parameter configuration.
+
+	NVCV_ERR_DIRECT3D = -99, //!< A Direct3D error has occurred.
+
+	NVCV_ERR_CUDA_BASE = -100, //!< CUDA errors are offset from this value.
+	NVCV_ERR_CUDA_VALUE =
+		-101, //!< A CUDA parameter is not within the acceptable range.
+	NVCV_ERR_CUDA_MEMORY =
+		-102, //!< There is not enough CUDA memory for the requested operation.
+	NVCV_ERR_CUDA_PITCH =
+		-112, //!< A CUDA pitch is not within the acceptable range.
+	NVCV_ERR_CUDA_INIT =
+		-127, //!< The CUDA driver and runtime could not be initialized.
+	NVCV_ERR_CUDA_LAUNCH = -819, //!< The CUDA kernel launch has failed.
+	NVCV_ERR_CUDA_KERNEL =
+		-309, //!< No suitable kernel image is available for the device.
+	NVCV_ERR_CUDA_DRIVER =
+		-135, //!< The installed NVIDIA CUDA driver is older than the CUDA runtime library.
+	NVCV_ERR_CUDA_UNSUPPORTED =
+		-901, //!< The CUDA operation is not supported on the current system or device.
+	NVCV_ERR_CUDA_ILLEGAL_ADDRESS =
+		-800, //!< CUDA tried to load or store on an invalid memory address.
+	NVCV_ERR_CUDA =
+		-1099, //!< An otherwise unspecified CUDA error has been reported.
+} NvCV_Status;
+
+/**  Filter selectors */
+#define NVVFX_FX_TRANSFER "Transfer"
+#define NVVFX_FX_GREEN_SCREEN "GreenScreen"             // Green Screen
+#define NVVFX_FX_BGBLUR "BackgroundBlur"                // Background blur
+#define NVVFX_FX_ARTIFACT_REDUCTION "ArtifactReduction" // Artifact Reduction
+#define NVVFX_FX_SUPER_RES "SuperRes"                   // Super Res
+#define NVVFX_FX_SR_UPSCALE "Upscale"                   // Super Res Upscale
+#define NVVFX_FX_DENOISING "Denoising"                  // Denoising
+
+/** Parameter selectors */
+#define NVVFX_INPUT_IMAGE_0 "SrcImage0"
+#define NVVFX_INPUT_IMAGE NVVFX_INPUT_IMAGE_0
+#define NVVFX_INPUT_IMAGE_1 "SrcImage1"
+#define NVVFX_OUTPUT_IMAGE_0 "DstImage0"
+#define NVVFX_OUTPUT_IMAGE NVVFX_OUTPUT_IMAGE_0
+#define NVVFX_MODEL_DIRECTORY "ModelDir"
+#define NVVFX_CUDA_STREAM "CudaStream" //!< The CUDA stream to use
+#define NVVFX_CUDA_GRAPH "CudaGraph"   //!< Enable CUDA graph to use
+#define NVVFX_INFO "Info"              //!< Get info about the effects
+#define NVVFX_SCALE "Scale"            //!< Scale factor
+#define NVVFX_STRENGTH "Strength"      //!< Strength for different filters
+#define NVVFX_STRENGTH_LEVELS "StrengthLevels" //!< Number of strength levels
+#define NVVFX_MODE "Mode"                      //!< Mode for different filters
+#define NVVFX_TEMPORAL "Temporal"    //!< Temporal mode: 0=image, 1=video
+#define NVVFX_GPU "GPU"              //!< Preferred GPU (optional)
+#define NVVFX_BATCH_SIZE "BatchSize" //!< Batch Size (default 1)
+#define NVVFX_MODEL_BATCH "ModelBatch"
+#define NVVFX_STATE "State"          //!< State variable
+#define NVVFX_STATE_SIZE "StateSize" //!< Number of bytes needed to store state
+
+//! The format of pixels in an image.
+typedef enum NvCVImage_PixelFormat {
+	NVCV_FORMAT_UNKNOWN = 0, //!< Unknown pixel format.
+	NVCV_Y = 1,              //!< Luminance (gray).
+	NVCV_A = 2,              //!< Alpha (opacity)
+	NVCV_YA = 3,             //!< { Luminance, Alpha }
+	NVCV_RGB = 4,            //!< { Red, Green, Blue }
+	NVCV_BGR = 5,            //!< { Red, Green, Blue }
+	NVCV_RGBA = 6,           //!< { Red, Green, Blue, Alpha }
+	NVCV_BGRA = 7,           //!< { Red, Green, Blue, Alpha }
+	NVCV_ARGB = 8,           //!< { Red, Green, Blue, Alpha }
+	NVCV_ABGR = 9,           //!< { Red, Green, Blue, Alpha }
+	NVCV_YUV420 =
+		10, //!< Luminance and subsampled Chrominance { Y, Cb, Cr }
+	NVCV_YUV422 =
+		11, //!< Luminance and subsampled Chrominance { Y, Cb, Cr }
+	NVCV_YUV444 =
+		12, //!< Luminance and full bandwidth Chrominance { Y, Cb, Cr }
+} NvCVImage_PixelFormat;
+
+//! The data type used to represent each component of an image.
+typedef enum NvCVImage_ComponentType {
+	NVCV_TYPE_UNKNOWN = 0, //!< Unknown type of component.
+	NVCV_U8 = 1,           //!< Unsigned 8-bit integer.
+	NVCV_U16 = 2,          //!< Unsigned 16-bit integer.
+	NVCV_S16 = 3,          //!< Signed 16-bit integer.
+	NVCV_F16 = 4,          //!< 16-bit floating-point.
+	NVCV_U32 = 5,          //!< Unsigned 32-bit integer.
+	NVCV_S32 = 6,          //!< Signed 32-bit integer.
+	NVCV_F32 = 7,          //!< 32-bit floating-point (float).
+	NVCV_U64 = 8,          //!< Unsigned 64-bit integer.
+	NVCV_S64 = 9,          //!< Signed 64-bit integer.
+	NVCV_F64 = 10,         //!< 64-bit floating-point (double).
+} NvCVImage_ComponentType;
+
+//! Value for the planar field or layout argument. Two values are currently accommodated for RGB:
+//! Interleaved or chunky storage locates all components of a pixel adjacent in memory,
+//! e.g. RGBRGBRGB... (denoted [RGB]).
+//! Planar storage locates the same component of all pixels adjacent in memory,
+//! e.g. RRRRR...GGGGG...BBBBB... (denoted [R][G][B])
+//! YUV has many more variants.
+//! 4:2:2 can be chunky, planar or semi-planar, with different orderings.
+//! 4:2:0 can be planar or semi-planar, with different orderings.
+//! Aliases are provided for FOURCCs defined at fourcc.org.
+//! Note: the LSB can be used to distinguish between chunky and planar formats.
+#define NVCV_INTERLEAVED \
+	0 //!< All components of pixel(x,y) are adjacent (same as chunky) (default for non-YUV).
+#define NVCV_CHUNKY \
+	0 //!< All components of pixel(x,y) are adjacent (same as interleaved).
+#define NVCV_PLANAR 1 //!< The same component of all pixels are adjacent.
+#define NVCV_UYVY 2   //!< [UYVY]    Chunky 4:2:2 (default for 4:2:2)
+#define NVCV_VYUY 4   //!< [VYUY]    Chunky 4:2:2
+#define NVCV_YUYV 6   //!< [YUYV]    Chunky 4:2:2
+#define NVCV_YVYU 8   //!< [YVYU]    Chunky 4:2:2
+#define NVCV_CYUV 10  //!< [YUV]     Chunky 4:4:4
+#define NVCV_CYVU 12  //!< [YVU]     Chunky 4:4:4
+#define NVCV_YUV 3    //!< [Y][U][V] Planar 4:2:2 or 4:2:0 or 4:4:4
+#define NVCV_YVU 5    //!< [Y][V][U] Planar 4:2:2 or 4:2:0 or 4:4:4
+#define NVCV_YCUV \
+	7 //!< [Y][UV]   Semi-planar 4:2:2 or 4:2:0 (default for 4:2:0)
+#define NVCV_YCVU 9 //!< [Y][VU]   Semi-planar 4:2:2 or 4:2:0
+
+//! The following are FOURCC aliases for specific layouts. Note that it is still required to specify the format as well
+//! as the layout, e.g. NVCV_YUV420 and NVCV_NV12, even though the NV12 layout is only associated with YUV420 sampling.
+#define NVCV_I420 NVCV_YUV  //!< [Y][U][V] Planar 4:2:0
+#define NVCV_IYUV NVCV_YUV  //!< [Y][U][V] Planar 4:2:0
+#define NVCV_YV12 NVCV_YVU  //!< [Y][V][U] Planar 4:2:0
+#define NVCV_NV12 NVCV_YCUV //!< [Y][UV]   Semi-planar 4:2:0 (default for 4:2:0)
+#define NVCV_NV21 NVCV_YCVU //!< [Y][VU]   Semi-planar 4:2:0
+#define NVCV_YUY2 NVCV_YUYV //!< [YUYV]    Chunky 4:2:2
+#define NVCV_I444 NVCV_YUV  //!< [Y][U][V] Planar 4:4:4
+#define NVCV_YM24 NVCV_YUV  //!< [Y][U][V] Planar 4:4:4
+#define NVCV_YM42 NVCV_YVU  //!< [Y][V][U] Planar 4:4:4
+#define NVCV_NV24 NVCV_YCUV //!< [Y][UV]   Semi-planar 4:4:4
+#define NVCV_NV42 NVCV_YCVU //!< [Y][VU]   Semi-planar 4:4:4
+
+//! The following are ORed together for the colorspace field for YUV.
+//! NVCV_601 and NVCV_709 describe the color axes of YUV.
+//! NVCV_VIDEO_RANGE and NVCV_VIDEO_RANGE describe the range, [16, 235] or [0, 255], respectively.
+//! NVCV_CHROMA_COSITED and NVCV_CHROMA_INTSTITIAL describe the location of the chroma samples.
+#define NVCV_601 0x00  //!< The Rec.601  YUV colorspace, typically used for SD.
+#define NVCV_709 0x01  //!< The Rec.709  YUV colorspace, typically used for HD.
+#define NVCV_2020 0x02 //!< The Rec.2020 YUV colorspace.
+#define NVCV_VIDEO_RANGE 0x00 //!< The video range is [16, 235].
+#define NVCV_FULL_RANGE 0x04  //!< The video range is [ 0, 255].
+#define NVCV_CHROMA_COSITED \
+	0x00 //!< The chroma is sampled at the same location as the luma samples horizontally.
+#define NVCV_CHROMA_INTSTITIAL \
+	0x08 //!< The chroma is sampled between luma samples horizontally.
+#define NVCV_CHROMA_TOPLEFT \
+	0x10 //!< The chroma is sampled at the same location as the luma samples horizontally and vertically.
+#define NVCV_CHROMA_MPEG2 NVCV_CHROMA_COSITED //!< As is most video.
+#define NVCV_CHROMA_MPEG1 NVCV_CHROMA_INTSTITIAL
+#define NVCV_CHROMA_JPEG NVCV_CHROMA_INTSTITIAL
+#define NVCV_CHROMA_H261 NVCV_CHROMA_INTSTITIAL
+#define NVCV_CHROMA_INTERSTITIAL NVCV_CHROMA_INTSTITIAL //!< Correct spelling
+
+//! This is the value for the gpuMem field or the memSpace argument.
+#define NVCV_CPU 0        //!< The buffer is stored in CPU memory.
+#define NVCV_GPU 1        //!< The buffer is stored in CUDA memory.
+#define NVCV_CUDA 1       //!< The buffer is stored in CUDA memory.
+#define NVCV_CPU_PINNED 2 //!< The buffer is stored in pinned CPU memory.
+#define NVCV_CUDA_ARRAY 3 //!< A CUDA array is used for storage.
+
+/** Parameter selectors */
+#define NVVFX_INPUT_IMAGE_0 "SrcImage0"
+#define NVVFX_INPUT_IMAGE NVVFX_INPUT_IMAGE_0
+#define NVVFX_INPUT_IMAGE_1 "SrcImage1"
+#define NVVFX_OUTPUT_IMAGE_0 "DstImage0"
+#define NVVFX_OUTPUT_IMAGE NVVFX_OUTPUT_IMAGE_0
+#define NVVFX_MODEL_DIRECTORY "ModelDir"
+#define NVVFX_CUDA_STREAM "CudaStream" //!< The CUDA stream to use
+#define NVVFX_CUDA_GRAPH "CudaGraph"   //!< Enable CUDA graph to use
+#define NVVFX_INFO "Info"              //!< Get info about the effects
+#define NVVFX_SCALE "Scale"            //!< Scale factor
+#define NVVFX_STRENGTH "Strength"      //!< Strength for different filters
+#define NVVFX_STRENGTH_LEVELS "StrengthLevels" //!< Number of strength levels
+#define NVVFX_MODE "Mode"                      //!< Mode for different filters
+#define NVVFX_TEMPORAL "Temporal"    //!< Temporal mode: 0=image, 1=video
+#define NVVFX_GPU "GPU"              //!< Preferred GPU (optional)
+#define NVVFX_BATCH_SIZE "BatchSize" //!< Batch Size (default 1)
+#define NVVFX_MODEL_BATCH "ModelBatch"
+#define NVVFX_STATE "State"          //!< State variable
+#define NVVFX_STATE_SIZE "StateSize" //!< Number of bytes needed to store state
+
+//! Image descriptor.
+typedef struct
+#ifdef _MSC_VER
+	__declspec(dllexport)
+#endif // _MSC_VER
+		NvCVImage {
+	unsigned int width; //!< The number of pixels horizontally in the image.
+	unsigned int height; //!< The number of pixels  vertically  in the image.
+	signed int pitch;    //!< The byte stride between pixels vertically.
+	NvCVImage_PixelFormat
+		pixelFormat; //!< The format of the pixels in the image.
+	NvCVImage_ComponentType
+		componentType; //!< The data type used to represent each component of the image.
+	unsigned char pixelBytes; //!< The number of bytes in a chunky pixel.
+	unsigned char
+		componentBytes; //!< The number of bytes in each pixel component.
+	unsigned char numComponents; //!< The number of components in each pixel.
+	unsigned char planar; //!< NVCV_CHUNKY, NVCV_PLANAR, NVCV_UYVY, ....
+	unsigned char gpuMem; //!< NVCV_CPU, NVCV_CPU_PINNED, NVCV_CUDA, NVCV_GPU
+	unsigned char
+		colorspace; //!< An OR of colorspace, range and chroma phase.
+	unsigned char reserved
+		[2];  //!< For structure padding and future expansion. Set to 0.
+	void *pixels; //!< Pointer to pixel(0,0) in the image.
+	void *deletePtr; //!< Buffer memory to be deleted (can be NULL).
+	void (*deleteProc)(
+		void *p); //!< Delete procedure to call rather than free().
+	unsigned long long
+		bufferBytes; //!< The maximum amount of memory available through pixels.
+} NvCVImage;
+
+//! Integer rectangle.
+typedef struct NvCVRect2i {
+	int x;      //!< The left edge of the rectangle.
+	int y;      //!< The top  edge of the rectangle.
+	int width;  //!< The width  of the rectangle.
+	int height; //!< The height of the rectangle.
+} NvCVRect2i;
+
+//! Integer point.
+typedef struct NvCVPoint2i {
+	int x; //!< The horizontal coordinate.
+	int y; //!< The vertical coordinate
+} NvCVPoint2i;
+
+typedef struct CUstream_st *CUstream;
+typedef const char *NvVFX_EffectSelector;
+typedef const char *NvVFX_ParameterSelector;
+typedef void *NvVFX_Handle;
+
+/* nvvfx functions */
+typedef NvCV_Status NvVFX_API (*NvVFX_GetVersion_t)(unsigned int *version);
+typedef NvCV_Status NvVFX_API (*NvVFX_CreateEffect_t)(NvVFX_EffectSelector code,
+						      NvVFX_Handle *effect);
+typedef NvCV_Status NvVFX_API (*NvVFX_DestroyEffect_t)(NvVFX_Handle effect);
+typedef NvCV_Status
+	NvVFX_API (*NvVFX_SetU32_t)(NvVFX_Handle effect,
+				    NvVFX_ParameterSelector param_name,
+				    unsigned int val);
+typedef NvCV_Status NvVFX_API (*NvVFX_SetS32_t)(
+	NvVFX_Handle effect, NvVFX_ParameterSelector param_name, int val);
+typedef NvCV_Status NvVFX_API (*NvVFX_SetF32_t)(
+	NvVFX_Handle effect, NvVFX_ParameterSelector param_name, float val);
+typedef NvCV_Status NvVFX_API (*NvVFX_SetF64_t)(
+	NvVFX_Handle effect, NvVFX_ParameterSelector param_name, double val);
+typedef NvCV_Status
+	NvVFX_API (*NvVFX_SetU64_t)(NvVFX_Handle effect,
+				    NvVFX_ParameterSelector param_name,
+				    unsigned long long val);
+typedef NvCV_Status NvVFX_API (*NvVFX_SetObject_t)(
+	NvVFX_Handle effect, NvVFX_ParameterSelector param_name, void *ptr);
+typedef NvCV_Status
+	NvVFX_API (*NvVFX_SetCudaStream_t)(NvVFX_Handle effect,
+					   NvVFX_ParameterSelector param_name,
+					   CUstream stream);
+typedef NvCV_Status NvVFX_API (*NvVFX_SetImage_t)(
+	NvVFX_Handle effect, NvVFX_ParameterSelector param_name, NvCVImage *im);
+typedef NvCV_Status
+	NvVFX_API (*NvVFX_SetString_t)(NvVFX_Handle effect,
+				       NvVFX_ParameterSelector param_name,
+				       const char *str);
+typedef NvCV_Status
+	NvVFX_API (*NvVFX_GetU32_t)(NvVFX_Handle effect,
+				    NvVFX_ParameterSelector param_name,
+				    unsigned int *val);
+typedef NvCV_Status NvVFX_API (*NvVFX_GetS32_t)(
+	NvVFX_Handle effect, NvVFX_ParameterSelector param_name, int *val);
+typedef NvCV_Status NvVFX_API (*NvVFX_GetF32_t)(
+	NvVFX_Handle effect, NvVFX_ParameterSelector param_name, float *val);
+typedef NvCV_Status NvVFX_API (*NvVFX_GetF64_t)(
+	NvVFX_Handle effect, NvVFX_ParameterSelector param_name, double *val);
+typedef NvCV_Status
+	NvVFX_API (*NvVFX_GetU64_t)(NvVFX_Handle effect,
+				    NvVFX_ParameterSelector param_name,
+				    unsigned long long *val);
+typedef NvCV_Status NvVFX_API (*NvVFX_GetObject_t)(
+	NvVFX_Handle effect, NvVFX_ParameterSelector param_name, void **ptr);
+typedef NvCV_Status
+	NvVFX_API (*NvVFX_GetCudaStream_t)(NvVFX_Handle effect,
+					   NvVFX_ParameterSelector param_name,
+					   CUstream *stream);
+typedef NvCV_Status NvVFX_API (*NvVFX_GetImage_t)(
+	NvVFX_Handle effect, NvVFX_ParameterSelector param_name, NvCVImage *im);
+typedef NvCV_Status
+	NvVFX_API (*NvVFX_GetString_t)(NvVFX_Handle effect,
+				       NvVFX_ParameterSelector param_name,
+				       const char **str);
+typedef NvCV_Status NvVFX_API (*NvVFX_Run_t)(NvVFX_Handle effect, int async);
+typedef NvCV_Status NvVFX_API (*NvVFX_Load_t)(NvVFX_Handle effect);
+typedef NvCV_Status NvVFX_API (*NvVFX_CudaStreamCreate_t)(CUstream *stream);
+typedef NvCV_Status NvVFX_API (*NvVFX_CudaStreamDestroy_t)(CUstream stream);
+
+/* NvCVImage functions */
+typedef NvCV_Status NvCV_API (*NvCVImage_Init_t)(
+	NvCVImage *im, unsigned width, unsigned height, int pitch, void *pixels,
+	NvCVImage_PixelFormat format, NvCVImage_ComponentType type,
+	unsigned layout, unsigned memSpace);
+typedef NvCV_Status NvCV_API (*NvCVImage_InitView_t)(NvCVImage *subImg,
+						     NvCVImage *fullImg, int x,
+						     int y, unsigned width,
+						     unsigned height);
+typedef NvCV_Status NvCV_API (*NvCVImage_Alloc_t)(
+	NvCVImage *im, unsigned width, unsigned height,
+	NvCVImage_PixelFormat format, NvCVImage_ComponentType type,
+	unsigned layout, unsigned memSpace, unsigned alignment);
+typedef NvCV_Status NvCV_API (*NvCVImage_Realloc_t)(
+	NvCVImage *im, unsigned width, unsigned height,
+	NvCVImage_PixelFormat format, NvCVImage_ComponentType type,
+	unsigned layout, unsigned memSpace, unsigned alignment);
+typedef NvCV_Status NvCV_API (*NvCVImage_Dealloc_t)(NvCVImage *im);
+typedef NvCV_Status NvCV_API (*NvCVImage_Create_t)(
+	unsigned width, unsigned height, NvCVImage_PixelFormat format,
+	NvCVImage_ComponentType type, unsigned layout, unsigned memSpace,
+	unsigned alignment, NvCVImage **out);
+typedef NvCV_Status NvCV_API (*NvCVImage_Destroy_t)(NvCVImage *im);
+typedef NvCV_Status NvCV_API (*NvCVImage_ComponentOffsets_t)(
+	NvCVImage_PixelFormat format, int *rOff, int *gOff, int *bOff,
+	int *aOff, int *yOff);
+typedef NvCV_Status NvCV_API (*NvCVImage_Transfer_t)(const NvCVImage *src,
+						     NvCVImage *dst,
+						     float scale,
+						     struct CUstream_st *stream,
+						     NvCVImage *tmp);
+typedef NvCV_Status NvCV_API (*NvCVImage_TransferRect_t)(
+	const NvCVImage *src, const NvCVRect2i *srcRect, NvCVImage *dst,
+	const NvCVPoint2i *dstPt, float scale, struct CUstream_st *stream,
+	NvCVImage *tmp);
+typedef NvCV_Status NvCV_API (*NvCVImage_TransferFromYUV_t)(
+	const void *y, int yPixBytes, int yPitch, const void *u, const void *v,
+	int uvPixBytes, int uvPitch, NvCVImage_PixelFormat yuvFormat,
+	NvCVImage_ComponentType yuvType, unsigned yuvColorSpace,
+	unsigned yuvMemSpace, NvCVImage *dst, const NvCVRect2i *dstRect,
+	float scale, struct CUstream_st *stream, NvCVImage *tmp);
+typedef NvCV_Status NvCV_API (*NvCVImage_TransferToYUV_t)(
+	const NvCVImage *src, const NvCVRect2i *srcRect, const void *y,
+	int yPixBytes, int yPitch, const void *u, const void *v, int uvPixBytes,
+	int uvPitch, NvCVImage_PixelFormat yuvFormat,
+	NvCVImage_ComponentType yuvType, unsigned yuvColorSpace,
+	unsigned yuvMemSpace, float scale, struct CUstream_st *stream,
+	NvCVImage *tmp);
+typedef NvCV_Status
+	NvCV_API (*NvCVImage_MapResource_t)(NvCVImage *im,
+					    struct CUstream_st *stream);
+
+typedef NvCV_Status
+	NvCV_API (*NvCVImage_UnmapResource_t)(NvCVImage *im,
+					      struct CUstream_st *stream);
+typedef NvCV_Status NvCV_API (*NvCVImage_Composite_t)(
+	const NvCVImage *fg, const NvCVImage *bg, const NvCVImage *mat,
+	NvCVImage *dst, struct CUstream_st *stream);
+typedef NvCV_Status NvCV_API (*NvCVImage_CompositeRect_t)(
+	const NvCVImage *fg, const NvCVPoint2i *fgOrg, const NvCVImage *bg,
+	const NvCVPoint2i *bgOrg, const NvCVImage *mat, unsigned mode,
+	NvCVImage *dst, const NvCVPoint2i *dstOrg, struct CUstream_st *stream);
+typedef NvCV_Status NvCV_API (*NvCVImage_CompositeOverConstant_t)(
+	const NvCVImage *src, const NvCVImage *mat, const void *bgColor,
+	NvCVImage *dst, struct CUstream_st *stream);
+typedef NvCV_Status NvCV_API (*NvCVImage_FlipY_t)(const NvCVImage *src,
+						  NvCVImage *dst);
+typedef NvCV_Status NvCV_API (*NvCVImage_GetYUVPointers_t)(
+	NvCVImage *im, unsigned char **y, unsigned char **u, unsigned char **v,
+	int *yPixBytes, int *cPixBytes, int *yRowBytes, int *cRowBytes);
+
+typedef const char *(*NvCV_GetErrorStringFromCode_t)(NvCV_Status code);
+
+/* beware: this is experimental : D3D functions */
+typedef NvCV_Status NvCV_API (*NvCVImage_ToD3DFormat_t)(
+	NvCVImage_PixelFormat format, NvCVImage_ComponentType type,
+	unsigned layout, DXGI_FORMAT *d3dFormat);
+typedef NvCV_Status NvCV_API (*NvCVImage_FromD3DFormat_t)(
+	DXGI_FORMAT d3dFormat, NvCVImage_PixelFormat *format,
+	NvCVImage_ComponentType *type, unsigned char *layout);
+typedef NvCV_Status NvCV_API (*NvCVImage_ToD3DColorSpace_t)(
+	unsigned char nvcvColorSpace, DXGI_COLOR_SPACE_TYPE *pD3dColorSpace);
+typedef NvCV_Status NvCV_API (*NvCVImage_FromD3DColorSpace_t)(
+	DXGI_COLOR_SPACE_TYPE d3dColorSpace, unsigned char *pNvcvColorSpace);
+typedef NvCV_Status NvCV_API (*NvCVImage_InitFromD3D11Texture_t)(
+	NvCVImage *im, struct ID3D11Texture2D *tx);
+typedef NvCV_Status
+	NvCV_API (*NvCVImage_InitFromD3DTexture_t)(NvCVImage *im,
+						   struct ID3D11Texture2D *tx);
+/* cuda runtime */
+typedef enum cudaError {
+	cudaSuccess = 0,
+	cudaErrorInvalidValue = 1,
+	cudaErrorMemoryAllocation = 2,
+	cudaErrorInitializationError = 3,
+	cudaErrorCudartUnloading = 4,
+	cudaErrorProfilerDisabled = 5,
+	cudaErrorProfilerNotInitialized = 6,
+	cudaErrorProfilerAlreadyStarted = 7,
+	cudaErrorProfilerAlreadyStopped = 8,
+	cudaErrorInvalidConfiguration = 9,
+	cudaErrorInvalidPitchValue = 12,
+	cudaErrorInvalidSymbol = 13,
+	cudaErrorInvalidHostPointer = 16,
+	cudaErrorInvalidDevicePointer = 17,
+	cudaErrorInvalidTexture = 18,
+	cudaErrorInvalidTextureBinding = 19,
+	cudaErrorInvalidChannelDescriptor = 20,
+	cudaErrorInvalidMemcpyDirection = 21,
+	cudaErrorAddressOfConstant = 22,
+	cudaErrorTextureFetchFailed = 23,
+	cudaErrorTextureNotBound = 24,
+	cudaErrorSynchronizationError = 25,
+	cudaErrorInvalidFilterSetting = 26,
+	cudaErrorInvalidNormSetting = 27,
+	cudaErrorMixedDeviceExecution = 28,
+	cudaErrorNotYetImplemented = 31,
+	cudaErrorMemoryValueTooLarge = 32,
+	cudaErrorStubLibrary = 34,
+	cudaErrorInsufficientDriver = 35,
+	cudaErrorCallRequiresNewerDriver = 36,
+	cudaErrorInvalidSurface = 37,
+	cudaErrorDuplicateVariableName = 43,
+	cudaErrorDuplicateTextureName = 44,
+	cudaErrorDuplicateSurfaceName = 45,
+	cudaErrorDevicesUnavailable = 46,
+	cudaErrorIncompatibleDriverContext = 49,
+	cudaErrorMissingConfiguration = 52,
+	cudaErrorPriorLaunchFailure = 53,
+	cudaErrorLaunchMaxDepthExceeded = 65,
+	cudaErrorLaunchFileScopedTex = 66,
+	cudaErrorLaunchFileScopedSurf = 67,
+	cudaErrorSyncDepthExceeded = 68,
+	cudaErrorLaunchPendingCountExceeded = 69,
+	cudaErrorInvalidDeviceFunction = 98,
+	cudaErrorNoDevice = 100,
+	cudaErrorInvalidDevice = 101,
+	cudaErrorDeviceNotLicensed = 102,
+	cudaErrorSoftwareValidityNotEstablished = 103,
+	cudaErrorStartupFailure = 127,
+	cudaErrorInvalidKernelImage = 200,
+	cudaErrorDeviceUninitialized = 201,
+	cudaErrorMapBufferObjectFailed = 205,
+	cudaErrorUnmapBufferObjectFailed = 206,
+	cudaErrorArrayIsMapped = 207,
+	cudaErrorAlreadyMapped = 208,
+	cudaErrorNoKernelImageForDevice = 209,
+	cudaErrorAlreadyAcquired = 210,
+	cudaErrorNotMapped = 211,
+	cudaErrorNotMappedAsArray = 212,
+	cudaErrorNotMappedAsPointer = 213,
+	cudaErrorECCUncorrectable = 214,
+	cudaErrorUnsupportedLimit = 215,
+	cudaErrorDeviceAlreadyInUse = 216,
+	cudaErrorPeerAccessUnsupported = 217,
+	cudaErrorInvalidPtx = 218,
+	cudaErrorInvalidGraphicsContext = 219,
+	cudaErrorNvlinkUncorrectable = 220,
+	cudaErrorJitCompilerNotFound = 221,
+	cudaErrorUnsupportedPtxVersion = 222,
+	cudaErrorJitCompilationDisabled = 223,
+	cudaErrorInvalidSource = 300,
+	cudaErrorFileNotFound = 301,
+	cudaErrorSharedObjectSymbolNotFound = 302,
+	cudaErrorSharedObjectInitFailed = 303,
+	cudaErrorOperatingSystem = 304,
+	cudaErrorInvalidResourceHandle = 400,
+	cudaErrorIllegalState = 401,
+	cudaErrorSymbolNotFound = 500,
+	cudaErrorNotReady = 600,
+	cudaErrorIllegalAddress = 700,
+	cudaErrorLaunchOutOfResources = 701,
+	cudaErrorLaunchTimeout = 702,
+	cudaErrorLaunchIncompatibleTexturing = 703,
+	cudaErrorPeerAccessAlreadyEnabled = 704,
+	cudaErrorPeerAccessNotEnabled = 705,
+	cudaErrorSetOnActiveProcess = 708,
+	cudaErrorContextIsDestroyed = 709,
+	cudaErrorAssert = 710,
+	cudaErrorTooManyPeers = 711,
+	cudaErrorHostMemoryAlreadyRegistered = 712,
+	cudaErrorHostMemoryNotRegistered = 713,
+	cudaErrorHardwareStackError = 714,
+	cudaErrorIllegalInstruction = 715,
+	cudaErrorMisalignedAddress = 716,
+	cudaErrorInvalidAddressSpace = 717,
+	cudaErrorInvalidPc = 718,
+	cudaErrorLaunchFailure = 719,
+	cudaErrorCooperativeLaunchTooLarge = 720,
+	cudaErrorNotPermitted = 800,
+	cudaErrorNotSupported = 801,
+	cudaErrorSystemNotReady = 802,
+	cudaErrorSystemDriverMismatch = 803,
+	cudaErrorCompatNotSupportedOnDevice = 804,
+	cudaErrorStreamCaptureUnsupported = 900,
+	cudaErrorStreamCaptureInvalidated = 901,
+	cudaErrorStreamCaptureMerge = 902,
+	cudaErrorStreamCaptureUnmatched = 903,
+	cudaErrorStreamCaptureUnjoined = 904,
+	cudaErrorStreamCaptureIsolation = 905,
+	cudaErrorStreamCaptureImplicit = 906,
+	cudaErrorCapturedEvent = 907,
+	cudaErrorStreamCaptureWrongThread = 908,
+	cudaErrorTimeout = 909,
+	cudaErrorGraphExecUpdateFailure = 910,
+	cudaErrorUnknown = 999,
+	cudaErrorApiFailureBase = 10000
+} cudaError;
+
+typedef enum cudaMemcpyKind {
+	cudaMemcpyHostToHost = 0,     /**< Host   -> Host */
+	cudaMemcpyHostToDevice = 1,   /**< Host   -> Device */
+	cudaMemcpyDeviceToHost = 2,   /**< Device -> Host */
+	cudaMemcpyDeviceToDevice = 3, /**< Device -> Device */
+	cudaMemcpyDefault = 4
+} cudaMemcpyKind;
+
+typedef enum cudaError cudaError_t;
+
+typedef cudaError_t CUDARTAPI (*cudaMalloc_t)(void **devPtr, size_t size);
+typedef cudaError_t CUDARTAPI (*cudaStreamSynchronize_t)(CUstream stream);
+typedef cudaError_t CUDARTAPI (*cudaFree_t)(void *devPtr);
+typedef cudaError_t CUDARTAPI (*cudaMemsetAsync_t)(void *devPtr, int value,
+						   size_t count,
+						   CUstream stream);
+typedef cudaError_t CUDARTAPI (*cudaMemcpy_t)(void *dst, const void *src,
+					      size_t count,
+					      enum cudaMemcpyKind kind);
+
+/* nvvfx */
+static NvVFX_GetVersion_t NvVFX_GetVersion = NULL;
+static NvVFX_CreateEffect_t NvVFX_CreateEffect = NULL;
+static NvVFX_DestroyEffect_t NvVFX_DestroyEffect = NULL;
+static NvVFX_SetU32_t NvVFX_SetU32 = NULL;
+static NvVFX_SetS32_t NvVFX_SetS32 = NULL;
+static NvVFX_SetF32_t NvVFX_SetF32 = NULL;
+static NvVFX_SetF64_t NvVFX_SetF64 = NULL;
+static NvVFX_SetU64_t NvVFX_SetU64 = NULL;
+static NvVFX_SetObject_t NvVFX_SetObject = NULL;
+static NvVFX_SetCudaStream_t NvVFX_SetCudaStream = NULL;
+static NvVFX_SetImage_t NvVFX_SetImage = NULL;
+static NvVFX_SetString_t NvVFX_SetString = NULL;
+static NvVFX_GetU32_t NvVFX_GetU32 = NULL;
+static NvVFX_GetS32_t NvVFX_GetS32 = NULL;
+static NvVFX_GetF32_t NvVFX_GetF32 = NULL;
+static NvVFX_GetF64_t NvVFX_GetF64 = NULL;
+static NvVFX_GetU64_t NvVFX_GetU64 = NULL;
+static NvVFX_GetObject_t NvVFX_GetObject = NULL;
+static NvVFX_GetCudaStream_t NvVFX_GetCudaStream = NULL;
+static NvVFX_GetImage_t NvVFX_GetImage = NULL;
+static NvVFX_GetString_t NvVFX_GetString = NULL;
+static NvVFX_Run_t NvVFX_Run = NULL;
+static NvVFX_Load_t NvVFX_Load = NULL;
+static NvVFX_CudaStreamCreate_t NvVFX_CudaStreamCreate = NULL;
+static NvVFX_CudaStreamDestroy_t NvVFX_CudaStreamDestroy = NULL;
+
+/*nvcvimage */
+static NvCVImage_Init_t NvCVImage_Init = NULL;
+static NvCVImage_InitView_t NvCVImage_InitView = NULL;
+static NvCVImage_Alloc_t NvCVImage_Alloc = NULL;
+static NvCVImage_Realloc_t NvCVImage_Realloc = NULL;
+static NvCVImage_Dealloc_t NvCVImage_Dealloc = NULL;
+static NvCVImage_Create_t NvCVImage_Create = NULL;
+static NvCVImage_Destroy_t NvCVImage_Destroy = NULL;
+static NvCVImage_ComponentOffsets_t NvCVImage_ComponentOffsets = NULL;
+static NvCVImage_Transfer_t NvCVImage_Transfer = NULL;
+static NvCVImage_TransferRect_t NvCVImage_TransferRect = NULL;
+static NvCVImage_TransferFromYUV_t NvCVImage_TransferFromYUV = NULL;
+static NvCVImage_TransferToYUV_t NvCVImage_TransferToYUV = NULL;
+static NvCVImage_MapResource_t NvCVImage_MapResource = NULL;
+static NvCVImage_UnmapResource_t NvCVImage_UnmapResource = NULL;
+static NvCVImage_Composite_t NvCVImage_Composite = NULL;
+static NvCVImage_CompositeRect_t NvCVImage_CompositeRect = NULL;
+static NvCVImage_CompositeOverConstant_t NvCVImage_CompositeOverConstant = NULL;
+static NvCVImage_FlipY_t NvCVImage_FlipY = NULL;
+static NvCVImage_GetYUVPointers_t NvCVImage_GetYUVPointers = NULL;
+/* nvcvimage  D3D*/
+static NvCVImage_ToD3DFormat_t NvCVImage_ToD3DFormat = NULL;
+static NvCVImage_FromD3DFormat_t NvCVImage_FromD3DFormat = NULL;
+static NvCVImage_ToD3DColorSpace_t NvCVImage_ToD3DColorSpace = NULL;
+static NvCVImage_FromD3DColorSpace_t NvCVImage_FromD3DColorSpace = NULL;
+static NvCVImage_InitFromD3D11Texture_t NvCVImage_InitFromD3D11Texture = NULL;
+/* error codes */
+static NvCV_GetErrorStringFromCode_t NvCV_GetErrorStringFromCode = NULL;
+
+/* cuda runtime */
+static cudaMalloc_t cudaMalloc = NULL;
+static cudaStreamSynchronize_t cudaStreamSynchronize = NULL;
+static cudaFree_t cudaFree = NULL;
+static cudaMemcpy_t cudaMemcpy = NULL;
+static cudaMemsetAsync_t cudaMemsetAsync = NULL;
+
+static inline void release_nv_vfx()
+{
+	NvVFX_CreateEffect = NULL;
+	NvVFX_CudaStreamCreate = NULL;
+	NvVFX_CudaStreamDestroy = NULL;
+	NvVFX_DestroyEffect = NULL;
+	NvVFX_GetCudaStream = NULL;
+	NvCV_GetErrorStringFromCode = NULL;
+	NvVFX_GetF32 = NULL;
+	NvVFX_GetF64 = NULL;
+	NvVFX_GetImage = NULL;
+	NvVFX_GetObject = NULL;
+	NvVFX_GetS32 = NULL;
+	NvVFX_GetString = NULL;
+	NvVFX_GetU32 = NULL;
+	NvVFX_GetU64 = NULL;
+	NvVFX_GetVersion = NULL;
+	NvVFX_Load = NULL;
+	NvVFX_Run = NULL;
+	NvVFX_SetCudaStream = NULL;
+	NvVFX_SetF32 = NULL;
+	NvVFX_SetF64 = NULL;
+	NvVFX_SetImage = NULL;
+	NvVFX_SetObject = NULL;
+	NvVFX_SetS32 = NULL;
+	NvVFX_SetString = NULL;
+	NvVFX_SetU32 = NULL;
+	NvVFX_SetU64 = NULL;
+	if (nv_videofx) {
+		FreeLibrary(nv_videofx);
+		nv_videofx = NULL;
+	}
+	NvCVImage_Alloc = NULL;
+	NvCVImage_ComponentOffsets = NULL;
+	NvCVImage_Composite = NULL;
+	NvCVImage_CompositeRect = NULL;
+	NvCVImage_CompositeOverConstant = NULL;
+	NvCVImage_Create = NULL;
+	NvCVImage_Dealloc = NULL;
+	NvCVImage_Destroy = NULL;
+	NvCVImage_Init = NULL;
+	NvCVImage_InitView = NULL;
+	NvCVImage_Realloc = NULL;
+	NvCVImage_Transfer = NULL;
+	NvCVImage_TransferRect = NULL;
+	NvCVImage_TransferFromYUV = NULL;
+	NvCVImage_TransferToYUV = NULL;
+	NvCVImage_MapResource = NULL;
+	NvCVImage_UnmapResource = NULL;
+	NvCVImage_InitFromD3D11Texture = NULL;
+	NvCVImage_FlipY = NULL;
+	NvCVImage_GetYUVPointers = NULL;
+	NvCVImage_ToD3DFormat = NULL;
+	NvCVImage_FromD3DFormat = NULL;
+	NvCVImage_ToD3DColorSpace = NULL;
+	NvCVImage_FromD3DColorSpace = NULL;
+	if (nv_cvimage) {
+		FreeLibrary(nv_cvimage);
+		nv_cvimage = NULL;
+	}
+	cudaMalloc = NULL;
+	cudaStreamSynchronize = NULL;
+	cudaFree = NULL;
+	cudaMemcpy = NULL;
+	cudaMemsetAsync = NULL;
+	if (nv_cudart) {
+		FreeLibrary(nv_cudart);
+		nv_cudart = NULL;
+	}
+}
+
+static inline void nvvfx_get_sdk_path(char *buffer, const size_t len)
+{
+	DWORD ret = GetEnvironmentVariableA("NV_VIDEO_EFFECTS_PATH", buffer,
+					    (DWORD)len);
+
+	if (!ret || ret >= len - 1) {
+		char path[MAX_PATH];
+		GetEnvironmentVariableA("ProgramFiles", path, MAX_PATH);
+
+		size_t max_len = sizeof(path) / sizeof(char);
+		snprintf(buffer, max_len,
+			 "%s\\NVIDIA Corporation\\NVIDIA Video Effects\\",
+			 path);
+	}
+}
+
+static inline bool load_nv_vfx_libs()
+{
+	char fullPath[MAX_PATH];
+	nvvfx_get_sdk_path(fullPath, MAX_PATH);
+	SetDllDirectoryA(fullPath);
+	nv_videofx = LoadLibrary(L"NVVideoEffects.dll");
+	nv_cvimage = LoadLibrary(L"NVCVImage.dll");
+	nv_cudart = LoadLibrary(L"cudart64_110.dll");
+	SetDllDirectoryA(NULL);
+	return !!nv_videofx && !!nv_cvimage && !!nv_cudart;
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus

+ 2 - 0
plugins/obs-filters/obs-filters-config.h.in

@@ -9,3 +9,5 @@
 #endif
 
 #cmakedefine HAS_NOISEREDUCTION
+#define NVIDIA_GREENSCREEN_ENABLED @NVVFX_FOUND@
+

+ 14 - 2
plugins/obs-filters/obs-filters.c

@@ -37,6 +37,11 @@ extern struct obs_source_info limiter_filter;
 extern struct obs_source_info expander_filter;
 extern struct obs_source_info luma_key_filter;
 extern struct obs_source_info luma_key_filter_v2;
+#ifdef LIBNVVFX_ENABLED
+extern struct obs_source_info nvidia_greenscreen_filter_info;
+extern bool load_nvvfx(void);
+extern void unload_nvvfx(void);
+#endif
 
 bool obs_module_load(void)
 {
@@ -72,12 +77,19 @@ bool obs_module_load(void)
 	obs_register_source(&expander_filter);
 	obs_register_source(&luma_key_filter);
 	obs_register_source(&luma_key_filter_v2);
+#ifdef LIBNVVFX_ENABLED
+	if (load_nvvfx())
+		obs_register_source(&nvidia_greenscreen_filter_info);
+#endif
 	return true;
 }
 
-#ifdef LIBNVAFX_ENABLED
 void obs_module_unload(void)
 {
+#ifdef LIBNVAFX_ENABLED
 	unload_nvafx();
-}
 #endif
+#ifdef LIBNVVFX_ENABLED
+	unload_nvvfx();
+#endif
+}