فهرست منبع

obs-ffmpeg: Add texture-based NVENC encoder implementation

Adds a texture-based NVENC implementation which passes OBS NV12 output
textures directly to NVENC without downloading them off of the GPU,
increasing NVENC performance by a significant margin.

If NV12 textures are unavailable or the new encoder fails to initialize
for whatever reason, it will fall back to the FFmpeg NVENC
implementation safely.
jp9000 6 سال پیش
والد
کامیت
ed0c7bcd6a

+ 8 - 0
plugins/obs-ffmpeg/CMakeLists.txt

@@ -29,6 +29,14 @@ if(UNIX AND NOT APPLE)
 		${LIBVA_LBRARIES})
 endif()
 
+if(WIN32)
+	list(APPEND obs-ffmpeg_SOURCES
+		jim-nvenc.c
+		jim-nvenc-helpers.c)
+	list(APPEND obs-ffmpeg_HEADERS
+		jim-nvenc.h)
+endif()
+
 add_library(obs-ffmpeg MODULE
 	${obs-ffmpeg_HEADERS}
 	${obs-ffmpeg_SOURCES})

+ 134 - 0
plugins/obs-ffmpeg/jim-nvenc-helpers.c

@@ -0,0 +1,134 @@
+#include "jim-nvenc.h"
+#include <util/platform.h>
+#include <util/threading.h>
+
+static void *nvenc_lib = NULL;
+static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
+NV_ENCODE_API_FUNCTION_LIST nv = {NV_ENCODE_API_FUNCTION_LIST_VER};
+NV_CREATE_INSTANCE_FUNC nv_create_instance = NULL;
+
+bool load_nvenc_lib(void)
+{
+	if (sizeof(void*) == 8) {
+		nvenc_lib = os_dlopen("nvEncodeAPI64.dll");
+	} else {
+		nvenc_lib = os_dlopen("nvEncodeAPI.dll");
+	}
+
+	return !!nvenc_lib;
+}
+
+static void *load_nv_func(const char *func)
+{
+	void *func_ptr = os_dlsym(nvenc_lib, func);
+	if (!func_ptr) {
+		error("Could not load function: %s", func);
+	}
+	return func_ptr;
+}
+
+typedef NVENCSTATUS (NVENCAPI *NV_MAX_VER_FUNC)(uint32_t*);
+
+const char *nv_error_name(NVENCSTATUS err)
+{
+#define RETURN_CASE(x) \
+	case x: return #x
+
+	switch (err) {
+	RETURN_CASE(NV_ENC_SUCCESS);
+	RETURN_CASE(NV_ENC_ERR_NO_ENCODE_DEVICE);
+	RETURN_CASE(NV_ENC_ERR_UNSUPPORTED_DEVICE);
+	RETURN_CASE(NV_ENC_ERR_INVALID_ENCODERDEVICE);
+	RETURN_CASE(NV_ENC_ERR_INVALID_DEVICE);
+	RETURN_CASE(NV_ENC_ERR_DEVICE_NOT_EXIST);
+	RETURN_CASE(NV_ENC_ERR_INVALID_PTR);
+	RETURN_CASE(NV_ENC_ERR_INVALID_EVENT);
+	RETURN_CASE(NV_ENC_ERR_INVALID_PARAM);
+	RETURN_CASE(NV_ENC_ERR_INVALID_CALL);
+	RETURN_CASE(NV_ENC_ERR_OUT_OF_MEMORY);
+	RETURN_CASE(NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
+	RETURN_CASE(NV_ENC_ERR_UNSUPPORTED_PARAM);
+	RETURN_CASE(NV_ENC_ERR_LOCK_BUSY);
+	RETURN_CASE(NV_ENC_ERR_NOT_ENOUGH_BUFFER);
+	RETURN_CASE(NV_ENC_ERR_INVALID_VERSION);
+	RETURN_CASE(NV_ENC_ERR_MAP_FAILED);
+	RETURN_CASE(NV_ENC_ERR_NEED_MORE_INPUT);
+	RETURN_CASE(NV_ENC_ERR_ENCODER_BUSY);
+	RETURN_CASE(NV_ENC_ERR_EVENT_NOT_REGISTERD);
+	RETURN_CASE(NV_ENC_ERR_GENERIC);
+	RETURN_CASE(NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY);
+	RETURN_CASE(NV_ENC_ERR_UNIMPLEMENTED);
+	RETURN_CASE(NV_ENC_ERR_RESOURCE_REGISTER_FAILED);
+	RETURN_CASE(NV_ENC_ERR_RESOURCE_NOT_REGISTERED);
+	RETURN_CASE(NV_ENC_ERR_RESOURCE_NOT_MAPPED);
+	}
+#undef RETURN_CASE
+
+	return "Unknown Error";
+}
+
+static inline bool init_nvenc_internal(void)
+{
+	static bool initialized = false;
+	static bool success = false;
+
+	if (initialized)
+		return success;
+	initialized = true;
+
+	NV_MAX_VER_FUNC nv_max_ver = (NV_MAX_VER_FUNC)
+		load_nv_func("NvEncodeAPIGetMaxSupportedVersion");
+	if (!nv_max_ver) {
+		return false;
+	}
+
+	uint32_t ver = 0;
+	if (NV_FAILED(nv_max_ver(&ver))) {
+		return false;
+	}
+
+	uint32_t cur_ver =
+		(NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
+	if (cur_ver > ver) {
+		error("Current driver version does not support this NVENC "
+				"version, please upgrade your driver");
+		return false;
+	}
+
+	nv_create_instance = (NV_CREATE_INSTANCE_FUNC)
+		load_nv_func("NvEncodeAPICreateInstance");
+	if (!nv_create_instance) {
+		return false;
+	}
+
+	if (NV_FAILED(nv_create_instance(&nv))) {
+		return false;
+	}
+
+	success = true;
+	return true;
+}
+
+bool init_nvenc(void)
+{
+	bool success;
+
+	pthread_mutex_lock(&init_mutex);
+	success = init_nvenc_internal();
+	pthread_mutex_unlock(&init_mutex);
+
+	return success;
+}
+
+extern struct obs_encoder_info nvenc_info;
+
+void jim_nvenc_load(void)
+{
+	pthread_mutex_init(&init_mutex, NULL);
+	obs_register_encoder(&nvenc_info);
+}
+
+void jim_nvenc_unload(void)
+{
+	pthread_mutex_destroy(&init_mutex);
+}

+ 922 - 0
plugins/obs-ffmpeg/jim-nvenc.c

@@ -0,0 +1,922 @@
+#include "jim-nvenc.h"
+#include <util/circlebuf.h>
+#include <util/darray.h>
+#include <util/dstr.h>
+#include <obs-avc.h>
+#define INITGUID
+#include <dxgi.h>
+#include <d3d11.h>
+#include <d3d11_1.h>
+
+/* ========================================================================= */
+
+#define EXTRA_BUFFERS 5
+
+#define error_hr(msg) \
+	error("%s: %s: 0x%08lX", __FUNCTION__, msg, (uint32_t)hr);
+
+struct nv_bitstream;
+struct nv_texture;
+
+struct handle_tex {
+	uint32_t handle;
+	ID3D11Texture2D *tex;
+	IDXGIKeyedMutex *km;
+};
+
+/* ------------------------------------------------------------------------- */
+/* Main Implementation Structure                                             */
+
+struct nvenc_data {
+	obs_encoder_t *encoder;
+
+	void                     *session;
+	NV_ENC_INITIALIZE_PARAMS params;
+	NV_ENC_CONFIG            config;
+	size_t                   buf_count;
+	size_t                   output_delay;
+	size_t                   buffers_queued;
+	size_t                   next_bitstream;
+	size_t                   cur_bitstream;
+	bool                     encode_started;
+	bool                     first_packet;
+	bool                     can_change_bitrate;
+	bool                     bframes;
+
+	DARRAY(struct nv_bitstream) bitstreams;
+	DARRAY(struct nv_texture)   textures;
+	DARRAY(struct handle_tex)   input_textures;
+	struct circlebuf            dts_list;
+
+	DARRAY(uint8_t) packet_data;
+	int64_t         packet_pts;
+	bool            packet_keyframe;
+
+	ID3D11Device        *device;
+	ID3D11DeviceContext *context;
+
+	uint32_t cx;
+	uint32_t cy;
+
+	uint8_t *header;
+	size_t  header_size;
+
+	uint8_t *sei;
+	size_t  sei_size;
+};
+
+/* ------------------------------------------------------------------------- */
+/* Bitstream Buffer                                                          */
+
+struct nv_bitstream {
+	void   *ptr;
+	HANDLE event;
+};
+
+static bool nv_bitstream_init(struct nvenc_data *enc, struct nv_bitstream *bs)
+{
+	NV_ENC_CREATE_BITSTREAM_BUFFER buf = {NV_ENC_CREATE_BITSTREAM_BUFFER_VER};
+	NV_ENC_EVENT_PARAMS params = {NV_ENC_EVENT_PARAMS_VER};
+	HANDLE event = NULL;
+
+	if (NV_FAILED(nv.nvEncCreateBitstreamBuffer(enc->session, &buf))) {
+		return false;
+	}
+
+	event = CreateEvent(NULL, true, true, NULL);
+	if (!event) {
+		error("%s: %s", __FUNCTION__, "Failed to create event");
+		goto fail;
+	}
+
+	params.completionEvent = event;
+	if (NV_FAILED(nv.nvEncRegisterAsyncEvent(enc->session, &params))) {
+		goto fail;
+	}
+
+	bs->ptr = buf.bitstreamBuffer;
+	bs->event = event;
+	return true;
+
+fail:
+	if (event) {
+		CloseHandle(event);
+	}
+	if (buf.bitstreamBuffer) {
+		nv.nvEncDestroyBitstreamBuffer(enc->session,
+				buf.bitstreamBuffer);
+	}
+	return false;
+}
+
+static void nv_bitstream_free(struct nvenc_data *enc, struct nv_bitstream *bs)
+{
+	if (bs->ptr) {
+		nv.nvEncDestroyBitstreamBuffer(enc->session, bs->ptr);
+
+		NV_ENC_EVENT_PARAMS params = {NV_ENC_EVENT_PARAMS_VER};
+		params.completionEvent = bs->event;
+		nv.nvEncUnregisterAsyncEvent(enc->session, &params);
+		CloseHandle(bs->event);
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+/* Texture Resource                                                          */
+
+struct nv_texture {
+	void            *res;
+	ID3D11Texture2D *tex;
+	void            *mapped_res;
+};
+
+static bool nv_texture_init(struct nvenc_data *enc, struct nv_texture *nvtex)
+{
+	ID3D11Device *device = enc->device;
+	ID3D11Texture2D *tex;
+	HRESULT hr;
+
+	D3D11_TEXTURE2D_DESC desc = {0};
+	desc.Width                = enc->cx;
+	desc.Height               = enc->cy;
+	desc.MipLevels            = 1;
+	desc.ArraySize            = 1;
+	desc.Format               = DXGI_FORMAT_NV12;
+	desc.SampleDesc.Count     = 1;
+	desc.BindFlags            = D3D11_BIND_RENDER_TARGET;
+
+	hr = device->lpVtbl->CreateTexture2D(device, &desc, NULL, &tex);
+	if (FAILED(hr)) {
+		error_hr("Failed to create texture");
+		return false;
+	}
+
+	tex->lpVtbl->SetEvictionPriority(tex, DXGI_RESOURCE_PRIORITY_MAXIMUM);
+
+	NV_ENC_REGISTER_RESOURCE res = {NV_ENC_REGISTER_RESOURCE_VER};
+	res.resourceType             = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
+	res.resourceToRegister       = tex;
+	res.width                    = enc->cx;
+	res.height                   = enc->cy;
+	res.bufferFormat             = NV_ENC_BUFFER_FORMAT_NV12;
+
+	if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) {
+		tex->lpVtbl->Release(tex);
+		return false;
+	}
+
+	nvtex->res = res.registeredResource;
+	nvtex->tex = tex;
+	return true;
+}
+
+static void nv_texture_free(struct nvenc_data *enc, struct nv_texture *nvtex)
+{
+	if (nvtex->res) {
+		if (nvtex->mapped_res) {
+			nv.nvEncUnmapInputResource(enc->session,
+					nvtex->mapped_res);
+		}
+		nv.nvEncUnregisterResource(enc->session, nvtex->res);
+		nvtex->tex->lpVtbl->Release(nvtex->tex);
+	}
+}
+
+/* ------------------------------------------------------------------------- */
+/* Implementation                                                            */
+
+static const char *nvenc_get_name(void *type_data)
+{
+	UNUSED_PARAMETER(type_data);
+	return "NVIDIA NVENC H.264 (new)";
+}
+
+static inline int nv_get_cap(struct nvenc_data *enc, NV_ENC_CAPS cap)
+{
+	if (!enc->session)
+		return 0;
+
+	NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER};
+	int v;
+
+	param.capsToQuery = cap;
+	nv.nvEncGetEncodeCaps(enc->session, NV_ENC_CODEC_H264_GUID, &param, &v);
+	return v;
+}
+
+static bool nvenc_update(void *data, obs_data_t *settings)
+{
+	struct nvenc_data *enc = data;
+
+	/* Only support reconfiguration of CBR bitrate */
+	if (enc->can_change_bitrate) {
+		int bitrate = (int)obs_data_get_int(settings, "bitrate");
+
+		enc->config.rcParams.averageBitRate = bitrate * 1000;
+		enc->config.rcParams.maxBitRate     = bitrate * 1000;
+
+		NV_ENC_RECONFIGURE_PARAMS params = {0};
+		params.version                   = NV_ENC_RECONFIGURE_PARAMS_VER;
+		params.reInitEncodeParams        = enc->params;
+
+		if (FAILED(nv.nvEncReconfigureEncoder(enc->session, &params))) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static HANDLE get_lib(const char *lib)
+{
+	HMODULE mod = GetModuleHandleA(lib);
+	if (mod)
+		return mod;
+
+	mod = LoadLibraryA(lib);
+	if (!mod)
+		error("Failed to load %s", lib);
+	return mod;
+}
+
+typedef HRESULT (WINAPI *CREATEDXGIFACTORY1PROC)(REFIID, void **);
+
+static bool init_d3d11(struct nvenc_data *enc, obs_data_t *settings)
+{
+	HMODULE                 dxgi  = get_lib("DXGI.dll");
+	HMODULE                 d3d11 = get_lib("D3D11.dll");
+	CREATEDXGIFACTORY1PROC  create_dxgi;
+	PFN_D3D11_CREATE_DEVICE create_device;
+	IDXGIFactory1           *factory;
+	IDXGIAdapter            *adapter;
+	ID3D11Device            *device;
+	ID3D11DeviceContext     *context;
+	HRESULT                 hr;
+
+	int gpu = (int)obs_data_get_int(settings, "gpu");
+
+	if (!dxgi || !d3d11) {
+		return false;
+	}
+
+	create_dxgi = (CREATEDXGIFACTORY1PROC)GetProcAddress(dxgi,
+			"CreateDXGIFactory1");
+	create_device = (PFN_D3D11_CREATE_DEVICE)GetProcAddress(d3d11,
+			"D3D11CreateDevice");
+
+	if (!create_dxgi || !create_device) {
+		error("Failed to load D3D11/DXGI procedures");
+		return false;
+	}
+
+	hr = create_dxgi(&IID_IDXGIFactory1, &factory);
+	if (FAILED(hr)) {
+		error_hr("CreateDXGIFactory1 failed");
+		return false;
+	}
+
+	hr = factory->lpVtbl->EnumAdapters(factory, gpu, &adapter);
+	factory->lpVtbl->Release(factory);
+	if (FAILED(hr)) {
+		error_hr("EnumAdapters failed");
+		return false;
+	}
+
+	hr = create_device(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, 0,
+			NULL, 0, D3D11_SDK_VERSION, &device, NULL, &context);
+	adapter->lpVtbl->Release(adapter);
+	if (FAILED(hr)) {
+		error_hr("D3D11CreateDevice failed");
+		return false;
+	}
+
+	enc->device = device;
+	enc->context = context;
+	return true;
+}
+
+static bool init_session(struct nvenc_data *enc)
+{
+	NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params =
+			{NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER};
+	params.device = enc->device;
+	params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
+	params.apiVersion = NVENCAPI_VERSION;
+
+	if (NV_FAILED(nv.nvEncOpenEncodeSessionEx(&params, &enc->session))) {
+		return false;
+	}
+	return true;
+}
+
+static bool init_encoder(struct nvenc_data *enc, obs_data_t *settings)
+{
+	const char *rc = obs_data_get_string(settings, "rate_control");
+	int bitrate = (int)obs_data_get_int(settings, "bitrate");
+	int max_bitrate = (int)obs_data_get_int(settings, "max_bitrate");
+	int cqp = (int)obs_data_get_int(settings, "cqp");
+	int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec");
+	const char *preset = obs_data_get_string(settings, "preset");
+	const char *profile = obs_data_get_string(settings, "profile");
+	bool psycho_aq = obs_data_get_bool(settings, "psycho_aq");
+	bool lookahead = obs_data_get_bool(settings, "lookahead");
+	int gpu = (int)obs_data_get_int(settings, "gpu");
+	int bf = (int)obs_data_get_int(settings, "bf");
+	bool vbr = astrcmpi(rc, "VBR") == 0;
+	NVENCSTATUS err;
+
+	video_t *video = obs_encoder_video(enc->encoder);
+	const struct video_output_info *voi = video_output_get_info(video);
+
+	enc->cx = voi->width;
+	enc->cy = voi->height;
+
+	/* -------------------------- */
+	/* get preset                 */
+
+	GUID nv_preset = NV_ENC_PRESET_DEFAULT_GUID;
+	bool twopass = false;
+	bool hp = false;
+	bool ll = false;
+
+	if (astrcmpi(preset, "hq") == 0) {
+		nv_preset = NV_ENC_PRESET_HQ_GUID;
+
+	} else if (astrcmpi(preset, "mq") == 0) {
+		nv_preset = NV_ENC_PRESET_HQ_GUID;
+		twopass = true;
+
+	} else if (astrcmpi(preset, "hp") == 0) {
+		nv_preset = NV_ENC_PRESET_HP_GUID;
+		hp = true;
+
+	} else if (astrcmpi(preset, "ll") == 0) {
+		nv_preset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
+		ll = true;
+
+	} else if (astrcmpi(preset, "llhq") == 0) {
+		nv_preset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
+		ll = true;
+
+	} else if (astrcmpi(preset, "llhp") == 0) {
+		nv_preset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
+		hp = true;
+		ll = true;
+	}
+
+	if (astrcmpi(rc, "lossless") == 0) {
+		nv_preset = hp
+			? NV_ENC_PRESET_LOSSLESS_HP_GUID
+			: NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID;
+	}
+
+	/* -------------------------- */
+	/* get preset default config  */
+
+	NV_ENC_PRESET_CONFIG preset_config =
+			{NV_ENC_PRESET_CONFIG_VER, {NV_ENC_CONFIG_VER}};
+
+	err = nv.nvEncGetEncodePresetConfig(enc->session,
+			NV_ENC_CODEC_H264_GUID, nv_preset, &preset_config);
+	if (nv_failed(err, __FUNCTION__, "nvEncGetEncodePresetConfig")) {
+		return false;
+	}
+
+	/* -------------------------- */
+	/* main configuration         */
+
+	enc->config = preset_config.presetCfg;
+
+	uint32_t gop_size = (keyint_sec)
+		? keyint_sec * voi->fps_num / voi->fps_den
+		: 250;
+
+	NV_ENC_INITIALIZE_PARAMS *params = &enc->params;
+	NV_ENC_CONFIG *config = &enc->config;
+	NV_ENC_CONFIG_H264 *h264_config = &config->encodeCodecConfig.h264Config;
+	NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui_params =
+		&h264_config->h264VUIParameters;
+
+	memset(params, 0, sizeof(*params));
+	params->version = NV_ENC_INITIALIZE_PARAMS_VER;
+	params->encodeGUID = NV_ENC_CODEC_H264_GUID;
+	params->presetGUID = nv_preset;
+	params->encodeWidth = voi->width;
+	params->encodeHeight = voi->height;
+	params->darWidth = voi->width;
+	params->darHeight = voi->height;
+	params->frameRateNum = voi->fps_num;
+	params->frameRateDen = voi->fps_den;
+	params->enableEncodeAsync = 1;
+	params->enablePTD = 1;
+	params->encodeConfig = &enc->config;
+	params->maxEncodeWidth = voi->width;
+	params->maxEncodeHeight = voi->height;
+	config->rcParams.averageBitRate = bitrate * 1000;
+	config->rcParams.maxBitRate = vbr ? max_bitrate * 1000 : bitrate * 1000;
+	config->gopLength = gop_size;
+	config->frameIntervalP = 1 + bf;
+	h264_config->idrPeriod = gop_size;
+	vui_params->videoSignalTypePresentFlag = 1;
+	vui_params->videoFullRangeFlag = (voi->range == VIDEO_RANGE_FULL);
+	vui_params->colourDescriptionPresentFlag = 1;
+	vui_params->colourMatrix = (voi->colorspace == VIDEO_CS_709) ? 1 : 5;
+	vui_params->colourPrimaries = 1;
+	vui_params->transferCharacteristics = 1;
+
+	enc->bframes = bf > 0;
+
+	/* lookahead */
+	if (lookahead && nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_LOOKAHEAD)) {
+		config->rcParams.lookaheadDepth = 8;
+		config->rcParams.enableLookahead = 1;
+	}
+
+	/* psycho aq */
+	if (nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ)) {
+		config->rcParams.enableAQ = psycho_aq;
+		config->rcParams.enableTemporalAQ = psycho_aq;
+	}
+
+	/* -------------------------- */
+	/* rate control               */
+
+	enc->can_change_bitrate =
+		nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE);
+
+	config->rcParams.rateControlMode = twopass
+		? NV_ENC_PARAMS_RC_VBR_HQ
+		: NV_ENC_PARAMS_RC_VBR;
+
+	if (astrcmpi(rc, "cqp") == 0) {
+		config->rcParams.targetQuality = cqp;
+		config->rcParams.averageBitRate = 0;
+		config->rcParams.maxBitRate = 0;
+		enc->can_change_bitrate = false;
+
+	} else if (astrcmpi(rc, "lossless") == 0) {
+		config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
+		config->rcParams.constQP.qpInterP = 0;
+		config->rcParams.constQP.qpInterB = 0;
+		config->rcParams.constQP.qpIntra = 0;
+		config->rcParams.averageBitRate = 0;
+		config->rcParams.maxBitRate = 0;
+		enc->can_change_bitrate = false;
+
+	} else if (astrcmpi(rc, "vbr") != 0) { /* CBR by default */
+		h264_config->outputBufferingPeriodSEI = 1;
+		h264_config->outputPictureTimingSEI = 1;
+		config->rcParams.rateControlMode = twopass
+			? NV_ENC_PARAMS_RC_2_PASS_QUALITY
+			: NV_ENC_PARAMS_RC_CBR;
+	}
+
+	/* -------------------------- */
+	/* profile                    */
+
+	if (astrcmpi(profile, "main") == 0) {
+		config->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
+	} else if (astrcmpi(profile, "baseline") == 0) {
+		config->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
+	} else {
+		config->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
+	}
+
+	/* -------------------------- */
+	/* initialize                 */
+
+	if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, params))) {
+		return false;
+	}
+
+	enc->buf_count = config->frameIntervalP +
+		config->rcParams.lookaheadDepth + EXTRA_BUFFERS;
+	enc->output_delay = enc->buf_count - 1;
+
+	info("settings:\n"
+	     "\trate_control: %s\n"
+	     "\tbitrate:      %d\n"
+	     "\tcqp:          %d\n"
+	     "\tkeyint:       %d\n"
+	     "\tpreset:       %s\n"
+	     "\tprofile:      %s\n"
+	     "\twidth:        %d\n"
+	     "\theight:       %d\n"
+	     "\t2-pass:       %s\n"
+	     "\tb-frames:     %d\n"
+	     "\tlookahead:    %s\n"
+	     "\tpsycho_aq:    %s\n"
+	     "\tGPU:          %d\n",
+	     rc, bitrate, cqp, gop_size,
+	     preset, profile,
+	     enc->cx, enc->cy,
+	     twopass ? "true" : "false",
+	     bf,
+	     lookahead ? "true" : "false",
+	     psycho_aq ? "true" : "false",
+	     gpu);
+
+	return true;
+}
+
+static bool init_bitstreams(struct nvenc_data *enc)
+{
+	da_reserve(enc->bitstreams, enc->buf_count);
+	for (size_t i = 0; i < enc->buf_count; i++) {
+		struct nv_bitstream bitstream;
+		if (!nv_bitstream_init(enc, &bitstream)) {
+			return false;
+		}
+
+		da_push_back(enc->bitstreams, &bitstream);
+	}
+
+	return true;
+}
+
+static bool init_textures(struct nvenc_data *enc)
+{
+	da_reserve(enc->bitstreams, enc->buf_count);
+	for (size_t i = 0; i < enc->buf_count; i++) {
+		struct nv_texture texture;
+		if (!nv_texture_init(enc, &texture)) {
+			return false;
+		}
+
+		da_push_back(enc->textures, &texture);
+	}
+
+	return true;
+}
+
+static void nvenc_destroy(void *data);
+
+static void *nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
+{
+	NV_ENCODE_API_FUNCTION_LIST init = {NV_ENCODE_API_FUNCTION_LIST_VER};
+	struct nvenc_data *enc = bzalloc(sizeof(*enc));
+	enc->encoder = encoder;
+	enc->first_packet = true;
+
+	if (!obs_nv12_tex_active()) {
+		goto fail;
+	}
+	if (!init_nvenc()) {
+		goto fail;
+	}
+	if (NV_FAILED(nv_create_instance(&init))) {
+		goto fail;
+	}
+	if (!init_d3d11(enc, settings)) {
+		goto fail;
+	}
+	if (!init_session(enc)) {
+		goto fail;
+	}
+	if (!init_encoder(enc, settings)) {
+		goto fail;
+	}
+	if (!init_bitstreams(enc)) {
+		goto fail;
+	}
+	if (!init_textures(enc)) {
+		goto fail;
+	}
+
+	return enc;
+
+fail:
+	nvenc_destroy(enc);
+	return obs_encoder_create_rerouted(encoder, "ffmpeg_nvenc");
+}
+
+static bool get_encoded_packet(struct nvenc_data *enc, bool finalize);
+
+static void nvenc_destroy(void *data)
+{
+	struct nvenc_data *enc = data;
+
+	for (size_t i = 0; i < enc->textures.num; i++) {
+		nv_texture_free(enc, &enc->textures.array[i]);
+	}
+	if (enc->encode_started) {
+		size_t next_bitstream = enc->next_bitstream;
+		HANDLE next_event = enc->bitstreams.array[next_bitstream].event;
+
+		NV_ENC_PIC_PARAMS params = {NV_ENC_PIC_PARAMS_VER};
+		params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
+		params.completionEvent = next_event;
+		nv.nvEncEncodePicture(enc->session, &params);
+		get_encoded_packet(enc, true);
+	}
+	for (size_t i = 0; i < enc->bitstreams.num; i++) {
+		nv_bitstream_free(enc, &enc->bitstreams.array[i]);
+	}
+	if (enc->session) {
+		nv.nvEncDestroyEncoder(enc->session);
+	}
+	for (size_t i = 0; i < enc->input_textures.num; i++) {
+		ID3D11Texture2D *tex = enc->input_textures.array[i].tex;
+		IDXGIKeyedMutex *km = enc->input_textures.array[i].km;
+		tex->lpVtbl->Release(tex);
+		km->lpVtbl->Release(km);
+	}
+	if (enc->context) {
+		enc->context->lpVtbl->Release(enc->context);
+	}
+	if (enc->device) {
+		enc->device->lpVtbl->Release(enc->device);
+	}
+
+	bfree(enc->header);
+	bfree(enc->sei);
+	circlebuf_free(&enc->dts_list);
+	da_free(enc->textures);
+	da_free(enc->bitstreams);
+	da_free(enc->input_textures);
+	da_free(enc->packet_data);
+	bfree(enc);
+}
+
+static ID3D11Texture2D *get_tex_from_handle(struct nvenc_data *enc,
+		uint32_t handle, IDXGIKeyedMutex **km_out)
+{
+	ID3D11Device    *device = enc->device;
+	IDXGIKeyedMutex *km;
+	ID3D11Texture2D *input_tex;
+	HRESULT         hr;
+
+	for (size_t i = 0; i < enc->input_textures.num; i++) {
+		struct handle_tex *ht = &enc->input_textures.array[i];
+		if (ht->handle == handle) {
+			*km_out = ht->km;
+			return ht->tex;
+		}
+	}
+
+	hr = device->lpVtbl->OpenSharedResource(device,
+			(HANDLE)(uintptr_t)handle,
+			&IID_ID3D11Texture2D, &input_tex);
+	if (FAILED(hr)) {
+		error_hr("OpenSharedResource failed");
+		return NULL;
+	}
+
+	hr = input_tex->lpVtbl->QueryInterface(input_tex, &IID_IDXGIKeyedMutex,
+			&km);
+	if (FAILED(hr)) {
+		error_hr("QueryInterface(IDXGIKeyedMutex) failed");
+		input_tex->lpVtbl->Release(input_tex);
+		return NULL;
+	}
+
+	input_tex->lpVtbl->SetEvictionPriority(input_tex,
+			DXGI_RESOURCE_PRIORITY_MAXIMUM);
+
+	*km_out = km;
+
+	struct handle_tex new_ht = {handle, input_tex, km};
+	da_push_back(enc->input_textures, &new_ht);
+	return input_tex;
+}
+
+static bool get_encoded_packet(struct nvenc_data *enc, bool finalize)
+{
+	void *s = enc->session;
+
+	da_resize(enc->packet_data, 0);
+
+	if (!enc->buffers_queued)
+		return true;
+	if (!finalize && enc->buffers_queued < enc->output_delay)
+		return true;
+
+	size_t count = finalize ? enc->buffers_queued : 1;
+
+	for (size_t i = 0; i < count; i++) {
+		size_t cur_bs_idx          = enc->cur_bitstream;
+		struct nv_bitstream *bs    = &enc->bitstreams.array[cur_bs_idx];
+		struct nv_texture   *nvtex = &enc->textures.array[cur_bs_idx];
+
+		/* ---------------- */
+
+		NV_ENC_LOCK_BITSTREAM lock = {NV_ENC_LOCK_BITSTREAM_VER};
+		lock.outputBitstream       = bs->ptr;
+		lock.doNotWait             = false;
+
+		if (NV_FAILED(nv.nvEncLockBitstream(s, &lock))) {
+			return false;
+		}
+
+		if (enc->first_packet) {
+			uint8_t *new_packet;
+			size_t size;
+
+			enc->first_packet = false;
+			obs_extract_avc_headers(
+					lock.bitstreamBufferPtr,
+					lock.bitstreamSizeInBytes,
+					&new_packet, &size,
+					&enc->header, &enc->header_size,
+					&enc->sei, &enc->sei_size);
+
+			da_copy_array(enc->packet_data, new_packet, size);
+			bfree(new_packet);
+		} else {
+			da_copy_array(enc->packet_data,
+					lock.bitstreamBufferPtr,
+					lock.bitstreamSizeInBytes);
+		}
+
+		enc->packet_pts = (int64_t)lock.outputTimeStamp;
+		enc->packet_keyframe = lock.pictureType == NV_ENC_PIC_TYPE_IDR;
+
+		if (NV_FAILED(nv.nvEncUnlockBitstream(s, bs->ptr))) {
+			return false;
+		}
+
+		/* ---------------- */
+
+		if (nvtex->mapped_res) {
+			NVENCSTATUS err;
+			err = nv.nvEncUnmapInputResource(s, nvtex->mapped_res);
+			if (nv_failed(err, __FUNCTION__, "unmap")) {
+				return false;
+			}
+			nvtex->mapped_res = NULL;
+		}
+
+		/* ---------------- */
+
+		if (++enc->cur_bitstream == enc->buf_count)
+			enc->cur_bitstream = 0;
+
+		enc->buffers_queued--;
+	}
+
+	return true;
+}
+
+static bool nvenc_encode_tex(void *data, uint32_t handle, int64_t pts,
+		uint64_t lock_key, uint64_t *next_key,
+		struct encoder_packet *packet, bool *received_packet)
+{
+	struct nvenc_data   *enc     = data;
+	ID3D11Device        *device  = enc->device;
+	ID3D11DeviceContext *context = enc->context;
+	ID3D11Texture2D     *input_tex;
+	ID3D11Texture2D     *output_tex;
+	IDXGIKeyedMutex     *km;
+	struct nv_texture   *nvtex;
+	struct nv_bitstream *bs;
+	NVENCSTATUS         err;
+
+	if (handle == GS_INVALID_HANDLE) {
+		error("Encode failed: bad texture handle");
+		*next_key = lock_key;
+		return false;
+	}
+
+	bs    = &enc->bitstreams.array[enc->next_bitstream];
+	nvtex = &enc->textures.array[enc->next_bitstream];
+
+	input_tex  = get_tex_from_handle(enc, handle, &km);
+	output_tex = nvtex->tex;
+
+	if (!input_tex) {
+		*next_key = lock_key;
+		return false;
+	}
+
+	circlebuf_push_back(&enc->dts_list, &pts, sizeof(pts));
+
+	/* ------------------------------------ */
+	/* wait for output bitstream/tex        */
+
+	WaitForSingleObject(bs->event, INFINITE);
+
+	/* ------------------------------------ */
+	/* copy to output tex                   */
+
+	km->lpVtbl->AcquireSync(km, lock_key, INFINITE);
+
+	context->lpVtbl->CopyResource(context,
+			(ID3D11Resource *)output_tex,
+			(ID3D11Resource *)input_tex);
+
+	km->lpVtbl->ReleaseSync(km, *next_key);
+
+	/* ------------------------------------ */
+	/* map output tex so nvenc can use it   */
+
+	NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER};
+	map.registeredResource        = nvtex->res;
+	if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map))) {
+		return false;
+	}
+
+	nvtex->mapped_res = map.mappedResource;
+
+	/* ------------------------------------ */
+	/* do actual encode call                */
+
+	NV_ENC_PIC_PARAMS params = {0};
+	params.version           = NV_ENC_PIC_PARAMS_VER;
+	params.pictureStruct     = NV_ENC_PIC_STRUCT_FRAME;
+	params.inputBuffer       = nvtex->mapped_res;
+	params.bufferFmt         = NV_ENC_BUFFER_FORMAT_NV12;
+	params.inputTimeStamp    = (uint64_t)pts;
+	params.inputWidth        = enc->cx;
+	params.inputHeight       = enc->cy;
+	params.outputBitstream   = bs->ptr;
+	params.completionEvent   = bs->event;
+
+	err = nv.nvEncEncodePicture(enc->session, &params);
+	if (err != NV_ENC_SUCCESS && err != NV_ENC_ERR_NEED_MORE_INPUT) {
+		nv_failed(err, __FUNCTION__, "nvEncEncodePicture");
+		return false;
+	}
+
+	enc->encode_started = true;
+	enc->buffers_queued++;
+
+	if (++enc->next_bitstream == enc->buf_count) {
+		enc->next_bitstream = 0;
+	}
+
+	/* ------------------------------------ */
+	/* check for encoded packet and parse   */
+
+	if (!get_encoded_packet(enc, false)) {
+		return false;
+	}
+
+	/* ------------------------------------ */
+	/* output encoded packet                */
+
+	if (enc->packet_data.num) {
+		int64_t dts;
+		circlebuf_pop_front(&enc->dts_list, &dts, sizeof(dts));
+
+		/* subtract bframe delay from dts */
+		if (enc->bframes)
+			dts--;
+
+		*received_packet = true;
+		packet->data     = enc->packet_data.array;
+		packet->size     = enc->packet_data.num;
+		packet->type     = OBS_ENCODER_VIDEO;
+		packet->pts      = enc->packet_pts;
+		packet->dts      = dts;
+		packet->keyframe = enc->packet_keyframe;
+	} else {
+		*received_packet = false;
+	}
+
+	return true;
+}
+
+extern void nvenc_defaults(obs_data_t *settings);
+extern obs_properties_t *nvenc_properties(void *unused);
+
+static bool nvenc_extra_data(void *data, uint8_t **header, size_t *size)
+{
+	struct nvenc_data *enc = data;
+
+	if (!enc->header) {
+		return false;
+	}
+
+	*header = enc->header;
+	*size   = enc->header_size;
+	return true;
+}
+
+static bool nvenc_sei_data(void *data, uint8_t **sei, size_t *size)
+{
+	struct nvenc_data *enc = data;
+
+	if (!enc->sei) {
+		return false;
+	}
+
+	*sei  = enc->sei;
+	*size = enc->header_size;
+	return true;
+}
+
+struct obs_encoder_info nvenc_info = {
+	.id                      = "jim_nvenc",
+	.codec                   = "h264",
+	.type                    = OBS_ENCODER_VIDEO,
+	.caps                    = OBS_ENCODER_CAP_PASS_TEXTURE,
+	.get_name                = nvenc_get_name,
+	.create                  = nvenc_create,
+	.destroy                 = nvenc_destroy,
+	.update                  = nvenc_update,
+	.encode_texture          = nvenc_encode_tex,
+	.get_defaults            = nvenc_defaults,
+	.get_properties          = nvenc_properties,
+	.get_extra_data          = nvenc_extra_data,
+	.get_sei_data            = nvenc_sei_data,
+};

+ 35 - 0
plugins/obs-ffmpeg/jim-nvenc.h

@@ -0,0 +1,35 @@
+#pragma once
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#include <obs-module.h>
+#include "nvEncodeAPI.h"
+
+#define do_log(level, format, ...) \
+	blog(level, "[jim-nvenc] " format, ##__VA_ARGS__)
+
+#define error(format, ...) do_log(LOG_ERROR,   format, ##__VA_ARGS__)
+#define warn(format, ...)  do_log(LOG_WARNING, format, ##__VA_ARGS__)
+#define info(format, ...)  do_log(LOG_INFO,    format, ##__VA_ARGS__)
+#define debug(format, ...) do_log(LOG_DEBUG,   format, ##__VA_ARGS__)
+
+typedef NVENCSTATUS (NVENCAPI *NV_CREATE_INSTANCE_FUNC)(NV_ENCODE_API_FUNCTION_LIST*);
+
+extern const char *nv_error_name(NVENCSTATUS err);
+extern NV_ENCODE_API_FUNCTION_LIST nv;
+extern NV_CREATE_INSTANCE_FUNC nv_create_instance;
+extern bool init_nvenc(void);
+
+static inline bool nv_failed(NVENCSTATUS err, const char *func,
+		const char *call)
+{
+	if (err == NV_ENC_SUCCESS)
+		return false;
+
+	error("%s: %s failed: %d (%s)", func, call, (int)err,
+			nv_error_name(err));
+	return true;
+}
+
+#define NV_FAILED(x) nv_failed(x, __FUNCTION__, #x)

+ 22 - 5
plugins/obs-ffmpeg/obs-ffmpeg.c

@@ -223,6 +223,10 @@ finish:
 }
 #endif
 
+#ifdef _WIN32
+extern bool load_nvenc_lib(void);
+#endif
+
 static bool nvenc_supported(void)
 {
 	av_register_all();
@@ -241,11 +245,9 @@ static bool nvenc_supported(void)
 	if (!nvenc_device_available()) {
 		goto cleanup;
 	}
-
-	if (sizeof(void*) == 8) {
-		lib = os_dlopen("nvEncodeAPI64.dll");
-	} else {
-		lib = os_dlopen("nvEncodeAPI.dll");
+	if (load_nvenc_lib()) {
+		success = true;
+		goto finish;
 	}
 #else
 	lib = os_dlopen("libnvidia-encode.so.1");
@@ -258,6 +260,7 @@ static bool nvenc_supported(void)
 cleanup:
 	if (lib)
 		os_dlclose(lib);
+finish:
 	profile_end(nvenc_check_name);
 	return success;
 }
@@ -272,6 +275,11 @@ static bool vaapi_supported(void)
 }
 #endif
 
+#ifdef _WIN32
+extern void jim_nvenc_load(void);
+extern void jim_nvenc_unload(void);
+#endif
+
 bool obs_module_load(void)
 {
 	da_init(active_log_contexts);
@@ -288,6 +296,11 @@ bool obs_module_load(void)
 #ifndef __APPLE__
 	if (nvenc_supported()) {
 		blog(LOG_INFO, "NVENC supported");
+#ifdef _WIN32
+		if (get_win_ver_int() > 0x0601) {
+			jim_nvenc_load();
+		}
+#endif
 		obs_register_encoder(&nvenc_encoder_info);
 	}
 #if !defined(_WIN32) && defined(LIBAVUTIL_VAAPI_AVAILABLE)
@@ -317,4 +330,8 @@ void obs_module_unload(void)
 
 	da_free(active_log_contexts);
 	da_free(cached_log_contexts);
+
+#ifdef _WIN32
+	jim_nvenc_unload();
+#endif
 }