obs-nvenc.c 65 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464
  1. #include "obs-nvenc.h"
  2. #include <util/deque.h>
  3. #include <util/darray.h>
  4. #include <util/dstr.h>
  5. #include <obs-avc.h>
  6. #include <obs-hevc.h>
  7. #include <libavutil/rational.h>
  8. #ifdef _WIN32
  9. #define INITGUID
  10. #include <dxgi.h>
  11. #include <d3d11.h>
  12. #include <d3d11_1.h>
  13. #else
  14. #include <glad/glad.h>
  15. #endif
  16. /* ========================================================================= */
  17. /* a hack of the ages: nvenc backward compatibility */
  18. #define CONFIGURED_NVENC_MAJOR 12
  19. #define CONFIGURED_NVENC_MINOR 1
  20. /* we cannot guarantee structures haven't changed, so purposely break on
  21. * version change to force the programmer to update or remove backward
  22. * compatibility NVENC code. */
  23. #if CONFIGURED_NVENC_MAJOR != NVENCAPI_MAJOR_VERSION || \
  24. CONFIGURED_NVENC_MINOR < NVENCAPI_MINOR_VERSION
  25. #error NVENC version changed, update or remove NVENC compatibility code
  26. #endif
  27. #undef NVENCAPI_STRUCT_VERSION
  28. #define NVENCAPI_STRUCT_VERSION(ver) \
  29. ((uint32_t)(enc->needs_compat_ver ? NVENC_COMPAT_VER \
  30. : NVENCAPI_VERSION) | \
  31. ((ver) << 16) | (0x7 << 28))
  32. #define NV_ENC_CONFIG_COMPAT_VER (NVENCAPI_STRUCT_VERSION(7) | (1 << 31))
  33. #define NV_ENC_INITIALIZE_PARAMS_COMPAT_VER \
  34. (NVENCAPI_STRUCT_VERSION(5) | (1 << 31))
  35. #define NV_ENC_PIC_PARAMS_COMPAT_VER (NVENCAPI_STRUCT_VERSION(4) | (1 << 31))
  36. #define NV_ENC_LOCK_BITSTREAM_COMPAT_VER NVENCAPI_STRUCT_VERSION(1)
  37. #define NV_ENC_REGISTER_RESOURCE_COMPAT_VER NVENCAPI_STRUCT_VERSION(3)
  38. #define COMPATIBILITY_VERSION \
  39. (NVENC_COMPAT_MAJOR_VER << 4 | NVENC_COMPAT_MINOR_VER)
  40. /* ========================================================================= */
  41. #define EXTRA_BUFFERS 5
  42. #define do_log(level, format, ...) \
  43. blog(level, "[obs-nvenc: '%s'] " format, \
  44. obs_encoder_get_name(enc->encoder), ##__VA_ARGS__)
  45. #define error(format, ...) do_log(LOG_ERROR, format, ##__VA_ARGS__)
  46. #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
  47. #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
  48. #define debug(format, ...) do_log(LOG_DEBUG, format, ##__VA_ARGS__)
  49. #define error_hr(msg) error("%s: %s: 0x%08lX", __FUNCTION__, msg, (uint32_t)hr);
  50. #ifndef _WIN32
  51. #define min(a, b) (((a) < (b)) ? (a) : (b))
  52. #define max(a, b) (((a) > (b)) ? (a) : (b))
  53. #endif
  54. struct nv_bitstream;
  55. struct nv_texture;
  56. struct handle_tex {
  57. #ifdef _WIN32
  58. uint32_t handle;
  59. ID3D11Texture2D *tex;
  60. IDXGIKeyedMutex *km;
  61. #else
  62. GLuint tex_id;
  63. CUgraphicsResource res_y;
  64. CUgraphicsResource res_uv;
  65. #endif
  66. };
  67. /* ------------------------------------------------------------------------- */
  68. /* Main Implementation Structure */
  69. enum codec_type {
  70. CODEC_H264,
  71. CODEC_HEVC,
  72. CODEC_AV1,
  73. };
  74. static const char *get_codec_name(enum codec_type type)
  75. {
  76. switch (type) {
  77. case CODEC_H264:
  78. return "H264";
  79. case CODEC_HEVC:
  80. return "HEVC";
  81. case CODEC_AV1:
  82. return "AV1";
  83. }
  84. return "Unknown";
  85. }
  86. struct nvenc_data {
  87. obs_encoder_t *encoder;
  88. enum codec_type codec;
  89. GUID codec_guid;
  90. void *session;
  91. NV_ENC_INITIALIZE_PARAMS params;
  92. NV_ENC_CONFIG config;
  93. int rc_lookahead;
  94. uint32_t buf_count;
  95. int output_delay;
  96. int buffers_queued;
  97. size_t next_bitstream;
  98. size_t cur_bitstream;
  99. bool encode_started;
  100. bool first_packet;
  101. bool can_change_bitrate;
  102. bool needs_compat_ver;
  103. bool fallback;
  104. int32_t bframes;
  105. DARRAY(struct handle_tex) input_textures;
  106. DARRAY(struct nv_bitstream) bitstreams;
  107. DARRAY(struct nv_cuda_surface) surfaces;
  108. NV_ENC_BUFFER_FORMAT surface_format;
  109. struct deque dts_list;
  110. DARRAY(uint8_t) packet_data;
  111. int64_t packet_pts;
  112. bool packet_keyframe;
  113. #ifdef _WIN32
  114. DARRAY(struct nv_texture) textures;
  115. ID3D11Device *device;
  116. ID3D11DeviceContext *context;
  117. #endif
  118. uint32_t cx;
  119. uint32_t cy;
  120. enum video_format in_format;
  121. uint8_t *header;
  122. size_t header_size;
  123. uint8_t *sei;
  124. size_t sei_size;
  125. int8_t *roi_map;
  126. size_t roi_map_size;
  127. uint32_t roi_increment;
  128. CUcontext cu_ctx;
  129. };
  130. /* ------------------------------------------------------------------------- */
  131. /* Bitstream Buffer */
  132. struct nv_bitstream {
  133. void *ptr;
  134. };
  135. #define NV_FAIL(format, ...) nv_fail(enc->encoder, format, ##__VA_ARGS__)
  136. #define NV_FAILED(x) nv_failed(enc->encoder, x, __FUNCTION__, #x)
  137. static bool nv_bitstream_init(struct nvenc_data *enc, struct nv_bitstream *bs)
  138. {
  139. NV_ENC_CREATE_BITSTREAM_BUFFER buf = {
  140. NV_ENC_CREATE_BITSTREAM_BUFFER_VER};
  141. if (NV_FAILED(nv.nvEncCreateBitstreamBuffer(enc->session, &buf))) {
  142. return false;
  143. }
  144. bs->ptr = buf.bitstreamBuffer;
  145. return true;
  146. }
  147. static void nv_bitstream_free(struct nvenc_data *enc, struct nv_bitstream *bs)
  148. {
  149. if (bs->ptr) {
  150. nv.nvEncDestroyBitstreamBuffer(enc->session, bs->ptr);
  151. }
  152. }
  153. /* ------------------------------------------------------------------------- */
  154. /* Texture Resource */
  155. #ifdef _WIN32
  156. struct nv_texture {
  157. void *res;
  158. ID3D11Texture2D *tex;
  159. void *mapped_res;
  160. };
  161. static bool nv_texture_init(struct nvenc_data *enc, struct nv_texture *nvtex)
  162. {
  163. const bool p010 = obs_p010_tex_active();
  164. D3D11_TEXTURE2D_DESC desc = {0};
  165. desc.Width = enc->cx;
  166. desc.Height = enc->cy;
  167. desc.MipLevels = 1;
  168. desc.ArraySize = 1;
  169. desc.Format = p010 ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12;
  170. desc.SampleDesc.Count = 1;
  171. desc.BindFlags = D3D11_BIND_RENDER_TARGET;
  172. ID3D11Device *const device = enc->device;
  173. ID3D11Texture2D *tex;
  174. HRESULT hr = device->lpVtbl->CreateTexture2D(device, &desc, NULL, &tex);
  175. if (FAILED(hr)) {
  176. error_hr("Failed to create texture");
  177. return false;
  178. }
  179. tex->lpVtbl->SetEvictionPriority(tex, DXGI_RESOURCE_PRIORITY_MAXIMUM);
  180. uint32_t struct_ver = enc->needs_compat_ver
  181. ? NV_ENC_REGISTER_RESOURCE_COMPAT_VER
  182. : NV_ENC_REGISTER_RESOURCE_VER;
  183. NV_ENC_REGISTER_RESOURCE res = {struct_ver};
  184. res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
  185. res.resourceToRegister = tex;
  186. res.width = enc->cx;
  187. res.height = enc->cy;
  188. res.bufferFormat = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
  189. : NV_ENC_BUFFER_FORMAT_NV12;
  190. if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) {
  191. tex->lpVtbl->Release(tex);
  192. return false;
  193. }
  194. nvtex->res = res.registeredResource;
  195. nvtex->tex = tex;
  196. nvtex->mapped_res = NULL;
  197. return true;
  198. }
  199. static void nv_texture_free(struct nvenc_data *enc, struct nv_texture *nvtex)
  200. {
  201. if (nvtex->res) {
  202. if (nvtex->mapped_res) {
  203. nv.nvEncUnmapInputResource(enc->session,
  204. nvtex->mapped_res);
  205. }
  206. nv.nvEncUnregisterResource(enc->session, nvtex->res);
  207. nvtex->tex->lpVtbl->Release(nvtex->tex);
  208. }
  209. }
  210. #endif
  211. /* ------------------------------------------------------------------------- */
  212. /* CUDA Stuff */
  213. /* CUDA error handling */
  214. static inline bool cuda_error_check(struct nvenc_data *enc, CUresult res,
  215. const char *func, const char *call)
  216. {
  217. if (res == CUDA_SUCCESS)
  218. return true;
  219. const char *name, *desc;
  220. if (cuda_get_error_desc(res, &name, &desc)) {
  221. error("%s: CUDA call \"%s\" failed with %s (%d): %s", func,
  222. call, name, res, desc);
  223. } else {
  224. error("%s: CUDA call \"%s\" failed with %d", func, call, res);
  225. }
  226. return false;
  227. }
  228. #define CU_FAILED(call) \
  229. if (!cuda_error_check(enc, call, __FUNCTION__, #call)) \
  230. return false;
  231. #define CU_CHECK(call) \
  232. if (!cuda_error_check(enc, call, __FUNCTION__, #call)) { \
  233. success = false; \
  234. goto unmap; \
  235. }
  236. /* CUDA Surfaces */
  237. struct nv_cuda_surface {
  238. CUarray tex;
  239. NV_ENC_REGISTERED_PTR res;
  240. NV_ENC_INPUT_PTR *mapped_res;
  241. };
  242. /* Missing from ffmpeg nvcodec headers, required for CUDA arrays to be usable in NVENC */
  243. static const int CUDA_ARRAY3D_SURFACE_LDST = 0x02;
  244. static bool nv_cuda_surface_init(struct nvenc_data *enc,
  245. struct nv_cuda_surface *nvsurf)
  246. {
  247. const bool p010 = obs_p010_tex_active();
  248. CUDA_ARRAY3D_DESCRIPTOR desc;
  249. desc.Width = enc->cx;
  250. desc.Height = enc->cy;
  251. desc.Depth = 0;
  252. desc.Flags = CUDA_ARRAY3D_SURFACE_LDST;
  253. desc.NumChannels = 1;
  254. if (!enc->fallback) {
  255. desc.Format = p010 ? CU_AD_FORMAT_UNSIGNED_INT16
  256. : CU_AD_FORMAT_UNSIGNED_INT8;
  257. desc.Height = enc->cy + enc->cy / 2;
  258. } else {
  259. switch (enc->surface_format) {
  260. case NV_ENC_BUFFER_FORMAT_NV12:
  261. desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
  262. // Additional half-height plane for UV data
  263. desc.Height += enc->cy / 2;
  264. break;
  265. case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
  266. desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
  267. desc.Height += enc->cy / 2;
  268. desc.NumChannels = 2; // number of bytes per element
  269. break;
  270. case NV_ENC_BUFFER_FORMAT_YUV444:
  271. desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
  272. desc.Height *= 3; // 3 full-size planes
  273. break;
  274. default:
  275. error("Unknown input format: %d", enc->surface_format);
  276. return false;
  277. }
  278. }
  279. CU_FAILED(cu->cuArray3DCreate(&nvsurf->tex, &desc))
  280. NV_ENC_REGISTER_RESOURCE res = {0};
  281. res.version = enc->needs_compat_ver
  282. ? NV_ENC_REGISTER_RESOURCE_COMPAT_VER
  283. : NV_ENC_REGISTER_RESOURCE_VER;
  284. res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY;
  285. res.resourceToRegister = (void *)nvsurf->tex;
  286. res.width = enc->cx;
  287. res.height = enc->cy;
  288. res.pitch = (uint32_t)(desc.Width * desc.NumChannels);
  289. if (!enc->fallback) {
  290. res.bufferFormat = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
  291. : NV_ENC_BUFFER_FORMAT_NV12;
  292. } else {
  293. res.bufferFormat = enc->surface_format;
  294. }
  295. if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) {
  296. return false;
  297. }
  298. nvsurf->res = res.registeredResource;
  299. nvsurf->mapped_res = NULL;
  300. return true;
  301. }
  302. static void nv_cuda_surface_free(struct nvenc_data *enc,
  303. struct nv_cuda_surface *nvsurf)
  304. {
  305. if (nvsurf->res) {
  306. if (nvsurf->mapped_res) {
  307. nv.nvEncUnmapInputResource(enc->session,
  308. nvsurf->mapped_res);
  309. }
  310. nv.nvEncUnregisterResource(enc->session, nvsurf->res);
  311. cu->cuArrayDestroy(nvsurf->tex);
  312. }
  313. }
  314. /* ------------------------------------------------------------------------- */
  315. /* Implementation */
  316. static const char *h264_nvenc_get_name(void *type_data)
  317. {
  318. UNUSED_PARAMETER(type_data);
  319. return "NVIDIA NVENC H.264";
  320. }
  321. static const char *h264_nvenc_soft_get_name(void *type_data)
  322. {
  323. UNUSED_PARAMETER(type_data);
  324. return "NVIDIA NVENC H.264 (Fallback)";
  325. }
  326. #ifdef ENABLE_HEVC
  327. static const char *hevc_nvenc_get_name(void *type_data)
  328. {
  329. UNUSED_PARAMETER(type_data);
  330. return "NVIDIA NVENC HEVC";
  331. }
  332. static const char *hevc_nvenc_soft_get_name(void *type_data)
  333. {
  334. UNUSED_PARAMETER(type_data);
  335. return "NVIDIA NVENC HEVC (Fallback)";
  336. }
  337. #endif
  338. static const char *av1_nvenc_get_name(void *type_data)
  339. {
  340. UNUSED_PARAMETER(type_data);
  341. return "NVIDIA NVENC AV1";
  342. }
  343. static const char *av1_nvenc_soft_get_name(void *type_data)
  344. {
  345. UNUSED_PARAMETER(type_data);
  346. return "NVIDIA NVENC AV1 (Fallback)";
  347. }
  348. static inline int nv_get_cap(struct nvenc_data *enc, NV_ENC_CAPS cap)
  349. {
  350. if (!enc->session)
  351. return 0;
  352. NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER};
  353. int v;
  354. param.capsToQuery = cap;
  355. nv.nvEncGetEncodeCaps(enc->session, enc->codec_guid, &param, &v);
  356. return v;
  357. }
  358. static bool nvenc_update(void *data, obs_data_t *settings)
  359. {
  360. struct nvenc_data *enc = data;
  361. /* Only support reconfiguration of CBR bitrate */
  362. if (enc->can_change_bitrate) {
  363. int bitrate = (int)obs_data_get_int(settings, "bitrate");
  364. int max_bitrate =
  365. (int)obs_data_get_int(settings, "max_bitrate");
  366. bool vbr = (enc->config.rcParams.rateControlMode ==
  367. NV_ENC_PARAMS_RC_VBR);
  368. enc->config.rcParams.averageBitRate = bitrate * 1000;
  369. enc->config.rcParams.maxBitRate = vbr ? max_bitrate * 1000
  370. : bitrate * 1000;
  371. NV_ENC_RECONFIGURE_PARAMS params = {0};
  372. params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
  373. params.reInitEncodeParams = enc->params;
  374. params.resetEncoder = 1;
  375. params.forceIDR = 1;
  376. if (NV_FAILED(nv.nvEncReconfigureEncoder(enc->session,
  377. &params))) {
  378. return false;
  379. }
  380. }
  381. return true;
  382. }
  383. #ifdef _WIN32
  384. static HANDLE get_lib(struct nvenc_data *enc, const char *lib)
  385. {
  386. HMODULE mod = GetModuleHandleA(lib);
  387. if (mod)
  388. return mod;
  389. mod = LoadLibraryA(lib);
  390. if (!mod)
  391. error("Failed to load %s", lib);
  392. return mod;
  393. }
  394. typedef HRESULT(WINAPI *CREATEDXGIFACTORY1PROC)(REFIID, void **);
  395. static bool init_d3d11(struct nvenc_data *enc, obs_data_t *settings)
  396. {
  397. HMODULE dxgi = get_lib(enc, "DXGI.dll");
  398. HMODULE d3d11 = get_lib(enc, "D3D11.dll");
  399. CREATEDXGIFACTORY1PROC create_dxgi;
  400. PFN_D3D11_CREATE_DEVICE create_device;
  401. IDXGIFactory1 *factory;
  402. IDXGIAdapter *adapter;
  403. ID3D11Device *device;
  404. ID3D11DeviceContext *context;
  405. HRESULT hr;
  406. if (!dxgi || !d3d11) {
  407. return false;
  408. }
  409. create_dxgi = (CREATEDXGIFACTORY1PROC)GetProcAddress(
  410. dxgi, "CreateDXGIFactory1");
  411. create_device = (PFN_D3D11_CREATE_DEVICE)GetProcAddress(
  412. d3d11, "D3D11CreateDevice");
  413. if (!create_dxgi || !create_device) {
  414. error("Failed to load D3D11/DXGI procedures");
  415. return false;
  416. }
  417. hr = create_dxgi(&IID_IDXGIFactory1, &factory);
  418. if (FAILED(hr)) {
  419. error_hr("CreateDXGIFactory1 failed");
  420. return false;
  421. }
  422. hr = factory->lpVtbl->EnumAdapters(factory, 0, &adapter);
  423. factory->lpVtbl->Release(factory);
  424. if (FAILED(hr)) {
  425. error_hr("EnumAdapters failed");
  426. return false;
  427. }
  428. hr = create_device(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, 0, NULL, 0,
  429. D3D11_SDK_VERSION, &device, NULL, &context);
  430. adapter->lpVtbl->Release(adapter);
  431. if (FAILED(hr)) {
  432. error_hr("D3D11CreateDevice failed");
  433. return false;
  434. }
  435. enc->device = device;
  436. enc->context = context;
  437. return true;
  438. }
  439. #endif
  440. static bool init_session(struct nvenc_data *enc)
  441. {
  442. NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = {
  443. NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER};
  444. params.apiVersion = enc->needs_compat_ver ? NVENC_COMPAT_VER
  445. : NVENCAPI_VERSION;
  446. #ifdef _WIN32
  447. if (enc->fallback) {
  448. params.device = enc->cu_ctx;
  449. params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
  450. } else {
  451. params.device = enc->device;
  452. params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
  453. }
  454. #else
  455. params.device = enc->cu_ctx;
  456. params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
  457. #endif
  458. if (NV_FAILED(nv.nvEncOpenEncodeSessionEx(&params, &enc->session))) {
  459. return false;
  460. }
  461. return true;
  462. }
  463. static void initialize_params(struct nvenc_data *enc, const GUID *nv_preset,
  464. NV_ENC_TUNING_INFO nv_tuning, uint32_t width,
  465. uint32_t height, uint32_t fps_num,
  466. uint32_t fps_den)
  467. {
  468. int darWidth, darHeight;
  469. av_reduce(&darWidth, &darHeight, width, height, 1024 * 1024);
  470. NV_ENC_INITIALIZE_PARAMS *params = &enc->params;
  471. memset(params, 0, sizeof(*params));
  472. params->version = enc->needs_compat_ver
  473. ? NV_ENC_INITIALIZE_PARAMS_COMPAT_VER
  474. : NV_ENC_INITIALIZE_PARAMS_VER;
  475. params->encodeGUID = enc->codec_guid;
  476. params->presetGUID = *nv_preset;
  477. params->encodeWidth = width;
  478. params->encodeHeight = height;
  479. params->darWidth = enc->codec == CODEC_AV1 ? width : (uint32_t)darWidth;
  480. params->darHeight = enc->codec == CODEC_AV1 ? height
  481. : (uint32_t)darHeight;
  482. params->frameRateNum = fps_num;
  483. params->frameRateDen = fps_den;
  484. params->enableEncodeAsync = 0;
  485. params->enablePTD = 1;
  486. params->encodeConfig = &enc->config;
  487. params->tuningInfo = nv_tuning;
  488. }
  489. static inline GUID get_nv_preset2(const char *preset2)
  490. {
  491. if (astrcmpi(preset2, "p1") == 0) {
  492. return NV_ENC_PRESET_P1_GUID;
  493. } else if (astrcmpi(preset2, "p2") == 0) {
  494. return NV_ENC_PRESET_P2_GUID;
  495. } else if (astrcmpi(preset2, "p3") == 0) {
  496. return NV_ENC_PRESET_P3_GUID;
  497. } else if (astrcmpi(preset2, "p4") == 0) {
  498. return NV_ENC_PRESET_P4_GUID;
  499. } else if (astrcmpi(preset2, "p6") == 0) {
  500. return NV_ENC_PRESET_P6_GUID;
  501. } else if (astrcmpi(preset2, "p7") == 0) {
  502. return NV_ENC_PRESET_P7_GUID;
  503. } else {
  504. return NV_ENC_PRESET_P5_GUID;
  505. }
  506. }
  507. static inline NV_ENC_TUNING_INFO get_nv_tuning(const char *tuning)
  508. {
  509. if (astrcmpi(tuning, "ll") == 0) {
  510. return NV_ENC_TUNING_INFO_LOW_LATENCY;
  511. } else if (astrcmpi(tuning, "ull") == 0) {
  512. return NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY;
  513. } else {
  514. return NV_ENC_TUNING_INFO_HIGH_QUALITY;
  515. }
  516. }
  517. static inline NV_ENC_MULTI_PASS get_nv_multipass(const char *multipass)
  518. {
  519. if (astrcmpi(multipass, "qres") == 0) {
  520. return NV_ENC_TWO_PASS_QUARTER_RESOLUTION;
  521. } else if (astrcmpi(multipass, "fullres") == 0) {
  522. return NV_ENC_TWO_PASS_FULL_RESOLUTION;
  523. } else {
  524. return NV_ENC_MULTI_PASS_DISABLED;
  525. }
  526. }
  527. static bool is_10_bit(const struct nvenc_data *enc)
  528. {
  529. return enc->fallback ? enc->in_format == VIDEO_FORMAT_P010
  530. : obs_p010_tex_active();
  531. }
  532. static bool init_encoder_base(struct nvenc_data *enc, obs_data_t *settings,
  533. int bf, bool compatibility, bool *lossless)
  534. {
  535. const char *rc = obs_data_get_string(settings, "rate_control");
  536. int bitrate = (int)obs_data_get_int(settings, "bitrate");
  537. int max_bitrate = (int)obs_data_get_int(settings, "max_bitrate");
  538. int cqp = (int)obs_data_get_int(settings, "cqp");
  539. int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec");
  540. const char *preset = obs_data_get_string(settings, "preset");
  541. const char *preset2 = obs_data_get_string(settings, "preset2");
  542. const char *tuning = obs_data_get_string(settings, "tune");
  543. const char *multipass = obs_data_get_string(settings, "multipass");
  544. const char *profile = obs_data_get_string(settings, "profile");
  545. bool lookahead = obs_data_get_bool(settings, "lookahead");
  546. bool vbr = astrcmpi(rc, "VBR") == 0;
  547. bool psycho_aq = !compatibility &&
  548. obs_data_get_bool(settings, "psycho_aq");
  549. bool disable_scenecut = obs_data_get_bool(settings, "disable_scenecut");
  550. NVENCSTATUS err;
  551. video_t *video = obs_encoder_video(enc->encoder);
  552. const struct video_output_info *voi = video_output_get_info(video);
  553. enc->cx = obs_encoder_get_width(enc->encoder);
  554. enc->cy = obs_encoder_get_height(enc->encoder);
  555. /* -------------------------- */
  556. /* get preset */
  557. GUID nv_preset = get_nv_preset2(preset2);
  558. NV_ENC_TUNING_INFO nv_tuning = get_nv_tuning(tuning);
  559. NV_ENC_MULTI_PASS nv_multipass = compatibility
  560. ? NV_ENC_MULTI_PASS_DISABLED
  561. : get_nv_multipass(multipass);
  562. if (obs_data_has_user_value(settings, "preset") &&
  563. !obs_data_has_user_value(settings, "preset2") &&
  564. enc->codec == CODEC_H264) {
  565. if (astrcmpi(preset, "mq") == 0) {
  566. nv_preset = NV_ENC_PRESET_P5_GUID;
  567. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  568. nv_multipass = NV_ENC_TWO_PASS_QUARTER_RESOLUTION;
  569. } else if (astrcmpi(preset, "hq") == 0) {
  570. nv_preset = NV_ENC_PRESET_P5_GUID;
  571. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  572. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  573. } else if (astrcmpi(preset, "default") == 0) {
  574. nv_preset = NV_ENC_PRESET_P3_GUID;
  575. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  576. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  577. } else if (astrcmpi(preset, "hp") == 0) {
  578. nv_preset = NV_ENC_PRESET_P1_GUID;
  579. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  580. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  581. } else if (astrcmpi(preset, "ll") == 0) {
  582. nv_preset = NV_ENC_PRESET_P3_GUID;
  583. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  584. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  585. } else if (astrcmpi(preset, "llhq") == 0) {
  586. nv_preset = NV_ENC_PRESET_P4_GUID;
  587. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  588. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  589. } else if (astrcmpi(preset, "llhp") == 0) {
  590. nv_preset = NV_ENC_PRESET_P2_GUID;
  591. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  592. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  593. }
  594. } else if (obs_data_has_user_value(settings, "preset") &&
  595. !obs_data_has_user_value(settings, "preset2") &&
  596. enc->codec == CODEC_HEVC) {
  597. if (astrcmpi(preset, "mq") == 0) {
  598. nv_preset = NV_ENC_PRESET_P6_GUID;
  599. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  600. nv_multipass = NV_ENC_TWO_PASS_QUARTER_RESOLUTION;
  601. } else if (astrcmpi(preset, "hq") == 0) {
  602. nv_preset = NV_ENC_PRESET_P6_GUID;
  603. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  604. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  605. } else if (astrcmpi(preset, "default") == 0) {
  606. nv_preset = NV_ENC_PRESET_P5_GUID;
  607. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  608. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  609. } else if (astrcmpi(preset, "hp") == 0) {
  610. nv_preset = NV_ENC_PRESET_P1_GUID;
  611. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  612. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  613. } else if (astrcmpi(preset, "ll") == 0) {
  614. nv_preset = NV_ENC_PRESET_P3_GUID;
  615. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  616. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  617. } else if (astrcmpi(preset, "llhq") == 0) {
  618. nv_preset = NV_ENC_PRESET_P4_GUID;
  619. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  620. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  621. } else if (astrcmpi(preset, "llhp") == 0) {
  622. nv_preset = NV_ENC_PRESET_P2_GUID;
  623. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  624. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  625. }
  626. }
  627. const bool rc_lossless = astrcmpi(rc, "lossless") == 0;
  628. *lossless = rc_lossless;
  629. if (rc_lossless) {
  630. *lossless =
  631. nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE);
  632. if (*lossless) {
  633. nv_tuning = NV_ENC_TUNING_INFO_LOSSLESS;
  634. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  635. } else {
  636. warn("lossless encode is not supported, ignoring");
  637. nv_preset = NV_ENC_PRESET_P5_GUID;
  638. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  639. nv_multipass = NV_ENC_TWO_PASS_QUARTER_RESOLUTION;
  640. }
  641. }
  642. /* -------------------------- */
  643. /* get preset default config */
  644. uint32_t config_ver = enc->needs_compat_ver ? NV_ENC_CONFIG_COMPAT_VER
  645. : NV_ENC_CONFIG_VER;
  646. NV_ENC_PRESET_CONFIG preset_config = {NV_ENC_PRESET_CONFIG_VER,
  647. {config_ver}};
  648. err = nv.nvEncGetEncodePresetConfigEx(enc->session, enc->codec_guid,
  649. nv_preset, nv_tuning,
  650. &preset_config);
  651. if (nv_failed(enc->encoder, err, __FUNCTION__,
  652. "nvEncGetEncodePresetConfig")) {
  653. return false;
  654. }
  655. /* -------------------------- */
  656. /* main configuration */
  657. enc->config = preset_config.presetCfg;
  658. uint32_t gop_size =
  659. (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250;
  660. NV_ENC_CONFIG *config = &enc->config;
  661. initialize_params(enc, &nv_preset, nv_tuning, voi->width, voi->height,
  662. voi->fps_num, voi->fps_den);
  663. config->gopLength = gop_size;
  664. config->frameIntervalP = 1 + bf;
  665. enc->bframes = bf;
  666. /* lookahead */
  667. const bool use_profile_lookahead = config->rcParams.enableLookahead;
  668. lookahead = nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_LOOKAHEAD) &&
  669. (lookahead || use_profile_lookahead);
  670. if (lookahead) {
  671. enc->rc_lookahead = use_profile_lookahead
  672. ? config->rcParams.lookaheadDepth
  673. : 8;
  674. }
  675. int buf_count = max(4, config->frameIntervalP * 2 * 2);
  676. if (lookahead) {
  677. buf_count = max(buf_count, config->frameIntervalP +
  678. enc->rc_lookahead +
  679. EXTRA_BUFFERS);
  680. }
  681. buf_count = min(64, buf_count);
  682. enc->buf_count = buf_count;
  683. const int output_delay = buf_count - 1;
  684. enc->output_delay = output_delay;
  685. if (lookahead) {
  686. const int lkd_bound = output_delay - config->frameIntervalP - 4;
  687. if (lkd_bound >= 0) {
  688. config->rcParams.enableLookahead = 1;
  689. config->rcParams.lookaheadDepth =
  690. max(enc->rc_lookahead, lkd_bound);
  691. config->rcParams.disableIadapt = 0;
  692. config->rcParams.disableBadapt = 0;
  693. } else {
  694. lookahead = false;
  695. }
  696. }
  697. enc->config.rcParams.disableIadapt = disable_scenecut;
  698. /* psycho aq */
  699. if (!compatibility) {
  700. if (nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ)) {
  701. config->rcParams.enableAQ = psycho_aq;
  702. config->rcParams.aqStrength = 8;
  703. config->rcParams.enableTemporalAQ = psycho_aq;
  704. } else {
  705. warn("Ignoring Psycho Visual Tuning request since GPU is not capable");
  706. }
  707. }
  708. /* -------------------------- */
  709. /* rate control */
  710. enc->can_change_bitrate =
  711. nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE);
  712. config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
  713. if (astrcmpi(rc, "cqp") == 0 || rc_lossless) {
  714. if (*lossless)
  715. cqp = 0;
  716. int cqp_val = enc->codec == CODEC_AV1 ? cqp * 4 : cqp;
  717. config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
  718. config->rcParams.constQP.qpInterP = cqp_val;
  719. config->rcParams.constQP.qpInterB = cqp_val;
  720. config->rcParams.constQP.qpIntra = cqp_val;
  721. enc->can_change_bitrate = false;
  722. bitrate = 0;
  723. max_bitrate = 0;
  724. } else if (astrcmpi(rc, "vbr") != 0) { /* CBR by default */
  725. config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
  726. }
  727. config->rcParams.averageBitRate = bitrate * 1000;
  728. config->rcParams.maxBitRate = vbr ? max_bitrate * 1000 : bitrate * 1000;
  729. config->rcParams.vbvBufferSize = bitrate * 1000;
  730. config->rcParams.multiPass = nv_multipass;
  731. config->rcParams.qpMapMode = NV_ENC_QP_MAP_DELTA;
  732. /* -------------------------- */
  733. /* initialize */
  734. info("settings:\n"
  735. "\tcodec: %s\n"
  736. "\trate_control: %s\n"
  737. "\tbitrate: %d\n"
  738. "\tcqp: %d\n"
  739. "\tkeyint: %d\n"
  740. "\tpreset: %s\n"
  741. "\ttuning: %s\n"
  742. "\tmultipass: %s\n"
  743. "\tprofile: %s\n"
  744. "\twidth: %d\n"
  745. "\theight: %d\n"
  746. "\tb-frames: %d\n"
  747. "\tlookahead: %s\n"
  748. "\tpsycho_aq: %s\n",
  749. get_codec_name(enc->codec), rc, bitrate, cqp, gop_size, preset2,
  750. tuning, multipass, profile, enc->cx, enc->cy, bf,
  751. lookahead ? "true" : "false", psycho_aq ? "true" : "false");
  752. return true;
  753. }
  754. static bool init_encoder_h264(struct nvenc_data *enc, obs_data_t *settings,
  755. int bf, bool compatibility)
  756. {
  757. const char *rc = obs_data_get_string(settings, "rate_control");
  758. int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec");
  759. const char *profile = obs_data_get_string(settings, "profile");
  760. bool lossless;
  761. if (!init_encoder_base(enc, settings, bf, compatibility, &lossless)) {
  762. return false;
  763. }
  764. NV_ENC_CONFIG *config = &enc->config;
  765. NV_ENC_CONFIG_H264 *h264_config = &config->encodeCodecConfig.h264Config;
  766. NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui_params =
  767. &h264_config->h264VUIParameters;
  768. video_t *video = obs_encoder_video(enc->encoder);
  769. const struct video_output_info *voi = video_output_get_info(video);
  770. uint32_t gop_size =
  771. (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250;
  772. h264_config->idrPeriod = gop_size;
  773. bool repeat_headers = obs_data_get_bool(settings, "repeat_headers");
  774. if (repeat_headers) {
  775. h264_config->repeatSPSPPS = 1;
  776. h264_config->disableSPSPPS = 0;
  777. h264_config->outputAUD = 1;
  778. }
  779. h264_config->sliceMode = 3;
  780. h264_config->sliceModeData = 1;
  781. h264_config->useBFramesAsRef = NV_ENC_BFRAME_REF_MODE_DISABLED;
  782. /* Enable CBR padding */
  783. if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR)
  784. h264_config->enableFillerDataInsertion = 1;
  785. vui_params->videoSignalTypePresentFlag = 1;
  786. vui_params->videoFullRangeFlag = (voi->range == VIDEO_RANGE_FULL);
  787. vui_params->colourDescriptionPresentFlag = 1;
  788. switch (voi->colorspace) {
  789. case VIDEO_CS_601:
  790. vui_params->colourPrimaries = 6;
  791. vui_params->transferCharacteristics = 6;
  792. vui_params->colourMatrix = 6;
  793. break;
  794. case VIDEO_CS_DEFAULT:
  795. case VIDEO_CS_709:
  796. vui_params->colourPrimaries = 1;
  797. vui_params->transferCharacteristics = 1;
  798. vui_params->colourMatrix = 1;
  799. break;
  800. case VIDEO_CS_SRGB:
  801. vui_params->colourPrimaries = 1;
  802. vui_params->transferCharacteristics = 13;
  803. vui_params->colourMatrix = 1;
  804. break;
  805. default:
  806. break;
  807. }
  808. if (astrcmpi(rc, "lossless") == 0) {
  809. h264_config->qpPrimeYZeroTransformBypassFlag = 1;
  810. } else if (astrcmpi(rc, "vbr") != 0) { /* CBR */
  811. h264_config->outputBufferingPeriodSEI = 1;
  812. }
  813. h264_config->outputPictureTimingSEI = 1;
  814. /* -------------------------- */
  815. /* profile */
  816. if (enc->in_format == VIDEO_FORMAT_I444) {
  817. config->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
  818. h264_config->chromaFormatIDC = 3;
  819. } else if (astrcmpi(profile, "main") == 0) {
  820. config->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
  821. } else if (astrcmpi(profile, "baseline") == 0) {
  822. config->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
  823. } else if (!lossless) {
  824. config->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
  825. }
  826. if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) {
  827. return false;
  828. }
  829. return true;
  830. }
  831. static bool init_encoder_hevc(struct nvenc_data *enc, obs_data_t *settings,
  832. int bf, bool compatibility)
  833. {
  834. const char *rc = obs_data_get_string(settings, "rate_control");
  835. int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec");
  836. const char *profile = obs_data_get_string(settings, "profile");
  837. bool lossless;
  838. if (!init_encoder_base(enc, settings, bf, compatibility, &lossless)) {
  839. return false;
  840. }
  841. NV_ENC_CONFIG *config = &enc->config;
  842. NV_ENC_CONFIG_HEVC *hevc_config = &config->encodeCodecConfig.hevcConfig;
  843. NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui_params =
  844. &hevc_config->hevcVUIParameters;
  845. video_t *video = obs_encoder_video(enc->encoder);
  846. const struct video_output_info *voi = video_output_get_info(video);
  847. uint32_t gop_size =
  848. (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250;
  849. hevc_config->idrPeriod = gop_size;
  850. bool repeat_headers = obs_data_get_bool(settings, "repeat_headers");
  851. if (repeat_headers) {
  852. hevc_config->repeatSPSPPS = 1;
  853. hevc_config->disableSPSPPS = 0;
  854. hevc_config->outputAUD = 1;
  855. }
  856. hevc_config->sliceMode = 3;
  857. hevc_config->sliceModeData = 1;
  858. hevc_config->useBFramesAsRef = NV_ENC_BFRAME_REF_MODE_DISABLED;
  859. /* Enable CBR padding */
  860. if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR)
  861. hevc_config->enableFillerDataInsertion = 1;
  862. vui_params->videoSignalTypePresentFlag = 1;
  863. vui_params->videoFullRangeFlag = (voi->range == VIDEO_RANGE_FULL);
  864. vui_params->colourDescriptionPresentFlag = 1;
  865. switch (voi->colorspace) {
  866. case VIDEO_CS_601:
  867. vui_params->colourPrimaries = 6;
  868. vui_params->transferCharacteristics = 6;
  869. vui_params->colourMatrix = 6;
  870. break;
  871. case VIDEO_CS_DEFAULT:
  872. case VIDEO_CS_709:
  873. vui_params->colourPrimaries = 1;
  874. vui_params->transferCharacteristics = 1;
  875. vui_params->colourMatrix = 1;
  876. break;
  877. case VIDEO_CS_SRGB:
  878. vui_params->colourPrimaries = 1;
  879. vui_params->transferCharacteristics = 13;
  880. vui_params->colourMatrix = 1;
  881. break;
  882. case VIDEO_CS_2100_PQ:
  883. vui_params->colourPrimaries = 9;
  884. vui_params->transferCharacteristics = 16;
  885. vui_params->colourMatrix = 9;
  886. vui_params->chromaSampleLocationFlag = 1;
  887. vui_params->chromaSampleLocationTop = 2;
  888. vui_params->chromaSampleLocationBot = 2;
  889. break;
  890. case VIDEO_CS_2100_HLG:
  891. vui_params->colourPrimaries = 9;
  892. vui_params->transferCharacteristics = 18;
  893. vui_params->colourMatrix = 9;
  894. vui_params->chromaSampleLocationFlag = 1;
  895. vui_params->chromaSampleLocationTop = 2;
  896. vui_params->chromaSampleLocationBot = 2;
  897. }
  898. hevc_config->pixelBitDepthMinus8 = is_10_bit(enc) ? 2 : 0;
  899. if (astrcmpi(rc, "cbr") == 0) {
  900. hevc_config->outputBufferingPeriodSEI = 1;
  901. }
  902. hevc_config->outputPictureTimingSEI = 1;
  903. /* -------------------------- */
  904. /* profile */
  905. if (enc->in_format == VIDEO_FORMAT_I444) {
  906. config->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
  907. hevc_config->chromaFormatIDC = 3;
  908. } else if (astrcmpi(profile, "main10") == 0) {
  909. config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
  910. } else if (is_10_bit(enc)) {
  911. blog(LOG_WARNING, "[obs-nvenc] Forcing main10 for P010");
  912. config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
  913. } else {
  914. config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
  915. }
  916. if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) {
  917. return false;
  918. }
  919. return true;
  920. }
  921. static bool init_encoder_av1(struct nvenc_data *enc, obs_data_t *settings,
  922. int bf, bool compatibility)
  923. {
  924. int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec");
  925. bool lossless;
  926. if (!init_encoder_base(enc, settings, bf, compatibility, &lossless)) {
  927. return false;
  928. }
  929. NV_ENC_CONFIG *config = &enc->config;
  930. NV_ENC_CONFIG_AV1 *av1_config = &config->encodeCodecConfig.av1Config;
  931. video_t *video = obs_encoder_video(enc->encoder);
  932. const struct video_output_info *voi = video_output_get_info(video);
  933. uint32_t gop_size =
  934. (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250;
  935. av1_config->idrPeriod = gop_size;
  936. av1_config->useBFramesAsRef = NV_ENC_BFRAME_REF_MODE_DISABLED;
  937. av1_config->colorRange = (voi->range == VIDEO_RANGE_FULL);
  938. /* Enable CBR padding */
  939. if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR)
  940. av1_config->enableBitstreamPadding = 1;
  941. #define PIXELCOUNT_4K (3840 * 2160)
  942. /* If size is 4K+, set tiles to 2 uniform columns. */
  943. if ((voi->width * voi->height) >= PIXELCOUNT_4K) {
  944. av1_config->enableCustomTileConfig = 0;
  945. av1_config->numTileColumns = 2;
  946. }
  947. switch (voi->colorspace) {
  948. case VIDEO_CS_601:
  949. av1_config->colorPrimaries = 6;
  950. av1_config->transferCharacteristics = 6;
  951. av1_config->matrixCoefficients = 6;
  952. break;
  953. case VIDEO_CS_DEFAULT:
  954. case VIDEO_CS_709:
  955. av1_config->colorPrimaries = 1;
  956. av1_config->transferCharacteristics = 1;
  957. av1_config->matrixCoefficients = 1;
  958. break;
  959. case VIDEO_CS_SRGB:
  960. av1_config->colorPrimaries = 1;
  961. av1_config->transferCharacteristics = 13;
  962. av1_config->matrixCoefficients = 1;
  963. break;
  964. case VIDEO_CS_2100_PQ:
  965. av1_config->colorPrimaries = 9;
  966. av1_config->transferCharacteristics = 16;
  967. av1_config->matrixCoefficients = 9;
  968. break;
  969. case VIDEO_CS_2100_HLG:
  970. av1_config->colorPrimaries = 9;
  971. av1_config->transferCharacteristics = 18;
  972. av1_config->matrixCoefficients = 9;
  973. }
  974. /* -------------------------- */
  975. /* profile */
  976. config->profileGUID = NV_ENC_AV1_PROFILE_MAIN_GUID;
  977. av1_config->tier = NV_ENC_TIER_AV1_0;
  978. av1_config->level = NV_ENC_LEVEL_AV1_AUTOSELECT;
  979. av1_config->chromaFormatIDC = 1;
  980. av1_config->pixelBitDepthMinus8 = is_10_bit(enc) ? 2 : 0;
  981. av1_config->inputPixelBitDepthMinus8 = av1_config->pixelBitDepthMinus8;
  982. av1_config->numFwdRefs = 1;
  983. av1_config->numBwdRefs = 1;
  984. av1_config->repeatSeqHdr = 1;
  985. if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) {
  986. return false;
  987. }
  988. return true;
  989. }
  990. static bool init_bitstreams(struct nvenc_data *enc)
  991. {
  992. da_reserve(enc->bitstreams, enc->buf_count);
  993. for (uint32_t i = 0; i < enc->buf_count; i++) {
  994. struct nv_bitstream bitstream;
  995. if (!nv_bitstream_init(enc, &bitstream)) {
  996. return false;
  997. }
  998. da_push_back(enc->bitstreams, &bitstream);
  999. }
  1000. return true;
  1001. }
  1002. #ifdef _WIN32
  1003. static bool init_textures(struct nvenc_data *enc)
  1004. {
  1005. da_reserve(enc->textures, enc->buf_count);
  1006. for (uint32_t i = 0; i < enc->buf_count; i++) {
  1007. struct nv_texture texture;
  1008. if (!nv_texture_init(enc, &texture)) {
  1009. return false;
  1010. }
  1011. da_push_back(enc->textures, &texture);
  1012. }
  1013. return true;
  1014. }
  1015. #endif
  1016. static bool init_cuda_surfaces(struct nvenc_data *enc)
  1017. {
  1018. switch (enc->in_format) {
  1019. case VIDEO_FORMAT_P010:
  1020. enc->surface_format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
  1021. break;
  1022. case VIDEO_FORMAT_I444:
  1023. enc->surface_format = NV_ENC_BUFFER_FORMAT_YUV444;
  1024. break;
  1025. default:
  1026. enc->surface_format = NV_ENC_BUFFER_FORMAT_NV12;
  1027. }
  1028. da_reserve(enc->surfaces, enc->buf_count);
  1029. CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx))
  1030. for (uint32_t i = 0; i < enc->buf_count; i++) {
  1031. struct nv_cuda_surface buf;
  1032. if (!nv_cuda_surface_init(enc, &buf)) {
  1033. return false;
  1034. }
  1035. da_push_back(enc->surfaces, &buf);
  1036. }
  1037. CU_FAILED(cu->cuCtxPopCurrent(NULL))
  1038. return true;
  1039. }
  1040. static bool init_cuda_ctx(struct nvenc_data *enc, obs_data_t *settings,
  1041. const bool texture)
  1042. {
  1043. int count, gpu;
  1044. CUdevice device;
  1045. bool cuda_override;
  1046. /* Allow CUDA device override for texture encoders (experimental) */
  1047. if (obs_data_has_user_value(settings, "cuda_device")) {
  1048. gpu = (int)obs_data_get_int(settings, "cuda_device");
  1049. cuda_override = true;
  1050. } else {
  1051. gpu = (int)obs_data_get_int(settings, "gpu");
  1052. cuda_override = false;
  1053. }
  1054. CU_FAILED(cu->cuInit(0))
  1055. CU_FAILED(cu->cuDeviceGetCount(&count))
  1056. if (!count) {
  1057. NV_FAIL("No CUDA devices found");
  1058. return false;
  1059. }
  1060. #ifdef _WIN32
  1061. CU_FAILED(cu->cuDeviceGet(&device, gpu))
  1062. #else
  1063. if (!texture || cuda_override) {
  1064. CU_FAILED(cu->cuDeviceGet(&device, gpu))
  1065. } else {
  1066. unsigned int ctx_count = 0;
  1067. CUdevice devices[2];
  1068. obs_enter_graphics();
  1069. CUresult res = cu->cuGLGetDevices(&ctx_count, devices, 2,
  1070. CU_GL_DEVICE_LIST_ALL);
  1071. obs_leave_graphics();
  1072. if (res != CUDA_SUCCESS || !ctx_count) {
  1073. /* CUDA_ERROR_INVALID_GRAPHICS_CONTEXT should be treated
  1074. * as non-fatal fallback (probably running on iGPU). */
  1075. if (res == 219) {
  1076. info("Not running on NVIDIA GPU, falling back to non-texture encoder");
  1077. } else {
  1078. const char *name, *desc;
  1079. if (cuda_get_error_desc(res, &name, &desc)) {
  1080. error("Failed to get a CUDA device for the current OpenGL context: %s: %s",
  1081. name, desc);
  1082. } else {
  1083. error("Failed to get a CUDA device for the current OpenGL context: %d",
  1084. res);
  1085. }
  1086. }
  1087. return false;
  1088. }
  1089. /* Documentation indicates this should only ever happen with SLI, i.e. never for OBS. */
  1090. if (ctx_count > 1) {
  1091. warn("Got more than one CUDA devices for OpenGL context, this is untested.");
  1092. }
  1093. device = devices[0];
  1094. debug("Loading up CUDA on device %u", device);
  1095. }
  1096. #endif
  1097. CU_FAILED(cu->cuCtxCreate(&enc->cu_ctx, 0, device))
  1098. CU_FAILED(cu->cuCtxPopCurrent(NULL))
  1099. return true;
  1100. }
  1101. static enum video_format get_preferred_format(enum video_format format)
  1102. {
  1103. switch (format) {
  1104. case VIDEO_FORMAT_I010:
  1105. case VIDEO_FORMAT_P010:
  1106. return VIDEO_FORMAT_P010;
  1107. case VIDEO_FORMAT_RGBA:
  1108. case VIDEO_FORMAT_BGRA:
  1109. case VIDEO_FORMAT_BGRX:
  1110. case VIDEO_FORMAT_I444:
  1111. return VIDEO_FORMAT_I444;
  1112. default:
  1113. return VIDEO_FORMAT_NV12;
  1114. }
  1115. }
  1116. static void nvenc_destroy(void *data);
  1117. static bool init_specific_encoder(struct nvenc_data *enc, obs_data_t *settings,
  1118. int bf, bool compatibility)
  1119. {
  1120. switch (enc->codec) {
  1121. case CODEC_HEVC:
  1122. return init_encoder_hevc(enc, settings, bf, compatibility);
  1123. case CODEC_H264:
  1124. return init_encoder_h264(enc, settings, bf, compatibility);
  1125. case CODEC_AV1:
  1126. return init_encoder_av1(enc, settings, bf, compatibility);
  1127. }
  1128. return false;
  1129. }
  1130. static bool init_encoder(struct nvenc_data *enc, enum codec_type codec,
  1131. obs_data_t *settings, obs_encoder_t *encoder)
  1132. {
  1133. UNUSED_PARAMETER(codec);
  1134. UNUSED_PARAMETER(encoder);
  1135. int bf = (int)obs_data_get_int(settings, "bf");
  1136. const bool support_10bit =
  1137. nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
  1138. const int bf_max = nv_get_cap(enc, NV_ENC_CAPS_NUM_MAX_BFRAMES);
  1139. video_t *video = obs_encoder_video(enc->encoder);
  1140. const struct video_output_info *voi = video_output_get_info(video);
  1141. enc->in_format = get_preferred_format(voi->format);
  1142. if (is_10_bit(enc) && !support_10bit) {
  1143. NV_FAIL(obs_module_text("NVENC.10bitUnsupported"));
  1144. return false;
  1145. }
  1146. switch (voi->format) {
  1147. case VIDEO_FORMAT_I010:
  1148. case VIDEO_FORMAT_P010:
  1149. break;
  1150. default:
  1151. switch (voi->colorspace) {
  1152. case VIDEO_CS_2100_PQ:
  1153. case VIDEO_CS_2100_HLG:
  1154. NV_FAIL(obs_module_text("NVENC.8bitUnsupportedHdr"));
  1155. return false;
  1156. default:
  1157. break;
  1158. }
  1159. }
  1160. if (bf > bf_max) {
  1161. blog(LOG_WARNING,
  1162. "[obs-nvenc] Max B-frames setting (%d) is more than encoder supports (%d).\n"
  1163. "Setting B-frames to %d",
  1164. bf, bf_max, bf_max);
  1165. bf = bf_max;
  1166. }
  1167. if (!init_specific_encoder(enc, settings, bf, false)) {
  1168. blog(LOG_WARNING, "[obs-nvenc] init_specific_encoder failed, "
  1169. "trying again with compatibility options");
  1170. nv.nvEncDestroyEncoder(enc->session);
  1171. enc->session = NULL;
  1172. if (!init_session(enc)) {
  1173. return false;
  1174. }
  1175. /* try without multipass and psycho aq */
  1176. if (!init_specific_encoder(enc, settings, bf, true)) {
  1177. return false;
  1178. }
  1179. }
  1180. return true;
  1181. }
  1182. static void *nvenc_create_internal(enum codec_type codec, obs_data_t *settings,
  1183. obs_encoder_t *encoder, bool texture)
  1184. {
  1185. struct nvenc_data *enc = bzalloc(sizeof(*enc));
  1186. enc->encoder = encoder;
  1187. enc->codec = codec;
  1188. enc->first_packet = true;
  1189. enc->fallback = !texture;
  1190. if (get_nvenc_ver() == COMPATIBILITY_VERSION) {
  1191. enc->needs_compat_ver = true;
  1192. }
  1193. NV_ENCODE_API_FUNCTION_LIST init = {NV_ENCODE_API_FUNCTION_LIST_VER};
  1194. switch (enc->codec) {
  1195. case CODEC_H264:
  1196. enc->codec_guid = NV_ENC_CODEC_H264_GUID;
  1197. break;
  1198. case CODEC_HEVC:
  1199. enc->codec_guid = NV_ENC_CODEC_HEVC_GUID;
  1200. break;
  1201. case CODEC_AV1:
  1202. enc->codec_guid = NV_ENC_CODEC_AV1_GUID;
  1203. break;
  1204. }
  1205. if (!init_nvenc(encoder)) {
  1206. goto fail;
  1207. }
  1208. if (
  1209. #ifdef _WIN32
  1210. !texture &&
  1211. #endif
  1212. !init_cuda(encoder)) {
  1213. goto fail;
  1214. }
  1215. if (NV_FAILED(nv_create_instance(&init))) {
  1216. goto fail;
  1217. }
  1218. #ifdef _WIN32
  1219. if (texture && !init_d3d11(enc, settings)) {
  1220. goto fail;
  1221. }
  1222. #endif
  1223. if (
  1224. #ifdef _WIN32
  1225. !texture &&
  1226. #endif
  1227. !init_cuda_ctx(enc, settings, texture)) {
  1228. goto fail;
  1229. }
  1230. if (!init_session(enc)) {
  1231. goto fail;
  1232. }
  1233. if (!init_encoder(enc, codec, settings, encoder)) {
  1234. goto fail;
  1235. }
  1236. if (!init_bitstreams(enc)) {
  1237. goto fail;
  1238. }
  1239. #ifdef _WIN32
  1240. if (texture && !init_textures(enc)) {
  1241. goto fail;
  1242. }
  1243. #endif
  1244. if (
  1245. #ifdef _WIN32
  1246. !texture &&
  1247. #endif
  1248. !init_cuda_surfaces(enc)) {
  1249. goto fail;
  1250. }
  1251. enc->codec = codec;
  1252. if (enc->cu_ctx)
  1253. cu->cuCtxPopCurrent(NULL);
  1254. return enc;
  1255. fail:
  1256. nvenc_destroy(enc);
  1257. return NULL;
  1258. }
  1259. static void *nvenc_create_base(enum codec_type codec, obs_data_t *settings,
  1260. obs_encoder_t *encoder, bool texture)
  1261. {
  1262. /* this encoder requires shared textures, this cannot be used on a
  1263. * gpu other than the one OBS is currently running on. */
  1264. const int gpu = (int)obs_data_get_int(settings, "gpu");
  1265. if (gpu != 0 && texture) {
  1266. blog(LOG_INFO,
  1267. "[obs-nvenc] different GPU selected by user, falling back "
  1268. "to non-texture encoder");
  1269. goto reroute;
  1270. }
  1271. if (obs_encoder_scaling_enabled(encoder)) {
  1272. if (obs_encoder_gpu_scaling_enabled(encoder)) {
  1273. blog(LOG_INFO, "[obs-nvenc] GPU scaling enabled");
  1274. } else if (texture) {
  1275. blog(LOG_INFO,
  1276. "[obs-nvenc] CPU scaling enabled, falling back to"
  1277. " non-texture encoder");
  1278. goto reroute;
  1279. }
  1280. }
  1281. if (texture && !obs_p010_tex_active() && !obs_nv12_tex_active()) {
  1282. blog(LOG_INFO,
  1283. "[obs-nvenc] nv12/p010 not active, falling back to "
  1284. "non-texture encoder");
  1285. goto reroute;
  1286. }
  1287. struct nvenc_data *enc =
  1288. nvenc_create_internal(codec, settings, encoder, texture);
  1289. if (enc) {
  1290. return enc;
  1291. }
  1292. reroute:
  1293. if (!texture) {
  1294. blog(LOG_ERROR,
  1295. "Already in fallback encoder, can't fall back further!");
  1296. return NULL;
  1297. }
  1298. switch (codec) {
  1299. case CODEC_H264:
  1300. return obs_encoder_create_rerouted(encoder,
  1301. "obs_nvenc_h264_cuda");
  1302. case CODEC_HEVC:
  1303. return obs_encoder_create_rerouted(encoder,
  1304. "obs_nvenc_hevc_cuda");
  1305. case CODEC_AV1:
  1306. return obs_encoder_create_rerouted(encoder,
  1307. "obs_nvenc_av1_cuda");
  1308. }
  1309. return NULL;
  1310. }
  1311. static void *h264_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
  1312. {
  1313. return nvenc_create_base(CODEC_H264, settings, encoder, true);
  1314. }
  1315. #ifdef ENABLE_HEVC
  1316. static void *hevc_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
  1317. {
  1318. return nvenc_create_base(CODEC_HEVC, settings, encoder, true);
  1319. }
  1320. #endif
  1321. static void *av1_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
  1322. {
  1323. return nvenc_create_base(CODEC_AV1, settings, encoder, true);
  1324. }
  1325. static void *h264_nvenc_soft_create(obs_data_t *settings,
  1326. obs_encoder_t *encoder)
  1327. {
  1328. return nvenc_create_base(CODEC_H264, settings, encoder, false);
  1329. }
  1330. #ifdef ENABLE_HEVC
  1331. static void *hevc_nvenc_soft_create(obs_data_t *settings,
  1332. obs_encoder_t *encoder)
  1333. {
  1334. return nvenc_create_base(CODEC_HEVC, settings, encoder, false);
  1335. }
  1336. #endif
  1337. static void *av1_nvenc_soft_create(obs_data_t *settings, obs_encoder_t *encoder)
  1338. {
  1339. return nvenc_create_base(CODEC_AV1, settings, encoder, false);
  1340. }
  1341. static bool get_encoded_packet(struct nvenc_data *enc, bool finalize);
  1342. static void nvenc_destroy(void *data)
  1343. {
  1344. struct nvenc_data *enc = data;
  1345. if (enc->cu_ctx)
  1346. cu->cuCtxPushCurrent(enc->cu_ctx);
  1347. if (enc->encode_started) {
  1348. uint32_t struct_ver = enc->needs_compat_ver
  1349. ? NV_ENC_PIC_PARAMS_COMPAT_VER
  1350. : NV_ENC_PIC_PARAMS_VER;
  1351. NV_ENC_PIC_PARAMS params = {struct_ver};
  1352. params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
  1353. nv.nvEncEncodePicture(enc->session, &params);
  1354. get_encoded_packet(enc, true);
  1355. }
  1356. #ifdef _WIN32
  1357. for (size_t i = 0; i < enc->textures.num; i++) {
  1358. nv_texture_free(enc, &enc->textures.array[i]);
  1359. }
  1360. #endif
  1361. for (size_t i = 0; i < enc->surfaces.num; i++) {
  1362. nv_cuda_surface_free(enc, &enc->surfaces.array[i]);
  1363. }
  1364. for (size_t i = 0; i < enc->bitstreams.num; i++) {
  1365. nv_bitstream_free(enc, &enc->bitstreams.array[i]);
  1366. }
  1367. if (enc->session) {
  1368. nv.nvEncDestroyEncoder(enc->session);
  1369. }
  1370. #ifdef _WIN32
  1371. for (size_t i = 0; i < enc->input_textures.num; i++) {
  1372. ID3D11Texture2D *tex = enc->input_textures.array[i].tex;
  1373. IDXGIKeyedMutex *km = enc->input_textures.array[i].km;
  1374. tex->lpVtbl->Release(tex);
  1375. km->lpVtbl->Release(km);
  1376. }
  1377. if (enc->context) {
  1378. enc->context->lpVtbl->Release(enc->context);
  1379. }
  1380. if (enc->device) {
  1381. enc->device->lpVtbl->Release(enc->device);
  1382. }
  1383. #else
  1384. for (size_t i = 0; i < enc->input_textures.num; i++) {
  1385. CUgraphicsResource res_y = enc->input_textures.array[i].res_y;
  1386. CUgraphicsResource res_uv = enc->input_textures.array[i].res_uv;
  1387. cu->cuGraphicsUnregisterResource(res_y);
  1388. cu->cuGraphicsUnregisterResource(res_uv);
  1389. }
  1390. #endif
  1391. if (enc->cu_ctx) {
  1392. cu->cuCtxPopCurrent(NULL);
  1393. cu->cuCtxDestroy(enc->cu_ctx);
  1394. }
  1395. bfree(enc->header);
  1396. bfree(enc->sei);
  1397. deque_free(&enc->dts_list);
  1398. da_free(enc->surfaces);
  1399. da_free(enc->input_textures);
  1400. da_free(enc->bitstreams);
  1401. #ifdef _WIN32
  1402. da_free(enc->textures);
  1403. #endif
  1404. da_free(enc->packet_data);
  1405. bfree(enc->roi_map);
  1406. bfree(enc);
  1407. }
  1408. #ifdef _WIN32
  1409. static ID3D11Texture2D *get_tex_from_handle(struct nvenc_data *enc,
  1410. uint32_t handle,
  1411. IDXGIKeyedMutex **km_out)
  1412. {
  1413. ID3D11Device *device = enc->device;
  1414. IDXGIKeyedMutex *km;
  1415. ID3D11Texture2D *input_tex;
  1416. HRESULT hr;
  1417. for (size_t i = 0; i < enc->input_textures.num; i++) {
  1418. struct handle_tex *ht = &enc->input_textures.array[i];
  1419. if (ht->handle == handle) {
  1420. *km_out = ht->km;
  1421. return ht->tex;
  1422. }
  1423. }
  1424. hr = device->lpVtbl->OpenSharedResource(device,
  1425. (HANDLE)(uintptr_t)handle,
  1426. &IID_ID3D11Texture2D,
  1427. &input_tex);
  1428. if (FAILED(hr)) {
  1429. error_hr("OpenSharedResource failed");
  1430. return NULL;
  1431. }
  1432. hr = input_tex->lpVtbl->QueryInterface(input_tex, &IID_IDXGIKeyedMutex,
  1433. &km);
  1434. if (FAILED(hr)) {
  1435. error_hr("QueryInterface(IDXGIKeyedMutex) failed");
  1436. input_tex->lpVtbl->Release(input_tex);
  1437. return NULL;
  1438. }
  1439. input_tex->lpVtbl->SetEvictionPriority(input_tex,
  1440. DXGI_RESOURCE_PRIORITY_MAXIMUM);
  1441. *km_out = km;
  1442. struct handle_tex new_ht = {handle, input_tex, km};
  1443. da_push_back(enc->input_textures, &new_ht);
  1444. return input_tex;
  1445. }
  1446. #endif
  1447. static bool get_encoded_packet(struct nvenc_data *enc, bool finalize)
  1448. {
  1449. void *s = enc->session;
  1450. da_resize(enc->packet_data, 0);
  1451. if (!enc->buffers_queued)
  1452. return true;
  1453. if (!finalize && enc->buffers_queued < enc->output_delay)
  1454. return true;
  1455. size_t count = finalize ? enc->buffers_queued : 1;
  1456. for (size_t i = 0; i < count; i++) {
  1457. size_t cur_bs_idx = enc->cur_bitstream;
  1458. struct nv_bitstream *bs = &enc->bitstreams.array[cur_bs_idx];
  1459. #ifdef _WIN32
  1460. struct nv_texture *nvtex =
  1461. enc->fallback ? NULL : &enc->textures.array[cur_bs_idx];
  1462. struct nv_cuda_surface *surf =
  1463. enc->fallback ? &enc->surfaces.array[cur_bs_idx] : NULL;
  1464. #else
  1465. struct nv_cuda_surface *surf = &enc->surfaces.array[cur_bs_idx];
  1466. #endif
  1467. /* ---------------- */
  1468. uint32_t struct_ver = enc->needs_compat_ver
  1469. ? NV_ENC_LOCK_BITSTREAM_COMPAT_VER
  1470. : NV_ENC_LOCK_BITSTREAM_VER;
  1471. NV_ENC_LOCK_BITSTREAM lock = {struct_ver};
  1472. lock.outputBitstream = bs->ptr;
  1473. lock.doNotWait = false;
  1474. if (NV_FAILED(nv.nvEncLockBitstream(s, &lock))) {
  1475. return false;
  1476. }
  1477. if (enc->first_packet) {
  1478. NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = {0};
  1479. uint8_t buf[256];
  1480. uint32_t size = 0;
  1481. payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
  1482. payload.spsppsBuffer = buf;
  1483. payload.inBufferSize = sizeof(buf);
  1484. payload.outSPSPPSPayloadSize = &size;
  1485. nv.nvEncGetSequenceParams(s, &payload);
  1486. enc->header = bmemdup(buf, size);
  1487. enc->header_size = size;
  1488. enc->first_packet = false;
  1489. }
  1490. da_copy_array(enc->packet_data, lock.bitstreamBufferPtr,
  1491. lock.bitstreamSizeInBytes);
  1492. enc->packet_pts = (int64_t)lock.outputTimeStamp;
  1493. enc->packet_keyframe = lock.pictureType == NV_ENC_PIC_TYPE_IDR;
  1494. if (NV_FAILED(nv.nvEncUnlockBitstream(s, bs->ptr))) {
  1495. return false;
  1496. }
  1497. /* ---------------- */
  1498. #ifdef _WIN32
  1499. if (nvtex && nvtex->mapped_res) {
  1500. NVENCSTATUS err;
  1501. err = nv.nvEncUnmapInputResource(s, nvtex->mapped_res);
  1502. if (nv_failed(enc->encoder, err, __FUNCTION__,
  1503. "unmap")) {
  1504. return false;
  1505. }
  1506. nvtex->mapped_res = NULL;
  1507. }
  1508. #endif
  1509. /* ---------------- */
  1510. if (surf && surf->mapped_res) {
  1511. NVENCSTATUS err;
  1512. err = nv.nvEncUnmapInputResource(s, surf->mapped_res);
  1513. if (nv_failed(enc->encoder, err, __FUNCTION__,
  1514. "unmap")) {
  1515. return false;
  1516. }
  1517. surf->mapped_res = NULL;
  1518. }
  1519. /* ---------------- */
  1520. if (++enc->cur_bitstream == enc->buf_count)
  1521. enc->cur_bitstream = 0;
  1522. enc->buffers_queued--;
  1523. }
  1524. return true;
  1525. }
  1526. struct roi_params {
  1527. uint32_t mb_width;
  1528. uint32_t mb_height;
  1529. uint32_t mb_size;
  1530. bool av1;
  1531. int8_t *map;
  1532. };
  1533. static void roi_cb(void *param, struct obs_encoder_roi *roi)
  1534. {
  1535. const struct roi_params *rp = param;
  1536. int8_t qp_val;
  1537. /* AV1 has a larger QP range than HEVC/H.264 */
  1538. if (rp->av1) {
  1539. qp_val = (int8_t)(-128.0f * roi->priority);
  1540. } else {
  1541. qp_val = (int8_t)(-51.0f * roi->priority);
  1542. }
  1543. const uint32_t roi_left = roi->left / rp->mb_size;
  1544. const uint32_t roi_top = roi->top / rp->mb_size;
  1545. const uint32_t roi_right = (roi->right - 1) / rp->mb_size;
  1546. const uint32_t roi_bottom = (roi->bottom - 1) / rp->mb_size;
  1547. for (uint32_t mb_y = 0; mb_y < rp->mb_height; mb_y++) {
  1548. if (mb_y < roi_top || mb_y > roi_bottom)
  1549. continue;
  1550. for (uint32_t mb_x = 0; mb_x < rp->mb_width; mb_x++) {
  1551. if (mb_x < roi_left || mb_x > roi_right)
  1552. continue;
  1553. rp->map[mb_y * rp->mb_width + mb_x] = qp_val;
  1554. }
  1555. }
  1556. }
  1557. static void add_roi(struct nvenc_data *enc, NV_ENC_PIC_PARAMS *params)
  1558. {
  1559. const uint32_t increment = obs_encoder_get_roi_increment(enc->encoder);
  1560. if (enc->roi_map && enc->roi_increment == increment) {
  1561. params->qpDeltaMap = enc->roi_map;
  1562. params->qpDeltaMapSize = (uint32_t)enc->roi_map_size;
  1563. return;
  1564. }
  1565. uint32_t mb_size = 0;
  1566. switch (enc->codec) {
  1567. case CODEC_H264:
  1568. /* H.264 is always 16x16 */
  1569. mb_size = 16;
  1570. break;
  1571. case CODEC_HEVC:
  1572. /* HEVC can be 16x16, 32x32, or 64x64, but NVENC is always 32x32 */
  1573. mb_size = 32;
  1574. break;
  1575. case CODEC_AV1:
  1576. /* AV1 can be 64x64 or 128x128, but NVENC is always 64x64 */
  1577. mb_size = 64;
  1578. break;
  1579. }
  1580. const uint32_t mb_width = (enc->cx + mb_size - 1) / mb_size;
  1581. const uint32_t mb_height = (enc->cy + mb_size - 1) / mb_size;
  1582. const size_t map_size = mb_width * mb_height * sizeof(int8_t);
  1583. if (map_size != enc->roi_map_size) {
  1584. enc->roi_map = brealloc(enc->roi_map, map_size);
  1585. enc->roi_map_size = map_size;
  1586. }
  1587. memset(enc->roi_map, 0, enc->roi_map_size);
  1588. struct roi_params par = {
  1589. .mb_width = mb_width,
  1590. .mb_height = mb_height,
  1591. .mb_size = mb_size,
  1592. .av1 = enc->codec == CODEC_AV1,
  1593. .map = enc->roi_map,
  1594. };
  1595. obs_encoder_enum_roi(enc->encoder, roi_cb, &par);
  1596. enc->roi_increment = increment;
  1597. params->qpDeltaMap = enc->roi_map;
  1598. params->qpDeltaMapSize = (uint32_t)map_size;
  1599. }
  1600. static bool nvenc_encode_shared(struct nvenc_data *enc, struct nv_bitstream *bs,
  1601. void *pic, int64_t pts,
  1602. struct encoder_packet *packet,
  1603. bool *received_packet)
  1604. {
  1605. NV_ENC_PIC_PARAMS params = {0};
  1606. params.version = enc->needs_compat_ver ? NV_ENC_PIC_PARAMS_COMPAT_VER
  1607. : NV_ENC_PIC_PARAMS_VER;
  1608. params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
  1609. params.inputBuffer = pic;
  1610. params.inputTimeStamp = (uint64_t)pts;
  1611. params.inputWidth = enc->cx;
  1612. params.inputHeight = enc->cy;
  1613. params.inputPitch = enc->cx;
  1614. params.outputBitstream = bs->ptr;
  1615. if (enc->fallback) {
  1616. params.bufferFmt = enc->surface_format;
  1617. } else {
  1618. params.bufferFmt = obs_p010_tex_active()
  1619. ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
  1620. : NV_ENC_BUFFER_FORMAT_NV12;
  1621. }
  1622. /* Add ROI map if enabled */
  1623. if (obs_encoder_has_roi(enc->encoder))
  1624. add_roi(enc, &params);
  1625. NVENCSTATUS err = nv.nvEncEncodePicture(enc->session, &params);
  1626. if (err != NV_ENC_SUCCESS && err != NV_ENC_ERR_NEED_MORE_INPUT) {
  1627. nv_failed(enc->encoder, err, __FUNCTION__,
  1628. "nvEncEncodePicture");
  1629. return false;
  1630. }
  1631. enc->encode_started = true;
  1632. enc->buffers_queued++;
  1633. if (++enc->next_bitstream == enc->buf_count) {
  1634. enc->next_bitstream = 0;
  1635. }
  1636. /* ------------------------------------ */
  1637. /* check for encoded packet and parse */
  1638. if (!get_encoded_packet(enc, false)) {
  1639. return false;
  1640. }
  1641. /* ------------------------------------ */
  1642. /* output encoded packet */
  1643. if (enc->packet_data.num) {
  1644. int64_t dts;
  1645. deque_pop_front(&enc->dts_list, &dts, sizeof(dts));
  1646. /* subtract bframe delay from dts */
  1647. dts -= (int64_t)enc->bframes * packet->timebase_num;
  1648. *received_packet = true;
  1649. packet->data = enc->packet_data.array;
  1650. packet->size = enc->packet_data.num;
  1651. packet->type = OBS_ENCODER_VIDEO;
  1652. packet->pts = enc->packet_pts;
  1653. packet->dts = dts;
  1654. packet->keyframe = enc->packet_keyframe;
  1655. } else {
  1656. *received_packet = false;
  1657. }
  1658. return true;
  1659. }
  1660. #ifdef _WIN32
  1661. static bool nvenc_encode_tex(void *data, uint32_t handle, int64_t pts,
  1662. uint64_t lock_key, uint64_t *next_key,
  1663. struct encoder_packet *packet,
  1664. bool *received_packet)
  1665. {
  1666. struct nvenc_data *enc = data;
  1667. ID3D11DeviceContext *context = enc->context;
  1668. ID3D11Texture2D *input_tex;
  1669. ID3D11Texture2D *output_tex;
  1670. IDXGIKeyedMutex *km;
  1671. struct nv_texture *nvtex;
  1672. struct nv_bitstream *bs;
  1673. if (handle == GS_INVALID_HANDLE) {
  1674. error("Encode failed: bad texture handle");
  1675. *next_key = lock_key;
  1676. return false;
  1677. }
  1678. bs = &enc->bitstreams.array[enc->next_bitstream];
  1679. nvtex = &enc->textures.array[enc->next_bitstream];
  1680. input_tex = get_tex_from_handle(enc, handle, &km);
  1681. output_tex = nvtex->tex;
  1682. if (!input_tex) {
  1683. *next_key = lock_key;
  1684. return false;
  1685. }
  1686. deque_push_back(&enc->dts_list, &pts, sizeof(pts));
  1687. /* ------------------------------------ */
  1688. /* copy to output tex */
  1689. km->lpVtbl->AcquireSync(km, lock_key, INFINITE);
  1690. context->lpVtbl->CopyResource(context, (ID3D11Resource *)output_tex,
  1691. (ID3D11Resource *)input_tex);
  1692. km->lpVtbl->ReleaseSync(km, *next_key);
  1693. /* ------------------------------------ */
  1694. /* map output tex so nvenc can use it */
  1695. NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER};
  1696. map.registeredResource = nvtex->res;
  1697. if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map))) {
  1698. return false;
  1699. }
  1700. nvtex->mapped_res = map.mappedResource;
  1701. /* ------------------------------------ */
  1702. /* do actual encode call */
  1703. return nvenc_encode_shared(enc, bs, nvtex->mapped_res, pts, packet,
  1704. received_packet);
  1705. }
  1706. #else
  1707. static inline bool get_res_for_tex_ids(struct nvenc_data *enc, GLuint tex_id_y,
  1708. GLuint tex_id_uv,
  1709. CUgraphicsResource *tex_y,
  1710. CUgraphicsResource *tex_uv)
  1711. {
  1712. bool success = true;
  1713. for (size_t idx = 0; idx < enc->input_textures.num; idx++) {
  1714. struct handle_tex *ht = &enc->input_textures.array[idx];
  1715. if (ht->tex_id != tex_id_y)
  1716. continue;
  1717. *tex_y = ht->res_y;
  1718. *tex_uv = ht->res_uv;
  1719. return success;
  1720. }
  1721. CU_CHECK(cu->cuGraphicsGLRegisterImage(
  1722. tex_y, tex_id_y, GL_TEXTURE_2D,
  1723. CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY))
  1724. CU_CHECK(cu->cuGraphicsGLRegisterImage(
  1725. tex_uv, tex_id_uv, GL_TEXTURE_2D,
  1726. CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY))
  1727. struct handle_tex ht = {tex_id_y, *tex_y, *tex_uv};
  1728. da_push_back(enc->input_textures, &ht);
  1729. unmap:
  1730. if (!success) {
  1731. cu->cuGraphicsUnregisterResource(*tex_y);
  1732. cu->cuGraphicsUnregisterResource(*tex_uv);
  1733. }
  1734. return success;
  1735. }
  1736. static inline bool copy_tex_cuda(struct nvenc_data *enc, const bool p010,
  1737. GLuint tex[2], struct nv_cuda_surface *surf)
  1738. {
  1739. bool success = true;
  1740. CUgraphicsResource mapped_tex[2] = {0};
  1741. CUarray mapped_cuda;
  1742. if (!get_res_for_tex_ids(enc, tex[0], tex[1], &mapped_tex[0],
  1743. &mapped_tex[1]))
  1744. return false;
  1745. CU_CHECK(cu->cuGraphicsMapResources(2, mapped_tex, 0))
  1746. CUDA_MEMCPY2D m = {0};
  1747. m.dstMemoryType = CU_MEMORYTYPE_ARRAY;
  1748. m.srcMemoryType = CU_MEMORYTYPE_ARRAY;
  1749. m.dstArray = surf->tex;
  1750. m.WidthInBytes = p010 ? enc->cx * 2 : enc->cx;
  1751. m.Height = enc->cy;
  1752. // Map and copy Y texture
  1753. CU_CHECK(cu->cuGraphicsSubResourceGetMappedArray(&mapped_cuda,
  1754. mapped_tex[0], 0, 0));
  1755. m.srcArray = mapped_cuda;
  1756. CU_CHECK(cu->cuMemcpy2D(&m))
  1757. // Map and copy UV texture
  1758. CU_CHECK(cu->cuGraphicsSubResourceGetMappedArray(&mapped_cuda,
  1759. mapped_tex[1], 0, 0))
  1760. m.srcArray = mapped_cuda;
  1761. m.dstY += enc->cy;
  1762. m.Height = enc->cy / 2;
  1763. CU_CHECK(cu->cuMemcpy2D(&m))
  1764. unmap:
  1765. cu->cuGraphicsUnmapResources(2, mapped_tex, 0);
  1766. return success;
  1767. }
  1768. static bool nvenc_encode_tex2(void *data, struct encoder_texture *tex,
  1769. int64_t pts, uint64_t lock_key,
  1770. uint64_t *next_key, struct encoder_packet *packet,
  1771. bool *received_packet)
  1772. {
  1773. struct nvenc_data *enc = data;
  1774. struct nv_cuda_surface *surf;
  1775. struct nv_bitstream *bs;
  1776. const bool p010 = obs_p010_tex_active();
  1777. GLuint input_tex[2];
  1778. if (tex == NULL || tex->tex[0] == NULL) {
  1779. error("Encode failed: bad texture handle");
  1780. *next_key = lock_key;
  1781. return false;
  1782. }
  1783. bs = &enc->bitstreams.array[enc->next_bitstream];
  1784. surf = &enc->surfaces.array[enc->next_bitstream];
  1785. deque_push_back(&enc->dts_list, &pts, sizeof(pts));
  1786. /* ------------------------------------ */
  1787. /* copy to CUDA data */
  1788. CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx))
  1789. obs_enter_graphics();
  1790. input_tex[0] = *(GLuint *)gs_texture_get_obj(tex->tex[0]);
  1791. input_tex[1] = *(GLuint *)gs_texture_get_obj(tex->tex[1]);
  1792. bool success = copy_tex_cuda(enc, p010, input_tex, surf);
  1793. obs_leave_graphics();
  1794. CU_FAILED(cu->cuCtxPopCurrent(NULL))
  1795. if (!success)
  1796. return false;
  1797. /* ------------------------------------ */
  1798. /* map output tex so nvenc can use it */
  1799. NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER};
  1800. map.registeredResource = surf->res;
  1801. map.mappedBufferFmt = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
  1802. : NV_ENC_BUFFER_FORMAT_NV12;
  1803. if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map)))
  1804. return false;
  1805. surf->mapped_res = map.mappedResource;
  1806. /* ------------------------------------ */
  1807. /* do actual encode call */
  1808. return nvenc_encode_shared(enc, bs, surf->mapped_res, pts, packet,
  1809. received_packet);
  1810. }
  1811. #endif
  1812. static inline bool nvenc_copy_frame(struct nvenc_data *enc,
  1813. struct encoder_frame *frame,
  1814. struct nv_cuda_surface *surf)
  1815. {
  1816. bool success = true;
  1817. size_t height = enc->cy;
  1818. size_t width = enc->cx;
  1819. CUDA_MEMCPY2D m = {0};
  1820. m.srcMemoryType = CU_MEMORYTYPE_HOST;
  1821. m.dstMemoryType = CU_MEMORYTYPE_ARRAY;
  1822. m.dstArray = surf->tex;
  1823. m.WidthInBytes = width;
  1824. m.Height = height;
  1825. CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx))
  1826. if (enc->surface_format == NV_ENC_BUFFER_FORMAT_NV12) {
  1827. /* Page-locks the host memory so that it can be DMAd directly
  1828. * rather than CUDA doing an internal copy to page-locked
  1829. * memory before actually DMA-ing to the GPU. */
  1830. CU_CHECK(cu->cuMemHostRegister(frame->data[0],
  1831. frame->linesize[0] * height, 0))
  1832. CU_CHECK(cu->cuMemHostRegister(
  1833. frame->data[1], frame->linesize[1] * height / 2, 0))
  1834. m.srcPitch = frame->linesize[0];
  1835. m.srcHost = frame->data[0];
  1836. CU_FAILED(cu->cuMemcpy2D(&m))
  1837. m.srcPitch = frame->linesize[1];
  1838. m.srcHost = frame->data[1];
  1839. m.dstY += height;
  1840. m.Height /= 2;
  1841. CU_FAILED(cu->cuMemcpy2D(&m))
  1842. } else if (enc->surface_format == NV_ENC_BUFFER_FORMAT_YUV420_10BIT) {
  1843. CU_CHECK(cu->cuMemHostRegister(frame->data[0],
  1844. frame->linesize[0] * height, 0))
  1845. CU_CHECK(cu->cuMemHostRegister(
  1846. frame->data[1], frame->linesize[1] * height / 2, 0))
  1847. // P010 lines are double the size (16 bit per pixel)
  1848. m.WidthInBytes *= 2;
  1849. m.srcPitch = frame->linesize[0];
  1850. m.srcHost = frame->data[0];
  1851. CU_FAILED(cu->cuMemcpy2D(&m))
  1852. m.srcPitch = frame->linesize[1];
  1853. m.srcHost = frame->data[1];
  1854. m.dstY += height;
  1855. m.Height /= 2;
  1856. CU_FAILED(cu->cuMemcpy2D(&m))
  1857. } else { // I444
  1858. CU_CHECK(cu->cuMemHostRegister(frame->data[0],
  1859. frame->linesize[0] * height, 0))
  1860. CU_CHECK(cu->cuMemHostRegister(frame->data[1],
  1861. frame->linesize[1] * height, 0))
  1862. CU_CHECK(cu->cuMemHostRegister(frame->data[2],
  1863. frame->linesize[2] * height, 0))
  1864. m.srcPitch = frame->linesize[0];
  1865. m.srcHost = frame->data[0];
  1866. CU_FAILED(cu->cuMemcpy2D(&m))
  1867. m.srcPitch = frame->linesize[1];
  1868. m.srcHost = frame->data[1];
  1869. m.dstY += height;
  1870. CU_FAILED(cu->cuMemcpy2D(&m))
  1871. m.srcPitch = frame->linesize[2];
  1872. m.srcHost = frame->data[2];
  1873. m.dstY += height;
  1874. CU_FAILED(cu->cuMemcpy2D(&m))
  1875. }
  1876. unmap:
  1877. if (frame->data[0])
  1878. cu->cuMemHostUnregister(frame->data[0]);
  1879. if (frame->data[1])
  1880. cu->cuMemHostUnregister(frame->data[1]);
  1881. if (frame->data[2])
  1882. cu->cuMemHostUnregister(frame->data[2]);
  1883. CU_FAILED(cu->cuCtxPopCurrent(NULL))
  1884. return success;
  1885. }
  1886. static bool nvenc_encode_soft(void *data, struct encoder_frame *frame,
  1887. struct encoder_packet *packet,
  1888. bool *received_packet)
  1889. {
  1890. struct nvenc_data *enc = data;
  1891. struct nv_cuda_surface *surf;
  1892. struct nv_bitstream *bs;
  1893. bs = &enc->bitstreams.array[enc->next_bitstream];
  1894. surf = &enc->surfaces.array[enc->next_bitstream];
  1895. deque_push_back(&enc->dts_list, &frame->pts, sizeof(frame->pts));
  1896. /* ------------------------------------ */
  1897. /* copy to CUDA surface */
  1898. if (!nvenc_copy_frame(enc, frame, surf))
  1899. return false;
  1900. /* ------------------------------------ */
  1901. /* map output tex so nvenc can use it */
  1902. NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER};
  1903. map.registeredResource = surf->res;
  1904. map.mappedBufferFmt = enc->surface_format;
  1905. if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map)))
  1906. return false;
  1907. surf->mapped_res = map.mappedResource;
  1908. /* ------------------------------------ */
  1909. /* do actual encode call */
  1910. return nvenc_encode_shared(enc, bs, surf->mapped_res, frame->pts,
  1911. packet, received_packet);
  1912. }
  1913. static void nvenc_soft_video_info(void *data, struct video_scale_info *info)
  1914. {
  1915. struct nvenc_data *enc = data;
  1916. info->format = enc->in_format;
  1917. }
  1918. extern void h264_nvenc_defaults(obs_data_t *settings);
  1919. extern obs_properties_t *h264_nvenc_properties(void *unused);
  1920. #ifdef ENABLE_HEVC
  1921. extern void hevc_nvenc_defaults(obs_data_t *settings);
  1922. extern obs_properties_t *hevc_nvenc_properties(void *unused);
  1923. #endif
  1924. extern obs_properties_t *av1_nvenc_properties(void *unused);
  1925. extern void av1_nvenc_defaults(obs_data_t *settings);
  1926. static bool nvenc_extra_data(void *data, uint8_t **header, size_t *size)
  1927. {
  1928. struct nvenc_data *enc = data;
  1929. if (!enc->header) {
  1930. return false;
  1931. }
  1932. *header = enc->header;
  1933. *size = enc->header_size;
  1934. return true;
  1935. }
  1936. static bool nvenc_sei_data(void *data, uint8_t **sei, size_t *size)
  1937. {
  1938. struct nvenc_data *enc = data;
  1939. if (!enc->sei) {
  1940. return false;
  1941. }
  1942. *sei = enc->sei;
  1943. *size = enc->sei_size;
  1944. return true;
  1945. }
  1946. struct obs_encoder_info h264_nvenc_info = {
  1947. .id = "jim_nvenc",
  1948. .codec = "h264",
  1949. .type = OBS_ENCODER_VIDEO,
  1950. .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
  1951. OBS_ENCODER_CAP_ROI,
  1952. .get_name = h264_nvenc_get_name,
  1953. .create = h264_nvenc_create,
  1954. .destroy = nvenc_destroy,
  1955. .update = nvenc_update,
  1956. #ifdef _WIN32
  1957. .encode_texture = nvenc_encode_tex,
  1958. #else
  1959. .encode_texture2 = nvenc_encode_tex2,
  1960. #endif
  1961. .get_defaults = h264_nvenc_defaults,
  1962. .get_properties = h264_nvenc_properties,
  1963. .get_extra_data = nvenc_extra_data,
  1964. .get_sei_data = nvenc_sei_data,
  1965. };
  1966. #ifdef ENABLE_HEVC
  1967. struct obs_encoder_info hevc_nvenc_info = {
  1968. .id = "jim_hevc_nvenc",
  1969. .codec = "hevc",
  1970. .type = OBS_ENCODER_VIDEO,
  1971. .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
  1972. OBS_ENCODER_CAP_ROI,
  1973. .get_name = hevc_nvenc_get_name,
  1974. .create = hevc_nvenc_create,
  1975. .destroy = nvenc_destroy,
  1976. .update = nvenc_update,
  1977. #ifdef _WIN32
  1978. .encode_texture = nvenc_encode_tex,
  1979. #else
  1980. .encode_texture2 = nvenc_encode_tex2,
  1981. #endif
  1982. .get_defaults = hevc_nvenc_defaults,
  1983. .get_properties = hevc_nvenc_properties,
  1984. .get_extra_data = nvenc_extra_data,
  1985. .get_sei_data = nvenc_sei_data,
  1986. };
  1987. #endif
  1988. struct obs_encoder_info av1_nvenc_info = {
  1989. .id = "jim_av1_nvenc",
  1990. .codec = "av1",
  1991. .type = OBS_ENCODER_VIDEO,
  1992. .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
  1993. OBS_ENCODER_CAP_ROI,
  1994. .get_name = av1_nvenc_get_name,
  1995. .create = av1_nvenc_create,
  1996. .destroy = nvenc_destroy,
  1997. .update = nvenc_update,
  1998. #ifdef _WIN32
  1999. .encode_texture = nvenc_encode_tex,
  2000. #else
  2001. .encode_texture2 = nvenc_encode_tex2,
  2002. #endif
  2003. .get_defaults = av1_nvenc_defaults,
  2004. .get_properties = av1_nvenc_properties,
  2005. .get_extra_data = nvenc_extra_data,
  2006. };
  2007. struct obs_encoder_info h264_nvenc_soft_info = {
  2008. .id = "obs_nvenc_h264_cuda",
  2009. .codec = "h264",
  2010. .type = OBS_ENCODER_VIDEO,
  2011. .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
  2012. OBS_ENCODER_CAP_INTERNAL,
  2013. .get_name = h264_nvenc_soft_get_name,
  2014. .create = h264_nvenc_soft_create,
  2015. .destroy = nvenc_destroy,
  2016. .update = nvenc_update,
  2017. .encode = nvenc_encode_soft,
  2018. .get_defaults = h264_nvenc_defaults,
  2019. .get_properties = h264_nvenc_properties,
  2020. .get_extra_data = nvenc_extra_data,
  2021. .get_sei_data = nvenc_sei_data,
  2022. .get_video_info = nvenc_soft_video_info,
  2023. };
  2024. #ifdef ENABLE_HEVC
  2025. struct obs_encoder_info hevc_nvenc_soft_info = {
  2026. .id = "obs_nvenc_hevc_cuda",
  2027. .codec = "hevc",
  2028. .type = OBS_ENCODER_VIDEO,
  2029. .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
  2030. OBS_ENCODER_CAP_INTERNAL,
  2031. .get_name = hevc_nvenc_soft_get_name,
  2032. .create = hevc_nvenc_soft_create,
  2033. .destroy = nvenc_destroy,
  2034. .update = nvenc_update,
  2035. .encode = nvenc_encode_soft,
  2036. .get_defaults = hevc_nvenc_defaults,
  2037. .get_properties = hevc_nvenc_properties,
  2038. .get_extra_data = nvenc_extra_data,
  2039. .get_sei_data = nvenc_sei_data,
  2040. .get_video_info = nvenc_soft_video_info,
  2041. };
  2042. #endif
  2043. struct obs_encoder_info av1_nvenc_soft_info = {
  2044. .id = "obs_nvenc_av1_cuda",
  2045. .codec = "av1",
  2046. .type = OBS_ENCODER_VIDEO,
  2047. .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
  2048. OBS_ENCODER_CAP_INTERNAL,
  2049. .get_name = av1_nvenc_soft_get_name,
  2050. .create = av1_nvenc_soft_create,
  2051. .destroy = nvenc_destroy,
  2052. .update = nvenc_update,
  2053. .encode = nvenc_encode_soft,
  2054. .get_defaults = av1_nvenc_defaults,
  2055. .get_properties = av1_nvenc_properties,
  2056. .get_extra_data = nvenc_extra_data,
  2057. .get_video_info = nvenc_soft_video_info,
  2058. };