obs-nvenc.c 65 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472
  1. #include "obs-nvenc.h"
  2. #include <util/deque.h>
  3. #include <util/darray.h>
  4. #include <util/dstr.h>
  5. #include <obs-avc.h>
  6. #include <obs-hevc.h>
  7. #include <libavutil/rational.h>
  8. #ifdef _WIN32
  9. #define INITGUID
  10. #include <dxgi.h>
  11. #include <d3d11.h>
  12. #include <d3d11_1.h>
  13. #else
  14. #include <glad/glad.h>
  15. #endif
  16. /* ========================================================================= */
  17. /* a hack of the ages: nvenc backward compatibility */
  18. #define CONFIGURED_NVENC_MAJOR 12
  19. #define CONFIGURED_NVENC_MINOR 1
  20. /* we cannot guarantee structures haven't changed, so purposely break on
  21. * version change to force the programmer to update or remove backward
  22. * compatibility NVENC code. */
  23. #if CONFIGURED_NVENC_MAJOR != NVENCAPI_MAJOR_VERSION || \
  24. CONFIGURED_NVENC_MINOR < NVENCAPI_MINOR_VERSION
  25. #error NVENC version changed, update or remove NVENC compatibility code
  26. #endif
  27. #undef NVENCAPI_STRUCT_VERSION
  28. #define NVENCAPI_STRUCT_VERSION(ver) \
  29. ((uint32_t)(enc->needs_compat_ver ? NVENC_COMPAT_VER \
  30. : NVENCAPI_VERSION) | \
  31. ((ver) << 16) | (0x7 << 28))
  32. #define NV_ENC_CONFIG_COMPAT_VER (NVENCAPI_STRUCT_VERSION(7) | (1 << 31))
  33. #define NV_ENC_INITIALIZE_PARAMS_COMPAT_VER \
  34. (NVENCAPI_STRUCT_VERSION(5) | (1 << 31))
  35. #define NV_ENC_PIC_PARAMS_COMPAT_VER (NVENCAPI_STRUCT_VERSION(4) | (1 << 31))
  36. #define NV_ENC_LOCK_BITSTREAM_COMPAT_VER NVENCAPI_STRUCT_VERSION(1)
  37. #define NV_ENC_REGISTER_RESOURCE_COMPAT_VER NVENCAPI_STRUCT_VERSION(3)
  38. #define COMPATIBILITY_VERSION \
  39. (NVENC_COMPAT_MAJOR_VER << 4 | NVENC_COMPAT_MINOR_VER)
  40. /* ========================================================================= */
  41. #define EXTRA_BUFFERS 5
  42. #define do_log(level, format, ...) \
  43. blog(level, "[obs-nvenc: '%s'] " format, \
  44. obs_encoder_get_name(enc->encoder), ##__VA_ARGS__)
  45. #define error(format, ...) do_log(LOG_ERROR, format, ##__VA_ARGS__)
  46. #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
  47. #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
  48. #define debug(format, ...) do_log(LOG_DEBUG, format, ##__VA_ARGS__)
  49. #define error_hr(msg) error("%s: %s: 0x%08lX", __FUNCTION__, msg, (uint32_t)hr);
  50. #ifndef _WIN32
  51. #define min(a, b) (((a) < (b)) ? (a) : (b))
  52. #define max(a, b) (((a) > (b)) ? (a) : (b))
  53. #endif
  54. struct nv_bitstream;
  55. struct nv_texture;
  56. struct handle_tex {
  57. #ifdef _WIN32
  58. uint32_t handle;
  59. ID3D11Texture2D *tex;
  60. IDXGIKeyedMutex *km;
  61. #else
  62. GLuint tex_id;
  63. CUgraphicsResource res_y;
  64. CUgraphicsResource res_uv;
  65. #endif
  66. };
  67. /* ------------------------------------------------------------------------- */
  68. /* Main Implementation Structure */
  69. enum codec_type {
  70. CODEC_H264,
  71. CODEC_HEVC,
  72. CODEC_AV1,
  73. };
  74. static const char *get_codec_name(enum codec_type type)
  75. {
  76. switch (type) {
  77. case CODEC_H264:
  78. return "H264";
  79. case CODEC_HEVC:
  80. return "HEVC";
  81. case CODEC_AV1:
  82. return "AV1";
  83. }
  84. return "Unknown";
  85. }
  86. struct nvenc_data {
  87. obs_encoder_t *encoder;
  88. enum codec_type codec;
  89. GUID codec_guid;
  90. void *session;
  91. NV_ENC_INITIALIZE_PARAMS params;
  92. NV_ENC_CONFIG config;
  93. int rc_lookahead;
  94. uint32_t buf_count;
  95. int output_delay;
  96. int buffers_queued;
  97. size_t next_bitstream;
  98. size_t cur_bitstream;
  99. bool encode_started;
  100. bool first_packet;
  101. bool can_change_bitrate;
  102. bool needs_compat_ver;
  103. bool fallback;
  104. int32_t bframes;
  105. DARRAY(struct handle_tex) input_textures;
  106. DARRAY(struct nv_bitstream) bitstreams;
  107. DARRAY(struct nv_cuda_surface) surfaces;
  108. NV_ENC_BUFFER_FORMAT surface_format;
  109. struct deque dts_list;
  110. DARRAY(uint8_t) packet_data;
  111. int64_t packet_pts;
  112. bool packet_keyframe;
  113. #ifdef _WIN32
  114. DARRAY(struct nv_texture) textures;
  115. ID3D11Device *device;
  116. ID3D11DeviceContext *context;
  117. #endif
  118. uint32_t cx;
  119. uint32_t cy;
  120. enum video_format in_format;
  121. uint8_t *header;
  122. size_t header_size;
  123. uint8_t *sei;
  124. size_t sei_size;
  125. int8_t *roi_map;
  126. size_t roi_map_size;
  127. uint32_t roi_increment;
  128. CUcontext cu_ctx;
  129. };
  130. /* ------------------------------------------------------------------------- */
  131. /* Bitstream Buffer */
  132. struct nv_bitstream {
  133. void *ptr;
  134. };
  135. #define NV_FAIL(format, ...) nv_fail(enc->encoder, format, ##__VA_ARGS__)
  136. #define NV_FAILED(x) nv_failed(enc->encoder, x, __FUNCTION__, #x)
  137. static bool nv_bitstream_init(struct nvenc_data *enc, struct nv_bitstream *bs)
  138. {
  139. NV_ENC_CREATE_BITSTREAM_BUFFER buf = {
  140. NV_ENC_CREATE_BITSTREAM_BUFFER_VER};
  141. if (NV_FAILED(nv.nvEncCreateBitstreamBuffer(enc->session, &buf))) {
  142. return false;
  143. }
  144. bs->ptr = buf.bitstreamBuffer;
  145. return true;
  146. }
  147. static void nv_bitstream_free(struct nvenc_data *enc, struct nv_bitstream *bs)
  148. {
  149. if (bs->ptr) {
  150. nv.nvEncDestroyBitstreamBuffer(enc->session, bs->ptr);
  151. }
  152. }
  153. /* ------------------------------------------------------------------------- */
  154. /* Texture Resource */
  155. #ifdef _WIN32
  156. struct nv_texture {
  157. void *res;
  158. ID3D11Texture2D *tex;
  159. void *mapped_res;
  160. };
  161. static bool nv_texture_init(struct nvenc_data *enc, struct nv_texture *nvtex)
  162. {
  163. const bool p010 = obs_p010_tex_active();
  164. D3D11_TEXTURE2D_DESC desc = {0};
  165. desc.Width = enc->cx;
  166. desc.Height = enc->cy;
  167. desc.MipLevels = 1;
  168. desc.ArraySize = 1;
  169. desc.Format = p010 ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12;
  170. desc.SampleDesc.Count = 1;
  171. desc.BindFlags = D3D11_BIND_RENDER_TARGET;
  172. ID3D11Device *const device = enc->device;
  173. ID3D11Texture2D *tex;
  174. HRESULT hr = device->lpVtbl->CreateTexture2D(device, &desc, NULL, &tex);
  175. if (FAILED(hr)) {
  176. error_hr("Failed to create texture");
  177. return false;
  178. }
  179. tex->lpVtbl->SetEvictionPriority(tex, DXGI_RESOURCE_PRIORITY_MAXIMUM);
  180. uint32_t struct_ver = enc->needs_compat_ver
  181. ? NV_ENC_REGISTER_RESOURCE_COMPAT_VER
  182. : NV_ENC_REGISTER_RESOURCE_VER;
  183. NV_ENC_REGISTER_RESOURCE res = {struct_ver};
  184. res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
  185. res.resourceToRegister = tex;
  186. res.width = enc->cx;
  187. res.height = enc->cy;
  188. res.bufferFormat = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
  189. : NV_ENC_BUFFER_FORMAT_NV12;
  190. if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) {
  191. tex->lpVtbl->Release(tex);
  192. return false;
  193. }
  194. nvtex->res = res.registeredResource;
  195. nvtex->tex = tex;
  196. nvtex->mapped_res = NULL;
  197. return true;
  198. }
  199. static void nv_texture_free(struct nvenc_data *enc, struct nv_texture *nvtex)
  200. {
  201. if (nvtex->res) {
  202. if (nvtex->mapped_res) {
  203. nv.nvEncUnmapInputResource(enc->session,
  204. nvtex->mapped_res);
  205. }
  206. nv.nvEncUnregisterResource(enc->session, nvtex->res);
  207. nvtex->tex->lpVtbl->Release(nvtex->tex);
  208. }
  209. }
  210. #endif
  211. /* ------------------------------------------------------------------------- */
  212. /* CUDA Stuff */
  213. /* CUDA error handling */
  214. static inline bool cuda_error_check(struct nvenc_data *enc, CUresult res,
  215. const char *func, const char *call)
  216. {
  217. if (res == CUDA_SUCCESS)
  218. return true;
  219. struct dstr message = {0};
  220. const char *name, *desc;
  221. if (cuda_get_error_desc(res, &name, &desc)) {
  222. dstr_printf(&message,
  223. "%s: CUDA call \"%s\" failed with %s (%d): %s",
  224. func, call, name, res, desc);
  225. } else {
  226. dstr_printf(&message, "%s: CUDA call \"%s\" failed with %d",
  227. func, call, res);
  228. }
  229. error("%s", message.array);
  230. obs_encoder_set_last_error(enc->encoder, message.array);
  231. dstr_free(&message);
  232. return false;
  233. }
  234. #define CU_FAILED(call) \
  235. if (!cuda_error_check(enc, call, __FUNCTION__, #call)) \
  236. return false;
  237. #define CU_CHECK(call) \
  238. if (!cuda_error_check(enc, call, __FUNCTION__, #call)) { \
  239. success = false; \
  240. goto unmap; \
  241. }
  242. /* CUDA Surfaces */
  243. struct nv_cuda_surface {
  244. CUarray tex;
  245. NV_ENC_REGISTERED_PTR res;
  246. NV_ENC_INPUT_PTR *mapped_res;
  247. };
  248. /* Missing from ffmpeg nvcodec headers, required for CUDA arrays to be usable in NVENC */
  249. static const int CUDA_ARRAY3D_SURFACE_LDST = 0x02;
  250. static bool nv_cuda_surface_init(struct nvenc_data *enc,
  251. struct nv_cuda_surface *nvsurf)
  252. {
  253. const bool p010 = obs_p010_tex_active();
  254. CUDA_ARRAY3D_DESCRIPTOR desc;
  255. desc.Width = enc->cx;
  256. desc.Height = enc->cy;
  257. desc.Depth = 0;
  258. desc.Flags = CUDA_ARRAY3D_SURFACE_LDST;
  259. desc.NumChannels = 1;
  260. if (!enc->fallback) {
  261. desc.Format = p010 ? CU_AD_FORMAT_UNSIGNED_INT16
  262. : CU_AD_FORMAT_UNSIGNED_INT8;
  263. desc.Height = enc->cy + enc->cy / 2;
  264. } else {
  265. switch (enc->surface_format) {
  266. case NV_ENC_BUFFER_FORMAT_NV12:
  267. desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
  268. // Additional half-height plane for UV data
  269. desc.Height += enc->cy / 2;
  270. break;
  271. case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
  272. desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
  273. desc.Height += enc->cy / 2;
  274. desc.NumChannels = 2; // number of bytes per element
  275. break;
  276. case NV_ENC_BUFFER_FORMAT_YUV444:
  277. desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
  278. desc.Height *= 3; // 3 full-size planes
  279. break;
  280. default:
  281. error("Unknown input format: %d", enc->surface_format);
  282. return false;
  283. }
  284. }
  285. CU_FAILED(cu->cuArray3DCreate(&nvsurf->tex, &desc))
  286. NV_ENC_REGISTER_RESOURCE res = {0};
  287. res.version = enc->needs_compat_ver
  288. ? NV_ENC_REGISTER_RESOURCE_COMPAT_VER
  289. : NV_ENC_REGISTER_RESOURCE_VER;
  290. res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY;
  291. res.resourceToRegister = (void *)nvsurf->tex;
  292. res.width = enc->cx;
  293. res.height = enc->cy;
  294. res.pitch = (uint32_t)(desc.Width * desc.NumChannels);
  295. if (!enc->fallback) {
  296. res.bufferFormat = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
  297. : NV_ENC_BUFFER_FORMAT_NV12;
  298. } else {
  299. res.bufferFormat = enc->surface_format;
  300. }
  301. if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) {
  302. return false;
  303. }
  304. nvsurf->res = res.registeredResource;
  305. nvsurf->mapped_res = NULL;
  306. return true;
  307. }
  308. static void nv_cuda_surface_free(struct nvenc_data *enc,
  309. struct nv_cuda_surface *nvsurf)
  310. {
  311. if (nvsurf->res) {
  312. if (nvsurf->mapped_res) {
  313. nv.nvEncUnmapInputResource(enc->session,
  314. nvsurf->mapped_res);
  315. }
  316. nv.nvEncUnregisterResource(enc->session, nvsurf->res);
  317. cu->cuArrayDestroy(nvsurf->tex);
  318. }
  319. }
  320. /* ------------------------------------------------------------------------- */
  321. /* Implementation */
  322. static const char *h264_nvenc_get_name(void *type_data)
  323. {
  324. UNUSED_PARAMETER(type_data);
  325. return "NVIDIA NVENC H.264";
  326. }
  327. static const char *h264_nvenc_soft_get_name(void *type_data)
  328. {
  329. UNUSED_PARAMETER(type_data);
  330. return "NVIDIA NVENC H.264 (Fallback)";
  331. }
  332. #ifdef ENABLE_HEVC
  333. static const char *hevc_nvenc_get_name(void *type_data)
  334. {
  335. UNUSED_PARAMETER(type_data);
  336. return "NVIDIA NVENC HEVC";
  337. }
  338. static const char *hevc_nvenc_soft_get_name(void *type_data)
  339. {
  340. UNUSED_PARAMETER(type_data);
  341. return "NVIDIA NVENC HEVC (Fallback)";
  342. }
  343. #endif
  344. static const char *av1_nvenc_get_name(void *type_data)
  345. {
  346. UNUSED_PARAMETER(type_data);
  347. return "NVIDIA NVENC AV1";
  348. }
  349. static const char *av1_nvenc_soft_get_name(void *type_data)
  350. {
  351. UNUSED_PARAMETER(type_data);
  352. return "NVIDIA NVENC AV1 (Fallback)";
  353. }
  354. static inline int nv_get_cap(struct nvenc_data *enc, NV_ENC_CAPS cap)
  355. {
  356. if (!enc->session)
  357. return 0;
  358. NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER};
  359. int v;
  360. param.capsToQuery = cap;
  361. nv.nvEncGetEncodeCaps(enc->session, enc->codec_guid, &param, &v);
  362. return v;
  363. }
  364. static bool nvenc_update(void *data, obs_data_t *settings)
  365. {
  366. struct nvenc_data *enc = data;
  367. /* Only support reconfiguration of CBR bitrate */
  368. if (enc->can_change_bitrate) {
  369. int bitrate = (int)obs_data_get_int(settings, "bitrate");
  370. int max_bitrate =
  371. (int)obs_data_get_int(settings, "max_bitrate");
  372. bool vbr = (enc->config.rcParams.rateControlMode ==
  373. NV_ENC_PARAMS_RC_VBR);
  374. enc->config.rcParams.averageBitRate = bitrate * 1000;
  375. enc->config.rcParams.maxBitRate = vbr ? max_bitrate * 1000
  376. : bitrate * 1000;
  377. NV_ENC_RECONFIGURE_PARAMS params = {0};
  378. params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
  379. params.reInitEncodeParams = enc->params;
  380. params.resetEncoder = 1;
  381. params.forceIDR = 1;
  382. if (NV_FAILED(nv.nvEncReconfigureEncoder(enc->session,
  383. &params))) {
  384. return false;
  385. }
  386. }
  387. return true;
  388. }
  389. #ifdef _WIN32
  390. static HANDLE get_lib(struct nvenc_data *enc, const char *lib)
  391. {
  392. HMODULE mod = GetModuleHandleA(lib);
  393. if (mod)
  394. return mod;
  395. mod = LoadLibraryA(lib);
  396. if (!mod)
  397. error("Failed to load %s", lib);
  398. return mod;
  399. }
  400. typedef HRESULT(WINAPI *CREATEDXGIFACTORY1PROC)(REFIID, void **);
  401. static bool init_d3d11(struct nvenc_data *enc, obs_data_t *settings)
  402. {
  403. HMODULE dxgi = get_lib(enc, "DXGI.dll");
  404. HMODULE d3d11 = get_lib(enc, "D3D11.dll");
  405. CREATEDXGIFACTORY1PROC create_dxgi;
  406. PFN_D3D11_CREATE_DEVICE create_device;
  407. IDXGIFactory1 *factory;
  408. IDXGIAdapter *adapter;
  409. ID3D11Device *device;
  410. ID3D11DeviceContext *context;
  411. HRESULT hr;
  412. if (!dxgi || !d3d11) {
  413. return false;
  414. }
  415. create_dxgi = (CREATEDXGIFACTORY1PROC)GetProcAddress(
  416. dxgi, "CreateDXGIFactory1");
  417. create_device = (PFN_D3D11_CREATE_DEVICE)GetProcAddress(
  418. d3d11, "D3D11CreateDevice");
  419. if (!create_dxgi || !create_device) {
  420. error("Failed to load D3D11/DXGI procedures");
  421. return false;
  422. }
  423. hr = create_dxgi(&IID_IDXGIFactory1, &factory);
  424. if (FAILED(hr)) {
  425. error_hr("CreateDXGIFactory1 failed");
  426. return false;
  427. }
  428. hr = factory->lpVtbl->EnumAdapters(factory, 0, &adapter);
  429. factory->lpVtbl->Release(factory);
  430. if (FAILED(hr)) {
  431. error_hr("EnumAdapters failed");
  432. return false;
  433. }
  434. hr = create_device(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, 0, NULL, 0,
  435. D3D11_SDK_VERSION, &device, NULL, &context);
  436. adapter->lpVtbl->Release(adapter);
  437. if (FAILED(hr)) {
  438. error_hr("D3D11CreateDevice failed");
  439. return false;
  440. }
  441. enc->device = device;
  442. enc->context = context;
  443. return true;
  444. }
  445. #endif
  446. static bool init_session(struct nvenc_data *enc)
  447. {
  448. NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = {
  449. NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER};
  450. params.apiVersion = enc->needs_compat_ver ? NVENC_COMPAT_VER
  451. : NVENCAPI_VERSION;
  452. #ifdef _WIN32
  453. if (enc->fallback) {
  454. params.device = enc->cu_ctx;
  455. params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
  456. } else {
  457. params.device = enc->device;
  458. params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
  459. }
  460. #else
  461. params.device = enc->cu_ctx;
  462. params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
  463. #endif
  464. if (NV_FAILED(nv.nvEncOpenEncodeSessionEx(&params, &enc->session))) {
  465. return false;
  466. }
  467. return true;
  468. }
  469. static void initialize_params(struct nvenc_data *enc, const GUID *nv_preset,
  470. NV_ENC_TUNING_INFO nv_tuning, uint32_t width,
  471. uint32_t height, uint32_t fps_num,
  472. uint32_t fps_den)
  473. {
  474. int darWidth, darHeight;
  475. av_reduce(&darWidth, &darHeight, width, height, 1024 * 1024);
  476. NV_ENC_INITIALIZE_PARAMS *params = &enc->params;
  477. memset(params, 0, sizeof(*params));
  478. params->version = enc->needs_compat_ver
  479. ? NV_ENC_INITIALIZE_PARAMS_COMPAT_VER
  480. : NV_ENC_INITIALIZE_PARAMS_VER;
  481. params->encodeGUID = enc->codec_guid;
  482. params->presetGUID = *nv_preset;
  483. params->encodeWidth = width;
  484. params->encodeHeight = height;
  485. params->darWidth = enc->codec == CODEC_AV1 ? width : (uint32_t)darWidth;
  486. params->darHeight = enc->codec == CODEC_AV1 ? height
  487. : (uint32_t)darHeight;
  488. params->frameRateNum = fps_num;
  489. params->frameRateDen = fps_den;
  490. params->enableEncodeAsync = 0;
  491. params->enablePTD = 1;
  492. params->encodeConfig = &enc->config;
  493. params->tuningInfo = nv_tuning;
  494. }
  495. static inline GUID get_nv_preset2(const char *preset2)
  496. {
  497. if (astrcmpi(preset2, "p1") == 0) {
  498. return NV_ENC_PRESET_P1_GUID;
  499. } else if (astrcmpi(preset2, "p2") == 0) {
  500. return NV_ENC_PRESET_P2_GUID;
  501. } else if (astrcmpi(preset2, "p3") == 0) {
  502. return NV_ENC_PRESET_P3_GUID;
  503. } else if (astrcmpi(preset2, "p4") == 0) {
  504. return NV_ENC_PRESET_P4_GUID;
  505. } else if (astrcmpi(preset2, "p6") == 0) {
  506. return NV_ENC_PRESET_P6_GUID;
  507. } else if (astrcmpi(preset2, "p7") == 0) {
  508. return NV_ENC_PRESET_P7_GUID;
  509. } else {
  510. return NV_ENC_PRESET_P5_GUID;
  511. }
  512. }
  513. static inline NV_ENC_TUNING_INFO get_nv_tuning(const char *tuning)
  514. {
  515. if (astrcmpi(tuning, "ll") == 0) {
  516. return NV_ENC_TUNING_INFO_LOW_LATENCY;
  517. } else if (astrcmpi(tuning, "ull") == 0) {
  518. return NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY;
  519. } else {
  520. return NV_ENC_TUNING_INFO_HIGH_QUALITY;
  521. }
  522. }
  523. static inline NV_ENC_MULTI_PASS get_nv_multipass(const char *multipass)
  524. {
  525. if (astrcmpi(multipass, "qres") == 0) {
  526. return NV_ENC_TWO_PASS_QUARTER_RESOLUTION;
  527. } else if (astrcmpi(multipass, "fullres") == 0) {
  528. return NV_ENC_TWO_PASS_FULL_RESOLUTION;
  529. } else {
  530. return NV_ENC_MULTI_PASS_DISABLED;
  531. }
  532. }
  533. static bool is_10_bit(const struct nvenc_data *enc)
  534. {
  535. return enc->fallback ? enc->in_format == VIDEO_FORMAT_P010
  536. : obs_p010_tex_active();
  537. }
  538. static bool init_encoder_base(struct nvenc_data *enc, obs_data_t *settings,
  539. int bf, bool compatibility, bool *lossless)
  540. {
  541. const char *rc = obs_data_get_string(settings, "rate_control");
  542. int bitrate = (int)obs_data_get_int(settings, "bitrate");
  543. int max_bitrate = (int)obs_data_get_int(settings, "max_bitrate");
  544. int cqp = (int)obs_data_get_int(settings, "cqp");
  545. int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec");
  546. const char *preset = obs_data_get_string(settings, "preset");
  547. const char *preset2 = obs_data_get_string(settings, "preset2");
  548. const char *tuning = obs_data_get_string(settings, "tune");
  549. const char *multipass = obs_data_get_string(settings, "multipass");
  550. const char *profile = obs_data_get_string(settings, "profile");
  551. bool lookahead = obs_data_get_bool(settings, "lookahead");
  552. bool vbr = astrcmpi(rc, "VBR") == 0;
  553. bool psycho_aq = !compatibility &&
  554. obs_data_get_bool(settings, "psycho_aq");
  555. bool disable_scenecut = obs_data_get_bool(settings, "disable_scenecut");
  556. NVENCSTATUS err;
  557. video_t *video = obs_encoder_video(enc->encoder);
  558. const struct video_output_info *voi = video_output_get_info(video);
  559. enc->cx = obs_encoder_get_width(enc->encoder);
  560. enc->cy = obs_encoder_get_height(enc->encoder);
  561. /* -------------------------- */
  562. /* get preset */
  563. GUID nv_preset = get_nv_preset2(preset2);
  564. NV_ENC_TUNING_INFO nv_tuning = get_nv_tuning(tuning);
  565. NV_ENC_MULTI_PASS nv_multipass = compatibility
  566. ? NV_ENC_MULTI_PASS_DISABLED
  567. : get_nv_multipass(multipass);
  568. if (obs_data_has_user_value(settings, "preset") &&
  569. !obs_data_has_user_value(settings, "preset2") &&
  570. enc->codec == CODEC_H264) {
  571. if (astrcmpi(preset, "mq") == 0) {
  572. nv_preset = NV_ENC_PRESET_P5_GUID;
  573. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  574. nv_multipass = NV_ENC_TWO_PASS_QUARTER_RESOLUTION;
  575. } else if (astrcmpi(preset, "hq") == 0) {
  576. nv_preset = NV_ENC_PRESET_P5_GUID;
  577. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  578. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  579. } else if (astrcmpi(preset, "default") == 0) {
  580. nv_preset = NV_ENC_PRESET_P3_GUID;
  581. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  582. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  583. } else if (astrcmpi(preset, "hp") == 0) {
  584. nv_preset = NV_ENC_PRESET_P1_GUID;
  585. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  586. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  587. } else if (astrcmpi(preset, "ll") == 0) {
  588. nv_preset = NV_ENC_PRESET_P3_GUID;
  589. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  590. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  591. } else if (astrcmpi(preset, "llhq") == 0) {
  592. nv_preset = NV_ENC_PRESET_P4_GUID;
  593. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  594. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  595. } else if (astrcmpi(preset, "llhp") == 0) {
  596. nv_preset = NV_ENC_PRESET_P2_GUID;
  597. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  598. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  599. }
  600. } else if (obs_data_has_user_value(settings, "preset") &&
  601. !obs_data_has_user_value(settings, "preset2") &&
  602. enc->codec == CODEC_HEVC) {
  603. if (astrcmpi(preset, "mq") == 0) {
  604. nv_preset = NV_ENC_PRESET_P6_GUID;
  605. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  606. nv_multipass = NV_ENC_TWO_PASS_QUARTER_RESOLUTION;
  607. } else if (astrcmpi(preset, "hq") == 0) {
  608. nv_preset = NV_ENC_PRESET_P6_GUID;
  609. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  610. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  611. } else if (astrcmpi(preset, "default") == 0) {
  612. nv_preset = NV_ENC_PRESET_P5_GUID;
  613. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  614. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  615. } else if (astrcmpi(preset, "hp") == 0) {
  616. nv_preset = NV_ENC_PRESET_P1_GUID;
  617. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  618. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  619. } else if (astrcmpi(preset, "ll") == 0) {
  620. nv_preset = NV_ENC_PRESET_P3_GUID;
  621. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  622. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  623. } else if (astrcmpi(preset, "llhq") == 0) {
  624. nv_preset = NV_ENC_PRESET_P4_GUID;
  625. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  626. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  627. } else if (astrcmpi(preset, "llhp") == 0) {
  628. nv_preset = NV_ENC_PRESET_P2_GUID;
  629. nv_tuning = NV_ENC_TUNING_INFO_LOW_LATENCY;
  630. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  631. }
  632. }
  633. const bool rc_lossless = astrcmpi(rc, "lossless") == 0;
  634. *lossless = rc_lossless;
  635. if (rc_lossless) {
  636. *lossless =
  637. nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE);
  638. if (*lossless) {
  639. nv_tuning = NV_ENC_TUNING_INFO_LOSSLESS;
  640. nv_multipass = NV_ENC_MULTI_PASS_DISABLED;
  641. } else {
  642. warn("lossless encode is not supported, ignoring");
  643. nv_preset = NV_ENC_PRESET_P5_GUID;
  644. nv_tuning = NV_ENC_TUNING_INFO_HIGH_QUALITY;
  645. nv_multipass = NV_ENC_TWO_PASS_QUARTER_RESOLUTION;
  646. }
  647. }
  648. /* -------------------------- */
  649. /* get preset default config */
  650. uint32_t config_ver = enc->needs_compat_ver ? NV_ENC_CONFIG_COMPAT_VER
  651. : NV_ENC_CONFIG_VER;
  652. NV_ENC_PRESET_CONFIG preset_config = {NV_ENC_PRESET_CONFIG_VER,
  653. {config_ver}};
  654. err = nv.nvEncGetEncodePresetConfigEx(enc->session, enc->codec_guid,
  655. nv_preset, nv_tuning,
  656. &preset_config);
  657. if (nv_failed(enc->encoder, err, __FUNCTION__,
  658. "nvEncGetEncodePresetConfig")) {
  659. return false;
  660. }
  661. /* -------------------------- */
  662. /* main configuration */
  663. enc->config = preset_config.presetCfg;
  664. uint32_t gop_size =
  665. (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250;
  666. NV_ENC_CONFIG *config = &enc->config;
  667. initialize_params(enc, &nv_preset, nv_tuning, voi->width, voi->height,
  668. voi->fps_num, voi->fps_den);
  669. config->gopLength = gop_size;
  670. config->frameIntervalP = 1 + bf;
  671. enc->bframes = bf;
  672. /* lookahead */
  673. const bool use_profile_lookahead = config->rcParams.enableLookahead;
  674. lookahead = nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_LOOKAHEAD) &&
  675. (lookahead || use_profile_lookahead);
  676. if (lookahead) {
  677. enc->rc_lookahead = use_profile_lookahead
  678. ? config->rcParams.lookaheadDepth
  679. : 8;
  680. }
  681. int buf_count = max(4, config->frameIntervalP * 2 * 2);
  682. if (lookahead) {
  683. buf_count = max(buf_count, config->frameIntervalP +
  684. enc->rc_lookahead +
  685. EXTRA_BUFFERS);
  686. }
  687. buf_count = min(64, buf_count);
  688. enc->buf_count = buf_count;
  689. const int output_delay = buf_count - 1;
  690. enc->output_delay = output_delay;
  691. if (lookahead) {
  692. const int lkd_bound = output_delay - config->frameIntervalP - 4;
  693. if (lkd_bound >= 0) {
  694. config->rcParams.enableLookahead = 1;
  695. config->rcParams.lookaheadDepth =
  696. max(enc->rc_lookahead, lkd_bound);
  697. config->rcParams.disableIadapt = 0;
  698. config->rcParams.disableBadapt = 0;
  699. } else {
  700. lookahead = false;
  701. }
  702. }
  703. enc->config.rcParams.disableIadapt = disable_scenecut;
  704. /* psycho aq */
  705. if (!compatibility) {
  706. if (nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ)) {
  707. config->rcParams.enableAQ = psycho_aq;
  708. config->rcParams.aqStrength = 8;
  709. config->rcParams.enableTemporalAQ = psycho_aq;
  710. } else {
  711. warn("Ignoring Psycho Visual Tuning request since GPU is not capable");
  712. }
  713. }
  714. /* -------------------------- */
  715. /* rate control */
  716. enc->can_change_bitrate =
  717. nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE);
  718. config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
  719. if (astrcmpi(rc, "cqp") == 0 || rc_lossless) {
  720. if (*lossless)
  721. cqp = 0;
  722. int cqp_val = enc->codec == CODEC_AV1 ? cqp * 4 : cqp;
  723. config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
  724. config->rcParams.constQP.qpInterP = cqp_val;
  725. config->rcParams.constQP.qpInterB = cqp_val;
  726. config->rcParams.constQP.qpIntra = cqp_val;
  727. enc->can_change_bitrate = false;
  728. bitrate = 0;
  729. max_bitrate = 0;
  730. } else if (astrcmpi(rc, "vbr") != 0) { /* CBR by default */
  731. config->rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
  732. }
  733. config->rcParams.averageBitRate = bitrate * 1000;
  734. config->rcParams.maxBitRate = vbr ? max_bitrate * 1000 : bitrate * 1000;
  735. config->rcParams.vbvBufferSize = bitrate * 1000;
  736. config->rcParams.multiPass = nv_multipass;
  737. config->rcParams.qpMapMode = NV_ENC_QP_MAP_DELTA;
  738. /* -------------------------- */
  739. /* initialize */
  740. info("settings:\n"
  741. "\tcodec: %s\n"
  742. "\trate_control: %s\n"
  743. "\tbitrate: %d\n"
  744. "\tcqp: %d\n"
  745. "\tkeyint: %d\n"
  746. "\tpreset: %s\n"
  747. "\ttuning: %s\n"
  748. "\tmultipass: %s\n"
  749. "\tprofile: %s\n"
  750. "\twidth: %d\n"
  751. "\theight: %d\n"
  752. "\tb-frames: %d\n"
  753. "\tlookahead: %s\n"
  754. "\tpsycho_aq: %s\n",
  755. get_codec_name(enc->codec), rc, bitrate, cqp, gop_size, preset2,
  756. tuning, multipass, profile, enc->cx, enc->cy, bf,
  757. lookahead ? "true" : "false", psycho_aq ? "true" : "false");
  758. return true;
  759. }
  760. static bool init_encoder_h264(struct nvenc_data *enc, obs_data_t *settings,
  761. int bf, bool compatibility)
  762. {
  763. const char *rc = obs_data_get_string(settings, "rate_control");
  764. int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec");
  765. const char *profile = obs_data_get_string(settings, "profile");
  766. bool lossless;
  767. if (!init_encoder_base(enc, settings, bf, compatibility, &lossless)) {
  768. return false;
  769. }
  770. NV_ENC_CONFIG *config = &enc->config;
  771. NV_ENC_CONFIG_H264 *h264_config = &config->encodeCodecConfig.h264Config;
  772. NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui_params =
  773. &h264_config->h264VUIParameters;
  774. video_t *video = obs_encoder_video(enc->encoder);
  775. const struct video_output_info *voi = video_output_get_info(video);
  776. uint32_t gop_size =
  777. (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250;
  778. h264_config->idrPeriod = gop_size;
  779. bool repeat_headers = obs_data_get_bool(settings, "repeat_headers");
  780. if (repeat_headers) {
  781. h264_config->repeatSPSPPS = 1;
  782. h264_config->disableSPSPPS = 0;
  783. h264_config->outputAUD = 1;
  784. }
  785. h264_config->sliceMode = 3;
  786. h264_config->sliceModeData = 1;
  787. h264_config->useBFramesAsRef = NV_ENC_BFRAME_REF_MODE_DISABLED;
  788. /* Enable CBR padding */
  789. if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR)
  790. h264_config->enableFillerDataInsertion = 1;
  791. vui_params->videoSignalTypePresentFlag = 1;
  792. vui_params->videoFullRangeFlag = (voi->range == VIDEO_RANGE_FULL);
  793. vui_params->colourDescriptionPresentFlag = 1;
  794. switch (voi->colorspace) {
  795. case VIDEO_CS_601:
  796. vui_params->colourPrimaries = 6;
  797. vui_params->transferCharacteristics = 6;
  798. vui_params->colourMatrix = 6;
  799. break;
  800. case VIDEO_CS_DEFAULT:
  801. case VIDEO_CS_709:
  802. vui_params->colourPrimaries = 1;
  803. vui_params->transferCharacteristics = 1;
  804. vui_params->colourMatrix = 1;
  805. break;
  806. case VIDEO_CS_SRGB:
  807. vui_params->colourPrimaries = 1;
  808. vui_params->transferCharacteristics = 13;
  809. vui_params->colourMatrix = 1;
  810. break;
  811. default:
  812. break;
  813. }
  814. if (astrcmpi(rc, "lossless") == 0) {
  815. h264_config->qpPrimeYZeroTransformBypassFlag = 1;
  816. } else if (astrcmpi(rc, "vbr") != 0) { /* CBR */
  817. h264_config->outputBufferingPeriodSEI = 1;
  818. }
  819. h264_config->outputPictureTimingSEI = 1;
  820. /* -------------------------- */
  821. /* profile */
  822. if (enc->in_format == VIDEO_FORMAT_I444) {
  823. config->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
  824. h264_config->chromaFormatIDC = 3;
  825. } else if (astrcmpi(profile, "main") == 0) {
  826. config->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
  827. } else if (astrcmpi(profile, "baseline") == 0) {
  828. config->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
  829. } else if (!lossless) {
  830. config->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
  831. }
  832. if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) {
  833. return false;
  834. }
  835. return true;
  836. }
  837. static bool init_encoder_hevc(struct nvenc_data *enc, obs_data_t *settings,
  838. int bf, bool compatibility)
  839. {
  840. const char *rc = obs_data_get_string(settings, "rate_control");
  841. int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec");
  842. const char *profile = obs_data_get_string(settings, "profile");
  843. bool lossless;
  844. if (!init_encoder_base(enc, settings, bf, compatibility, &lossless)) {
  845. return false;
  846. }
  847. NV_ENC_CONFIG *config = &enc->config;
  848. NV_ENC_CONFIG_HEVC *hevc_config = &config->encodeCodecConfig.hevcConfig;
  849. NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui_params =
  850. &hevc_config->hevcVUIParameters;
  851. video_t *video = obs_encoder_video(enc->encoder);
  852. const struct video_output_info *voi = video_output_get_info(video);
  853. uint32_t gop_size =
  854. (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250;
  855. hevc_config->idrPeriod = gop_size;
  856. bool repeat_headers = obs_data_get_bool(settings, "repeat_headers");
  857. if (repeat_headers) {
  858. hevc_config->repeatSPSPPS = 1;
  859. hevc_config->disableSPSPPS = 0;
  860. hevc_config->outputAUD = 1;
  861. }
  862. hevc_config->sliceMode = 3;
  863. hevc_config->sliceModeData = 1;
  864. hevc_config->useBFramesAsRef = NV_ENC_BFRAME_REF_MODE_DISABLED;
  865. /* Enable CBR padding */
  866. if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR)
  867. hevc_config->enableFillerDataInsertion = 1;
  868. vui_params->videoSignalTypePresentFlag = 1;
  869. vui_params->videoFullRangeFlag = (voi->range == VIDEO_RANGE_FULL);
  870. vui_params->colourDescriptionPresentFlag = 1;
  871. switch (voi->colorspace) {
  872. case VIDEO_CS_601:
  873. vui_params->colourPrimaries = 6;
  874. vui_params->transferCharacteristics = 6;
  875. vui_params->colourMatrix = 6;
  876. break;
  877. case VIDEO_CS_DEFAULT:
  878. case VIDEO_CS_709:
  879. vui_params->colourPrimaries = 1;
  880. vui_params->transferCharacteristics = 1;
  881. vui_params->colourMatrix = 1;
  882. break;
  883. case VIDEO_CS_SRGB:
  884. vui_params->colourPrimaries = 1;
  885. vui_params->transferCharacteristics = 13;
  886. vui_params->colourMatrix = 1;
  887. break;
  888. case VIDEO_CS_2100_PQ:
  889. vui_params->colourPrimaries = 9;
  890. vui_params->transferCharacteristics = 16;
  891. vui_params->colourMatrix = 9;
  892. vui_params->chromaSampleLocationFlag = 1;
  893. vui_params->chromaSampleLocationTop = 2;
  894. vui_params->chromaSampleLocationBot = 2;
  895. break;
  896. case VIDEO_CS_2100_HLG:
  897. vui_params->colourPrimaries = 9;
  898. vui_params->transferCharacteristics = 18;
  899. vui_params->colourMatrix = 9;
  900. vui_params->chromaSampleLocationFlag = 1;
  901. vui_params->chromaSampleLocationTop = 2;
  902. vui_params->chromaSampleLocationBot = 2;
  903. }
  904. hevc_config->pixelBitDepthMinus8 = is_10_bit(enc) ? 2 : 0;
  905. if (astrcmpi(rc, "cbr") == 0) {
  906. hevc_config->outputBufferingPeriodSEI = 1;
  907. }
  908. hevc_config->outputPictureTimingSEI = 1;
  909. /* -------------------------- */
  910. /* profile */
  911. if (enc->in_format == VIDEO_FORMAT_I444) {
  912. config->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
  913. hevc_config->chromaFormatIDC = 3;
  914. } else if (astrcmpi(profile, "main10") == 0) {
  915. config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
  916. } else if (is_10_bit(enc)) {
  917. blog(LOG_WARNING, "[obs-nvenc] Forcing main10 for P010");
  918. config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
  919. } else {
  920. config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
  921. }
  922. if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) {
  923. return false;
  924. }
  925. return true;
  926. }
  927. static bool init_encoder_av1(struct nvenc_data *enc, obs_data_t *settings,
  928. int bf, bool compatibility)
  929. {
  930. int keyint_sec = (int)obs_data_get_int(settings, "keyint_sec");
  931. bool lossless;
  932. if (!init_encoder_base(enc, settings, bf, compatibility, &lossless)) {
  933. return false;
  934. }
  935. NV_ENC_CONFIG *config = &enc->config;
  936. NV_ENC_CONFIG_AV1 *av1_config = &config->encodeCodecConfig.av1Config;
  937. video_t *video = obs_encoder_video(enc->encoder);
  938. const struct video_output_info *voi = video_output_get_info(video);
  939. uint32_t gop_size =
  940. (keyint_sec) ? keyint_sec * voi->fps_num / voi->fps_den : 250;
  941. av1_config->idrPeriod = gop_size;
  942. av1_config->useBFramesAsRef = NV_ENC_BFRAME_REF_MODE_DISABLED;
  943. av1_config->colorRange = (voi->range == VIDEO_RANGE_FULL);
  944. /* Enable CBR padding */
  945. if (config->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR)
  946. av1_config->enableBitstreamPadding = 1;
  947. #define PIXELCOUNT_4K (3840 * 2160)
  948. /* If size is 4K+, set tiles to 2 uniform columns. */
  949. if ((voi->width * voi->height) >= PIXELCOUNT_4K) {
  950. av1_config->enableCustomTileConfig = 0;
  951. av1_config->numTileColumns = 2;
  952. }
  953. switch (voi->colorspace) {
  954. case VIDEO_CS_601:
  955. av1_config->colorPrimaries = 6;
  956. av1_config->transferCharacteristics = 6;
  957. av1_config->matrixCoefficients = 6;
  958. break;
  959. case VIDEO_CS_DEFAULT:
  960. case VIDEO_CS_709:
  961. av1_config->colorPrimaries = 1;
  962. av1_config->transferCharacteristics = 1;
  963. av1_config->matrixCoefficients = 1;
  964. break;
  965. case VIDEO_CS_SRGB:
  966. av1_config->colorPrimaries = 1;
  967. av1_config->transferCharacteristics = 13;
  968. av1_config->matrixCoefficients = 1;
  969. break;
  970. case VIDEO_CS_2100_PQ:
  971. av1_config->colorPrimaries = 9;
  972. av1_config->transferCharacteristics = 16;
  973. av1_config->matrixCoefficients = 9;
  974. break;
  975. case VIDEO_CS_2100_HLG:
  976. av1_config->colorPrimaries = 9;
  977. av1_config->transferCharacteristics = 18;
  978. av1_config->matrixCoefficients = 9;
  979. }
  980. /* -------------------------- */
  981. /* profile */
  982. config->profileGUID = NV_ENC_AV1_PROFILE_MAIN_GUID;
  983. av1_config->tier = NV_ENC_TIER_AV1_0;
  984. av1_config->level = NV_ENC_LEVEL_AV1_AUTOSELECT;
  985. av1_config->chromaFormatIDC = 1;
  986. av1_config->pixelBitDepthMinus8 = is_10_bit(enc) ? 2 : 0;
  987. av1_config->inputPixelBitDepthMinus8 = av1_config->pixelBitDepthMinus8;
  988. av1_config->numFwdRefs = 1;
  989. av1_config->numBwdRefs = 1;
  990. av1_config->repeatSeqHdr = 1;
  991. if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) {
  992. return false;
  993. }
  994. return true;
  995. }
  996. static bool init_bitstreams(struct nvenc_data *enc)
  997. {
  998. da_reserve(enc->bitstreams, enc->buf_count);
  999. for (uint32_t i = 0; i < enc->buf_count; i++) {
  1000. struct nv_bitstream bitstream;
  1001. if (!nv_bitstream_init(enc, &bitstream)) {
  1002. return false;
  1003. }
  1004. da_push_back(enc->bitstreams, &bitstream);
  1005. }
  1006. return true;
  1007. }
  1008. #ifdef _WIN32
  1009. static bool init_textures(struct nvenc_data *enc)
  1010. {
  1011. da_reserve(enc->textures, enc->buf_count);
  1012. for (uint32_t i = 0; i < enc->buf_count; i++) {
  1013. struct nv_texture texture;
  1014. if (!nv_texture_init(enc, &texture)) {
  1015. return false;
  1016. }
  1017. da_push_back(enc->textures, &texture);
  1018. }
  1019. return true;
  1020. }
  1021. #endif
  1022. static bool init_cuda_surfaces(struct nvenc_data *enc)
  1023. {
  1024. switch (enc->in_format) {
  1025. case VIDEO_FORMAT_P010:
  1026. enc->surface_format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
  1027. break;
  1028. case VIDEO_FORMAT_I444:
  1029. enc->surface_format = NV_ENC_BUFFER_FORMAT_YUV444;
  1030. break;
  1031. default:
  1032. enc->surface_format = NV_ENC_BUFFER_FORMAT_NV12;
  1033. }
  1034. da_reserve(enc->surfaces, enc->buf_count);
  1035. CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx))
  1036. for (uint32_t i = 0; i < enc->buf_count; i++) {
  1037. struct nv_cuda_surface buf;
  1038. if (!nv_cuda_surface_init(enc, &buf)) {
  1039. return false;
  1040. }
  1041. da_push_back(enc->surfaces, &buf);
  1042. }
  1043. CU_FAILED(cu->cuCtxPopCurrent(NULL))
  1044. return true;
  1045. }
  1046. static bool init_cuda_ctx(struct nvenc_data *enc, obs_data_t *settings,
  1047. const bool texture)
  1048. {
  1049. int count, gpu;
  1050. CUdevice device;
  1051. bool cuda_override;
  1052. /* Allow CUDA device override for texture encoders (experimental) */
  1053. if (obs_data_has_user_value(settings, "cuda_device")) {
  1054. gpu = (int)obs_data_get_int(settings, "cuda_device");
  1055. cuda_override = true;
  1056. } else {
  1057. gpu = (int)obs_data_get_int(settings, "gpu");
  1058. cuda_override = false;
  1059. }
  1060. CU_FAILED(cu->cuInit(0))
  1061. CU_FAILED(cu->cuDeviceGetCount(&count))
  1062. if (!count) {
  1063. NV_FAIL("No CUDA devices found");
  1064. return false;
  1065. }
  1066. #ifdef _WIN32
  1067. CU_FAILED(cu->cuDeviceGet(&device, gpu))
  1068. #else
  1069. if (!texture || cuda_override) {
  1070. CU_FAILED(cu->cuDeviceGet(&device, gpu))
  1071. } else {
  1072. unsigned int ctx_count = 0;
  1073. CUdevice devices[2];
  1074. obs_enter_graphics();
  1075. CUresult res = cu->cuGLGetDevices(&ctx_count, devices, 2,
  1076. CU_GL_DEVICE_LIST_ALL);
  1077. obs_leave_graphics();
  1078. if (res != CUDA_SUCCESS || !ctx_count) {
  1079. /* CUDA_ERROR_INVALID_GRAPHICS_CONTEXT should be treated
  1080. * as non-fatal fallback (probably running on iGPU). */
  1081. if (res == 219) {
  1082. info("Not running on NVIDIA GPU, falling back to non-texture encoder");
  1083. } else {
  1084. const char *name, *desc;
  1085. if (cuda_get_error_desc(res, &name, &desc)) {
  1086. error("Failed to get a CUDA device for the current OpenGL context: %s: %s",
  1087. name, desc);
  1088. } else {
  1089. error("Failed to get a CUDA device for the current OpenGL context: %d",
  1090. res);
  1091. }
  1092. }
  1093. return false;
  1094. }
  1095. /* Documentation indicates this should only ever happen with SLI, i.e. never for OBS. */
  1096. if (ctx_count > 1) {
  1097. warn("Got more than one CUDA devices for OpenGL context, this is untested.");
  1098. }
  1099. device = devices[0];
  1100. debug("Loading up CUDA on device %u", device);
  1101. }
  1102. #endif
  1103. CU_FAILED(cu->cuCtxCreate(&enc->cu_ctx, 0, device))
  1104. CU_FAILED(cu->cuCtxPopCurrent(NULL))
  1105. return true;
  1106. }
  1107. static enum video_format get_preferred_format(enum video_format format)
  1108. {
  1109. switch (format) {
  1110. case VIDEO_FORMAT_I010:
  1111. case VIDEO_FORMAT_P010:
  1112. return VIDEO_FORMAT_P010;
  1113. case VIDEO_FORMAT_RGBA:
  1114. case VIDEO_FORMAT_BGRA:
  1115. case VIDEO_FORMAT_BGRX:
  1116. case VIDEO_FORMAT_I444:
  1117. return VIDEO_FORMAT_I444;
  1118. default:
  1119. return VIDEO_FORMAT_NV12;
  1120. }
  1121. }
  1122. static void nvenc_destroy(void *data);
  1123. static bool init_specific_encoder(struct nvenc_data *enc, obs_data_t *settings,
  1124. int bf, bool compatibility)
  1125. {
  1126. switch (enc->codec) {
  1127. case CODEC_HEVC:
  1128. return init_encoder_hevc(enc, settings, bf, compatibility);
  1129. case CODEC_H264:
  1130. return init_encoder_h264(enc, settings, bf, compatibility);
  1131. case CODEC_AV1:
  1132. return init_encoder_av1(enc, settings, bf, compatibility);
  1133. }
  1134. return false;
  1135. }
  1136. static bool init_encoder(struct nvenc_data *enc, enum codec_type codec,
  1137. obs_data_t *settings, obs_encoder_t *encoder)
  1138. {
  1139. UNUSED_PARAMETER(codec);
  1140. UNUSED_PARAMETER(encoder);
  1141. int bf = (int)obs_data_get_int(settings, "bf");
  1142. const bool support_10bit =
  1143. nv_get_cap(enc, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
  1144. const int bf_max = nv_get_cap(enc, NV_ENC_CAPS_NUM_MAX_BFRAMES);
  1145. video_t *video = obs_encoder_video(enc->encoder);
  1146. const struct video_output_info *voi = video_output_get_info(video);
  1147. enc->in_format = get_preferred_format(voi->format);
  1148. if (is_10_bit(enc) && !support_10bit) {
  1149. NV_FAIL(obs_module_text("NVENC.10bitUnsupported"));
  1150. return false;
  1151. }
  1152. switch (voi->format) {
  1153. case VIDEO_FORMAT_I010:
  1154. case VIDEO_FORMAT_P010:
  1155. break;
  1156. default:
  1157. switch (voi->colorspace) {
  1158. case VIDEO_CS_2100_PQ:
  1159. case VIDEO_CS_2100_HLG:
  1160. NV_FAIL(obs_module_text("NVENC.8bitUnsupportedHdr"));
  1161. return false;
  1162. default:
  1163. break;
  1164. }
  1165. }
  1166. if (bf > bf_max) {
  1167. blog(LOG_WARNING,
  1168. "[obs-nvenc] Max B-frames setting (%d) is more than encoder supports (%d).\n"
  1169. "Setting B-frames to %d",
  1170. bf, bf_max, bf_max);
  1171. bf = bf_max;
  1172. }
  1173. if (!init_specific_encoder(enc, settings, bf, false)) {
  1174. blog(LOG_WARNING, "[obs-nvenc] init_specific_encoder failed, "
  1175. "trying again with compatibility options");
  1176. nv.nvEncDestroyEncoder(enc->session);
  1177. enc->session = NULL;
  1178. if (!init_session(enc)) {
  1179. return false;
  1180. }
  1181. /* try without multipass and psycho aq */
  1182. if (!init_specific_encoder(enc, settings, bf, true)) {
  1183. return false;
  1184. }
  1185. }
  1186. return true;
  1187. }
  1188. static void *nvenc_create_internal(enum codec_type codec, obs_data_t *settings,
  1189. obs_encoder_t *encoder, bool texture)
  1190. {
  1191. struct nvenc_data *enc = bzalloc(sizeof(*enc));
  1192. enc->encoder = encoder;
  1193. enc->codec = codec;
  1194. enc->first_packet = true;
  1195. enc->fallback = !texture;
  1196. if (get_nvenc_ver() == COMPATIBILITY_VERSION) {
  1197. enc->needs_compat_ver = true;
  1198. }
  1199. NV_ENCODE_API_FUNCTION_LIST init = {NV_ENCODE_API_FUNCTION_LIST_VER};
  1200. switch (enc->codec) {
  1201. case CODEC_H264:
  1202. enc->codec_guid = NV_ENC_CODEC_H264_GUID;
  1203. break;
  1204. case CODEC_HEVC:
  1205. enc->codec_guid = NV_ENC_CODEC_HEVC_GUID;
  1206. break;
  1207. case CODEC_AV1:
  1208. enc->codec_guid = NV_ENC_CODEC_AV1_GUID;
  1209. break;
  1210. }
  1211. if (!init_nvenc(encoder)) {
  1212. goto fail;
  1213. }
  1214. if (
  1215. #ifdef _WIN32
  1216. !texture &&
  1217. #endif
  1218. !init_cuda(encoder)) {
  1219. goto fail;
  1220. }
  1221. if (NV_FAILED(nv_create_instance(&init))) {
  1222. goto fail;
  1223. }
  1224. #ifdef _WIN32
  1225. if (texture && !init_d3d11(enc, settings)) {
  1226. goto fail;
  1227. }
  1228. #endif
  1229. if (
  1230. #ifdef _WIN32
  1231. !texture &&
  1232. #endif
  1233. !init_cuda_ctx(enc, settings, texture)) {
  1234. goto fail;
  1235. }
  1236. if (!init_session(enc)) {
  1237. goto fail;
  1238. }
  1239. if (!init_encoder(enc, codec, settings, encoder)) {
  1240. goto fail;
  1241. }
  1242. if (!init_bitstreams(enc)) {
  1243. goto fail;
  1244. }
  1245. #ifdef _WIN32
  1246. if (texture && !init_textures(enc)) {
  1247. goto fail;
  1248. }
  1249. #endif
  1250. if (
  1251. #ifdef _WIN32
  1252. !texture &&
  1253. #endif
  1254. !init_cuda_surfaces(enc)) {
  1255. goto fail;
  1256. }
  1257. enc->codec = codec;
  1258. if (enc->cu_ctx)
  1259. cu->cuCtxPopCurrent(NULL);
  1260. return enc;
  1261. fail:
  1262. nvenc_destroy(enc);
  1263. return NULL;
  1264. }
  1265. static void *nvenc_create_base(enum codec_type codec, obs_data_t *settings,
  1266. obs_encoder_t *encoder, bool texture)
  1267. {
  1268. /* this encoder requires shared textures, this cannot be used on a
  1269. * gpu other than the one OBS is currently running on. */
  1270. const int gpu = (int)obs_data_get_int(settings, "gpu");
  1271. if (gpu != 0 && texture) {
  1272. blog(LOG_INFO,
  1273. "[obs-nvenc] different GPU selected by user, falling back "
  1274. "to non-texture encoder");
  1275. goto reroute;
  1276. }
  1277. if (obs_encoder_scaling_enabled(encoder)) {
  1278. if (obs_encoder_gpu_scaling_enabled(encoder)) {
  1279. blog(LOG_INFO, "[obs-nvenc] GPU scaling enabled");
  1280. } else if (texture) {
  1281. blog(LOG_INFO,
  1282. "[obs-nvenc] CPU scaling enabled, falling back to"
  1283. " non-texture encoder");
  1284. goto reroute;
  1285. }
  1286. }
  1287. if (texture && !obs_p010_tex_active() && !obs_nv12_tex_active()) {
  1288. blog(LOG_INFO,
  1289. "[obs-nvenc] nv12/p010 not active, falling back to "
  1290. "non-texture encoder");
  1291. goto reroute;
  1292. }
  1293. struct nvenc_data *enc =
  1294. nvenc_create_internal(codec, settings, encoder, texture);
  1295. if (enc) {
  1296. return enc;
  1297. }
  1298. reroute:
  1299. if (!texture) {
  1300. blog(LOG_ERROR,
  1301. "Already in fallback encoder, can't fall back further!");
  1302. return NULL;
  1303. }
  1304. switch (codec) {
  1305. case CODEC_H264:
  1306. return obs_encoder_create_rerouted(encoder,
  1307. "obs_nvenc_h264_cuda");
  1308. case CODEC_HEVC:
  1309. return obs_encoder_create_rerouted(encoder,
  1310. "obs_nvenc_hevc_cuda");
  1311. case CODEC_AV1:
  1312. return obs_encoder_create_rerouted(encoder,
  1313. "obs_nvenc_av1_cuda");
  1314. }
  1315. return NULL;
  1316. }
  1317. static void *h264_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
  1318. {
  1319. return nvenc_create_base(CODEC_H264, settings, encoder, true);
  1320. }
  1321. #ifdef ENABLE_HEVC
  1322. static void *hevc_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
  1323. {
  1324. return nvenc_create_base(CODEC_HEVC, settings, encoder, true);
  1325. }
  1326. #endif
  1327. static void *av1_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder)
  1328. {
  1329. return nvenc_create_base(CODEC_AV1, settings, encoder, true);
  1330. }
  1331. static void *h264_nvenc_soft_create(obs_data_t *settings,
  1332. obs_encoder_t *encoder)
  1333. {
  1334. return nvenc_create_base(CODEC_H264, settings, encoder, false);
  1335. }
  1336. #ifdef ENABLE_HEVC
  1337. static void *hevc_nvenc_soft_create(obs_data_t *settings,
  1338. obs_encoder_t *encoder)
  1339. {
  1340. return nvenc_create_base(CODEC_HEVC, settings, encoder, false);
  1341. }
  1342. #endif
  1343. static void *av1_nvenc_soft_create(obs_data_t *settings, obs_encoder_t *encoder)
  1344. {
  1345. return nvenc_create_base(CODEC_AV1, settings, encoder, false);
  1346. }
  1347. static bool get_encoded_packet(struct nvenc_data *enc, bool finalize);
  1348. static void nvenc_destroy(void *data)
  1349. {
  1350. struct nvenc_data *enc = data;
  1351. if (enc->cu_ctx)
  1352. cu->cuCtxPushCurrent(enc->cu_ctx);
  1353. if (enc->encode_started) {
  1354. uint32_t struct_ver = enc->needs_compat_ver
  1355. ? NV_ENC_PIC_PARAMS_COMPAT_VER
  1356. : NV_ENC_PIC_PARAMS_VER;
  1357. NV_ENC_PIC_PARAMS params = {struct_ver};
  1358. params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
  1359. nv.nvEncEncodePicture(enc->session, &params);
  1360. get_encoded_packet(enc, true);
  1361. }
  1362. #ifdef _WIN32
  1363. for (size_t i = 0; i < enc->textures.num; i++) {
  1364. nv_texture_free(enc, &enc->textures.array[i]);
  1365. }
  1366. #endif
  1367. for (size_t i = 0; i < enc->surfaces.num; i++) {
  1368. nv_cuda_surface_free(enc, &enc->surfaces.array[i]);
  1369. }
  1370. for (size_t i = 0; i < enc->bitstreams.num; i++) {
  1371. nv_bitstream_free(enc, &enc->bitstreams.array[i]);
  1372. }
  1373. if (enc->session) {
  1374. nv.nvEncDestroyEncoder(enc->session);
  1375. }
  1376. #ifdef _WIN32
  1377. for (size_t i = 0; i < enc->input_textures.num; i++) {
  1378. ID3D11Texture2D *tex = enc->input_textures.array[i].tex;
  1379. IDXGIKeyedMutex *km = enc->input_textures.array[i].km;
  1380. tex->lpVtbl->Release(tex);
  1381. km->lpVtbl->Release(km);
  1382. }
  1383. if (enc->context) {
  1384. enc->context->lpVtbl->Release(enc->context);
  1385. }
  1386. if (enc->device) {
  1387. enc->device->lpVtbl->Release(enc->device);
  1388. }
  1389. #else
  1390. for (size_t i = 0; i < enc->input_textures.num; i++) {
  1391. CUgraphicsResource res_y = enc->input_textures.array[i].res_y;
  1392. CUgraphicsResource res_uv = enc->input_textures.array[i].res_uv;
  1393. cu->cuGraphicsUnregisterResource(res_y);
  1394. cu->cuGraphicsUnregisterResource(res_uv);
  1395. }
  1396. #endif
  1397. if (enc->cu_ctx) {
  1398. cu->cuCtxPopCurrent(NULL);
  1399. cu->cuCtxDestroy(enc->cu_ctx);
  1400. }
  1401. bfree(enc->header);
  1402. bfree(enc->sei);
  1403. deque_free(&enc->dts_list);
  1404. da_free(enc->surfaces);
  1405. da_free(enc->input_textures);
  1406. da_free(enc->bitstreams);
  1407. #ifdef _WIN32
  1408. da_free(enc->textures);
  1409. #endif
  1410. da_free(enc->packet_data);
  1411. bfree(enc->roi_map);
  1412. bfree(enc);
  1413. }
  1414. #ifdef _WIN32
  1415. static ID3D11Texture2D *get_tex_from_handle(struct nvenc_data *enc,
  1416. uint32_t handle,
  1417. IDXGIKeyedMutex **km_out)
  1418. {
  1419. ID3D11Device *device = enc->device;
  1420. IDXGIKeyedMutex *km;
  1421. ID3D11Texture2D *input_tex;
  1422. HRESULT hr;
  1423. for (size_t i = 0; i < enc->input_textures.num; i++) {
  1424. struct handle_tex *ht = &enc->input_textures.array[i];
  1425. if (ht->handle == handle) {
  1426. *km_out = ht->km;
  1427. return ht->tex;
  1428. }
  1429. }
  1430. hr = device->lpVtbl->OpenSharedResource(device,
  1431. (HANDLE)(uintptr_t)handle,
  1432. &IID_ID3D11Texture2D,
  1433. &input_tex);
  1434. if (FAILED(hr)) {
  1435. error_hr("OpenSharedResource failed");
  1436. return NULL;
  1437. }
  1438. hr = input_tex->lpVtbl->QueryInterface(input_tex, &IID_IDXGIKeyedMutex,
  1439. &km);
  1440. if (FAILED(hr)) {
  1441. error_hr("QueryInterface(IDXGIKeyedMutex) failed");
  1442. input_tex->lpVtbl->Release(input_tex);
  1443. return NULL;
  1444. }
  1445. input_tex->lpVtbl->SetEvictionPriority(input_tex,
  1446. DXGI_RESOURCE_PRIORITY_MAXIMUM);
  1447. *km_out = km;
  1448. struct handle_tex new_ht = {handle, input_tex, km};
  1449. da_push_back(enc->input_textures, &new_ht);
  1450. return input_tex;
  1451. }
  1452. #endif
  1453. static bool get_encoded_packet(struct nvenc_data *enc, bool finalize)
  1454. {
  1455. void *s = enc->session;
  1456. da_resize(enc->packet_data, 0);
  1457. if (!enc->buffers_queued)
  1458. return true;
  1459. if (!finalize && enc->buffers_queued < enc->output_delay)
  1460. return true;
  1461. size_t count = finalize ? enc->buffers_queued : 1;
  1462. for (size_t i = 0; i < count; i++) {
  1463. size_t cur_bs_idx = enc->cur_bitstream;
  1464. struct nv_bitstream *bs = &enc->bitstreams.array[cur_bs_idx];
  1465. #ifdef _WIN32
  1466. struct nv_texture *nvtex =
  1467. enc->fallback ? NULL : &enc->textures.array[cur_bs_idx];
  1468. struct nv_cuda_surface *surf =
  1469. enc->fallback ? &enc->surfaces.array[cur_bs_idx] : NULL;
  1470. #else
  1471. struct nv_cuda_surface *surf = &enc->surfaces.array[cur_bs_idx];
  1472. #endif
  1473. /* ---------------- */
  1474. uint32_t struct_ver = enc->needs_compat_ver
  1475. ? NV_ENC_LOCK_BITSTREAM_COMPAT_VER
  1476. : NV_ENC_LOCK_BITSTREAM_VER;
  1477. NV_ENC_LOCK_BITSTREAM lock = {struct_ver};
  1478. lock.outputBitstream = bs->ptr;
  1479. lock.doNotWait = false;
  1480. if (NV_FAILED(nv.nvEncLockBitstream(s, &lock))) {
  1481. return false;
  1482. }
  1483. if (enc->first_packet) {
  1484. NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = {0};
  1485. uint8_t buf[256];
  1486. uint32_t size = 0;
  1487. payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
  1488. payload.spsppsBuffer = buf;
  1489. payload.inBufferSize = sizeof(buf);
  1490. payload.outSPSPPSPayloadSize = &size;
  1491. nv.nvEncGetSequenceParams(s, &payload);
  1492. enc->header = bmemdup(buf, size);
  1493. enc->header_size = size;
  1494. enc->first_packet = false;
  1495. }
  1496. da_copy_array(enc->packet_data, lock.bitstreamBufferPtr,
  1497. lock.bitstreamSizeInBytes);
  1498. enc->packet_pts = (int64_t)lock.outputTimeStamp;
  1499. enc->packet_keyframe = lock.pictureType == NV_ENC_PIC_TYPE_IDR;
  1500. if (NV_FAILED(nv.nvEncUnlockBitstream(s, bs->ptr))) {
  1501. return false;
  1502. }
  1503. /* ---------------- */
  1504. #ifdef _WIN32
  1505. if (nvtex && nvtex->mapped_res) {
  1506. NVENCSTATUS err;
  1507. err = nv.nvEncUnmapInputResource(s, nvtex->mapped_res);
  1508. if (nv_failed(enc->encoder, err, __FUNCTION__,
  1509. "unmap")) {
  1510. return false;
  1511. }
  1512. nvtex->mapped_res = NULL;
  1513. }
  1514. #endif
  1515. /* ---------------- */
  1516. if (surf && surf->mapped_res) {
  1517. NVENCSTATUS err;
  1518. err = nv.nvEncUnmapInputResource(s, surf->mapped_res);
  1519. if (nv_failed(enc->encoder, err, __FUNCTION__,
  1520. "unmap")) {
  1521. return false;
  1522. }
  1523. surf->mapped_res = NULL;
  1524. }
  1525. /* ---------------- */
  1526. if (++enc->cur_bitstream == enc->buf_count)
  1527. enc->cur_bitstream = 0;
  1528. enc->buffers_queued--;
  1529. }
  1530. return true;
  1531. }
  1532. struct roi_params {
  1533. uint32_t mb_width;
  1534. uint32_t mb_height;
  1535. uint32_t mb_size;
  1536. bool av1;
  1537. int8_t *map;
  1538. };
  1539. static void roi_cb(void *param, struct obs_encoder_roi *roi)
  1540. {
  1541. const struct roi_params *rp = param;
  1542. int8_t qp_val;
  1543. /* AV1 has a larger QP range than HEVC/H.264 */
  1544. if (rp->av1) {
  1545. qp_val = (int8_t)(-128.0f * roi->priority);
  1546. } else {
  1547. qp_val = (int8_t)(-51.0f * roi->priority);
  1548. }
  1549. const uint32_t roi_left = roi->left / rp->mb_size;
  1550. const uint32_t roi_top = roi->top / rp->mb_size;
  1551. const uint32_t roi_right = (roi->right - 1) / rp->mb_size;
  1552. const uint32_t roi_bottom = (roi->bottom - 1) / rp->mb_size;
  1553. for (uint32_t mb_y = 0; mb_y < rp->mb_height; mb_y++) {
  1554. if (mb_y < roi_top || mb_y > roi_bottom)
  1555. continue;
  1556. for (uint32_t mb_x = 0; mb_x < rp->mb_width; mb_x++) {
  1557. if (mb_x < roi_left || mb_x > roi_right)
  1558. continue;
  1559. rp->map[mb_y * rp->mb_width + mb_x] = qp_val;
  1560. }
  1561. }
  1562. }
  1563. static void add_roi(struct nvenc_data *enc, NV_ENC_PIC_PARAMS *params)
  1564. {
  1565. const uint32_t increment = obs_encoder_get_roi_increment(enc->encoder);
  1566. if (enc->roi_map && enc->roi_increment == increment) {
  1567. params->qpDeltaMap = enc->roi_map;
  1568. params->qpDeltaMapSize = (uint32_t)enc->roi_map_size;
  1569. return;
  1570. }
  1571. uint32_t mb_size = 0;
  1572. switch (enc->codec) {
  1573. case CODEC_H264:
  1574. /* H.264 is always 16x16 */
  1575. mb_size = 16;
  1576. break;
  1577. case CODEC_HEVC:
  1578. /* HEVC can be 16x16, 32x32, or 64x64, but NVENC is always 32x32 */
  1579. mb_size = 32;
  1580. break;
  1581. case CODEC_AV1:
  1582. /* AV1 can be 64x64 or 128x128, but NVENC is always 64x64 */
  1583. mb_size = 64;
  1584. break;
  1585. }
  1586. const uint32_t mb_width = (enc->cx + mb_size - 1) / mb_size;
  1587. const uint32_t mb_height = (enc->cy + mb_size - 1) / mb_size;
  1588. const size_t map_size = mb_width * mb_height * sizeof(int8_t);
  1589. if (map_size != enc->roi_map_size) {
  1590. enc->roi_map = brealloc(enc->roi_map, map_size);
  1591. enc->roi_map_size = map_size;
  1592. }
  1593. memset(enc->roi_map, 0, enc->roi_map_size);
  1594. struct roi_params par = {
  1595. .mb_width = mb_width,
  1596. .mb_height = mb_height,
  1597. .mb_size = mb_size,
  1598. .av1 = enc->codec == CODEC_AV1,
  1599. .map = enc->roi_map,
  1600. };
  1601. obs_encoder_enum_roi(enc->encoder, roi_cb, &par);
  1602. enc->roi_increment = increment;
  1603. params->qpDeltaMap = enc->roi_map;
  1604. params->qpDeltaMapSize = (uint32_t)map_size;
  1605. }
  1606. static bool nvenc_encode_shared(struct nvenc_data *enc, struct nv_bitstream *bs,
  1607. void *pic, int64_t pts,
  1608. struct encoder_packet *packet,
  1609. bool *received_packet)
  1610. {
  1611. NV_ENC_PIC_PARAMS params = {0};
  1612. params.version = enc->needs_compat_ver ? NV_ENC_PIC_PARAMS_COMPAT_VER
  1613. : NV_ENC_PIC_PARAMS_VER;
  1614. params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
  1615. params.inputBuffer = pic;
  1616. params.inputTimeStamp = (uint64_t)pts;
  1617. params.inputWidth = enc->cx;
  1618. params.inputHeight = enc->cy;
  1619. params.inputPitch = enc->cx;
  1620. params.outputBitstream = bs->ptr;
  1621. if (enc->fallback) {
  1622. params.bufferFmt = enc->surface_format;
  1623. } else {
  1624. params.bufferFmt = obs_p010_tex_active()
  1625. ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
  1626. : NV_ENC_BUFFER_FORMAT_NV12;
  1627. }
  1628. /* Add ROI map if enabled */
  1629. if (obs_encoder_has_roi(enc->encoder))
  1630. add_roi(enc, &params);
  1631. NVENCSTATUS err = nv.nvEncEncodePicture(enc->session, &params);
  1632. if (err != NV_ENC_SUCCESS && err != NV_ENC_ERR_NEED_MORE_INPUT) {
  1633. nv_failed(enc->encoder, err, __FUNCTION__,
  1634. "nvEncEncodePicture");
  1635. return false;
  1636. }
  1637. enc->encode_started = true;
  1638. enc->buffers_queued++;
  1639. if (++enc->next_bitstream == enc->buf_count) {
  1640. enc->next_bitstream = 0;
  1641. }
  1642. /* ------------------------------------ */
  1643. /* check for encoded packet and parse */
  1644. if (!get_encoded_packet(enc, false)) {
  1645. return false;
  1646. }
  1647. /* ------------------------------------ */
  1648. /* output encoded packet */
  1649. if (enc->packet_data.num) {
  1650. int64_t dts;
  1651. deque_pop_front(&enc->dts_list, &dts, sizeof(dts));
  1652. /* subtract bframe delay from dts */
  1653. dts -= (int64_t)enc->bframes * packet->timebase_num;
  1654. *received_packet = true;
  1655. packet->data = enc->packet_data.array;
  1656. packet->size = enc->packet_data.num;
  1657. packet->type = OBS_ENCODER_VIDEO;
  1658. packet->pts = enc->packet_pts;
  1659. packet->dts = dts;
  1660. packet->keyframe = enc->packet_keyframe;
  1661. } else {
  1662. *received_packet = false;
  1663. }
  1664. return true;
  1665. }
  1666. #ifdef _WIN32
  1667. static bool nvenc_encode_tex(void *data, uint32_t handle, int64_t pts,
  1668. uint64_t lock_key, uint64_t *next_key,
  1669. struct encoder_packet *packet,
  1670. bool *received_packet)
  1671. {
  1672. struct nvenc_data *enc = data;
  1673. ID3D11DeviceContext *context = enc->context;
  1674. ID3D11Texture2D *input_tex;
  1675. ID3D11Texture2D *output_tex;
  1676. IDXGIKeyedMutex *km;
  1677. struct nv_texture *nvtex;
  1678. struct nv_bitstream *bs;
  1679. if (handle == GS_INVALID_HANDLE) {
  1680. error("Encode failed: bad texture handle");
  1681. *next_key = lock_key;
  1682. return false;
  1683. }
  1684. bs = &enc->bitstreams.array[enc->next_bitstream];
  1685. nvtex = &enc->textures.array[enc->next_bitstream];
  1686. input_tex = get_tex_from_handle(enc, handle, &km);
  1687. output_tex = nvtex->tex;
  1688. if (!input_tex) {
  1689. *next_key = lock_key;
  1690. return false;
  1691. }
  1692. deque_push_back(&enc->dts_list, &pts, sizeof(pts));
  1693. /* ------------------------------------ */
  1694. /* copy to output tex */
  1695. km->lpVtbl->AcquireSync(km, lock_key, INFINITE);
  1696. context->lpVtbl->CopyResource(context, (ID3D11Resource *)output_tex,
  1697. (ID3D11Resource *)input_tex);
  1698. km->lpVtbl->ReleaseSync(km, *next_key);
  1699. /* ------------------------------------ */
  1700. /* map output tex so nvenc can use it */
  1701. NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER};
  1702. map.registeredResource = nvtex->res;
  1703. if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map))) {
  1704. return false;
  1705. }
  1706. nvtex->mapped_res = map.mappedResource;
  1707. /* ------------------------------------ */
  1708. /* do actual encode call */
  1709. return nvenc_encode_shared(enc, bs, nvtex->mapped_res, pts, packet,
  1710. received_packet);
  1711. }
  1712. #else
  1713. static inline bool get_res_for_tex_ids(struct nvenc_data *enc, GLuint tex_id_y,
  1714. GLuint tex_id_uv,
  1715. CUgraphicsResource *tex_y,
  1716. CUgraphicsResource *tex_uv)
  1717. {
  1718. bool success = true;
  1719. for (size_t idx = 0; idx < enc->input_textures.num; idx++) {
  1720. struct handle_tex *ht = &enc->input_textures.array[idx];
  1721. if (ht->tex_id != tex_id_y)
  1722. continue;
  1723. *tex_y = ht->res_y;
  1724. *tex_uv = ht->res_uv;
  1725. return success;
  1726. }
  1727. CU_CHECK(cu->cuGraphicsGLRegisterImage(
  1728. tex_y, tex_id_y, GL_TEXTURE_2D,
  1729. CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY))
  1730. CU_CHECK(cu->cuGraphicsGLRegisterImage(
  1731. tex_uv, tex_id_uv, GL_TEXTURE_2D,
  1732. CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY))
  1733. struct handle_tex ht = {tex_id_y, *tex_y, *tex_uv};
  1734. da_push_back(enc->input_textures, &ht);
  1735. unmap:
  1736. if (!success) {
  1737. cu->cuGraphicsUnregisterResource(*tex_y);
  1738. cu->cuGraphicsUnregisterResource(*tex_uv);
  1739. }
  1740. return success;
  1741. }
  1742. static inline bool copy_tex_cuda(struct nvenc_data *enc, const bool p010,
  1743. GLuint tex[2], struct nv_cuda_surface *surf)
  1744. {
  1745. bool success = true;
  1746. CUgraphicsResource mapped_tex[2] = {0};
  1747. CUarray mapped_cuda;
  1748. if (!get_res_for_tex_ids(enc, tex[0], tex[1], &mapped_tex[0],
  1749. &mapped_tex[1]))
  1750. return false;
  1751. CU_CHECK(cu->cuGraphicsMapResources(2, mapped_tex, 0))
  1752. CUDA_MEMCPY2D m = {0};
  1753. m.dstMemoryType = CU_MEMORYTYPE_ARRAY;
  1754. m.srcMemoryType = CU_MEMORYTYPE_ARRAY;
  1755. m.dstArray = surf->tex;
  1756. m.WidthInBytes = p010 ? enc->cx * 2 : enc->cx;
  1757. m.Height = enc->cy;
  1758. // Map and copy Y texture
  1759. CU_CHECK(cu->cuGraphicsSubResourceGetMappedArray(&mapped_cuda,
  1760. mapped_tex[0], 0, 0));
  1761. m.srcArray = mapped_cuda;
  1762. CU_CHECK(cu->cuMemcpy2D(&m))
  1763. // Map and copy UV texture
  1764. CU_CHECK(cu->cuGraphicsSubResourceGetMappedArray(&mapped_cuda,
  1765. mapped_tex[1], 0, 0))
  1766. m.srcArray = mapped_cuda;
  1767. m.dstY += enc->cy;
  1768. m.Height = enc->cy / 2;
  1769. CU_CHECK(cu->cuMemcpy2D(&m))
  1770. unmap:
  1771. cu->cuGraphicsUnmapResources(2, mapped_tex, 0);
  1772. return success;
  1773. }
  1774. static bool nvenc_encode_tex2(void *data, struct encoder_texture *tex,
  1775. int64_t pts, uint64_t lock_key,
  1776. uint64_t *next_key, struct encoder_packet *packet,
  1777. bool *received_packet)
  1778. {
  1779. struct nvenc_data *enc = data;
  1780. struct nv_cuda_surface *surf;
  1781. struct nv_bitstream *bs;
  1782. const bool p010 = obs_p010_tex_active();
  1783. GLuint input_tex[2];
  1784. if (tex == NULL || tex->tex[0] == NULL) {
  1785. error("Encode failed: bad texture handle");
  1786. *next_key = lock_key;
  1787. return false;
  1788. }
  1789. bs = &enc->bitstreams.array[enc->next_bitstream];
  1790. surf = &enc->surfaces.array[enc->next_bitstream];
  1791. deque_push_back(&enc->dts_list, &pts, sizeof(pts));
  1792. /* ------------------------------------ */
  1793. /* copy to CUDA data */
  1794. CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx))
  1795. obs_enter_graphics();
  1796. input_tex[0] = *(GLuint *)gs_texture_get_obj(tex->tex[0]);
  1797. input_tex[1] = *(GLuint *)gs_texture_get_obj(tex->tex[1]);
  1798. bool success = copy_tex_cuda(enc, p010, input_tex, surf);
  1799. obs_leave_graphics();
  1800. CU_FAILED(cu->cuCtxPopCurrent(NULL))
  1801. if (!success)
  1802. return false;
  1803. /* ------------------------------------ */
  1804. /* map output tex so nvenc can use it */
  1805. NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER};
  1806. map.registeredResource = surf->res;
  1807. map.mappedBufferFmt = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT
  1808. : NV_ENC_BUFFER_FORMAT_NV12;
  1809. if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map)))
  1810. return false;
  1811. surf->mapped_res = map.mappedResource;
  1812. /* ------------------------------------ */
  1813. /* do actual encode call */
  1814. return nvenc_encode_shared(enc, bs, surf->mapped_res, pts, packet,
  1815. received_packet);
  1816. }
  1817. #endif
  1818. static inline bool nvenc_copy_frame(struct nvenc_data *enc,
  1819. struct encoder_frame *frame,
  1820. struct nv_cuda_surface *surf)
  1821. {
  1822. bool success = true;
  1823. size_t height = enc->cy;
  1824. size_t width = enc->cx;
  1825. CUDA_MEMCPY2D m = {0};
  1826. m.srcMemoryType = CU_MEMORYTYPE_HOST;
  1827. m.dstMemoryType = CU_MEMORYTYPE_ARRAY;
  1828. m.dstArray = surf->tex;
  1829. m.WidthInBytes = width;
  1830. m.Height = height;
  1831. CU_FAILED(cu->cuCtxPushCurrent(enc->cu_ctx))
  1832. if (enc->surface_format == NV_ENC_BUFFER_FORMAT_NV12) {
  1833. /* Page-locks the host memory so that it can be DMAd directly
  1834. * rather than CUDA doing an internal copy to page-locked
  1835. * memory before actually DMA-ing to the GPU. */
  1836. CU_CHECK(cu->cuMemHostRegister(frame->data[0],
  1837. frame->linesize[0] * height, 0))
  1838. CU_CHECK(cu->cuMemHostRegister(
  1839. frame->data[1], frame->linesize[1] * height / 2, 0))
  1840. m.srcPitch = frame->linesize[0];
  1841. m.srcHost = frame->data[0];
  1842. CU_FAILED(cu->cuMemcpy2D(&m))
  1843. m.srcPitch = frame->linesize[1];
  1844. m.srcHost = frame->data[1];
  1845. m.dstY += height;
  1846. m.Height /= 2;
  1847. CU_FAILED(cu->cuMemcpy2D(&m))
  1848. } else if (enc->surface_format == NV_ENC_BUFFER_FORMAT_YUV420_10BIT) {
  1849. CU_CHECK(cu->cuMemHostRegister(frame->data[0],
  1850. frame->linesize[0] * height, 0))
  1851. CU_CHECK(cu->cuMemHostRegister(
  1852. frame->data[1], frame->linesize[1] * height / 2, 0))
  1853. // P010 lines are double the size (16 bit per pixel)
  1854. m.WidthInBytes *= 2;
  1855. m.srcPitch = frame->linesize[0];
  1856. m.srcHost = frame->data[0];
  1857. CU_FAILED(cu->cuMemcpy2D(&m))
  1858. m.srcPitch = frame->linesize[1];
  1859. m.srcHost = frame->data[1];
  1860. m.dstY += height;
  1861. m.Height /= 2;
  1862. CU_FAILED(cu->cuMemcpy2D(&m))
  1863. } else { // I444
  1864. CU_CHECK(cu->cuMemHostRegister(frame->data[0],
  1865. frame->linesize[0] * height, 0))
  1866. CU_CHECK(cu->cuMemHostRegister(frame->data[1],
  1867. frame->linesize[1] * height, 0))
  1868. CU_CHECK(cu->cuMemHostRegister(frame->data[2],
  1869. frame->linesize[2] * height, 0))
  1870. m.srcPitch = frame->linesize[0];
  1871. m.srcHost = frame->data[0];
  1872. CU_FAILED(cu->cuMemcpy2D(&m))
  1873. m.srcPitch = frame->linesize[1];
  1874. m.srcHost = frame->data[1];
  1875. m.dstY += height;
  1876. CU_FAILED(cu->cuMemcpy2D(&m))
  1877. m.srcPitch = frame->linesize[2];
  1878. m.srcHost = frame->data[2];
  1879. m.dstY += height;
  1880. CU_FAILED(cu->cuMemcpy2D(&m))
  1881. }
  1882. unmap:
  1883. if (frame->data[0])
  1884. cu->cuMemHostUnregister(frame->data[0]);
  1885. if (frame->data[1])
  1886. cu->cuMemHostUnregister(frame->data[1]);
  1887. if (frame->data[2])
  1888. cu->cuMemHostUnregister(frame->data[2]);
  1889. CU_FAILED(cu->cuCtxPopCurrent(NULL))
  1890. return success;
  1891. }
  1892. static bool nvenc_encode_soft(void *data, struct encoder_frame *frame,
  1893. struct encoder_packet *packet,
  1894. bool *received_packet)
  1895. {
  1896. struct nvenc_data *enc = data;
  1897. struct nv_cuda_surface *surf;
  1898. struct nv_bitstream *bs;
  1899. bs = &enc->bitstreams.array[enc->next_bitstream];
  1900. surf = &enc->surfaces.array[enc->next_bitstream];
  1901. deque_push_back(&enc->dts_list, &frame->pts, sizeof(frame->pts));
  1902. /* ------------------------------------ */
  1903. /* copy to CUDA surface */
  1904. if (!nvenc_copy_frame(enc, frame, surf))
  1905. return false;
  1906. /* ------------------------------------ */
  1907. /* map output tex so nvenc can use it */
  1908. NV_ENC_MAP_INPUT_RESOURCE map = {NV_ENC_MAP_INPUT_RESOURCE_VER};
  1909. map.registeredResource = surf->res;
  1910. map.mappedBufferFmt = enc->surface_format;
  1911. if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &map)))
  1912. return false;
  1913. surf->mapped_res = map.mappedResource;
  1914. /* ------------------------------------ */
  1915. /* do actual encode call */
  1916. return nvenc_encode_shared(enc, bs, surf->mapped_res, frame->pts,
  1917. packet, received_packet);
  1918. }
  1919. static void nvenc_soft_video_info(void *data, struct video_scale_info *info)
  1920. {
  1921. struct nvenc_data *enc = data;
  1922. info->format = enc->in_format;
  1923. }
  1924. extern void h264_nvenc_defaults(obs_data_t *settings);
  1925. extern obs_properties_t *h264_nvenc_properties(void *unused);
  1926. #ifdef ENABLE_HEVC
  1927. extern void hevc_nvenc_defaults(obs_data_t *settings);
  1928. extern obs_properties_t *hevc_nvenc_properties(void *unused);
  1929. #endif
  1930. extern obs_properties_t *av1_nvenc_properties(void *unused);
  1931. extern void av1_nvenc_defaults(obs_data_t *settings);
  1932. static bool nvenc_extra_data(void *data, uint8_t **header, size_t *size)
  1933. {
  1934. struct nvenc_data *enc = data;
  1935. if (!enc->header) {
  1936. return false;
  1937. }
  1938. *header = enc->header;
  1939. *size = enc->header_size;
  1940. return true;
  1941. }
  1942. static bool nvenc_sei_data(void *data, uint8_t **sei, size_t *size)
  1943. {
  1944. struct nvenc_data *enc = data;
  1945. if (!enc->sei) {
  1946. return false;
  1947. }
  1948. *sei = enc->sei;
  1949. *size = enc->sei_size;
  1950. return true;
  1951. }
  1952. struct obs_encoder_info h264_nvenc_info = {
  1953. .id = "jim_nvenc",
  1954. .codec = "h264",
  1955. .type = OBS_ENCODER_VIDEO,
  1956. .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
  1957. OBS_ENCODER_CAP_ROI,
  1958. .get_name = h264_nvenc_get_name,
  1959. .create = h264_nvenc_create,
  1960. .destroy = nvenc_destroy,
  1961. .update = nvenc_update,
  1962. #ifdef _WIN32
  1963. .encode_texture = nvenc_encode_tex,
  1964. #else
  1965. .encode_texture2 = nvenc_encode_tex2,
  1966. #endif
  1967. .get_defaults = h264_nvenc_defaults,
  1968. .get_properties = h264_nvenc_properties,
  1969. .get_extra_data = nvenc_extra_data,
  1970. .get_sei_data = nvenc_sei_data,
  1971. };
  1972. #ifdef ENABLE_HEVC
  1973. struct obs_encoder_info hevc_nvenc_info = {
  1974. .id = "jim_hevc_nvenc",
  1975. .codec = "hevc",
  1976. .type = OBS_ENCODER_VIDEO,
  1977. .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
  1978. OBS_ENCODER_CAP_ROI,
  1979. .get_name = hevc_nvenc_get_name,
  1980. .create = hevc_nvenc_create,
  1981. .destroy = nvenc_destroy,
  1982. .update = nvenc_update,
  1983. #ifdef _WIN32
  1984. .encode_texture = nvenc_encode_tex,
  1985. #else
  1986. .encode_texture2 = nvenc_encode_tex2,
  1987. #endif
  1988. .get_defaults = hevc_nvenc_defaults,
  1989. .get_properties = hevc_nvenc_properties,
  1990. .get_extra_data = nvenc_extra_data,
  1991. .get_sei_data = nvenc_sei_data,
  1992. };
  1993. #endif
  1994. struct obs_encoder_info av1_nvenc_info = {
  1995. .id = "jim_av1_nvenc",
  1996. .codec = "av1",
  1997. .type = OBS_ENCODER_VIDEO,
  1998. .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE |
  1999. OBS_ENCODER_CAP_ROI,
  2000. .get_name = av1_nvenc_get_name,
  2001. .create = av1_nvenc_create,
  2002. .destroy = nvenc_destroy,
  2003. .update = nvenc_update,
  2004. #ifdef _WIN32
  2005. .encode_texture = nvenc_encode_tex,
  2006. #else
  2007. .encode_texture2 = nvenc_encode_tex2,
  2008. #endif
  2009. .get_defaults = av1_nvenc_defaults,
  2010. .get_properties = av1_nvenc_properties,
  2011. .get_extra_data = nvenc_extra_data,
  2012. };
  2013. struct obs_encoder_info h264_nvenc_soft_info = {
  2014. .id = "obs_nvenc_h264_cuda",
  2015. .codec = "h264",
  2016. .type = OBS_ENCODER_VIDEO,
  2017. .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
  2018. OBS_ENCODER_CAP_INTERNAL,
  2019. .get_name = h264_nvenc_soft_get_name,
  2020. .create = h264_nvenc_soft_create,
  2021. .destroy = nvenc_destroy,
  2022. .update = nvenc_update,
  2023. .encode = nvenc_encode_soft,
  2024. .get_defaults = h264_nvenc_defaults,
  2025. .get_properties = h264_nvenc_properties,
  2026. .get_extra_data = nvenc_extra_data,
  2027. .get_sei_data = nvenc_sei_data,
  2028. .get_video_info = nvenc_soft_video_info,
  2029. };
  2030. #ifdef ENABLE_HEVC
  2031. struct obs_encoder_info hevc_nvenc_soft_info = {
  2032. .id = "obs_nvenc_hevc_cuda",
  2033. .codec = "hevc",
  2034. .type = OBS_ENCODER_VIDEO,
  2035. .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
  2036. OBS_ENCODER_CAP_INTERNAL,
  2037. .get_name = hevc_nvenc_soft_get_name,
  2038. .create = hevc_nvenc_soft_create,
  2039. .destroy = nvenc_destroy,
  2040. .update = nvenc_update,
  2041. .encode = nvenc_encode_soft,
  2042. .get_defaults = hevc_nvenc_defaults,
  2043. .get_properties = hevc_nvenc_properties,
  2044. .get_extra_data = nvenc_extra_data,
  2045. .get_sei_data = nvenc_sei_data,
  2046. .get_video_info = nvenc_soft_video_info,
  2047. };
  2048. #endif
  2049. struct obs_encoder_info av1_nvenc_soft_info = {
  2050. .id = "obs_nvenc_av1_cuda",
  2051. .codec = "av1",
  2052. .type = OBS_ENCODER_VIDEO,
  2053. .caps = OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI |
  2054. OBS_ENCODER_CAP_INTERNAL,
  2055. .get_name = av1_nvenc_soft_get_name,
  2056. .create = av1_nvenc_soft_create,
  2057. .destroy = nvenc_destroy,
  2058. .update = nvenc_update,
  2059. .encode = nvenc_encode_soft,
  2060. .get_defaults = av1_nvenc_defaults,
  2061. .get_properties = av1_nvenc_properties,
  2062. .get_extra_data = nvenc_extra_data,
  2063. .get_video_info = nvenc_soft_video_info,
  2064. };