1
0

common_utils_linux.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. #include "common_utils.h"
  2. #include <time.h>
  3. #include <cpuid.h>
  4. #include <va/va_drm.h>
  5. #include <va/va_drmcommon.h>
  6. #include <va/va_str.h>
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <unistd.h>
  10. #include <fcntl.h>
  11. #include <string.h>
  12. #include <dirent.h>
  13. #include <obs.h>
  14. #include <obs-encoder.h>
  15. #include <obs-nix-platform.h>
  16. #include <graphics/graphics.h>
  17. #include <util/c99defs.h>
  18. #include <util/dstr.h>
  19. #include <util/bmem.h>
  20. // Set during check_adapters to work-around VPL dispatcher not setting a VADisplay
  21. // for the MSDK runtime.
  22. static const char *default_h264_device = nullptr;
  23. static const char *default_hevc_device = nullptr;
  24. static const char *default_av1_device = nullptr;
  25. struct linux_data {
  26. int fd;
  27. VADisplay vaDisplay;
  28. };
  29. #define DEVICE_MGR_TYPE MFX_HANDLE_VA_DISPLAY
  30. // This ends up at like 72 for 1440p@120 AV1.
  31. // We may end up hitting this in practice?
  32. constexpr int32_t MAX_ALLOCABLE_SURFACES = 128;
  33. struct surface_info {
  34. VASurfaceID id;
  35. int32_t width, height;
  36. gs_texture_t *tex_y;
  37. gs_texture_t *tex_uv;
  38. };
  39. mfxStatus simple_alloc(mfxHDL pthis, mfxFrameAllocRequest *request, mfxFrameAllocResponse *response)
  40. {
  41. if (request->Type & (MFX_MEMTYPE_SYSTEM_MEMORY | MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET))
  42. return MFX_ERR_UNSUPPORTED;
  43. response->mids = (mfxMemId *)nullptr;
  44. response->NumFrameActual = 0;
  45. mfxSession *session = (mfxSession *)pthis;
  46. VADisplay display;
  47. mfxStatus sts = MFXVideoCORE_GetHandle(*session, DEVICE_MGR_TYPE, &display);
  48. MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
  49. // https://ffmpeg.org/doxygen/5.1/hwcontext__vaapi_8c_source.html#l00109
  50. // though earlier comments suggest the driver ignores rt_format so we could choose whatever.
  51. unsigned int rt_format;
  52. int32_t pix_format;
  53. switch (request->Info.FourCC) {
  54. case MFX_FOURCC_P010:
  55. rt_format = VA_RT_FORMAT_YUV420_10;
  56. pix_format = VA_FOURCC_P010;
  57. break;
  58. case MFX_FOURCC_NV12:
  59. default:
  60. rt_format = VA_RT_FORMAT_YUV420;
  61. pix_format = VA_FOURCC_NV12;
  62. break;
  63. }
  64. int num_attrs = 2;
  65. VASurfaceAttrib attrs[2] = {{
  66. .type = VASurfaceAttribMemoryType,
  67. .flags = VA_SURFACE_ATTRIB_SETTABLE,
  68. .value =
  69. {
  70. .type = VAGenericValueTypeInteger,
  71. .value = {.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2},
  72. },
  73. },
  74. {
  75. .type = VASurfaceAttribPixelFormat,
  76. .flags = VA_SURFACE_ATTRIB_SETTABLE,
  77. .value =
  78. {
  79. .type = VAGenericValueTypeInteger,
  80. .value = {.i = (int)pix_format},
  81. },
  82. }};
  83. unsigned int num_surfaces = request->NumFrameSuggested;
  84. VASurfaceID temp_surfaces[MAX_ALLOCABLE_SURFACES] = {0};
  85. assert(num_surfaces < MAX_ALLOCABLE_SURFACES);
  86. VAStatus vasts;
  87. if ((vasts = vaCreateSurfaces(display, rt_format, request->Info.Width, request->Info.Height, temp_surfaces,
  88. num_surfaces, attrs, num_attrs)) != VA_STATUS_SUCCESS) {
  89. blog(LOG_ERROR, "failed to create surfaces: %d", vasts);
  90. return MFX_ERR_MEMORY_ALLOC;
  91. }
  92. // Follow the FFmpeg trick and stuff our pointer at the end.
  93. mfxMemId *mids = (mfxMemId *)bmalloc(sizeof(mfxMemId) * num_surfaces + 1);
  94. struct surface_info *surfaces = (struct surface_info *)bmalloc(sizeof(struct surface_info) * num_surfaces);
  95. mids[num_surfaces] = surfaces; // stuff it
  96. for (uint64_t i = 0; i < num_surfaces; i++) {
  97. surfaces[i].id = temp_surfaces[i];
  98. surfaces[i].width = request->Info.Width;
  99. surfaces[i].height = request->Info.Height;
  100. mids[i] = &surfaces[i];
  101. VADRMPRIMESurfaceDescriptor surfDesc = {0};
  102. if (vaExportSurfaceHandle(display, surfaces[i].id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
  103. VA_EXPORT_SURFACE_READ_WRITE, &surfDesc) != VA_STATUS_SUCCESS)
  104. return MFX_ERR_MEMORY_ALLOC;
  105. obs_enter_graphics();
  106. // TODO: P010 format support
  107. assert(surfDesc.num_objects == 1);
  108. int fds[4] = {0};
  109. uint32_t strides[4] = {0};
  110. uint32_t offsets[4] = {0};
  111. uint64_t modifiers[4] = {0};
  112. fds[0] = surfDesc.objects[surfDesc.layers[0].object_index[0]].fd;
  113. fds[1] = surfDesc.objects[surfDesc.layers[1].object_index[0]].fd;
  114. strides[0] = surfDesc.layers[0].pitch[0];
  115. strides[1] = surfDesc.layers[1].pitch[0];
  116. offsets[0] = surfDesc.layers[0].offset[0];
  117. offsets[1] = surfDesc.layers[1].offset[0];
  118. modifiers[0] = surfDesc.objects[surfDesc.layers[0].object_index[0]].drm_format_modifier;
  119. modifiers[1] = surfDesc.objects[surfDesc.layers[1].object_index[0]].drm_format_modifier;
  120. surfaces[i].tex_y = gs_texture_create_from_dmabuf(surfDesc.width, surfDesc.height,
  121. surfDesc.layers[0].drm_format, GS_R8, 1, fds, strides,
  122. offsets, modifiers);
  123. surfaces[i].tex_uv = gs_texture_create_from_dmabuf(surfDesc.width / 2, surfDesc.height,
  124. surfDesc.layers[1].drm_format, GS_R8G8, 1, fds + 1,
  125. strides + 1, offsets + 1, modifiers + 1);
  126. obs_leave_graphics();
  127. close(surfDesc.objects[surfDesc.layers[0].object_index[0]].fd);
  128. if (!surfaces[i].tex_y || !surfaces[i].tex_uv) {
  129. return MFX_ERR_MEMORY_ALLOC;
  130. }
  131. }
  132. response->mids = (mfxMemId *)mids;
  133. response->NumFrameActual = num_surfaces;
  134. return MFX_ERR_NONE;
  135. }
  136. mfxStatus simple_lock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
  137. {
  138. UNUSED_PARAMETER(pthis);
  139. UNUSED_PARAMETER(mid);
  140. UNUSED_PARAMETER(ptr);
  141. return MFX_ERR_UNSUPPORTED;
  142. }
  143. mfxStatus simple_unlock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
  144. {
  145. UNUSED_PARAMETER(pthis);
  146. UNUSED_PARAMETER(mid);
  147. UNUSED_PARAMETER(ptr);
  148. return MFX_ERR_UNSUPPORTED;
  149. }
  150. mfxStatus simple_gethdl(mfxHDL pthis, mfxMemId mid, mfxHDL *handle)
  151. {
  152. UNUSED_PARAMETER(pthis);
  153. if (NULL == handle)
  154. return MFX_ERR_INVALID_HANDLE;
  155. // Seemingly undocumented, but Pair format defined by
  156. // oneVPL-intel-gpu-intel-onevpl-23.1.0/_studio/mfx_lib/encode_hw/av1/linux/base/av1ehw_base_va_packer_lin.cpp
  157. // https://github.com/intel/vpl-gpu-rt/blob/4170dd9fa1ea319dda81b6189616ecc9b178a321/_studio/shared/src/libmfx_core_vaapi.cpp#L1464
  158. mfxHDLPair *pPair = (mfxHDLPair *)handle;
  159. // first must be a pointer to a VASurfaceID and will be dereferenced by
  160. // the driver.
  161. pPair->first = &((struct surface_info *)mid)->id;
  162. pPair->second = 0;
  163. return MFX_ERR_NONE;
  164. }
  165. mfxStatus simple_free(mfxHDL pthis, mfxFrameAllocResponse *response)
  166. {
  167. if (response == nullptr)
  168. return MFX_ERR_NULL_PTR;
  169. if (response->mids == nullptr || response->NumFrameActual == 0)
  170. return MFX_ERR_NONE;
  171. mfxSession *session = (mfxSession *)pthis;
  172. VADisplay display;
  173. mfxStatus sts = MFXVideoCORE_GetHandle(*session, DEVICE_MGR_TYPE, &display);
  174. MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
  175. struct surface_info *surfs = (struct surface_info *)response->mids[response->NumFrameActual];
  176. VASurfaceID temp_surfaces[MAX_ALLOCABLE_SURFACES] = {0};
  177. obs_enter_graphics();
  178. for (int i = 0; i < response->NumFrameActual; i++) {
  179. temp_surfaces[i] = *(VASurfaceID *)response->mids[i];
  180. gs_texture_destroy(surfs[i].tex_y);
  181. gs_texture_destroy(surfs[i].tex_uv);
  182. }
  183. obs_leave_graphics();
  184. bfree(surfs);
  185. bfree(response->mids);
  186. if (vaDestroySurfaces(display, temp_surfaces, response->NumFrameActual) != VA_STATUS_SUCCESS)
  187. return MFX_ERR_MEMORY_ALLOC;
  188. return MFX_ERR_NONE;
  189. }
  190. mfxStatus simple_copytex(mfxHDL pthis, mfxMemId mid, void *tex, mfxU64 lock_key, mfxU64 *next_key)
  191. {
  192. UNUSED_PARAMETER(lock_key);
  193. UNUSED_PARAMETER(next_key);
  194. profile_start("copy_tex");
  195. mfxSession *session = (mfxSession *)pthis;
  196. VADisplay display;
  197. mfxStatus sts = MFXVideoCORE_GetHandle(*session, DEVICE_MGR_TYPE, &display);
  198. MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
  199. struct encoder_texture *ptex = (struct encoder_texture *)tex;
  200. struct surface_info *surf = (struct surface_info *)mid;
  201. obs_enter_graphics();
  202. gs_copy_texture(surf->tex_y, ptex->tex[0]);
  203. gs_copy_texture(surf->tex_uv, ptex->tex[1]);
  204. obs_leave_graphics();
  205. profile_end("copy_tex");
  206. return MFX_ERR_NONE;
  207. }
  208. struct get_drm_device_params {
  209. const char **out_path;
  210. uint32_t idx;
  211. };
  212. bool get_drm_device(void *param, const char *node, uint32_t idx)
  213. {
  214. struct get_drm_device_params *p = (struct get_drm_device_params *)param;
  215. if (idx == p->idx) {
  216. *p->out_path = node;
  217. return false;
  218. }
  219. return true;
  220. }
  221. // Initialize Intel VPL Session, device/display and memory manager
  222. mfxStatus Initialize(mfxVersion ver, mfxSession *pSession, mfxFrameAllocator *pmfxAllocator, mfxHDL *deviceHandle,
  223. bool bCreateSharedHandles, enum qsv_codec codec, void **data)
  224. {
  225. UNUSED_PARAMETER(ver);
  226. UNUSED_PARAMETER(deviceHandle);
  227. UNUSED_PARAMETER(bCreateSharedHandles);
  228. mfxStatus sts = MFX_ERR_NONE;
  229. mfxVariant impl;
  230. // Initialize Intel VPL Session
  231. mfxLoader loader = MFXLoad();
  232. mfxConfig cfg = MFXCreateConfig(loader);
  233. impl.Type = MFX_VARIANT_TYPE_U32;
  234. impl.Data.U32 = MFX_IMPL_TYPE_HARDWARE;
  235. MFXSetConfigFilterProperty(cfg, (const mfxU8 *)"mfxImplDescription.Impl", impl);
  236. impl.Type = MFX_VARIANT_TYPE_U32;
  237. impl.Data.U32 = INTEL_VENDOR_ID;
  238. MFXSetConfigFilterProperty(cfg, (const mfxU8 *)"mfxImplDescription.VendorID", impl);
  239. impl.Type = MFX_VARIANT_TYPE_U32;
  240. impl.Data.U32 = MFX_ACCEL_MODE_VIA_VAAPI_DRM_RENDER_NODE;
  241. MFXSetConfigFilterProperty(cfg, (const mfxU8 *)"mfxImplDescription.AccelerationMode", impl);
  242. const char *device_path = NULL;
  243. int fd = -1;
  244. if (pmfxAllocator) {
  245. obs_video_info ovi;
  246. obs_get_video_info(&ovi);
  247. struct get_drm_device_params params = {&device_path, (uint32_t)ovi.adapter};
  248. obs_enter_graphics();
  249. gs_enum_adapters(get_drm_device, &params);
  250. obs_leave_graphics();
  251. } else {
  252. if (codec == QSV_CODEC_AVC && default_h264_device)
  253. device_path = default_h264_device;
  254. else if (codec == QSV_CODEC_HEVC && default_hevc_device)
  255. device_path = default_hevc_device;
  256. else if (codec == QSV_CODEC_AV1 && default_av1_device)
  257. device_path = default_av1_device;
  258. }
  259. fd = open(device_path, O_RDWR);
  260. if (fd < 0) {
  261. blog(LOG_ERROR, "Failed to open device '%s'", device_path);
  262. return MFX_ERR_DEVICE_FAILED;
  263. }
  264. mfxHDL vaDisplay = vaGetDisplayDRM(fd);
  265. if (!vaDisplay) {
  266. return MFX_ERR_DEVICE_FAILED;
  267. }
  268. sts = MFXCreateSession(loader, 0, pSession);
  269. if (MFX_ERR_NONE > sts) {
  270. blog(LOG_ERROR, "Failed to initialize MFX");
  271. MSDK_PRINT_RET_MSG(sts);
  272. close(fd);
  273. return sts;
  274. }
  275. // VPL expects the VADisplay to be initialized.
  276. int major;
  277. int minor;
  278. if (vaInitialize(vaDisplay, &major, &minor) != VA_STATUS_SUCCESS) {
  279. blog(LOG_ERROR, "Failed to initialize VA-API");
  280. vaTerminate(vaDisplay);
  281. close(fd);
  282. return MFX_ERR_DEVICE_FAILED;
  283. }
  284. sts = MFXVideoCORE_SetHandle(*pSession, DEVICE_MGR_TYPE, vaDisplay);
  285. MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
  286. if (pmfxAllocator) {
  287. // Allow us to access the session during allocation.
  288. pmfxAllocator->pthis = pSession;
  289. pmfxAllocator->Alloc = simple_alloc;
  290. pmfxAllocator->Free = simple_free;
  291. pmfxAllocator->Lock = simple_lock;
  292. pmfxAllocator->Unlock = simple_unlock;
  293. pmfxAllocator->GetHDL = simple_gethdl;
  294. sts = MFXVideoCORE_SetFrameAllocator(*pSession, pmfxAllocator);
  295. MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
  296. }
  297. struct linux_data *d = (struct linux_data *)bmalloc(sizeof(struct linux_data));
  298. d->fd = fd;
  299. d->vaDisplay = (VADisplay)vaDisplay;
  300. *data = d;
  301. return sts;
  302. }
  303. void Release() {}
  304. // Release per session resources.
  305. void ReleaseSessionData(void *data)
  306. {
  307. struct linux_data *d = (struct linux_data *)data;
  308. if (d) {
  309. vaTerminate(d->vaDisplay);
  310. close(d->fd);
  311. bfree(d);
  312. }
  313. }
  314. void mfxGetTime(mfxTime *timestamp)
  315. {
  316. clock_gettime(CLOCK_MONOTONIC, timestamp);
  317. }
  318. double TimeDiffMsec(mfxTime tfinish, mfxTime tstart)
  319. {
  320. UNUSED_PARAMETER(tfinish);
  321. UNUSED_PARAMETER(tstart);
  322. //unused so far
  323. return 0.0;
  324. }
  325. extern "C" void util_cpuid(int cpuinfo[4], int level)
  326. {
  327. __get_cpuid(level, (unsigned int *)&cpuinfo[0], (unsigned int *)&cpuinfo[1], (unsigned int *)&cpuinfo[2],
  328. (unsigned int *)&cpuinfo[3]);
  329. }
  330. struct vaapi_device {
  331. int fd;
  332. VADisplay display;
  333. const char *driver;
  334. };
  335. static void vaapi_open(const char *device_path, struct vaapi_device *device)
  336. {
  337. int fd = open(device_path, O_RDWR);
  338. if (fd < 0) {
  339. return;
  340. }
  341. VADisplay display = vaGetDisplayDRM(fd);
  342. if (!display) {
  343. close(fd);
  344. return;
  345. }
  346. // VA-API is noisy by default.
  347. vaSetInfoCallback(display, nullptr, nullptr);
  348. vaSetErrorCallback(display, nullptr, nullptr);
  349. int major;
  350. int minor;
  351. if (vaInitialize(display, &major, &minor) != VA_STATUS_SUCCESS) {
  352. vaTerminate(display);
  353. close(fd);
  354. return;
  355. }
  356. const char *driver = vaQueryVendorString(display);
  357. if (strstr(driver, "Intel i965 driver") != nullptr) {
  358. blog(LOG_WARNING, "Legacy intel-vaapi-driver detected, incompatible with QSV");
  359. vaTerminate(display);
  360. close(fd);
  361. return;
  362. }
  363. device->fd = fd;
  364. device->display = display;
  365. device->driver = driver;
  366. }
  367. static void vaapi_close(struct vaapi_device *device)
  368. {
  369. vaTerminate(device->display);
  370. close(device->fd);
  371. }
  372. static uint32_t vaapi_check_support(VADisplay display, VAProfile profile, VAEntrypoint entrypoint)
  373. {
  374. VAConfigAttrib attrib[1];
  375. attrib->type = VAConfigAttribRateControl;
  376. VAStatus va_status = vaGetConfigAttributes(display, profile, entrypoint, attrib, 1);
  377. uint32_t rc = 0;
  378. switch (va_status) {
  379. case VA_STATUS_SUCCESS:
  380. rc = attrib->value;
  381. break;
  382. case VA_STATUS_ERROR_UNSUPPORTED_PROFILE:
  383. case VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT:
  384. default:
  385. break;
  386. }
  387. return (rc & VA_RC_CBR || rc & VA_RC_CQP || rc & VA_RC_VBR);
  388. }
  389. static bool vaapi_supports_av1(VADisplay display)
  390. {
  391. bool ret = false;
  392. // Are there any devices with non-LowPower entrypoints?
  393. ret |= vaapi_check_support(display, VAProfileAV1Profile0, VAEntrypointEncSlice);
  394. ret |= vaapi_check_support(display, VAProfileAV1Profile0, VAEntrypointEncSliceLP);
  395. return ret;
  396. }
  397. static bool vaapi_supports_hevc(VADisplay display)
  398. {
  399. bool ret = false;
  400. ret |= vaapi_check_support(display, VAProfileHEVCMain, VAEntrypointEncSlice);
  401. ret |= vaapi_check_support(display, VAProfileHEVCMain, VAEntrypointEncSliceLP);
  402. return ret;
  403. }
  404. bool check_adapter(void *param, const char *node, uint32_t idx)
  405. {
  406. struct vaapi_device device = {0};
  407. struct adapter_info *adapters = (struct adapter_info *)param;
  408. vaapi_open(node, &device);
  409. if (!device.display)
  410. return true;
  411. struct adapter_info *adapter = &adapters[idx];
  412. adapter->is_intel = strstr(device.driver, "Intel") != nullptr;
  413. // This is currently only used for LowPower coding which is busted on VA-API anyway.
  414. adapter->is_dgpu = false;
  415. adapter->supports_av1 = vaapi_supports_av1(device.display);
  416. adapter->supports_hevc = vaapi_supports_hevc(device.display);
  417. if (adapter->is_intel && default_h264_device == nullptr)
  418. default_h264_device = strdup(node);
  419. if (adapter->is_intel && adapter->supports_av1 && default_av1_device == nullptr)
  420. default_av1_device = strdup(node);
  421. if (adapter->is_intel && adapter->supports_hevc && default_hevc_device == nullptr)
  422. default_hevc_device = strdup(node);
  423. vaapi_close(&device);
  424. return true;
  425. }
  426. void check_adapters(struct adapter_info *adapters, size_t *adapter_count)
  427. {
  428. obs_enter_graphics();
  429. uint32_t gs_count = gs_get_adapter_count();
  430. if (*adapter_count < gs_count) {
  431. blog(LOG_WARNING, "Too many video adapters: %ld < %d", *adapter_count, gs_count);
  432. obs_leave_graphics();
  433. return;
  434. }
  435. gs_enum_adapters(check_adapter, adapters);
  436. obs_leave_graphics();
  437. }