obs-nvenc-test.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. #include <string_view>
  2. #include <unordered_map>
  3. #include <vector>
  4. #include <chrono>
  5. #include <future>
  6. #include <cstring>
  7. #include <ffnvcodec/nvEncodeAPI.h>
  8. #include <ffnvcodec/dynlink_loader.h>
  9. /*
  10. * Utility to check for NVENC support and capabilities.
  11. * Will check all GPUs and return INI-formatted results based on highest capability of all devices.
  12. */
  13. using namespace std;
  14. using namespace std::chrono_literals;
  15. static CudaFunctions *cu = nullptr;
  16. static NvencFunctions *nvenc = nullptr;
  17. NV_ENCODE_API_FUNCTION_LIST nv = {NV_ENCODE_API_FUNCTION_LIST_VER};
  18. static constexpr uint32_t NVENC_CONFIGURED_VERSION = (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
  19. /* NVML stuff */
  20. #define NVML_SUCCESS 0
  21. #define NVML_DEVICE_UUID_V2_BUFFER_SIZE 96
  22. #define NVML_DEVICE_NAME_V2_BUFFER_SIZE 96
  23. #define NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE 80
  24. typedef int nvmlReturn_t;
  25. typedef struct nvmlDevice *nvmlDevice_t;
  26. typedef enum nvmlEncoderType {
  27. NVML_ENCODER_QUERY_H264,
  28. NVML_ENCODER_QUERY_HEVC,
  29. NVML_ENCODER_QUERY_AV1,
  30. NVML_ENCODER_QUERY_UNKNOWN
  31. } nvmlEncoderType_t;
  32. typedef nvmlReturn_t (*NVML_GET_DRIVER_VER_FUNC)(char *, unsigned int);
  33. typedef nvmlReturn_t (*NVML_INIT_V2)();
  34. typedef nvmlReturn_t (*NVML_SHUTDOWN)();
  35. typedef nvmlReturn_t (*NVML_GET_HANDLE_BY_BUS_ID)(const char *, nvmlDevice_t *);
  36. typedef nvmlReturn_t (*NVML_GET_DEVICE_UUID)(nvmlDevice_t, char *, unsigned);
  37. typedef nvmlReturn_t (*NVML_GET_DEVICE_NAME)(nvmlDevice_t, char *, unsigned);
  38. typedef nvmlReturn_t (*NVML_GET_DEVICE_PCIE_GEN)(nvmlDevice_t, unsigned *);
  39. typedef nvmlReturn_t (*NVML_GET_DEVICE_PCIE_WIDTH)(nvmlDevice_t, unsigned *);
  40. typedef nvmlReturn_t (*NVML_GET_DEVICE_NAME)(nvmlDevice_t, char *, unsigned);
  41. typedef nvmlReturn_t (*NVML_GET_DEVICE_ARCHITECTURE)(nvmlDevice_t, unsigned *);
  42. typedef nvmlReturn_t (*NVML_GET_ENCODER_SESSIONS)(nvmlDevice_t, unsigned *, void *);
  43. typedef nvmlReturn_t (*NVML_GET_ENCODER_CAPACITY)(nvmlDevice_t, nvmlEncoderType, unsigned *);
  44. typedef nvmlReturn_t (*NVML_GET_ENCODER_UTILISATION)(nvmlDevice_t, unsigned *, unsigned *);
  45. /* Only Kepler is defined in NVIDIA's documentation, but it's also the main one we care about. */
  46. constexpr uint32_t NVML_DEVICE_ARCH_KEPLER = 2;
  47. const unordered_map<uint32_t, const string_view> arch_to_name = {
  48. {NVML_DEVICE_ARCH_KEPLER, "Kepler"},
  49. {3, "Kepler"},
  50. {4, "Maxwell"},
  51. {5, "Volta"},
  52. {6, "Turing"},
  53. {7, "Ampere"},
  54. {8, "Ada"},
  55. {9, "Hopper"},
  56. };
  57. /* List of capabilities to be queried per codec */
  58. static const vector<pair<NV_ENC_CAPS, string>> capabilities = {
  59. {NV_ENC_CAPS_NUM_MAX_BFRAMES, "bframes"},
  60. {NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE, "lossless"},
  61. {NV_ENC_CAPS_SUPPORT_LOOKAHEAD, "lookahead"},
  62. {NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ, "temporal_aq"},
  63. {NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE, "dynamic_bitrate"},
  64. {NV_ENC_CAPS_SUPPORT_10BIT_ENCODE, "10bit"},
  65. {NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE, "bref"},
  66. {NV_ENC_CAPS_NUM_ENCODER_ENGINES, "engines"},
  67. {NV_ENC_CAPS_SUPPORT_YUV444_ENCODE, "yuv_444"},
  68. {NV_ENC_CAPS_WIDTH_MAX, "max_width"},
  69. {NV_ENC_CAPS_HEIGHT_MAX, "max_height"},
  70. #if NVENCAPI_MAJOR_VERSION > 12 || NVENCAPI_MINOR_VERSION >= 2
  71. /* SDK 12.2+ features */
  72. {NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER, "temporal_filter"},
  73. {NV_ENC_CAPS_SUPPORT_LOOKAHEAD_LEVEL, "lookahead_level"},
  74. #endif
  75. };
  76. static const vector<pair<string_view, GUID>> codecs = {{"h264", NV_ENC_CODEC_H264_GUID},
  77. {"hevc", NV_ENC_CODEC_HEVC_GUID},
  78. {"av1", NV_ENC_CODEC_AV1_GUID}};
  79. typedef unordered_map<string, unordered_map<string, int>> codec_caps_map;
  80. struct device_info {
  81. string pci_id;
  82. string nvml_uuid;
  83. string cuda_uuid;
  84. string name;
  85. uint32_t architecture;
  86. uint32_t pcie_gen;
  87. uint32_t pcie_width;
  88. uint32_t encoder_sessions;
  89. uint32_t utilisation;
  90. uint32_t sample_period;
  91. uint32_t capacity_h264;
  92. uint32_t capacity_hevc;
  93. uint32_t capacity_av1;
  94. codec_caps_map caps;
  95. };
  96. /* RAII wrappers to make my life a little easier. */
  97. struct NVML {
  98. NVML_INIT_V2 init;
  99. NVML_SHUTDOWN shutdown;
  100. NVML_GET_DRIVER_VER_FUNC getDriverVersion;
  101. NVML_GET_HANDLE_BY_BUS_ID getDeviceHandleByPCIBusId;
  102. NVML_GET_DEVICE_UUID getDeviceUUID;
  103. NVML_GET_DEVICE_NAME getDeviceName;
  104. NVML_GET_DEVICE_PCIE_GEN getDevicePCIeGen;
  105. NVML_GET_DEVICE_PCIE_WIDTH getDevicePCIeWidth;
  106. NVML_GET_DEVICE_ARCHITECTURE getDeviceArchitecture;
  107. NVML_GET_ENCODER_SESSIONS getEncoderSessions;
  108. NVML_GET_ENCODER_CAPACITY getEncoderCapacity;
  109. NVML_GET_ENCODER_UTILISATION getEncoderUtilisation;
  110. NVML() = default;
  111. ~NVML()
  112. {
  113. if (initialised && shutdown)
  114. shutdown();
  115. }
  116. bool Init()
  117. {
  118. if (!load_nvml_lib()) {
  119. printf("reason=nvml_lib\n");
  120. return false;
  121. }
  122. init = (NVML_INIT_V2)load_nvml_func("nvmlInit_v2");
  123. shutdown = (NVML_SHUTDOWN)load_nvml_func("nvmlShutdown");
  124. getDriverVersion = (NVML_GET_DRIVER_VER_FUNC)load_nvml_func("nvmlSystemGetDriverVersion");
  125. getDeviceHandleByPCIBusId =
  126. (NVML_GET_HANDLE_BY_BUS_ID)load_nvml_func("nvmlDeviceGetHandleByPciBusId_v2");
  127. getDeviceUUID = (NVML_GET_DEVICE_UUID)load_nvml_func("nvmlDeviceGetUUID");
  128. getDeviceName = (NVML_GET_DEVICE_NAME)load_nvml_func("nvmlDeviceGetName");
  129. getDevicePCIeGen = (NVML_GET_DEVICE_PCIE_GEN)load_nvml_func("nvmlDeviceGetCurrPcieLinkGeneration");
  130. getDevicePCIeWidth = (NVML_GET_DEVICE_PCIE_WIDTH)load_nvml_func("nvmlDeviceGetCurrPcieLinkWidth");
  131. getDeviceArchitecture = (NVML_GET_DEVICE_ARCHITECTURE)load_nvml_func("nvmlDeviceGetArchitecture");
  132. getEncoderSessions = (NVML_GET_ENCODER_SESSIONS)load_nvml_func("nvmlDeviceGetEncoderSessions");
  133. getEncoderCapacity = (NVML_GET_ENCODER_CAPACITY)load_nvml_func("nvmlDeviceGetEncoderCapacity");
  134. getEncoderUtilisation = (NVML_GET_ENCODER_UTILISATION)load_nvml_func("nvmlDeviceGetEncoderUtilization");
  135. if (!init || !shutdown || !getDriverVersion || !getDeviceHandleByPCIBusId || !getDeviceUUID ||
  136. !getDeviceName || !getDevicePCIeGen || !getDevicePCIeWidth || !getEncoderSessions ||
  137. !getEncoderCapacity || !getEncoderUtilisation || !getDeviceArchitecture) {
  138. return false;
  139. }
  140. nvmlReturn_t res = init();
  141. if (res != 0) {
  142. printf("reason=nvml_init_%d\n", res);
  143. return false;
  144. }
  145. initialised = true;
  146. return true;
  147. }
  148. private:
  149. bool initialised = false;
  150. static inline void *nvml_lib = nullptr;
  151. bool load_nvml_lib()
  152. {
  153. #ifdef _WIN32
  154. nvml_lib = LoadLibraryA("nvml.dll");
  155. #else
  156. nvml_lib = dlopen("libnvidia-ml.so.1", RTLD_LAZY);
  157. #endif
  158. return nvml_lib != nullptr;
  159. }
  160. static void *load_nvml_func(const char *func)
  161. {
  162. #ifdef _WIN32
  163. void *func_ptr = (void *)GetProcAddress((HMODULE)nvml_lib, func);
  164. #else
  165. void *func_ptr = dlsym(nvml_lib, func);
  166. #endif
  167. return func_ptr;
  168. }
  169. };
  170. struct CUDACtx {
  171. CUcontext ctx;
  172. CUDACtx() = default;
  173. ~CUDACtx() { cu->cuCtxDestroy(ctx); }
  174. bool Init(int adapter_idx)
  175. {
  176. CUdevice dev;
  177. if (cu->cuDeviceGet(&dev, adapter_idx) != CUDA_SUCCESS)
  178. return false;
  179. return cu->cuCtxCreate(&ctx, 0, dev) == CUDA_SUCCESS;
  180. }
  181. string GetPCIBusId()
  182. {
  183. CUdevice dev;
  184. string bus_id;
  185. bus_id.resize(16);
  186. cu->cuCtxGetDevice(&dev);
  187. cu->cuDeviceGetPCIBusId(bus_id.data(), (int)bus_id.capacity(), dev);
  188. return bus_id;
  189. }
  190. string GetUUID()
  191. {
  192. CUdevice dev;
  193. CUuuid uuid;
  194. string uuid_str;
  195. cu->cuCtxGetDevice(&dev);
  196. cu->cuDeviceGetUuid_v2(&uuid, dev);
  197. uuid_str.resize(32);
  198. for (size_t idx = 0; idx < 16; idx++) {
  199. sprintf(uuid_str.data() + idx * 2, "%02x", uuid.bytes[idx] & 0xFF);
  200. }
  201. return uuid_str;
  202. }
  203. };
  204. struct NVSession {
  205. void *ptr = nullptr;
  206. NVSession() = default;
  207. ~NVSession() { nv.nvEncDestroyEncoder(ptr); }
  208. NVENCSTATUS OpenSession(const CUDACtx &ctx)
  209. {
  210. NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = {};
  211. params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
  212. params.apiVersion = NVENCAPI_VERSION;
  213. params.device = ctx.ctx;
  214. params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
  215. return nv.nvEncOpenEncodeSessionEx(&params, &ptr);
  216. }
  217. };
  218. static bool init_nvenc()
  219. {
  220. if (nvenc_load_functions(&nvenc, nullptr)) {
  221. printf("reason=nvenc_lib\n");
  222. return false;
  223. }
  224. NVENCSTATUS res = nvenc->NvEncodeAPICreateInstance(&nv);
  225. if (res != NV_ENC_SUCCESS) {
  226. printf("reason=nvenc_init_%d\n", res);
  227. return false;
  228. }
  229. return true;
  230. }
  231. static bool init_cuda()
  232. {
  233. if (cuda_load_functions(&cu, nullptr)) {
  234. printf("reason=cuda_lib\n");
  235. return false;
  236. }
  237. CUresult res = cu->cuInit(0);
  238. if (res != CUDA_SUCCESS) {
  239. printf("reason=cuda_init_%d\n", res);
  240. return false;
  241. }
  242. return true;
  243. }
  244. static bool get_adapter_caps(int adapter_idx, codec_caps_map &caps, device_info &device_info, NVML &nvml,
  245. bool &session_limit)
  246. {
  247. CUDACtx cudaCtx;
  248. NVSession nvSession;
  249. if (!cudaCtx.Init(adapter_idx))
  250. return false;
  251. device_info.pci_id = cudaCtx.GetPCIBusId();
  252. device_info.cuda_uuid = cudaCtx.GetUUID();
  253. nvmlDevice_t dev;
  254. if (nvml.getDeviceHandleByPCIBusId(device_info.pci_id.data(), &dev) == NVML_SUCCESS) {
  255. char uuid[NVML_DEVICE_UUID_V2_BUFFER_SIZE];
  256. nvml.getDeviceUUID(dev, uuid, sizeof(uuid));
  257. device_info.nvml_uuid = uuid;
  258. char name[NVML_DEVICE_NAME_V2_BUFFER_SIZE];
  259. nvml.getDeviceName(dev, name, sizeof(name));
  260. device_info.name = name;
  261. nvml.getDevicePCIeGen(dev, &device_info.pcie_gen);
  262. nvml.getDevicePCIeWidth(dev, &device_info.pcie_width);
  263. nvml.getEncoderSessions(dev, &device_info.encoder_sessions, nullptr);
  264. nvml.getDeviceArchitecture(dev, &device_info.architecture);
  265. nvml.getEncoderUtilisation(dev, &device_info.utilisation, &device_info.sample_period);
  266. nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_H264, &device_info.capacity_h264);
  267. nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_HEVC, &device_info.capacity_hevc);
  268. nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_AV1, &device_info.capacity_av1);
  269. }
  270. auto res = nvSession.OpenSession(cudaCtx);
  271. session_limit = session_limit || res == NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY;
  272. if (res != NV_ENC_SUCCESS)
  273. return false;
  274. uint32_t guid_count = 0;
  275. if (nv.nvEncGetEncodeGUIDCount(nvSession.ptr, &guid_count) != NV_ENC_SUCCESS)
  276. return false;
  277. vector<GUID> guids;
  278. guids.resize(guid_count);
  279. NVENCSTATUS stat = nv.nvEncGetEncodeGUIDs(nvSession.ptr, guids.data(), guid_count, &guid_count);
  280. if (stat != NV_ENC_SUCCESS)
  281. return false;
  282. NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER};
  283. for (uint32_t i = 0; i < guid_count; i++) {
  284. GUID *guid = &guids[i];
  285. std::string codec_name = "unknown";
  286. for (const auto &[name, codec_guid] : codecs) {
  287. if (memcmp(&codec_guid, guid, sizeof(GUID)) == 0) {
  288. codec_name = name;
  289. break;
  290. }
  291. }
  292. caps[codec_name]["codec_supported"] = 1;
  293. device_info.caps[codec_name]["codec_supported"] = 1;
  294. for (const auto &[cap, name] : capabilities) {
  295. int v;
  296. param.capsToQuery = cap;
  297. if (nv.nvEncGetEncodeCaps(nvSession.ptr, *guid, &param, &v) != NV_ENC_SUCCESS)
  298. continue;
  299. device_info.caps[codec_name][name] = v;
  300. if (v > caps[codec_name][name])
  301. caps[codec_name][name] = v;
  302. }
  303. }
  304. return true;
  305. }
  306. bool nvenc_checks(codec_caps_map &caps, vector<device_info> &device_infos)
  307. {
  308. /* NVENC API init */
  309. if (!init_nvenc())
  310. return false;
  311. /* CUDA init */
  312. if (!init_cuda())
  313. return false;
  314. NVML nvml;
  315. if (!nvml.Init())
  316. return false;
  317. /* --------------------------------------------------------- */
  318. /* obtain adapter compatibility information */
  319. uint32_t nvenc_ver;
  320. int cuda_driver_ver;
  321. int cuda_devices = 0;
  322. int nvenc_devices = 0;
  323. char driver_ver[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];
  324. bool session_limit = false;
  325. /* NVIDIA driver version */
  326. if (nvml.getDriverVersion(driver_ver, sizeof(driver_ver)) == NVML_SUCCESS) {
  327. printf("driver_ver=%s\n", driver_ver);
  328. } else {
  329. // Treat this as a non-fatal failure
  330. printf("driver_ver=0.0\n");
  331. }
  332. /* CUDA driver version and devices */
  333. if (cu->cuDriverGetVersion(&cuda_driver_ver) == CUDA_SUCCESS) {
  334. printf("cuda_ver=%d.%d\n", cuda_driver_ver / 1000, cuda_driver_ver % 1000);
  335. } else {
  336. printf("reason=no_cuda_version\n");
  337. return false;
  338. }
  339. if (cu->cuDeviceGetCount(&cuda_devices) == CUDA_SUCCESS && cuda_devices) {
  340. printf("cuda_devices=%d\n", cuda_devices);
  341. } else {
  342. printf("reason=no_devices\n");
  343. return false;
  344. }
  345. /* NVENC API version */
  346. if (nvenc->NvEncodeAPIGetMaxSupportedVersion(&nvenc_ver) == NV_ENC_SUCCESS) {
  347. printf("nvenc_ver=%d.%d\n", nvenc_ver >> 4, nvenc_ver & 0xf);
  348. } else {
  349. printf("reason=no_nvenc_version\n");
  350. return false;
  351. }
  352. device_infos.resize(cuda_devices);
  353. for (int idx = 0; idx < cuda_devices; idx++) {
  354. if (get_adapter_caps(idx, caps, device_infos[idx], nvml, session_limit))
  355. nvenc_devices++;
  356. }
  357. if (session_limit) {
  358. printf("reason=session_limit\n");
  359. return false;
  360. }
  361. if (nvenc_ver < NVENC_CONFIGURED_VERSION) {
  362. printf("reason=outdated_driver\n");
  363. return false;
  364. }
  365. printf("nvenc_devices=%d\n", nvenc_devices);
  366. if (!nvenc_devices) {
  367. printf("reason=no_supported_devices\n");
  368. return false;
  369. }
  370. return true;
  371. }
  372. int check_thread()
  373. {
  374. int ret = 0;
  375. codec_caps_map caps;
  376. vector<device_info> device_infos;
  377. caps["h264"]["codec_supported"] = 0;
  378. caps["hevc"]["codec_supported"] = 0;
  379. caps["av1"]["codec_supported"] = 0;
  380. printf("[general]\n");
  381. if (nvenc_checks(caps, device_infos)) {
  382. printf("nvenc_supported=true\n");
  383. } else {
  384. printf("nvenc_supported=false\n");
  385. ret = 1;
  386. }
  387. /* Global capabilities, based on highest supported across all devices */
  388. for (const auto &[codec, codec_caps] : caps) {
  389. printf("\n[%s]\n", codec.c_str());
  390. for (const auto &[name, value] : codec_caps) {
  391. printf("%s=%d\n", name.c_str(), value);
  392. }
  393. }
  394. /* Per-device info (mostly for debugging) */
  395. for (size_t idx = 0; idx < device_infos.size(); idx++) {
  396. const auto &info = device_infos[idx];
  397. string_view architecture = "Unknown";
  398. if (arch_to_name.count(info.architecture))
  399. architecture = arch_to_name.at(info.architecture);
  400. printf("\n[device.%zu]\n"
  401. "pci_id=%s\n"
  402. "nvml_uuid=%s\n"
  403. "cuda_uuid=%s\n"
  404. "name=%s\n"
  405. "architecture=%u\n"
  406. "architecture_name=%s\n"
  407. "pcie_link_width=%d\n"
  408. "pcie_link_gen=%d\n"
  409. "encoder_sessions=%u\n"
  410. "utilisation=%u\n"
  411. "sample_period=%u\n"
  412. "capacity_h264=%u\n"
  413. "capacity_hevc=%u\n"
  414. "capacity_av1=%u\n",
  415. idx, info.pci_id.c_str(), info.nvml_uuid.c_str(), info.cuda_uuid.c_str(), info.name.c_str(),
  416. info.architecture, architecture.data(), info.pcie_width, info.pcie_gen, info.encoder_sessions,
  417. info.utilisation, info.sample_period, info.capacity_h264, info.capacity_hevc, info.capacity_av1);
  418. for (const auto &[codec, codec_caps] : info.caps) {
  419. printf("\n[device.%zu.%s]\n", idx, codec.c_str());
  420. for (const auto &[name, value] : codec_caps) {
  421. printf("%s=%d\n", name.c_str(), value);
  422. }
  423. }
  424. }
  425. return ret;
  426. }
  427. int main(int, char **)
  428. {
  429. future<int> f = async(launch::async, check_thread);
  430. future_status status = f.wait_for(2.5s);
  431. if (status == future_status::timeout)
  432. exit(1);
  433. return f.get();
  434. }