| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561 |
- #include <string_view>
- #include <unordered_map>
- #include <vector>
- #include <chrono>
- #include <future>
- #include <cstring>
- #include <ffnvcodec/nvEncodeAPI.h>
- #include <ffnvcodec/dynlink_loader.h>
- /*
- * Utility to check for NVENC support and capabilities.
- * Will check all GPUs and return INI-formatted results based on highest capability of all devices.
- */
- using namespace std;
- using namespace std::chrono_literals;
- static CudaFunctions *cu = nullptr;
- static NvencFunctions *nvenc = nullptr;
- NV_ENCODE_API_FUNCTION_LIST nv = {NV_ENCODE_API_FUNCTION_LIST_VER};
- static constexpr uint32_t NVENC_CONFIGURED_VERSION = (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
- /* NVML stuff */
- #define NVML_SUCCESS 0
- #define NVML_DEVICE_UUID_V2_BUFFER_SIZE 96
- #define NVML_DEVICE_NAME_V2_BUFFER_SIZE 96
- #define NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE 80
- typedef int nvmlReturn_t;
- typedef struct nvmlDevice *nvmlDevice_t;
- typedef enum nvmlEncoderType {
- NVML_ENCODER_QUERY_H264,
- NVML_ENCODER_QUERY_HEVC,
- NVML_ENCODER_QUERY_AV1,
- NVML_ENCODER_QUERY_UNKNOWN
- } nvmlEncoderType_t;
- typedef nvmlReturn_t (*NVML_GET_DRIVER_VER_FUNC)(char *, unsigned int);
- typedef nvmlReturn_t (*NVML_INIT_V2)();
- typedef nvmlReturn_t (*NVML_SHUTDOWN)();
- typedef nvmlReturn_t (*NVML_GET_HANDLE_BY_BUS_ID)(const char *, nvmlDevice_t *);
- typedef nvmlReturn_t (*NVML_GET_DEVICE_UUID)(nvmlDevice_t, char *, unsigned);
- typedef nvmlReturn_t (*NVML_GET_DEVICE_NAME)(nvmlDevice_t, char *, unsigned);
- typedef nvmlReturn_t (*NVML_GET_DEVICE_PCIE_GEN)(nvmlDevice_t, unsigned *);
- typedef nvmlReturn_t (*NVML_GET_DEVICE_PCIE_WIDTH)(nvmlDevice_t, unsigned *);
- typedef nvmlReturn_t (*NVML_GET_DEVICE_NAME)(nvmlDevice_t, char *, unsigned);
- typedef nvmlReturn_t (*NVML_GET_DEVICE_ARCHITECTURE)(nvmlDevice_t, unsigned *);
- typedef nvmlReturn_t (*NVML_GET_ENCODER_SESSIONS)(nvmlDevice_t, unsigned *, void *);
- typedef nvmlReturn_t (*NVML_GET_ENCODER_CAPACITY)(nvmlDevice_t, nvmlEncoderType, unsigned *);
- typedef nvmlReturn_t (*NVML_GET_ENCODER_UTILISATION)(nvmlDevice_t, unsigned *, unsigned *);
- /* Only Kepler is defined in NVIDIA's documentation, but it's also the main one we care about. */
- constexpr uint32_t NVML_DEVICE_ARCH_KEPLER = 2;
- const unordered_map<uint32_t, const string_view> arch_to_name = {
- {NVML_DEVICE_ARCH_KEPLER, "Kepler"},
- {3, "Kepler"},
- {4, "Maxwell"},
- {5, "Volta"},
- {6, "Turing"},
- {7, "Ampere"},
- {8, "Ada"},
- {9, "Hopper"},
- {10, "Blackwell"},
- };
- /* List of capabilities to be queried per codec */
- static const vector<pair<NV_ENC_CAPS, string>> capabilities = {
- {NV_ENC_CAPS_NUM_MAX_BFRAMES, "bframes"},
- {NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE, "lossless"},
- {NV_ENC_CAPS_SUPPORT_LOOKAHEAD, "lookahead"},
- {NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ, "temporal_aq"},
- {NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE, "dynamic_bitrate"},
- {NV_ENC_CAPS_SUPPORT_10BIT_ENCODE, "10bit"},
- {NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE, "bref"},
- {NV_ENC_CAPS_NUM_ENCODER_ENGINES, "engines"},
- {NV_ENC_CAPS_SUPPORT_YUV444_ENCODE, "yuv_444"},
- {NV_ENC_CAPS_WIDTH_MAX, "max_width"},
- {NV_ENC_CAPS_HEIGHT_MAX, "max_height"},
- #if NVENCAPI_MAJOR_VERSION > 12 || NVENCAPI_MINOR_VERSION >= 2
- /* SDK 12.2+ features */
- {NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER, "temporal_filter"},
- {NV_ENC_CAPS_SUPPORT_LOOKAHEAD_LEVEL, "lookahead_level"},
- {NV_ENC_CAPS_SUPPORT_UNIDIRECTIONAL_B, "unidirectional_b"},
- #endif
- #if NVENCAPI_MAJOR_VERSION >= 13
- /* SDK 13.0+ features */
- {NV_ENC_CAPS_SUPPORT_YUV422_ENCODE, "yuv_422"},
- #endif
- };
- static const vector<pair<string_view, GUID>> codecs = {{"h264", NV_ENC_CODEC_H264_GUID},
- {"hevc", NV_ENC_CODEC_HEVC_GUID},
- {"av1", NV_ENC_CODEC_AV1_GUID}};
- typedef unordered_map<string, unordered_map<string, int>> codec_caps_map;
- struct device_info {
- string pci_id;
- string nvml_uuid;
- string cuda_uuid;
- string name;
- uint32_t architecture;
- uint32_t pcie_gen;
- uint32_t pcie_width;
- uint32_t encoder_sessions;
- uint32_t utilisation;
- uint32_t sample_period;
- uint32_t capacity_h264;
- uint32_t capacity_hevc;
- uint32_t capacity_av1;
- codec_caps_map caps;
- };
- /* RAII wrappers to make my life a little easier. */
- struct NVML {
- NVML_INIT_V2 init;
- NVML_SHUTDOWN shutdown;
- NVML_GET_DRIVER_VER_FUNC getDriverVersion;
- NVML_GET_HANDLE_BY_BUS_ID getDeviceHandleByPCIBusId;
- NVML_GET_DEVICE_UUID getDeviceUUID;
- NVML_GET_DEVICE_NAME getDeviceName;
- NVML_GET_DEVICE_PCIE_GEN getDevicePCIeGen;
- NVML_GET_DEVICE_PCIE_WIDTH getDevicePCIeWidth;
- NVML_GET_DEVICE_ARCHITECTURE getDeviceArchitecture;
- NVML_GET_ENCODER_SESSIONS getEncoderSessions;
- NVML_GET_ENCODER_CAPACITY getEncoderCapacity;
- NVML_GET_ENCODER_UTILISATION getEncoderUtilisation;
- NVML() = default;
- ~NVML()
- {
- if (initialised && shutdown)
- shutdown();
- }
- bool Init()
- {
- if (!load_nvml_lib()) {
- printf("reason=nvml_lib\n");
- return false;
- }
- init = (NVML_INIT_V2)load_nvml_func("nvmlInit_v2");
- shutdown = (NVML_SHUTDOWN)load_nvml_func("nvmlShutdown");
- getDriverVersion = (NVML_GET_DRIVER_VER_FUNC)load_nvml_func("nvmlSystemGetDriverVersion");
- getDeviceHandleByPCIBusId =
- (NVML_GET_HANDLE_BY_BUS_ID)load_nvml_func("nvmlDeviceGetHandleByPciBusId_v2");
- getDeviceUUID = (NVML_GET_DEVICE_UUID)load_nvml_func("nvmlDeviceGetUUID");
- getDeviceName = (NVML_GET_DEVICE_NAME)load_nvml_func("nvmlDeviceGetName");
- getDevicePCIeGen = (NVML_GET_DEVICE_PCIE_GEN)load_nvml_func("nvmlDeviceGetCurrPcieLinkGeneration");
- getDevicePCIeWidth = (NVML_GET_DEVICE_PCIE_WIDTH)load_nvml_func("nvmlDeviceGetCurrPcieLinkWidth");
- getDeviceArchitecture = (NVML_GET_DEVICE_ARCHITECTURE)load_nvml_func("nvmlDeviceGetArchitecture");
- getEncoderSessions = (NVML_GET_ENCODER_SESSIONS)load_nvml_func("nvmlDeviceGetEncoderSessions");
- getEncoderCapacity = (NVML_GET_ENCODER_CAPACITY)load_nvml_func("nvmlDeviceGetEncoderCapacity");
- getEncoderUtilisation = (NVML_GET_ENCODER_UTILISATION)load_nvml_func("nvmlDeviceGetEncoderUtilization");
- if (!init || !shutdown || !getDriverVersion || !getDeviceHandleByPCIBusId || !getDeviceUUID ||
- !getDeviceName || !getDevicePCIeGen || !getDevicePCIeWidth || !getEncoderSessions ||
- !getEncoderCapacity || !getEncoderUtilisation || !getDeviceArchitecture) {
- return false;
- }
- nvmlReturn_t res = init();
- if (res != 0) {
- printf("reason=nvml_init_%d\n", res);
- return false;
- }
- initialised = true;
- return true;
- }
- private:
- bool initialised = false;
- static inline void *nvml_lib = nullptr;
- bool load_nvml_lib()
- {
- #ifdef _WIN32
- nvml_lib = LoadLibraryA("nvml.dll");
- #else
- nvml_lib = dlopen("libnvidia-ml.so.1", RTLD_LAZY);
- #endif
- return nvml_lib != nullptr;
- }
- static void *load_nvml_func(const char *func)
- {
- #ifdef _WIN32
- void *func_ptr = (void *)GetProcAddress((HMODULE)nvml_lib, func);
- #else
- void *func_ptr = dlsym(nvml_lib, func);
- #endif
- return func_ptr;
- }
- };
- struct CUDACtx {
- CUcontext ctx;
- CUDACtx() = default;
- ~CUDACtx() { cu->cuCtxDestroy(ctx); }
- bool Init(int adapter_idx)
- {
- CUdevice dev;
- if (cu->cuDeviceGet(&dev, adapter_idx) != CUDA_SUCCESS)
- return false;
- return cu->cuCtxCreate(&ctx, 0, dev) == CUDA_SUCCESS;
- }
- string GetPCIBusId()
- {
- CUdevice dev;
- string bus_id;
- bus_id.resize(16);
- cu->cuCtxGetDevice(&dev);
- cu->cuDeviceGetPCIBusId(bus_id.data(), (int)bus_id.capacity(), dev);
- return bus_id;
- }
- string GetUUID()
- {
- CUdevice dev;
- CUuuid uuid;
- string uuid_str;
- cu->cuCtxGetDevice(&dev);
- cu->cuDeviceGetUuid_v2(&uuid, dev);
- uuid_str.resize(32);
- for (size_t idx = 0; idx < 16; idx++) {
- sprintf(uuid_str.data() + idx * 2, "%02x", uuid.bytes[idx] & 0xFF);
- }
- return uuid_str;
- }
- };
- struct NVSession {
- void *ptr = nullptr;
- NVSession() = default;
- ~NVSession() { nv.nvEncDestroyEncoder(ptr); }
- NVENCSTATUS OpenSession(const CUDACtx &ctx)
- {
- NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = {};
- params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
- params.apiVersion = NVENCAPI_VERSION;
- params.device = ctx.ctx;
- params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
- return nv.nvEncOpenEncodeSessionEx(¶ms, &ptr);
- }
- };
- static bool init_nvenc()
- {
- if (nvenc_load_functions(&nvenc, nullptr)) {
- printf("reason=nvenc_lib\n");
- return false;
- }
- NVENCSTATUS res = nvenc->NvEncodeAPICreateInstance(&nv);
- if (res != NV_ENC_SUCCESS) {
- printf("reason=nvenc_init_%d\n", res);
- return false;
- }
- return true;
- }
- static bool init_cuda()
- {
- if (cuda_load_functions(&cu, nullptr)) {
- printf("reason=cuda_lib\n");
- return false;
- }
- CUresult res = cu->cuInit(0);
- if (res != CUDA_SUCCESS) {
- printf("reason=cuda_init_%d\n", res);
- return false;
- }
- return true;
- }
- static bool get_adapter_caps(int adapter_idx, codec_caps_map &caps, device_info &device_info, NVML &nvml,
- bool &session_limit)
- {
- CUDACtx cudaCtx;
- NVSession nvSession;
- if (!cudaCtx.Init(adapter_idx))
- return false;
- device_info.pci_id = cudaCtx.GetPCIBusId();
- device_info.cuda_uuid = cudaCtx.GetUUID();
- nvmlDevice_t dev;
- if (nvml.getDeviceHandleByPCIBusId(device_info.pci_id.data(), &dev) == NVML_SUCCESS) {
- char uuid[NVML_DEVICE_UUID_V2_BUFFER_SIZE];
- nvml.getDeviceUUID(dev, uuid, sizeof(uuid));
- device_info.nvml_uuid = uuid;
- char name[NVML_DEVICE_NAME_V2_BUFFER_SIZE];
- nvml.getDeviceName(dev, name, sizeof(name));
- device_info.name = name;
- nvml.getDevicePCIeGen(dev, &device_info.pcie_gen);
- nvml.getDevicePCIeWidth(dev, &device_info.pcie_width);
- nvml.getEncoderSessions(dev, &device_info.encoder_sessions, nullptr);
- nvml.getDeviceArchitecture(dev, &device_info.architecture);
- nvml.getEncoderUtilisation(dev, &device_info.utilisation, &device_info.sample_period);
- nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_H264, &device_info.capacity_h264);
- nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_HEVC, &device_info.capacity_hevc);
- nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_AV1, &device_info.capacity_av1);
- }
- auto res = nvSession.OpenSession(cudaCtx);
- session_limit = session_limit || res == NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY;
- if (res != NV_ENC_SUCCESS)
- return false;
- uint32_t guid_count = 0;
- if (nv.nvEncGetEncodeGUIDCount(nvSession.ptr, &guid_count) != NV_ENC_SUCCESS)
- return false;
- vector<GUID> guids;
- guids.resize(guid_count);
- NVENCSTATUS stat = nv.nvEncGetEncodeGUIDs(nvSession.ptr, guids.data(), guid_count, &guid_count);
- if (stat != NV_ENC_SUCCESS)
- return false;
- NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER};
- for (uint32_t i = 0; i < guid_count; i++) {
- GUID *guid = &guids[i];
- std::string codec_name = "unknown";
- for (const auto &[name, codec_guid] : codecs) {
- if (memcmp(&codec_guid, guid, sizeof(GUID)) == 0) {
- codec_name = name;
- break;
- }
- }
- caps[codec_name]["codec_supported"] = 1;
- device_info.caps[codec_name]["codec_supported"] = 1;
- for (const auto &[cap, name] : capabilities) {
- int v;
- param.capsToQuery = cap;
- if (nv.nvEncGetEncodeCaps(nvSession.ptr, *guid, ¶m, &v) != NV_ENC_SUCCESS)
- continue;
- device_info.caps[codec_name][name] = v;
- caps[codec_name][name] = std::max(v, caps[codec_name][name]);
- }
- #if NVENCAPI_MAJOR_VERSION > 12 || NVENCAPI_MINOR_VERSION >= 2
- /* Explicitly check if UHQ tuning is supported since temporal filtering query is true for all codecs. */
- NV_ENC_PRESET_CONFIG preset_config = {};
- preset_config.version = NV_ENC_PRESET_CONFIG_VER;
- preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
- NVENCSTATUS res = nv.nvEncGetEncodePresetConfigEx(nvSession.ptr, *guid, NV_ENC_PRESET_P7_GUID,
- NV_ENC_TUNING_INFO_ULTRA_HIGH_QUALITY,
- &preset_config);
- device_info.caps[codec_name]["uhq"] = res == NV_ENC_SUCCESS ? 1 : 0;
- caps[codec_name]["uhq"] = std::max(device_info.caps[codec_name]["uhq"], caps[codec_name]["uhq"]);
- #endif
- }
- return true;
- }
- bool nvenc_checks(codec_caps_map &caps, vector<device_info> &device_infos)
- {
- /* NVENC API init */
- if (!init_nvenc())
- return false;
- /* CUDA init */
- if (!init_cuda())
- return false;
- NVML nvml;
- if (!nvml.Init())
- return false;
- /* --------------------------------------------------------- */
- /* obtain adapter compatibility information */
- uint32_t nvenc_ver;
- int cuda_driver_ver;
- int cuda_devices = 0;
- int nvenc_devices = 0;
- char driver_ver[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];
- bool session_limit = false;
- /* NVIDIA driver version */
- if (nvml.getDriverVersion(driver_ver, sizeof(driver_ver)) == NVML_SUCCESS) {
- printf("driver_ver=%s\n", driver_ver);
- } else {
- // Treat this as a non-fatal failure
- printf("driver_ver=0.0\n");
- }
- /* CUDA driver version and devices */
- if (cu->cuDriverGetVersion(&cuda_driver_ver) == CUDA_SUCCESS) {
- printf("cuda_ver=%d.%d\n", cuda_driver_ver / 1000, cuda_driver_ver % 1000);
- } else {
- printf("reason=no_cuda_version\n");
- return false;
- }
- if (cu->cuDeviceGetCount(&cuda_devices) == CUDA_SUCCESS && cuda_devices) {
- printf("cuda_devices=%d\n", cuda_devices);
- } else {
- printf("reason=no_devices\n");
- return false;
- }
- /* NVENC API version */
- if (nvenc->NvEncodeAPIGetMaxSupportedVersion(&nvenc_ver) == NV_ENC_SUCCESS) {
- printf("nvenc_ver=%d.%d\n", nvenc_ver >> 4, nvenc_ver & 0xf);
- } else {
- printf("reason=no_nvenc_version\n");
- return false;
- }
- device_infos.resize(cuda_devices);
- for (int idx = 0; idx < cuda_devices; idx++) {
- if (get_adapter_caps(idx, caps, device_infos[idx], nvml, session_limit))
- nvenc_devices++;
- }
- if (session_limit) {
- printf("reason=session_limit\n");
- return false;
- }
- if (nvenc_ver < NVENC_CONFIGURED_VERSION) {
- printf("reason=outdated_driver\n");
- return false;
- }
- printf("nvenc_devices=%d\n", nvenc_devices);
- if (!nvenc_devices) {
- printf("reason=no_supported_devices\n");
- return false;
- }
- uint32_t latest_architecture = 0;
- string_view architecture = "Unknown";
- for (auto &info : device_infos)
- latest_architecture = std::max(info.architecture, latest_architecture);
- if (arch_to_name.count(latest_architecture))
- architecture = arch_to_name.at(latest_architecture);
- printf("latest_architecture=%u\n"
- "latest_architecture_name=%s\n",
- latest_architecture, architecture.data());
- return true;
- }
- int check_thread()
- {
- int ret = 0;
- codec_caps_map caps;
- vector<device_info> device_infos;
- caps["h264"]["codec_supported"] = 0;
- caps["hevc"]["codec_supported"] = 0;
- caps["av1"]["codec_supported"] = 0;
- printf("[general]\n");
- if (nvenc_checks(caps, device_infos)) {
- printf("nvenc_supported=true\n");
- } else {
- printf("nvenc_supported=false\n");
- ret = 1;
- }
- /* Global capabilities, based on highest supported across all devices */
- for (const auto &[codec, codec_caps] : caps) {
- printf("\n[%s]\n", codec.c_str());
- for (const auto &[name, value] : codec_caps) {
- printf("%s=%d\n", name.c_str(), value);
- }
- }
- /* Per-device info (mostly for debugging) */
- for (size_t idx = 0; idx < device_infos.size(); idx++) {
- const auto &info = device_infos[idx];
- string_view architecture = "Unknown";
- if (arch_to_name.count(info.architecture))
- architecture = arch_to_name.at(info.architecture);
- printf("\n[device.%zu]\n"
- "pci_id=%s\n"
- "nvml_uuid=%s\n"
- "cuda_uuid=%s\n"
- "name=%s\n"
- "architecture=%u\n"
- "architecture_name=%s\n"
- "pcie_link_width=%d\n"
- "pcie_link_gen=%d\n"
- "encoder_sessions=%u\n"
- "utilisation=%u\n"
- "sample_period=%u\n"
- "capacity_h264=%u\n"
- "capacity_hevc=%u\n"
- "capacity_av1=%u\n",
- idx, info.pci_id.c_str(), info.nvml_uuid.c_str(), info.cuda_uuid.c_str(), info.name.c_str(),
- info.architecture, architecture.data(), info.pcie_width, info.pcie_gen, info.encoder_sessions,
- info.utilisation, info.sample_period, info.capacity_h264, info.capacity_hevc, info.capacity_av1);
- for (const auto &[codec, codec_caps] : info.caps) {
- printf("\n[device.%zu.%s]\n", idx, codec.c_str());
- for (const auto &[name, value] : codec_caps) {
- printf("%s=%d\n", name.c_str(), value);
- }
- }
- }
- return ret;
- }
- int main(int, char **)
- {
- future<int> f = async(launch::async, check_thread);
- future_status status = f.wait_for(2.5s);
- if (status == future_status::timeout)
- exit(1);
- return f.get();
- }
|