Browse Source

FindBLAS: Add support for NVPL

Vito Gamberini 8 months ago
parent
commit
9ead7c2129

+ 11 - 2
.gitlab/ci/configure_debian12_aarch64_ninja.cmake

@@ -1,13 +1,22 @@
 set(CMake_TEST_C_STANDARDS "90;99;11;17;23" CACHE STRING "")
 set(CMake_TEST_CXX_STANDARDS "98;11;14;17;20;23" CACHE STRING "")
 
+set(blas_lapack_cases
+  All
+  static=1 Generic
+  model=lp64
+    static=0 thread=SEQ NVPL thread=OMP NVPL thread= NVPL
+  model=ilp64
+    static=0 thread=SEQ NVPL thread=OMP NVPL thread= NVPL
+  )
+
 set(CMake_TEST_CTestUpdate_BZR "ON" CACHE BOOL "")
 set(CMake_TEST_CTestUpdate_CVS "ON" CACHE BOOL "")
 set(CMake_TEST_CTestUpdate_GIT "ON" CACHE BOOL "")
 set(CMake_TEST_CTestUpdate_HG "ON" CACHE BOOL "")
 set(CMake_TEST_CTestUpdate_SVN "ON" CACHE BOOL "")
 set(CMake_TEST_FindALSA "ON" CACHE BOOL "")
-set(CMake_TEST_FindBLAS "All;static=1;Generic" CACHE STRING "")
+set(CMake_TEST_FindBLAS "${blas_lapack_cases}" CACHE STRING "")
 set(CMake_TEST_FindBoost "ON" CACHE BOOL "")
 set(CMake_TEST_FindBoost_Python "ON" CACHE BOOL "")
 set(CMake_TEST_FindBZip2 "ON" CACHE BOOL "")
@@ -51,7 +60,7 @@ set(CMake_TEST_FindIntl "ON" CACHE BOOL "")
 set(CMake_TEST_FindJNI "ON" CACHE BOOL "")
 set(CMake_TEST_FindJPEG "ON" CACHE BOOL "")
 set(CMake_TEST_FindJsonCpp "ON" CACHE BOOL "")
-set(CMake_TEST_FindLAPACK "All;static=1;Generic" CACHE STRING "")
+set(CMake_TEST_FindLAPACK "${blas_lapack_cases}" CACHE STRING "")
 set(CMake_TEST_FindLibArchive "ON" CACHE BOOL "")
 set(CMake_TEST_FindLibinput "ON" CACHE BOOL "")
 set(CMake_TEST_FindLibLZMA "ON" CACHE BOOL "")

+ 5 - 0
Help/release/dev/nvpl-support.rst

@@ -0,0 +1,5 @@
+NVPL Support
+------------
+
+* The :module:`FindBLAS` and :module:`FindLAPACK` modules now support the
+  NVIDIA Performance Libraries (NVPL).

+ 76 - 0
Modules/FindBLAS.cmake

@@ -54,6 +54,21 @@ The following variables may be set to influence this module's behavior:
     Search for any BLAS/LAPACK.
     Most likely, a BLAS/LAPACK with 32-bit integer interfaces will be found.
 
+``BLA_THREAD``
+  .. versionadded:: 4.1
+
+  Specify the BLAS/LAPACK threading model:
+
+  ``SEQ``
+    Sequential model
+  ``OMP``
+    OpenMP model
+  ``ANY``
+    Search for any BLAS/LAPACK, if both are available most likely ``OMP`` will
+    be found.
+
+  This is currently only supported by NVIDIA NVPL.
+
 Imported Targets
 ^^^^^^^^^^^^^^^^
 
@@ -169,6 +184,11 @@ BLAS/LAPACK Vendors
 
   A BLAS/LAPACK demuxing library using PLT trampolines
 
+``NVPL``
+  .. versionadded:: 4.1
+
+  NVIDIA Performance Libraries
+
 ``NVHPC``
   .. versionadded:: 3.21
 
@@ -417,6 +437,16 @@ else()
   message(FATAL_ERROR "BLA_SIZEOF_INTEGER can have only <no value>, ANY, 4, or 8 values")
 endif()
 
+if(NOT BLA_THREAD)
+  set(_blas_thread "ANY")
+elseif((BLA_THREAD STREQUAL "ANY") OR
+       (BLA_THREAD STREQUAL "SEQ") OR
+       (BLA_THREAD STREQUAL "OMP"))
+  set(_blas_thread ${BLA_THREAD})
+else()
+  message(FATAL_ERROR "BLA_THREAD can have only <no value>, ANY, SEQ, or OMP values")
+endif()
+
 # Implicitly linked BLAS libraries?
 if(BLA_VENDOR STREQUAL "All")
   if(NOT BLAS_LIBRARIES)
@@ -1315,6 +1345,52 @@ if(NOT BLAS_LIBRARIES
   unset(_blas_fjlapack_lib)
 endif()
 
+# nVidia NVPL? (https://developer.nvidia.com/nvpl)
+if(BLA_VENDOR STREQUAL "NVPL" OR BLA_VENDOR STREQUAL "All")
+  # Prefer lp64 unless ilp64 is requested.
+  if((_blas_sizeof_integer EQUAL 4) OR (_blas_sizeof_integer STREQUAL "ANY"))
+    list(APPEND _blas_nvpl_ints "_lp64")
+  endif()
+  if((_blas_sizeof_integer EQUAL 8) OR (_blas_sizeof_integer STREQUAL "ANY"))
+    list(APPEND _blas_nvpl_ints "_ilp64")
+  endif()
+
+  # Prefer OMP if available
+  if((_blas_thread STREQUAL "OMP") OR (_blas_thread STREQUAL "ANY"))
+    list(APPEND _blas_nvpl_threads "_omp")
+  endif()
+  if((_blas_thread STREQUAL "SEQ") OR (_blas_thread STREQUAL "ANY"))
+    list(APPEND _blas_nvpl_threads "_seq")
+  endif()
+
+  if(NOT BLAS_LIBRARIES)
+    find_package(nvpl)
+    if(nvpl_FOUND)
+      foreach(_nvpl_thread IN LISTS _blas_nvpl_threads)
+        foreach(_nvpl_int IN LISTS _blas_nvpl_ints)
+
+          set(_blas_lib "nvpl::blas${_nvpl_int}${_nvpl_thread}")
+
+          if(TARGET ${_blas_lib})
+            set(BLAS_LIBRARIES ${_blas_lib})
+            break()
+          endif()
+
+        endforeach()
+
+        if(BLAS_LIBRARIES)
+          break()
+        endif()
+
+      endforeach()
+    endif()
+  endif()
+
+  unset(_blas_lib)
+  unset(_blas_nvpl_ints)
+  unset(_blas_nvpl_threads)
+endif()
+
 # BLAS in nVidia HPC SDK? (https://developer.nvidia.com/hpc-sdk)
 if(BLA_VENDOR STREQUAL "NVHPC" OR BLA_VENDOR STREQUAL "All")
   set(_blas_nvhpc_lib "blas")

+ 70 - 0
Modules/FindLAPACK.cmake

@@ -55,6 +55,21 @@ The following variables may be set to influence this module's behavior:
     Search for any BLAS/LAPACK.
     Most likely, a BLAS/LAPACK with 32-bit integer interfaces will be found.
 
+``BLA_THREAD``
+  .. versionadded:: 4.1
+
+  Specify the BLAS/LAPACK threading model:
+
+  ``SEQ``
+    Sequential model
+  ``OMP``
+    OpenMP model
+  ``ANY``
+    Search for any BLAS/LAPACK, if both are available most likely ``OMP`` will
+    be found.
+
+  This is currently only supported by NVIDIA NVPL.
+
 Imported Targets
 ^^^^^^^^^^^^^^^^
 
@@ -278,6 +293,16 @@ else()
   message(FATAL_ERROR "BLA_SIZEOF_INTEGER can have only <no value>, ANY, 4, or 8 values")
 endif()
 
+if(NOT BLA_THREAD)
+  set(_lapack_thread "ANY")
+elseif((BLA_THREAD STREQUAL "ANY") OR
+       (BLA_THREAD STREQUAL "SEQ") OR
+       (BLA_THREAD STREQUAL "OMP"))
+  set(_lapack_thread ${BLA_THREAD})
+else()
+  message(FATAL_ERROR "BLA_THREAD can have only <no value>, ANY, SEQ, or OMP values")
+endif()
+
 # Load BLAS
 if(NOT LAPACK_NOT_FOUND_MESSAGE)
   _lapack_find_dependency(BLAS)
@@ -693,6 +718,51 @@ if(NOT LAPACK_NOT_FOUND_MESSAGE)
     endif()
   endif()
 
+  # nVidia NVPL? (https://developer.nvidia.com/nvpl)
+  if(NOT LAPACK_LIBRARIES
+      AND (BLA_VENDOR MATCHES "NVPL" OR BLA_VENDOR STREQUAL "All"))
+    # Prefer lp64 unless ilp64 is requested.
+    if((_lapack_sizeof_integer EQUAL 4) OR (_lapack_sizeof_integer STREQUAL "ANY"))
+      list(APPEND _lapack_nvpl_ints "_lp64")
+    endif()
+    if((_lapack_sizeof_integer EQUAL 8) OR (_lapack_sizeof_integer STREQUAL "ANY"))
+      list(APPEND _lapack_nvpl_ints "_ilp64")
+    endif()
+
+    # Prefer OMP if available
+    if((_lapack_thread STREQUAL "OMP") OR (_lapack_thread STREQUAL "ANY"))
+      list(APPEND _lapack_nvpl_threads "_omp")
+    endif()
+    if((_lapack_thread STREQUAL "SEQ") OR (_lapack_thread STREQUAL "ANY"))
+      list(APPEND _lapack_nvpl_threads "_seq")
+    endif()
+
+    find_package(nvpl)
+    if(nvpl_FOUND)
+      foreach(_nvpl_thread IN LISTS _lapack_nvpl_threads)
+        foreach(_nvpl_int IN LISTS _lapack_nvpl_ints)
+
+          set(_lapack_lib "nvpl::lapack${_nvpl_int}${_nvpl_thread}")
+
+          if(TARGET ${_lapack_lib})
+            set(LAPACK_LIBRARIES ${_lapack_lib})
+            break()
+          endif()
+
+        endforeach()
+
+        if(LAPACK_LIBRARIES)
+          break()
+        endif()
+
+      endforeach()
+    endif()
+
+    unset(_lapack_lib)
+    unset(_lapack_nvpl_ints)
+    unset(_lapack_nvpl_threads)
+  endif()
+
   # NVHPC Library?
 
   if(NOT LAPACK_LIBRARIES

+ 6 - 1
Tests/FindBLAS/add_BLAS_LAPACK_tests.cmake

@@ -9,12 +9,13 @@ function(add_BLAS_LAPACK_tests var)
   set(compiler "")
   set(model "")
   set(static "")
+  set(thread "")
 
   set(sizeof_int_lp64 4)
   set(sizeof_int_ilp64 8)
 
   foreach(variant IN LISTS ${var})
-    if(variant MATCHES "^(all|compiler|model|static)=(.*)$")
+    if(variant MATCHES "^(all|compiler|model|static|thread)=(.*)$")
       set("${CMAKE_MATCH_1}" "${CMAKE_MATCH_2}")
       continue()
     elseif(variant MATCHES "^([^=]+)=(.*)$")
@@ -39,6 +40,10 @@ function(add_BLAS_LAPACK_tests var)
       string(APPEND variant_name "_Static")
       list(APPEND variant_options "-DBLA_STATIC=ON")
     endif()
+    if(thread)
+      string(APPEND variant_name "_${thread}")
+      list(APPEND variant_options "-DBLA_THREAD=${thread}")
+    endif()
     add_test(NAME Find${package}.Test_${variant_name} COMMAND
       ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
       --build-and-test