nppif.cpp 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. // Comes from
  2. // https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066
  3. #ifdef _WIN32
  4. # define EXPORT __declspec(dllexport)
  5. #else
  6. # define EXPORT
  7. #endif
  8. #include <cstdio>
  9. #include <assert.h>
  10. #include <cuda_runtime_api.h>
  11. #include <nppi_filtering_functions.h>
  12. EXPORT int nppif_main()
  13. {
  14. /**
  15. * 8-bit unsigned single-channel 1D row convolution.
  16. */
  17. const int simgrows = 32;
  18. const int simgcols = 32;
  19. Npp8u *d_pSrc, *d_pDst;
  20. const int nMaskSize = 3;
  21. NppiSize oROI;
  22. oROI.width = simgcols - nMaskSize;
  23. oROI.height = simgrows;
  24. const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]);
  25. const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]);
  26. const int simgpix = simgrows * simgcols;
  27. const int dimgpix = oROI.width * oROI.height;
  28. const int nSrcStep = simgcols * sizeof(d_pSrc[0]);
  29. const int nDstStep = oROI.width * sizeof(d_pDst[0]);
  30. const int pixval = 1;
  31. const int nDivisor = 1;
  32. const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval };
  33. Npp32s* d_pKernel;
  34. const Npp32s nAnchor = 2;
  35. cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize);
  36. if (err != cudaSuccess) {
  37. fprintf(stderr, "Cuda error %d\n", __LINE__);
  38. return 1;
  39. }
  40. err = cudaMalloc((void**)&d_pDst, dimgsize);
  41. if (err != cudaSuccess) {
  42. fprintf(stderr, "Cuda error %d\n", __LINE__);
  43. return 1;
  44. }
  45. err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0]));
  46. if (err != cudaSuccess) {
  47. fprintf(stderr, "Cuda error %d\n", __LINE__);
  48. return 1;
  49. }
  50. // set image to pixval initially
  51. err = cudaMemset(d_pSrc, pixval, simgsize);
  52. if (err != cudaSuccess) {
  53. fprintf(stderr, "Cuda error %d\n", __LINE__);
  54. return 1;
  55. }
  56. err = cudaMemset(d_pDst, 0, dimgsize);
  57. if (err != cudaSuccess) {
  58. fprintf(stderr, "Cuda error %d\n", __LINE__);
  59. return 1;
  60. }
  61. err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]),
  62. cudaMemcpyHostToDevice);
  63. if (err != cudaSuccess) {
  64. fprintf(stderr, "Cuda error %d\n", __LINE__);
  65. return 1;
  66. }
  67. // copy src to dst
  68. NppStatus ret =
  69. nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel,
  70. nMaskSize, nAnchor, nDivisor);
  71. assert(ret == NPP_NO_ERROR);
  72. Npp8u* h_imgres = new Npp8u[dimgpix];
  73. err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost);
  74. if (err != cudaSuccess) {
  75. fprintf(stderr, "Cuda error %d\n", __LINE__);
  76. return 1;
  77. }
  78. // test for filtering
  79. for (int i = 0; i < dimgpix; i++) {
  80. if (h_imgres[i] != (pixval * pixval * nMaskSize)) {
  81. fprintf(stderr, "h_imgres at index %d failed to match\n", i);
  82. return 1;
  83. }
  84. }
  85. return 0;
  86. }