nppif.cpp 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. // Comes from
  2. // https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066
  3. #ifdef _WIN32
  4. # define EXPORT __declspec(dllexport)
  5. #else
  6. # define EXPORT
  7. #endif
  8. #include <cstdio>
  9. #include <iostream>
  10. #include <assert.h>
  11. #include <cuda_runtime_api.h>
  12. #include <nppi_filtering_functions.h>
  13. EXPORT int nppif_main()
  14. {
  15. /**
  16. * 8-bit unsigned single-channel 1D row convolution.
  17. */
  18. const int simgrows = 32;
  19. const int simgcols = 32;
  20. Npp8u *d_pSrc, *d_pDst;
  21. const int nMaskSize = 3;
  22. NppiSize oROI;
  23. oROI.width = simgcols - nMaskSize;
  24. oROI.height = simgrows;
  25. const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]);
  26. const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]);
  27. const int simgpix = simgrows * simgcols;
  28. const int dimgpix = oROI.width * oROI.height;
  29. const int nSrcStep = simgcols * sizeof(d_pSrc[0]);
  30. const int nDstStep = oROI.width * sizeof(d_pDst[0]);
  31. const int pixval = 1;
  32. const int nDivisor = 1;
  33. const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval };
  34. Npp32s* d_pKernel;
  35. const Npp32s nAnchor = 2;
  36. cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize);
  37. if (err != cudaSuccess) {
  38. fprintf(stderr, "Cuda error %d\n", __LINE__);
  39. return 1;
  40. }
  41. err = cudaMalloc((void**)&d_pDst, dimgsize);
  42. if (err != cudaSuccess) {
  43. fprintf(stderr, "Cuda error %d\n", __LINE__);
  44. return 1;
  45. }
  46. err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0]));
  47. if (err != cudaSuccess) {
  48. fprintf(stderr, "Cuda error %d\n", __LINE__);
  49. return 1;
  50. }
  51. // set image to pixval initially
  52. err = cudaMemset(d_pSrc, pixval, simgsize);
  53. if (err != cudaSuccess) {
  54. fprintf(stderr, "Cuda error %d\n", __LINE__);
  55. return 1;
  56. }
  57. err = cudaMemset(d_pDst, 0, dimgsize);
  58. if (err != cudaSuccess) {
  59. fprintf(stderr, "Cuda error %d\n", __LINE__);
  60. return 1;
  61. }
  62. err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]),
  63. cudaMemcpyHostToDevice);
  64. if (err != cudaSuccess) {
  65. fprintf(stderr, "Cuda error %d\n", __LINE__);
  66. return 1;
  67. }
  68. // copy src to dst
  69. NppStatus ret =
  70. nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel,
  71. nMaskSize, nAnchor, nDivisor);
  72. assert(ret == NPP_NO_ERROR);
  73. Npp8u* h_imgres = new Npp8u[dimgpix];
  74. err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost);
  75. if (err != cudaSuccess) {
  76. fprintf(stderr, "Cuda error %d\n", __LINE__);
  77. return 1;
  78. }
  79. // test for filtering
  80. for (int i = 0; i < dimgpix; i++) {
  81. if (h_imgres[i] != (pixval * pixval * nMaskSize)) {
  82. fprintf(stderr, "h_imgres at index %d failed to match\n", i);
  83. return 1;
  84. }
  85. }
  86. return 0;
  87. }