main.c 1020 B

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. void vecaddgpu(float* r, float* a, float* b, int n)
  4. {
  5. #pragma acc kernels loop present(r, a, b)
  6. for (int i = 0; i < n; ++i)
  7. r[i] = a[i] + b[i];
  8. }
  9. int main(void)
  10. {
  11. int n = 100000; /* vector length */
  12. float* a; /* input vector 1 */
  13. float* b; /* input vector 2 */
  14. float* r; /* output vector */
  15. float* e; /* expected output values */
  16. int i, errs;
  17. a = (float*)malloc(n * sizeof(float));
  18. b = (float*)malloc(n * sizeof(float));
  19. r = (float*)malloc(n * sizeof(float));
  20. e = (float*)malloc(n * sizeof(float));
  21. for (i = 0; i < n; ++i) {
  22. a[i] = (float)(i + 1);
  23. b[i] = (float)(1000 * i);
  24. }
  25. /* compute on the GPU */
  26. #pragma acc data copyin(a[0 : n], b[0 : n]) copyout(r[0 : n])
  27. {
  28. vecaddgpu(r, a, b, n);
  29. }
  30. /* compute on the host to compare */
  31. for (i = 0; i < n; ++i)
  32. e[i] = a[i] + b[i];
  33. /* compare results */
  34. errs = 0;
  35. for (i = 0; i < n; ++i) {
  36. if (r[i] != e[i]) {
  37. ++errs;
  38. }
  39. }
  40. return errs;
  41. }