Adding a bunch of variables using cuda or cpu

Signed-off-by: Nigel Barink <nigelbarink@hotmail.com>
2024-09-29 18:41:02 +02:00
commit 8857db8abc
4 changed files with 73 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
 *.exp
 *.lib
 *.exe
--- a/README.md
+++ b/README.md
@@ -0,0 +1,4 @@
 # Practicing CUDA 
 ## Doing some parallel compute on the GPU 
--- a/add.cpp
+++ b/add.cpp
@@ -0,0 +1,31 @@
 #include <iostream>
 #include <math.h>
 void add (int n, float* x, float* y){
    for(int i = 0; i< n; i++)
        y[i] = x[i] +y[i];
 }
 int main (void) {
    int N = 1<<20;
    float *x = new float [N];
    float *y = new float [N];
    for (int i = 0; i<N; i++){
        x[i] = 1.0f;
        y[i] = 2.0f;
    }
    add(N, x, y );
    float maxError= 0.0f;
    for (int i =0; i < N; i++){
        maxError = fmax(maxError, fabs(y[i] -3.0f));
    }
    std::cout << "Max error: " << maxError << std::endl;
    delete [] x;
    delete [] y;
    return 0;
 }
--- a/add_cuda.cu
+++ b/add_cuda.cu
@@ -0,0 +1,35 @@
 #include <iostream>
 #include <math.h>
 __global__
 void add (int n, float* x, float* y){
    for(int i = 0; i< n; i++)
        y[i] = x[i] +y[i];
 }
 int main (void) {
    int N = 1<<20;
    float *x, *y;
    cudaMallocManaged(&x, N*sizeof(float));
    cudaMallocManaged(&y, N*sizeof(float));
    for (int i = 0; i<N; i++){
        x[i] = 1.0f;
        y[i] = 2.0f;
    }
    add<<<1,1>>>(N, x, y );
    cudaDeviceSynchronize();
    float maxError= 0.0f;
    for (int i =0; i < N; i++){
        maxError = fmax(maxError, fabs(y[i] -3.0f));
    }
    std::cout << "Max error: " << maxError << std::endl;
    cudaFree(x);
    cudaFree(y);
    return 0;
 }
		`@@ -0,0 +1,4 @@`
							`# Practicing CUDA`
							`## Doing some parallel compute on the GPU`