| ²é¿´: 946 | »Ø¸´: 0 | ||
cln116¾èÖú¹ó±ö (ÖªÃû×÷¼Ò)
|
[ÇóÖú]
CUDAºËº¯Êý¼ÆËã½á¹ûÓëC++Ñ»·¼ÆËã½á¹ûÓÖ²î±ð£¬ÇóÖú¡£
|
|
ʹÓÃCUDAʱ¼ä²»³¤£¬Óöµ½Á˼¬ÊÖµÄÎÊÌ⣬ϣÍû¸ßÊÖÖ¸µã£¬Ð»Ð»¡£ ÓÃCUDAºËº¯Êý¼ÆËãµÄ½á¹ûÓëÓÃC++Ñ»·µÄ½á¹ûÓвî±ð£¬Ö÷ÒªÊÇfloat×îºó2λ¡£³ÌÐòÖУ¬h_spec Êý×éÓÃÓÚ´æ´¢C++Ñ»·µÄ¼ÆËã½á¹û£¬d_specÓÃÓÚ´æ´¢ºËº¯Êý¼ÆËã½á¹û£¬½«d_spec¿½±´µ½Êý×éh_d_spec£¬±È½Ïh_specÓëh_d_spec¡£µ±h_specÓëh_d_spec¾ùÓÐ16641(129*129)¸öÔªËØÊ±£¬h_specÓëh_d_specÖв»ÏàµÈµÄÔªËØÊýΪ6723¡£²»ÏàµÈµÄÔªËØ²îÖµºÜС£¬ÀýÈ磺 h_d_spec[0] = 4.3036454e-008, h_spec[0] = 4.3036462e-008, ²îÖµ: -7.1054274e-015 h_d_spec[1] = 4.5101640e-008, h_spec[1] = 4.5101658e-008, ²îÖµ: -1.7763568e-014 ²»ÖªµÀÕâÖÖ´íÎó²úÉúµÄÔÒòµ½µ×ÊÇʲô¡£ÔÒòÒ»£¬¿ÉÄÜÊdzÌÐòдµÃÓÐÎÊÌ⣻ÔÒò¶þ£¬CPUºÍGPU¸¡µã¾«¶ÈÓвî±ð¡£ ³ÌÐòËùÓÃGPU: Geforce GTX 460, Geforce GT430¡£³ÌÐò´úÂëÈçÏ£º // complex_test.cpp #include "stdafx.h" #include <math.h> #include <cuda_runtime.h> #include "iostream" #include "stdlib.h" #define _PI 3.14159265358979323846 #define _PI2 _PI*2.0 #define _GRAVITY 9.81 #define _GRAVITY2 96.2361 extern "C" void DEVICE_spec_func(float * spec, float A, float L, float Speed, float2 Dir_vector, int dim, float damping); float spec_func(float A, float2 Dir, float Speed, float2 K, float reflDamping) { float k2 = K.x * K.x + K.y * K.y; float Dir_len2 = Dir.x * Dir.x + Dir.y *Dir.y; if (k2 == 0.f) return 0.f; float k4 = k2 * k2; float KdotW =K.x * Dir.x + K.y * Dir.y; if (KdotW < 0.f) return 0.0f; float KdotWhat = KdotW*KdotW/(k2*Dir_len2); float Speed4 = pow(Speed, 4.0f); float eterm = exp( -1.0f * _GRAVITY2 / (k2*Speed4) ) / k4; float specResult = A * eterm * KdotWhat ; return specResult; } void test_spec() { float L = 256.0f; float speed = 20.0f; float2 dir_vector; dir_vector.x = cos(_PI/4); dir_vector.x = sin(_PI/4); int _N = 128; float _A = pow(_PI2/L, 2.0) * 3.48/1000.0; float2 K; float * h_spec = (float *) malloc((_N+1)*(_N+1)*sizeof(float)); //host array float * d_spec = NULL; //device array cudaMalloc((void**)&d_spec,(_N+1)*(_N+1)*sizeof(float)); float * h_d_spec = (float *) malloc((_N+1)*(_N+1)*sizeof(float)); //copy device array to the array float * differ = (float *) malloc((_N+1)*(_N+1)*sizeof(float)); //difference int num = 0; int h_nonezero_num = 0; int d_nonezero_num = 0; int h_d_zero = 0; for(int y=0; y<=_N; y++) { K.y = _PI2 / L * (float)(-_N/2 + y); for(int x=0; x<=_N; x++) { K.x = _PI2 / L * (float)(-_N/2 + x); int offset = y * (_N+1) + x; h_spec[offset] = spec_func(_A, dir_vector, speed, K, 0.0); if(h_spec[offset]!=0.0) { h_nonezero_num+=1; } } } DEVICE_spec_func(d_spec, _A, L, speed, dir_vector, _N+1, 0.0f); cudaMemcpy(h_d_spec, d_spec, (_N+1)*(_N+1)*sizeof(float), cudaMemcpyDeviceToHost); for(int y=0; y<=_N; y++) { for(int x=0; x<=_N; x++) { int offset = y * (_N+1) + x; differ[offset] = h_d_spec[offset] - h_spec[offset]; if(h_d_spec[offset]!=0.0) { d_nonezero_num += 1; } if(differ[offset] != 0.0) { num += 1; } if(h_spec[offset]!=0.0 && h_d_spec[offset]==0.0 ) { h_d_zero += 1; } } } printf("Not equal: %d \n", num); } int _tmain(int argc, _TCHAR* argv[]) { test_spec(); return 0; } //kernel.cu #include "cuda_runtime.h" #include "device_functions_decls.h" #include "cufft.h" #include "cufftw.h" #include "curand.h" //#include "math.h" #define _PI 3.14159265358979323846 #define _PI2 _PI*2.0 #define _GRAVITY 9.81 #define _GRAVITY2 96.2361 __device__ float d_spec_func(const float A, const float2 Dir, const float Speed, const float2 K, const float reflDamping) { float k2 = K.x * K.x + K.y * K.y; float Dir_len2 = Dir.x * Dir.x + Dir.y *Dir.y; if (k2 == 0.f) return 0.f; float k4 = k2 * k2; float KdotW =K.x * Dir.x + K.y * Dir.y; if (KdotW < 0.f) return 0.0f; float KdotWhat = KdotW*KdotW/(k2*Dir_len2); float Speed4 = pow(Speed, 4.0f); float eterm = exp( -1.0f * _GRAVITY2 / (k2*Speed4) ) / k4; float specResult = A * eterm * KdotWhat ; //float specResult = KdotWhat; return specResult; } __global__ void kernel_spec(float * spec, float A, float L, float Speed, float2 Dir_vector, int dim, float damping) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int offset = x + y * dim; float2 K; K.x = _PI2 / L * (float)(-(dim-1)/2 + x); K.y = _PI2 / L * (float)(-(dim-1)/2 + y); spec[offset] = d_spec_func(A, Dir_vector, Speed, K, damping); } extern "C" void DEVICE_spec_func(float * spec, float A, float L, float Speed, float2 Dir_vector, int dim, float damping) { dim3 threads(1,1,1); dim3 blocks(dim, dim, 1); kernel_spec<<<blocks, threads>>>(spec, A, L, Speed, Dir_vector, dim, damping); } |
» ²ÂÄãϲ»¶
ÇëÎʹ²Í¬Í¨Ñ¶ºÍ¹²Í¬Ò»×÷µÄÈϿɶÈÎÊÌâ
ÒѾÓÐ7È˻ظ´
285Çóµ÷¼Á
ÒѾÓÐ7È˻ظ´
һ־Ըɽ¶«´óѧ£¬085600£¬344
ÒѾÓÐ4È˻ظ´
Ò»Ö¾Ô¸±±¾©¿Æ¼¼²ÄÁÏ¿ÆÑ§Ó빤³Ì288·Ö£¬Çóµ÷¼Á
ÒѾÓÐ12È˻ظ´
280Çóµ÷¼Á
ÒѾÓÐ7È˻ظ´
315Çóµ÷¼Á
ÒѾÓÐ5È˻ظ´
26¿¼Ñе÷¼Á
ÒѾÓÐ5È˻ظ´
301Çóµ÷¼Á
ÒѾÓÐ7È˻ظ´
Çóµ÷¼Á22408 288·Ö
ÒѾÓÐ4È˻ظ´
Çóµ÷¼Á
ÒѾÓÐ4È˻ظ´















»Ø¸´´ËÂ¥