Upload
alonso-gidley
View
219
Download
0
Embed Size (px)
Citation preview
GPGPU labor XI.
Monte Carlo szimuláció
Kezdeti teendők
• Tantárgy honlapja, Monte Carlo szimuláció• A labor kiindulási alapjának letöltése
(lab11_base.zip), kitömörítés a GPGPU\Labs könyvtárba
Isosurface raycastingvoid initVolVis(){ // OpenCL kernelek visualizationProgram = createProgram(context, device_id, "visualization.cl"); isosurfaceRaycastingKernel = createKernel(visualizationProgram, "isosurface"); alphaBlendedKernel = createKernel(visualizationProgram, "alphaBlended");
// Térfogati modell loadVolume("head.vox"); if(NULL == volumeData){ exit(-1); }
// Buffer objektum a térfogati modellnek volumeDataGPU = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * volumeSize[0] * volumeSize[1] * volumeSize[2], NULL, NULL); CL_SAFE_CALL( clEnqueueWriteBuffer(commands, volumeDataGPU, CL_TRUE, 0, sizeof(float) * volumeSize[0] * volumeSize[1] * volumeSize[2], volumeData, 0, NULL, NULL));}
Isosurface raycastingvoid isosurface(){ // Kamera -> View mátrix + kamera pozíció cl_float16 clViewDir; float* camMatrix = camera.getViewDirMatrix().getPointer(); for(int i = 0; i < 16; ++i){ clViewDir.s[i] = camMatrix[i]; }
Vector eye = camera.getEye(); clViewDir.s[12] = eye.x; clViewDir.s[13] = eye.y; clViewDir.s[14] = eye.z; clViewDir.s[15] = 1.0f;
// Kernel paraméterek CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 0, sizeof(int), &width) ); // Kép szélesség CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 1, sizeof(int), &height) ); // Kép magasság CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 2, sizeof(cl_mem), &visualizationBufferGPU) ); // Kép buffer CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 3, sizeof(int), &volumeSize[0]) ); // Térfogati adat mérete CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 4, sizeof(cl_mem), &volumeDataGPU) ); // Térfogati adat CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 5, sizeof(float), &isoValue) ); // Iso érték CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 6, sizeof(cl_float16), &clViewDir) ); // Kamera mátrix
// Ray casting size_t visualizationBufferSize[2]; visualizationBufferSize[0] = width; visualizationBufferSize[1] = height;
CL_SAFE_CALL(clEnqueueNDRangeKernel(commands, isosurfaceRaycastingKernel, 2, NULL, visualizationBufferSize, NULL, 0, NULL, NULL) );
clFinish(commands);
// A kép visszaolvasása az OpenCL bufferből CL_SAFE_CALL( clEnqueueReadBuffer(commands, visualizationBufferGPU, CL_TRUE, 0, sizeof(cl_float4) * width * height,
visualizationBufferCPU, 0, NULL, NULL));
// A kép kirajzolása az OpenGL framebufferbe glDrawPixels(width, height, GL_RGBA, GL_FLOAT, visualizationBufferCPU);}
Isosurface raycasting(visualization.cl)
__kernelvoid isosurface(const int width, const int height, __global float4* visualizationBuffer, const int resolution, __global float* volumeData,
const float isoValue, const float16 invViewMatrix){ // Képtérbeli koordináták int2 id = (int2)(get_global_id(0), get_global_id(1));
float2 uv = (float2)( (id.x / (float) width)*2.0f-1.0f, (id.y / (float) height)*2.0f-1.0f );
// A térfogati modell befoglaló doboza float4 boxMin = (float4)(-1.0f, -1.0f, -1.0f,1.0f); float4 boxMax = (float4)(1.0f, 1.0f, 1.0f,1.0f);
// Sugár világ térben struct ray eyeRay;
eyeRay.origin = (float4)(invViewMatrix.sC, invViewMatrix.sD, invViewMatrix.sE, invViewMatrix.sF);
float4 temp = normalize(((float4)(uv.x, uv.y, -2.0f, 0.0f))); eyeRay.direction.x = dot(temp, ((float4)(invViewMatrix.s0,invViewMatrix.s1,invViewMatrix.s2,invViewMatrix.s3))); eyeRay.direction.y = dot(temp, ((float4)(invViewMatrix.s4,invViewMatrix.s5,invViewMatrix.s6,invViewMatrix.s7))); eyeRay.direction.z = dot(temp, ((float4)(invViewMatrix.s8,invViewMatrix.s9,invViewMatrix.sA,invViewMatrix.sB))); eyeRay.direction.w = 0.0f;
float4 color = (float4)(0.0f);
// Sugár – befoglaló doboz metszés számítás float tnear, tfar; int hit = intersectBox(eyeRay.origin, eyeRay.direction, boxMin, boxMax, &tnear, &tfar); if(hit){ // TODO
color = (float4)(1.0f); }
// A szín kiírása a képbufferbe if(id.x < width && id.y < height){ visualizationBuffer[id.x + id.y * width] = color; }}
Isosurface raycasting
• Lépkedjünk tnear és tfar között a sugár mentén– Határozzuk meg az aktuális pozíciót a világ térben– Olvassuk ki a sűrűséget (getDensityFromVolume)• Ha a sűrűség nagyobb mint isoValue, akkor a szín
legyen (float4)(1.0f)• Lépjünk ki a ciklusból
Isosurface raycasting
• Adjunk hozzá árnyalást!– A fény iránya: 0.3, -2.0, 0.0, 0.0– A normál vektor a térfogati modellben a
getNormalFromVolume függvénnyel kérdezhető le– A szín legyen: clamp(dot(lightDir, normal), 0.0, 1.0)
Isosurface raycasting
• Félgömbös megvilágítás– A szín legyen: 0.5f + 0.5f * dot(lightDir, normal)
Alpha blended raycastingvoid alphaBlended(){ cl_float16 clViewDir; float* camMatrix = camera.getViewDirMatrix().getPointer(); for(int i = 0; i < 16; ++i){ clViewDir.s[i] = camMatrix[i]; }
Vector eye = camera.getEye(); clViewDir.s[12] = eye.x; clViewDir.s[13] = eye.y; clViewDir.s[14] = eye.z; clViewDir.s[15] = 1.0f;
CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 0, sizeof(int), &width) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 1, sizeof(int), &height) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 2, sizeof(cl_mem), &visualizationBufferGPU) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 3, sizeof(int), &volumeSize[0]) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 4, sizeof(cl_mem), &volumeDataGPU) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 5, sizeof(float), &alphaExponent) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 6, sizeof(float), &alphaCenter) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 7, sizeof(cl_float16), &clViewDir) );
size_t visualizationBufferSize[2]; visualizationBufferSize[0] = width; visualizationBufferSize[1] = height;
CL_SAFE_CALL(clEnqueueNDRangeKernel(commands, alphaBlendedKernel, 2, NULL, visualizationBufferSize, NULL, 0, NULL, NULL) );
clFinish(commands);
CL_SAFE_CALL( clEnqueueReadBuffer(commands, visualizationBufferGPU, CL_TRUE, 0, sizeof(cl_float4) * width * height, visualizationBufferCPU, 0, NULL, NULL));
glDrawPixels(width, height, GL_RGBA, GL_FLOAT, visualizationBufferCPU);}
Alpha blended raycasting(visualization.cl)
__kernelvoid alphaBlended(const int width, const int height, __global float4* visualizationBuffer,
const int resolution, __global float* volumeData, const float alphaExponent, const float alphaCenter, const float16 invViewMatrix){
int2 id = (int2)(get_global_id(0), get_global_id(1));
float2 uv = (float2)( (id.x / (float) width)*2.0f-1.0f, (id.y / (float) height)*2.0f-1.0f );
float4 boxMin = (float4)(-1.0f, -1.0f, -1.0f,1.0f); float4 boxMax = (float4)(1.0f, 1.0f, 1.0f,1.0f);
// calculate eye ray in world space struct ray eyeRay;
eyeRay.origin = (float4)(invViewMatrix.sC, invViewMatrix.sD, invViewMatrix.sE, invViewMatrix.sF);
float4 temp = normalize(((float4)(uv.x, uv.y, -2.0f, 0.0f))); eyeRay.direction.x = dot(temp, ((float4)(invViewMatrix.s0,invViewMatrix.s1,invViewMatrix.s2,invViewMatrix.s3))); eyeRay.direction.y = dot(temp, ((float4)(invViewMatrix.s4,invViewMatrix.s5,invViewMatrix.s6,invViewMatrix.s7))); eyeRay.direction.z = dot(temp, ((float4)(invViewMatrix.s8,invViewMatrix.s9,invViewMatrix.sA,invViewMatrix.sB))); eyeRay.direction.w = 0.0f;
float4 sum = (float4)(0.0f);
float tnear, tfar; int hit = intersectBox(eyeRay.origin, eyeRay.direction, boxMin, boxMax, &tnear, &tfar); if(hit){ sum = (float4)(1.0f); }
if(id.x < width && id.y < height){ visualizationBuffer[id.x + id.y * width] = (float4)(sum); }}
Alpha blended raycasting
• „X-Ray” kép– Lépkedjünk tfar-tól tnear-ig• Összegezzük a sűrűséget az elnyelődés figyelembe
vételével– Alpha = pow(density, alphaExponent) * step– Sum = (1-alpha)*sum + alpha * (float4)(1.0)
Alpha blended raycasting
• Árnyalás hozzáadása– Lépkedjünk tfar-tól tnear-ig• Összegezük a megvilágítást az elnyelődés figyelembe
vételével– Sűrűség: getDensityFromVolume– Normál vektor: getNormalFromVolume– Alpha: clamp(alphaExponent * (density – alphaCenter) + 0.5,
0.0, 1.0– Szín: 0.5f + 0.5f * dot(lightDir, normal)– Összegzett szín: (1-alpha) * sum + alpha * color
Alpha blended raycasting
• Átviteli függvény beépítése– color *= (float4)(density, density * density, density * density * density, 1.0f) + 0.1f;
Szóródás szimulációvoid initSimulation(){ photonProgram = createProgram(context, device_id, "programs.cl"); simulationKernel = createKernel(photonProgram, "simulation"); visualizationKernel = createKernel(photonProgram, "visualization");
// working set size CL_SAFE_CALL( clGetKernelWorkGroupInfo(simulationKernel, device_id, CL_KERNEL_WORK_GROUP_SIZE,
sizeof(workGroupSize), &workGroupSize, NULL)); maxComputeUnits = *(int*)getDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS); problemSize = workGroupSize * maxComputeUnits;
std::cout << "Working set: " << workGroupSize << " * " << maxComputeUnits << " = " << problemSize << std::endl;
// init random number generator cl_uint4* seedCPU = new cl_uint4[workGroupSize * maxComputeUnits]; for(int i=0; i < workGroupSize * maxComputeUnits; ++i){ seedCPU[i].s[0] = rand(); seedCPU[i].s[1] = rand(); seedCPU[i].s[2] = rand(); seedCPU[i].s[3] = rand(); } seedGPU = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint4) * workGroupSize * maxComputeUnits, NULL, NULL); CL_SAFE_CALL( clEnqueueWriteBuffer(commands, seedGPU,
CL_TRUE, 0, sizeof(cl_uint4) * workGroupSize * maxComputeUnits, seedCPU, 0, NULL, NULL));
// photon buffer photonBufferGPU = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(struct photon) * workGroupSize * maxComputeUnits, NULL, NULL);
// simulation buffer simulationBufferGPU = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * resolution * resolution * resolution, NULL, NULL);
// light source parameters lightSourcePosition.s[0] = 0.6f; lightSourcePosition.s[1] = 0.5f; lightSourcePosition.s[2] = 0.5f; lightSourcePosition.s[3] = 0.0f;}
Szóródás szimulációvoid resetSimulation(int resolution, cl_mem simulationBufferGPU){ cl_kernel resetSimulationKernel = createKernel(photonProgram, "resetSimulation");
CL_SAFE_CALL(clSetKernelArg(resetSimulationKernel, 0, sizeof(int), &resolution)); CL_SAFE_CALL(clSetKernelArg(resetSimulationKernel, 1, sizeof(cl_mem),
&simulationBufferGPU));
size_t workSize[3]; workSize[0] = resolution; workSize[1] = resolution; workSize[2] = resolution;
CL_SAFE_CALL(clEnqueueNDRangeKernel(commands, resetSimulationKernel, 3, NULL, workSize, NULL, 0, NULL, NULL));
clFinish(commands);
clReleaseKernel(resetSimulationKernel);}
Szóródás szimulációvoid simulationStep(){ if(1 == iteration){ resetSimulation(resolution, simulationBufferGPU); }
CL_SAFE_CALL( clSetKernelArg(simulationKernel, 0, sizeof(int), &iteration) ); CL_SAFE_CALL( clSetKernelArg(simulationKernel, 1, sizeof(cl_mem), &seedGPU) ); CL_SAFE_CALL( clSetKernelArg(simulationKernel, 2, sizeof(cl_mem),
&photonBufferGPU) ); CL_SAFE_CALL( clSetKernelArg(simulationKernel, 3, sizeof(int), &resolution) ); CL_SAFE_CALL( clSetKernelArg(simulationKernel, 4, sizeof(cl_mem),
&simulationBufferGPU) ); CL_SAFE_CALL( clSetKernelArg(simulationKernel, 5, sizeof(cl_float4),
&lightSourcePosition) );
CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, simulationKernel, 1, NULL, &problemSize, &workGroupSize, 0, NULL, NULL) );
clFinish(commands);}
Szóródás szimulációvoid visualizationStep(){ cl_float16 clViewDir; float* camMatrix = camera.getViewDirMatrix().getPointer(); for(int i = 0; i < 16; ++i){ clViewDir.s[i] = camMatrix[i]; }
Vector eye = camera.getEye(); clViewDir.s[12] = eye.x; clViewDir.s[13] = eye.y; clViewDir.s[14] = eye.z; clViewDir.s[15] = 1.0f;
CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 0, sizeof(int), &width) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 1, sizeof(int), &height) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 2, sizeof(cl_mem), &visualizationBufferGPU) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 3, sizeof(int), &resolution) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 4, sizeof(cl_mem), &simulationBufferGPU) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 5, sizeof(int), &iteration) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 6, sizeof(cl_float16), &clViewDir));
size_t visualizationBufferSize[2]; visualizationBufferSize[0] = width; visualizationBufferSize[1] = height;
CL_SAFE_CALL(clEnqueueNDRangeKernel(commands, visualizationKernel, 2, NULL, visualizationBufferSize, NULL, 0, NULL, NULL) );
clFinish(commands);
CL_SAFE_CALL( clEnqueueReadBuffer(commands, visualizationBufferGPU, CL_TRUE, 0, sizeof(cl_float4) * width * height, visualizationBufferCPU, 0, NULL, NULL));
//saveTGA(iteration, width, height, visualizationBufferCPU); glDrawPixels(width, height, GL_RGBA, GL_FLOAT, visualizationBufferCPU);}
Szóródás szimuláció
void display(){ // .... case 3: iteration++; simulationStep(); visualizationStep(); break;}
Szóródás szimuláció(programs.cl)
__kernelvoid resetSimulation(const int resolution, __global float* simulationBuffer)
{ int4 id = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); simulationBuffer[id.x + id.y * resolution + id.z * resolution * resolution] =
0.0f;}
Szóródás szimuláció(programs.cl)
__kernelvoid simulation(const int iteration, __global uint4* seed, __global struct photon* photons, const int resolution, __global float* simulationBuffer,
const float4 lightSourcePosition){ int id = get_global_id(0);
// random generator setup uint rng1 = seed[id].s0; uint rng2 = seed[id].s1; uint rng3 = seed[id].s2; uint rng4 = seed[id].s3;
// scatter simulation if(0 == iteration || photons[id].energy < 0.2f){ photons[id].origin = lightSourcePosition; photons[id].direction = getRandomDirection(&rng1, &rng2, &rng3, &rng4); photons[id].energy = 1.0f; } else { photons[id].direction = getRandomDirection(&rng1, &rng2, &rng3, &rng4); }
tracePhotonRM(&photons[id], getRandom(&rng1, &rng2, &rng3, &rng4)); storePhoton(&photons[id], resolution, simulationBuffer);
// random state store seed[id].s0 = rng1; seed[id].s1 = rng2; seed[id].s2 = rng3; seed[id].s3 = rng4;}
Szóródás szimuláció(programs.cl)
float4 getRandomDirection(uint* rng1, uint* rng2, uint* rng3, uint* rng4){ float x, y, z; bool inside = false; while(!inside){ x = getRandom(rng1, rng2, rng3, rng4) * 2.0f - 1.0f; y = getRandom(rng1, rng2, rng3, rng4) * 2.0f - 1.0f; z = getRandom(rng1, rng2, rng3, rng4) * 2.0f - 1.0f; if( (x*x + y*y + z*z) <= 1.0f){ inside = true; } } if( x*x + y*y + z*z == 0.0){ x = 0.0f; y = 1.0f; z = 0.0f; } float vlen = sqrt(x*x + y*y + z*z); return (float4)(x/vlen, y/vlen, z/vlen, 0);}
Szóródás szimuláció(programs.cl)
void storePhoton(__global struct photon* p, const int resolution, __global float* simulationBuffer){
if(p->energy < 0.1f) return;
int x = p->origin.x * resolution; int y = p->origin.y * resolution; int z = p->origin.z * resolution;
if(x > resolution -1 || x < 0) return; if(y > resolution -1 || y < 0) return; if(z > resolution -1 || z < 0) return;
simulationBuffer[x + y * resolution + z * resolution * resolution] += p->energy;
}
Szóródás szimuláció(programs.cl)
float getDensity(float4 p){
// teto lyukkal if(p.y > 0.78f && p.y < 0.83f && ( (p.x - 0.5f) * (p.x - 0.5f) + (p.z - 0.5f) * (p.z - 0.5f) ) >
0.001f) return 100.0f;
// falak if(p.x < 0.02f) return 100.0f; if(p.y < 0.02f) return 100.0f; if(p.z > 0.98f) return 100.0f;
// alul besurusodik if(p.y < 0.2f) return (1.0f - p.y) * 5.0f;
return 0.5f * densityScale;}
Szóródás szimuláció(programs.cl)
void tracePhotonRM(__global struct photon* p, float rnd){ float s = -log(rnd) / densityScale;
float t = 0.0f; float dt = 1.0f / 256.0f; float sum = 0.0f; float sigmat = 0.0f;
while(sum < s){ float4 samplePos = p->origin + t * p->direction; if(samplePos.x < 0.0f || samplePos.x > 1.0f || samplePos.y < 0.0f || samplePos.y > 1.0f || samplePos.z < 0.0f || samplePos.z > 1.0f){ p->energy = 0.0f; break; } else { sigmat = getDensity(samplePos); sum += sigmat * dt; t += dt; } }
p->origin = p->origin + p->direction * t; p->direction = p->direction; p->energy = p->energy * albedo;}
Szóródás szimuláció
• Bővítsük az analitikus sűrűség függvényünket!• Bővítsük több hullámhossz kezelésével– A sűrűség modellt egészítsük ki az albedó több
hullámhosszra való kiterjesztésével• A pontszerű fényforrást cseréljük le spot vagy
irány forrásra