GPGPU labor XI. Monte Carlo szimuláció. Kezdeti teendők Tantárgy honlapja, Monte Carlo szimuláció A labor kiindulási alapjának letöltése (lab11_base.zip),

GPGPU labor XI.

Monte Carlo szimuláció

Kezdeti teendők

• Tantárgy honlapja, Monte Carlo szimuláció• A labor kiindulási alapjának letöltése

(lab11_base.zip), kitömörítés a GPGPU\Labs könyvtárba

Isosurface raycastingvoid initVolVis(){ // OpenCL kernelek visualizationProgram = createProgram(context, device_id, "visualization.cl"); isosurfaceRaycastingKernel = createKernel(visualizationProgram, "isosurface"); alphaBlendedKernel = createKernel(visualizationProgram, "alphaBlended");

// Térfogati modell loadVolume("head.vox"); if(NULL == volumeData){ exit(-1); }

// Buffer objektum a térfogati modellnek volumeDataGPU = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * volumeSize[0] * volumeSize[1] * volumeSize[2], NULL, NULL); CL_SAFE_CALL( clEnqueueWriteBuffer(commands, volumeDataGPU, CL_TRUE, 0, sizeof(float) * volumeSize[0] * volumeSize[1] * volumeSize[2], volumeData, 0, NULL, NULL));}

Isosurface raycastingvoid isosurface(){ // Kamera -> View mátrix + kamera pozíció cl_float16 clViewDir; float* camMatrix = camera.getViewDirMatrix().getPointer(); for(int i = 0; i < 16; ++i){ clViewDir.s[i] = camMatrix[i]; }

Vector eye = camera.getEye(); clViewDir.s[12] = eye.x; clViewDir.s[13] = eye.y; clViewDir.s[14] = eye.z; clViewDir.s[15] = 1.0f;

// Kernel paraméterek CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 0, sizeof(int), &width) ); // Kép szélesség CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 1, sizeof(int), &height) ); // Kép magasság CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 2, sizeof(cl_mem), &visualizationBufferGPU) ); // Kép buffer CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 3, sizeof(int), &volumeSize[0]) ); // Térfogati adat mérete CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 4, sizeof(cl_mem), &volumeDataGPU) ); // Térfogati adat CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 5, sizeof(float), &isoValue) ); // Iso érték CL_SAFE_CALL( clSetKernelArg(isosurfaceRaycastingKernel, 6, sizeof(cl_float16), &clViewDir) ); // Kamera mátrix

// Ray casting size_t visualizationBufferSize[2]; visualizationBufferSize[0] = width; visualizationBufferSize[1] = height;

CL_SAFE_CALL(clEnqueueNDRangeKernel(commands, isosurfaceRaycastingKernel, 2, NULL, visualizationBufferSize, NULL, 0, NULL, NULL) );

clFinish(commands);

// A kép visszaolvasása az OpenCL bufferből CL_SAFE_CALL( clEnqueueReadBuffer(commands, visualizationBufferGPU, CL_TRUE, 0, sizeof(cl_float4) * width * height,

visualizationBufferCPU, 0, NULL, NULL));

// A kép kirajzolása az OpenGL framebufferbe glDrawPixels(width, height, GL_RGBA, GL_FLOAT, visualizationBufferCPU);}

Isosurface raycasting(visualization.cl)

__kernelvoid isosurface(const int width, const int height, __global float4* visualizationBuffer, const int resolution, __global float* volumeData,

const float isoValue, const float16 invViewMatrix){ // Képtérbeli koordináták int2 id = (int2)(get_global_id(0), get_global_id(1));

float2 uv = (float2)( (id.x / (float) width)*2.0f-1.0f, (id.y / (float) height)*2.0f-1.0f );

// A térfogati modell befoglaló doboza float4 boxMin = (float4)(-1.0f, -1.0f, -1.0f,1.0f); float4 boxMax = (float4)(1.0f, 1.0f, 1.0f,1.0f);

// Sugár világ térben struct ray eyeRay;

eyeRay.origin = (float4)(invViewMatrix.sC, invViewMatrix.sD, invViewMatrix.sE, invViewMatrix.sF);

float4 temp = normalize(((float4)(uv.x, uv.y, -2.0f, 0.0f))); eyeRay.direction.x = dot(temp, ((float4)(invViewMatrix.s0,invViewMatrix.s1,invViewMatrix.s2,invViewMatrix.s3))); eyeRay.direction.y = dot(temp, ((float4)(invViewMatrix.s4,invViewMatrix.s5,invViewMatrix.s6,invViewMatrix.s7))); eyeRay.direction.z = dot(temp, ((float4)(invViewMatrix.s8,invViewMatrix.s9,invViewMatrix.sA,invViewMatrix.sB))); eyeRay.direction.w = 0.0f;

float4 color = (float4)(0.0f);

// Sugár – befoglaló doboz metszés számítás float tnear, tfar; int hit = intersectBox(eyeRay.origin, eyeRay.direction, boxMin, boxMax, &tnear, &tfar); if(hit){ // TODO

color = (float4)(1.0f); }

// A szín kiírása a képbufferbe if(id.x < width && id.y < height){ visualizationBuffer[id.x + id.y * width] = color; }}

Isosurface raycasting

• Lépkedjünk tnear és tfar között a sugár mentén– Határozzuk meg az aktuális pozíciót a világ térben– Olvassuk ki a sűrűséget (getDensityFromVolume)• Ha a sűrűség nagyobb mint isoValue, akkor a szín

legyen (float4)(1.0f)• Lépjünk ki a ciklusból


• Adjunk hozzá árnyalást!– A fény iránya: 0.3, -2.0, 0.0, 0.0– A normál vektor a térfogati modellben a

getNormalFromVolume függvénnyel kérdezhető le– A szín legyen: clamp(dot(lightDir, normal), 0.0, 1.0)


• Félgömbös megvilágítás– A szín legyen: 0.5f + 0.5f * dot(lightDir, normal)

Alpha blended raycastingvoid alphaBlended(){ cl_float16 clViewDir; float* camMatrix = camera.getViewDirMatrix().getPointer(); for(int i = 0; i < 16; ++i){ clViewDir.s[i] = camMatrix[i]; }


CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 0, sizeof(int), &width) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 1, sizeof(int), &height) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 2, sizeof(cl_mem), &visualizationBufferGPU) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 3, sizeof(int), &volumeSize[0]) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 4, sizeof(cl_mem), &volumeDataGPU) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 5, sizeof(float), &alphaExponent) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 6, sizeof(float), &alphaCenter) ); CL_SAFE_CALL( clSetKernelArg(alphaBlendedKernel, 7, sizeof(cl_float16), &clViewDir) );

size_t visualizationBufferSize[2]; visualizationBufferSize[0] = width; visualizationBufferSize[1] = height;

CL_SAFE_CALL(clEnqueueNDRangeKernel(commands, alphaBlendedKernel, 2, NULL, visualizationBufferSize, NULL, 0, NULL, NULL) );

clFinish(commands);

CL_SAFE_CALL( clEnqueueReadBuffer(commands, visualizationBufferGPU, CL_TRUE, 0, sizeof(cl_float4) * width * height, visualizationBufferCPU, 0, NULL, NULL));

glDrawPixels(width, height, GL_RGBA, GL_FLOAT, visualizationBufferCPU);}

Alpha blended raycasting(visualization.cl)

__kernelvoid alphaBlended(const int width, const int height, __global float4* visualizationBuffer,

const int resolution, __global float* volumeData, const float alphaExponent, const float alphaCenter, const float16 invViewMatrix){

int2 id = (int2)(get_global_id(0), get_global_id(1));

float2 uv = (float2)( (id.x / (float) width)*2.0f-1.0f, (id.y / (float) height)*2.0f-1.0f );

float4 boxMin = (float4)(-1.0f, -1.0f, -1.0f,1.0f); float4 boxMax = (float4)(1.0f, 1.0f, 1.0f,1.0f);

// calculate eye ray in world space struct ray eyeRay;

eyeRay.origin = (float4)(invViewMatrix.sC, invViewMatrix.sD, invViewMatrix.sE, invViewMatrix.sF);

float4 temp = normalize(((float4)(uv.x, uv.y, -2.0f, 0.0f))); eyeRay.direction.x = dot(temp, ((float4)(invViewMatrix.s0,invViewMatrix.s1,invViewMatrix.s2,invViewMatrix.s3))); eyeRay.direction.y = dot(temp, ((float4)(invViewMatrix.s4,invViewMatrix.s5,invViewMatrix.s6,invViewMatrix.s7))); eyeRay.direction.z = dot(temp, ((float4)(invViewMatrix.s8,invViewMatrix.s9,invViewMatrix.sA,invViewMatrix.sB))); eyeRay.direction.w = 0.0f;

float4 sum = (float4)(0.0f);

float tnear, tfar; int hit = intersectBox(eyeRay.origin, eyeRay.direction, boxMin, boxMax, &tnear, &tfar); if(hit){ sum = (float4)(1.0f); }

if(id.x < width && id.y < height){ visualizationBuffer[id.x + id.y * width] = (float4)(sum); }}

Alpha blended raycasting

• „X-Ray” kép– Lépkedjünk tfar-tól tnear-ig• Összegezzük a sűrűséget az elnyelődés figyelembe

vételével– Alpha = pow(density, alphaExponent) * step– Sum = (1-alpha)*sum + alpha * (float4)(1.0)


• Árnyalás hozzáadása– Lépkedjünk tfar-tól tnear-ig• Összegezük a megvilágítást az elnyelődés figyelembe

vételével– Sűrűség: getDensityFromVolume– Normál vektor: getNormalFromVolume– Alpha: clamp(alphaExponent * (density – alphaCenter) + 0.5,

0.0, 1.0– Szín: 0.5f + 0.5f * dot(lightDir, normal)– Összegzett szín: (1-alpha) * sum + alpha * color


• Átviteli függvény beépítése– color *= (float4)(density, density * density, density * density * density, 1.0f) + 0.1f;

Szóródás szimulációvoid initSimulation(){ photonProgram = createProgram(context, device_id, "programs.cl"); simulationKernel = createKernel(photonProgram, "simulation"); visualizationKernel = createKernel(photonProgram, "visualization");

// working set size CL_SAFE_CALL( clGetKernelWorkGroupInfo(simulationKernel, device_id, CL_KERNEL_WORK_GROUP_SIZE,

sizeof(workGroupSize), &workGroupSize, NULL)); maxComputeUnits = *(int*)getDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS); problemSize = workGroupSize * maxComputeUnits;

std::cout << "Working set: " << workGroupSize << " * " << maxComputeUnits << " = " << problemSize << std::endl;

// init random number generator cl_uint4* seedCPU = new cl_uint4[workGroupSize * maxComputeUnits]; for(int i=0; i < workGroupSize * maxComputeUnits; ++i){ seedCPU[i].s[0] = rand(); seedCPU[i].s[1] = rand(); seedCPU[i].s[2] = rand(); seedCPU[i].s[3] = rand(); } seedGPU = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint4) * workGroupSize * maxComputeUnits, NULL, NULL); CL_SAFE_CALL( clEnqueueWriteBuffer(commands, seedGPU,

CL_TRUE, 0, sizeof(cl_uint4) * workGroupSize * maxComputeUnits, seedCPU, 0, NULL, NULL));

// photon buffer photonBufferGPU = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(struct photon) * workGroupSize * maxComputeUnits, NULL, NULL);

// simulation buffer simulationBufferGPU = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * resolution * resolution * resolution, NULL, NULL);

// light source parameters lightSourcePosition.s[0] = 0.6f; lightSourcePosition.s[1] = 0.5f; lightSourcePosition.s[2] = 0.5f; lightSourcePosition.s[3] = 0.0f;}

Szóródás szimulációvoid resetSimulation(int resolution, cl_mem simulationBufferGPU){ cl_kernel resetSimulationKernel = createKernel(photonProgram, "resetSimulation");

CL_SAFE_CALL(clSetKernelArg(resetSimulationKernel, 0, sizeof(int), &resolution)); CL_SAFE_CALL(clSetKernelArg(resetSimulationKernel, 1, sizeof(cl_mem),

&simulationBufferGPU));

size_t workSize[3]; workSize[0] = resolution; workSize[1] = resolution; workSize[2] = resolution;

CL_SAFE_CALL(clEnqueueNDRangeKernel(commands, resetSimulationKernel, 3, NULL, workSize, NULL, 0, NULL, NULL));

clFinish(commands);

clReleaseKernel(resetSimulationKernel);}

Szóródás szimulációvoid simulationStep(){ if(1 == iteration){ resetSimulation(resolution, simulationBufferGPU); }

CL_SAFE_CALL( clSetKernelArg(simulationKernel, 0, sizeof(int), &iteration) ); CL_SAFE_CALL( clSetKernelArg(simulationKernel, 1, sizeof(cl_mem), &seedGPU) ); CL_SAFE_CALL( clSetKernelArg(simulationKernel, 2, sizeof(cl_mem),

&photonBufferGPU) ); CL_SAFE_CALL( clSetKernelArg(simulationKernel, 3, sizeof(int), &resolution) ); CL_SAFE_CALL( clSetKernelArg(simulationKernel, 4, sizeof(cl_mem),

&simulationBufferGPU) ); CL_SAFE_CALL( clSetKernelArg(simulationKernel, 5, sizeof(cl_float4),

&lightSourcePosition) );

CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, simulationKernel, 1, NULL, &problemSize, &workGroupSize, 0, NULL, NULL) );

clFinish(commands);}

Szóródás szimulációvoid visualizationStep(){ cl_float16 clViewDir; float* camMatrix = camera.getViewDirMatrix().getPointer(); for(int i = 0; i < 16; ++i){ clViewDir.s[i] = camMatrix[i]; }


CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 0, sizeof(int), &width) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 1, sizeof(int), &height) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 2, sizeof(cl_mem), &visualizationBufferGPU) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 3, sizeof(int), &resolution) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 4, sizeof(cl_mem), &simulationBufferGPU) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 5, sizeof(int), &iteration) ); CL_SAFE_CALL( clSetKernelArg(visualizationKernel, 6, sizeof(cl_float16), &clViewDir));

size_t visualizationBufferSize[2]; visualizationBufferSize[0] = width; visualizationBufferSize[1] = height;

CL_SAFE_CALL(clEnqueueNDRangeKernel(commands, visualizationKernel, 2, NULL, visualizationBufferSize, NULL, 0, NULL, NULL) );

clFinish(commands);

CL_SAFE_CALL( clEnqueueReadBuffer(commands, visualizationBufferGPU, CL_TRUE, 0, sizeof(cl_float4) * width * height, visualizationBufferCPU, 0, NULL, NULL));

//saveTGA(iteration, width, height, visualizationBufferCPU); glDrawPixels(width, height, GL_RGBA, GL_FLOAT, visualizationBufferCPU);}

Szóródás szimuláció

void display(){ // .... case 3: iteration++; simulationStep(); visualizationStep(); break;}

Szóródás szimuláció(programs.cl)

__kernelvoid resetSimulation(const int resolution, __global float* simulationBuffer)

{ int4 id = (int4)(get_global_id(0), get_global_id(1), get_global_id(2), 0); simulationBuffer[id.x + id.y * resolution + id.z * resolution * resolution] =

0.0f;}


__kernelvoid simulation(const int iteration, __global uint4* seed, __global struct photon* photons, const int resolution, __global float* simulationBuffer,

const float4 lightSourcePosition){ int id = get_global_id(0);

// random generator setup uint rng1 = seed[id].s0; uint rng2 = seed[id].s1; uint rng3 = seed[id].s2; uint rng4 = seed[id].s3;

// scatter simulation if(0 == iteration || photons[id].energy < 0.2f){ photons[id].origin = lightSourcePosition; photons[id].direction = getRandomDirection(&rng1, &rng2, &rng3, &rng4); photons[id].energy = 1.0f; } else { photons[id].direction = getRandomDirection(&rng1, &rng2, &rng3, &rng4); }

tracePhotonRM(&photons[id], getRandom(&rng1, &rng2, &rng3, &rng4)); storePhoton(&photons[id], resolution, simulationBuffer);

// random state store seed[id].s0 = rng1; seed[id].s1 = rng2; seed[id].s2 = rng3; seed[id].s3 = rng4;}


float4 getRandomDirection(uint* rng1, uint* rng2, uint* rng3, uint* rng4){ float x, y, z; bool inside = false; while(!inside){ x = getRandom(rng1, rng2, rng3, rng4) * 2.0f - 1.0f; y = getRandom(rng1, rng2, rng3, rng4) * 2.0f - 1.0f; z = getRandom(rng1, rng2, rng3, rng4) * 2.0f - 1.0f; if( (x*x + y*y + z*z) <= 1.0f){ inside = true; } } if( x*x + y*y + z*z == 0.0){ x = 0.0f; y = 1.0f; z = 0.0f; } float vlen = sqrt(x*x + y*y + z*z); return (float4)(x/vlen, y/vlen, z/vlen, 0);}


void storePhoton(__global struct photon* p, const int resolution, __global float* simulationBuffer){

if(p->energy < 0.1f) return;

int x = p->origin.x * resolution; int y = p->origin.y * resolution; int z = p->origin.z * resolution;

if(x > resolution -1 || x < 0) return; if(y > resolution -1 || y < 0) return; if(z > resolution -1 || z < 0) return;

simulationBuffer[x + y * resolution + z * resolution * resolution] += p->energy;

}


float getDensity(float4 p){

// teto lyukkal if(p.y > 0.78f && p.y < 0.83f && ( (p.x - 0.5f) * (p.x - 0.5f) + (p.z - 0.5f) * (p.z - 0.5f) ) >

0.001f) return 100.0f;

// falak if(p.x < 0.02f) return 100.0f; if(p.y < 0.02f) return 100.0f; if(p.z > 0.98f) return 100.0f;

// alul besurusodik if(p.y < 0.2f) return (1.0f - p.y) * 5.0f;

return 0.5f * densityScale;}


void tracePhotonRM(__global struct photon* p, float rnd){ float s = -log(rnd) / densityScale;

float t = 0.0f; float dt = 1.0f / 256.0f; float sum = 0.0f; float sigmat = 0.0f;

while(sum < s){ float4 samplePos = p->origin + t * p->direction; if(samplePos.x < 0.0f || samplePos.x > 1.0f || samplePos.y < 0.0f || samplePos.y > 1.0f || samplePos.z < 0.0f || samplePos.z > 1.0f){ p->energy = 0.0f; break; } else { sigmat = getDensity(samplePos); sum += sigmat * dt; t += dt; } }

p->origin = p->origin + p->direction * t; p->direction = p->direction; p->energy = p->energy * albedo;}

Szóródás szimuláció

• Bővítsük az analitikus sűrűség függvényünket!• Bővítsük több hullámhossz kezelésével– A sűrűség modellt egészítsük ki az albedó több

hullámhosszra való kiterjesztésével• A pontszerű fényforrást cseréljük le spot vagy

irány forrásra

Documents

GPGPU labor XI. Monte Carlo szimuláció. Kezdeti teendők Tantárgy honlapja, Monte Carlo szimuláció A labor kiindulási alapjának letöltése (lab11_base.zip),