-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Homework4 #3
Open
Rrytel
wants to merge
29
commits into
main
Choose a base branch
from
homework4
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Homework4 #3
Changes from all commits
Commits
Show all changes
29 commits
Select commit
Hold shift + click to select a range
10766b4
add source
8d755da
add float3 functionality
4e02b2a
img implementation
6fdf83c
added img output
8eaa9e4
add reduce
dff892b
fix blur
35c3890
clean up code
ff50c63
update reduce
9e1d6f8
touch up
0dcbfc3
fix math
67d0ae7
fix math
37d8b06
fix math again
a55f8fd
clean up
fb456f2
add source
3435ba5
update histo
837f6f2
update histo sum
f63260e
fix histo
318a45b
update histo and data entry
495afe1
update histo
83e9061
update histo
e0a9a82
fix histo memory
b953ebe
add cdf
6258db2
add file output
2367142
update cdf
12cdb26
Delete CImg.h
Rrytel 833db28
Delete Makefile
Rrytel fae79cd
Delete heat.cpp
Rrytel 108b8ac
Delete mat.cpp
Rrytel ab45e0b
Delete mat.h
Rrytel File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,336 @@ | ||
#include <stdlib.h> | ||
#include <stdio.h> | ||
#include <iostream> | ||
#include <cmath> | ||
#include <hip/hip_runtime.h> | ||
#include <vector> | ||
#include <iterator> | ||
#include <fstream> | ||
#include <cmath> | ||
|
||
#define NUM_BINS 1024 | ||
|
||
__device__ float Normal(float x, float mu, float sig) { | ||
float temp1 = 1.0f / (std::sqrt(2.0f * 3.14) * sig); | ||
float temp2 = 0.5f * (x - mu) * (x - mu) / (sig * sig); | ||
float val = temp1 * std::exp(-temp2); | ||
return val; | ||
} | ||
|
||
__global__ void DataGen(float *data, float *x, float xBeg, float deltaX, | ||
float mu, float sig, int n) { | ||
// generate x and data | ||
// use x for plotting later | ||
int tId = threadIdx.x + blockDim.x * blockIdx.x; | ||
if (tId < 1 || tId > n) { | ||
return; | ||
} | ||
float xTemp[2]; | ||
xTemp[0] = xBeg + tId * deltaX; | ||
xTemp[1] = xBeg + (tId - 1) * deltaX; | ||
data[tId - 1] =(Normal(xTemp[0], mu, sig) + Normal(xTemp[1], mu, sig)) * deltaX * 0.5f; | ||
x[tId - 1] = xTemp[0]; | ||
} | ||
|
||
|
||
__global__ void BlScan(float *oData, const float *iData, int n) | ||
{ | ||
extern __shared__ float temp[]; // allocated on invocation | ||
int tId = threadIdx.x; | ||
int offset = 1; | ||
temp[2 * tId] = iData[2 * tId]; // load input into shared memory | ||
temp[2 * tId + 1] = iData[2 * tId + 1]; | ||
for (int d = n >> 1; d > 0; d >>= 1) // build sum in place up the tree | ||
{ | ||
__syncthreads(); | ||
if (tId < d) { | ||
int aI = offset * (2 * tId + 1) - 1; | ||
int bI = offset * (2 * tId + 2) - 1; | ||
temp[bI] += temp[aI]; | ||
} | ||
offset *= 2; | ||
} | ||
if (tId == 0) { | ||
temp[n - 1] = 0; | ||
} // clear the last element | ||
for (int d = 1; d < n; d *= 2) // traverse down tree & build scan | ||
{ | ||
offset >>= 1; | ||
__syncthreads(); | ||
if (tId < d) { | ||
int aI = offset * (2 * tId + 1) - 1; | ||
int bI = offset * (2 * tId + 2) - 1; | ||
float t = temp[aI]; | ||
temp[aI] = temp[bI]; | ||
temp[bI] += t; | ||
} | ||
} | ||
__syncthreads(); | ||
oData[2 * tId] = temp[2 * tId]; // write results to device memory | ||
oData[2 * tId + 1] = temp[2 * tId + 1]; | ||
} | ||
|
||
__global__ void Ex2In(float *scan, float *iData, int n) { | ||
extern __shared__ float temp[]; // allocated via kernel config | ||
int tId = threadIdx.x; | ||
if (tId >= n) | ||
return; | ||
temp[tId] = scan[tId]; // load scan data; | ||
__syncthreads(); | ||
|
||
if (tId > 0) | ||
scan[tId - 1] = temp[tId]; | ||
|
||
if (tId == n - 1) | ||
scan[tId] = temp[tId] + iData[tId]; // last element clean up! | ||
} | ||
__global__ void PDF(float histoSum,float *histo, float *outPDF) | ||
{ | ||
int tid = threadIdx.x + blockDim.x*blockIdx.x; | ||
outPDF[tid] = histo[tid]/histoSum; | ||
} | ||
|
||
__global__ void histogram_smem_atomics(const float *in, float range, float min, float *out) | ||
{ | ||
// initialize temporary accumulation array in shared memory | ||
extern __shared__ unsigned int smem[]; | ||
|
||
// linear thread index within linear block | ||
int t = threadIdx.x + blockIdx.x * blockDim.x; | ||
int tid = threadIdx.x; | ||
|
||
if(tid< NUM_BINS) | ||
{ | ||
smem[tid] = 0; | ||
} | ||
|
||
__syncthreads(); | ||
|
||
int bin = static_cast<int>(((in[t]-min)/(range))*1023); | ||
atomicAdd(&smem[bin],1); | ||
|
||
__syncthreads(); | ||
|
||
if(tid<NUM_BINS) | ||
{ | ||
atomicAdd(&(out[tid]), smem[tid]); | ||
} | ||
} | ||
|
||
__global__ void ShmemReduceKernelSum(float * dOut, float *dIn) | ||
{ | ||
// sdata is allocated in the kernel call: via dynamic shared memeory | ||
extern __shared__ float sData[]; | ||
|
||
int myId = threadIdx.x + blockDim.x*blockIdx.x; | ||
int tId = threadIdx.x; | ||
|
||
//load shared mem from global mem | ||
sData[tId] = dIn[myId]; | ||
__syncthreads(); // always sync before using sdata | ||
|
||
//do reduction over shared memory | ||
for(int s = blockDim.x/2; s>0; s >>=1) | ||
{ | ||
if(tId < s) | ||
{ | ||
sData[tId] += sData[tId + s]; | ||
} | ||
__syncthreads(); //make sure all additions are finished | ||
} | ||
|
||
//only tid 0 writes out result! | ||
if(tId == 0) | ||
{ | ||
dOut[blockIdx.x] = sData[0]; | ||
} | ||
} | ||
|
||
__global__ void ShmemReduceKernelMaxMin(float * dOut, const float *dIn, const bool isMax) | ||
{ | ||
// sdata is allocated in the kernel call: via dynamic shared memeory | ||
extern __shared__ float sData[]; | ||
|
||
int myId = threadIdx.x + blockDim.x*blockIdx.x; | ||
int tId = threadIdx.x; | ||
|
||
//load shared mem from global mem | ||
sData[tId] = dIn[myId]; | ||
__syncthreads(); // always sync before using sdata | ||
|
||
//do reduction over shared memory | ||
for(int s = blockDim.x/2; s>0; s >>=1) | ||
{ | ||
if(tId < s) | ||
{ | ||
if(isMax) | ||
{ | ||
sData[tId] = max(sData[tId + s],sData[tId]); | ||
} | ||
else | ||
{ | ||
sData[tId] = min(sData[tId + s],sData[tId]); | ||
} | ||
} | ||
__syncthreads(); //make sure all additions are finished | ||
} | ||
|
||
//only tid 0 writes out result! | ||
if(tId == 0) | ||
{ | ||
dOut[blockIdx.x] = sData[0]; | ||
} | ||
} | ||
|
||
void Histo() | ||
{ | ||
|
||
std::vector<float> x(NUM_BINS, 0.f); | ||
//Initialization | ||
int n; | ||
int numElements = 0; | ||
std::ifstream inputStream("data.txt"); | ||
std::vector<float> numbers; | ||
std::string line; | ||
float element; | ||
if (inputStream) | ||
{ | ||
while(std::getline(inputStream,line)) | ||
{ | ||
numbers.push_back(std::stof(line)); | ||
numElements++; | ||
} | ||
|
||
} | ||
n= numElements; | ||
|
||
float value; | ||
float minValue; | ||
float maxValue; | ||
float histoSum; | ||
|
||
float *dData; | ||
float *dReduc; | ||
|
||
size_t original = n*sizeof(float); | ||
size_t reduc = n/(1024)*sizeof(float); | ||
size_t reducI = n/(1024)*sizeof(unsigned int); | ||
|
||
//Allocation | ||
hipMalloc(&dData, original); | ||
hipMalloc(&dReduc, reduc); | ||
|
||
float* ptr = (float*) malloc(sizeof(float)*n); | ||
|
||
//Kernel Parameters | ||
dim3 blockDim(1024, 1, 1); | ||
dim3 gridDim(n/blockDim.x, 1, 1); | ||
|
||
size_t size = blockDim.x*sizeof(float); | ||
size_t sizeI = blockDim.x*sizeof(unsigned int); | ||
|
||
//std::cout<< "Number of array elements: " << numElements << std::endl; | ||
//Load array data to gpu | ||
hipMemcpy(dData, numbers.data(), original, hipMemcpyHostToDevice); | ||
|
||
//LoadArrayDataKernel<<<gridDim, blockDim>>>(dData); | ||
|
||
//Get Max | ||
ShmemReduceKernelMaxMin<<<gridDim,blockDim,size>>>(dReduc,dData,true); | ||
ShmemReduceKernelMaxMin<<<1,gridDim,size>>>(dReduc, dReduc,true); | ||
hipMemcpy(&value, dReduc, sizeof(float), hipMemcpyDeviceToHost); | ||
maxValue = value; | ||
|
||
//Get Min | ||
ShmemReduceKernelMaxMin<<<gridDim,blockDim,size>>>(dReduc,dData,false); | ||
ShmemReduceKernelMaxMin<<<1,gridDim,size>>>(dReduc, dReduc,false); | ||
hipMemcpy(&value, dReduc, sizeof(float), hipMemcpyDeviceToHost); | ||
minValue = value; | ||
|
||
|
||
//Histo | ||
float *hHisto = new float[NUM_BINS]; | ||
float *dHisto; | ||
hipMalloc((void**)&dHisto, NUM_BINS*sizeof(float)); | ||
|
||
dim3 blockDimHisto(1024); | ||
dim3 gridDimHisto(n/blockDim.x); | ||
|
||
histogram_smem_atomics<<<gridDimHisto, blockDimHisto,size>>>(dData, (maxValue-minValue), minValue, dHisto); | ||
|
||
|
||
hipMemcpy(hHisto, dHisto, NUM_BINS*sizeof(float), hipMemcpyDeviceToHost); | ||
|
||
float temp = 0; | ||
//for(int i = 0; i<1024; i++) | ||
//{ | ||
//std::cout << "bin "<< i<<": " << hHisto[i] << std::endl; | ||
//temp += i; | ||
//} | ||
|
||
|
||
ShmemReduceKernelSum<<<gridDim,blockDim,size>>>(dReduc,dHisto); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we just need |
||
hipMemcpy(&histoSum, dReduc, sizeof(float), hipMemcpyDeviceToHost); | ||
|
||
float *dPDF; | ||
float *hPDF = new float[NUM_BINS]; | ||
hipMalloc(&dPDF, NUM_BINS*sizeof(float)); | ||
PDF<<<gridDim,blockDim>>>(histoSum, dHisto ,dPDF); | ||
hipMemcpy(hPDF, dPDF, NUM_BINS*sizeof(float), hipMemcpyDeviceToHost); | ||
temp = 0; | ||
//for(int i = 0; i<1024; i++) | ||
//{ | ||
//std::cout << "bin "<< i<<": " << hPDF[i] << std::endl; | ||
//temp += i; | ||
//} | ||
|
||
//Sum scan PDF for CDF | ||
float *dCDF; | ||
float *hCDF = new float[NUM_BINS]; | ||
float xBeg = 0.0 - 5 *1; | ||
float xEnd = 0.0 +5 *1; | ||
float deltaX = (xEnd-xBeg)/(float)NUM_BINS; | ||
float *dX; | ||
hipMalloc(&dCDF, NUM_BINS*sizeof(float)); | ||
hipMalloc(&dX, NUM_BINS*sizeof(float)); | ||
DataGen<<<2, NUM_BINS / 2 + 1>>>(dPDF, dX, xBeg, deltaX, 0.0, 1.0, NUM_BINS); | ||
BlScan<<<1,NUM_BINS/2,2*NUM_BINS*sizeof(float)>>>(dCDF,dPDF,NUM_BINS); | ||
Ex2In<<<1, NUM_BINS, NUM_BINS*sizeof(float)>>>(dCDF, dPDF, NUM_BINS); | ||
hipMemcpy(x.data(), dX, sizeof(float)*NUM_BINS, hipMemcpyDeviceToHost); | ||
hipMemcpy(hCDF, dCDF, sizeof(float)*NUM_BINS, hipMemcpyDeviceToHost); | ||
temp = 0; | ||
//for(int i = 0; i<1024; i++) | ||
// { | ||
//std::cout << "bin "<< i<<": " << hCDF[i] << std::endl; | ||
//temp += i; | ||
// } | ||
|
||
std::cout << "Histo sum: " << histoSum << std::endl; | ||
std::cout << "Max: " << maxValue << std::endl; | ||
std::cout << "Min: " << minValue << std::endl; | ||
|
||
|
||
std::ofstream myFile; | ||
myFile.open("cdf.dat"); | ||
for (int aa = 0; aa < NUM_BINS; aa++) { | ||
myFile << x[aa] << '\t' << hPDF[aa] << '\t' << hCDF[aa] << "\n"; | ||
} | ||
myFile << std::endl; | ||
|
||
// destroy file object | ||
myFile.close(); | ||
|
||
//Free memory | ||
hipFree(dReduc); | ||
hipFree(dData); | ||
|
||
} | ||
|
||
/* Driver for the computation of pi. */ | ||
int main() | ||
{ | ||
Histo(); | ||
|
||
} | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can get rid of these.