Improve code readability
updated comments and separate function also for OMP/seq
This commit is contained in:
parent
a4d0a5dca8
commit
7bcc724d8e
249
main.cpp
249
main.cpp
@ -173,8 +173,10 @@ void print_usage() {
|
|||||||
std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device cpu|gpu] --load infile.gol --save outfile.gol --generations number [--measure]" << std::endl;
|
std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device cpu|gpu] --load infile.gol --save outfile.gol --generations number [--measure]" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void main_opencl(std::string infile, std::string outfile, int num_generations) {
|
void main_opencl(std::string infile, std::string outfile, int num_generations, bool measure) {
|
||||||
// get all platforms (drivers), e.g. NVIDIA
|
Timing *timing = Timing::getInstance();
|
||||||
|
|
||||||
|
// Get Nvidia CUDA platform
|
||||||
std::vector<cl::Platform> all_platforms;
|
std::vector<cl::Platform> all_platforms;
|
||||||
cl::Platform::get(&all_platforms);
|
cl::Platform::get(&all_platforms);
|
||||||
|
|
||||||
@ -183,28 +185,22 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) {
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
cl::Platform default_platform=all_platforms[0];
|
cl::Platform default_platform=all_platforms[0];
|
||||||
std::cout << "Using platform: "<<default_platform.getInfo<CL_PLATFORM_NAME>()<<"\n";
|
|
||||||
|
|
||||||
// get default device (CPUs, GPUs) of the default platform
|
// Use the first device (in my case, GPU is on this platform)
|
||||||
std::vector<cl::Device> all_devices;
|
std::vector<cl::Device> all_devices;
|
||||||
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
|
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
|
||||||
if(all_devices.size()==0){
|
if(all_devices.size()==0){
|
||||||
std::cout<<" No devices found. Check OpenCL installation!\n";
|
std::cout<<" No devices found. Check OpenCL installation!\n";
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// use device[1] because that's a GPU; device[0] is the CPU
|
|
||||||
cl::Device default_device=all_devices[0];
|
cl::Device default_device=all_devices[0];
|
||||||
std::cout<< "Using device: "<<default_device.getInfo<CL_DEVICE_NAME>()<<"\n";
|
|
||||||
|
|
||||||
// a context is like a "runtime link" to the device and platform;
|
// The context links device and platform
|
||||||
// i.e. communication is possible
|
|
||||||
cl::Context context({default_device});
|
cl::Context context({default_device});
|
||||||
|
|
||||||
// create the program that we want to execute on the device
|
// Load kernel code from file into Sources
|
||||||
cl::Program::Sources sources;
|
cl::Program::Sources sources;
|
||||||
|
|
||||||
// load kernel from file
|
|
||||||
std::ifstream file("gol.cl"); //taking file as inputstream
|
std::ifstream file("gol.cl"); //taking file as inputstream
|
||||||
std::string kernel_code;
|
std::string kernel_code;
|
||||||
|
|
||||||
@ -217,6 +213,7 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) {
|
|||||||
}
|
}
|
||||||
sources.push_back({kernel_code.c_str(), kernel_code.length()});
|
sources.push_back({kernel_code.c_str(), kernel_code.length()});
|
||||||
|
|
||||||
|
// Create a program with the previously defined context and (kernel) sources
|
||||||
cl::Program program(context, sources);
|
cl::Program program(context, sources);
|
||||||
if (program.build({default_device}) != CL_SUCCESS) {
|
if (program.build({default_device}) != CL_SUCCESS) {
|
||||||
std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device) << std::endl;
|
std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device) << std::endl;
|
||||||
@ -237,6 +234,8 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) {
|
|||||||
int size_x = std::stoi(x_str);
|
int size_x = std::stoi(x_str);
|
||||||
int size_y = std::stoi(y_str);
|
int size_y = std::stoi(y_str);
|
||||||
|
|
||||||
|
// Two arrays because one will always hold the previous status
|
||||||
|
// For now, we only put data into `world`
|
||||||
bool *world = new bool[size_x * size_y];
|
bool *world = new bool[size_x * size_y];
|
||||||
bool *result = new bool[size_x * size_y];
|
bool *result = new bool[size_x * size_y];
|
||||||
|
|
||||||
@ -255,38 +254,49 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) {
|
|||||||
|
|
||||||
world_file.close();
|
world_file.close();
|
||||||
|
|
||||||
// Put size into array
|
// Put the size into an array so it can be passed to the kernel
|
||||||
int size[2] = {size_x, size_y};
|
int size[2] = {size_x, size_y};
|
||||||
int n = size_x * size_y;
|
int n = size_x * size_y;
|
||||||
|
|
||||||
// create buffers on device (allocate space on GPU)
|
// Allocate space on the GPU
|
||||||
cl::Buffer buffer_previous(context, CL_MEM_READ_WRITE, sizeof(bool) * n);
|
cl::Buffer buffer_previous(context, CL_MEM_READ_WRITE, sizeof(bool) * n);
|
||||||
cl::Buffer buffer_new(context, CL_MEM_READ_WRITE, sizeof(bool) * n);
|
cl::Buffer buffer_new(context, CL_MEM_READ_WRITE, sizeof(bool) * n);
|
||||||
cl::Buffer buffer_size(context, CL_MEM_READ_WRITE, sizeof(int) * 2);
|
cl::Buffer buffer_size(context, CL_MEM_READ_WRITE, sizeof(int) * 2);
|
||||||
|
|
||||||
// create a queue (a queue of commands that the GPU will execute)
|
// Create queue of commands that the GPU will execute
|
||||||
cl::CommandQueue queue(context, default_device);
|
cl::CommandQueue queue(context, default_device);
|
||||||
|
|
||||||
// push write commands to queue
|
// Push write commands to queue
|
||||||
queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, world);
|
queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, world);
|
||||||
queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result);
|
queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result);
|
||||||
queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size);
|
queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size);
|
||||||
|
|
||||||
// RUN ZE KERNEL
|
// Create the kernel, which uses the `generation` method in our program (which was created from the kernel code)
|
||||||
cl::Kernel gol_kernel(program, "generation");
|
cl::Kernel gol_kernel(program, "generation");
|
||||||
|
|
||||||
|
timing->stopSetup();
|
||||||
|
timing->startComputation();
|
||||||
|
|
||||||
|
// Actually do the generations
|
||||||
for (int i = 0; i < num_generations; i++) {
|
for (int i = 0; i < num_generations; i++) {
|
||||||
|
// Update the arguments in the kernel
|
||||||
gol_kernel.setArg(0, buffer_previous);
|
gol_kernel.setArg(0, buffer_previous);
|
||||||
gol_kernel.setArg(1, buffer_new);
|
gol_kernel.setArg(1, buffer_new);
|
||||||
gol_kernel.setArg(2, buffer_size);
|
gol_kernel.setArg(2, buffer_size);
|
||||||
|
|
||||||
|
// Run it
|
||||||
queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n), cl::NullRange);
|
queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n), cl::NullRange);
|
||||||
queue.finish();
|
queue.finish();
|
||||||
|
|
||||||
|
// Swap the previous buffer with the new buffer, as we will want to use our result from this loop
|
||||||
|
// as the input of the next loop (overwriting the previous result, which is not needed anymore)
|
||||||
std::swap(buffer_previous, buffer_new);
|
std::swap(buffer_previous, buffer_new);
|
||||||
}
|
}
|
||||||
queue.finish();
|
queue.finish();
|
||||||
|
|
||||||
|
timing->stopComputation();
|
||||||
|
timing->startFinalization();
|
||||||
|
|
||||||
// Since we swap after every generation, we need to proceed differently depending on
|
// Since we swap after every generation, we need to proceed differently depending on
|
||||||
// whether we're in swapped mode or not at the moment
|
// whether we're in swapped mode or not at the moment
|
||||||
if (num_generations % 2 == 0) {
|
if (num_generations % 2 == 0) {
|
||||||
@ -313,113 +323,17 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
result_file.close();
|
result_file.close();
|
||||||
|
|
||||||
|
timing->stopFinalization();
|
||||||
|
|
||||||
|
if (measure) {
|
||||||
|
std::cout << timing->getResults() << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
void main_classic(std::string infile, std::string outfile, int num_generations, bool measure, Mode mode) {
|
||||||
Timing *timing = Timing::getInstance();
|
Timing *timing = Timing::getInstance();
|
||||||
|
|
||||||
// Setup.
|
|
||||||
timing->startSetup();
|
|
||||||
|
|
||||||
// Parse command line arguments
|
|
||||||
std::string infile;
|
|
||||||
std::string outfile;
|
|
||||||
Mode mode = Mode::SEQ;
|
|
||||||
bool use_gpu = false;
|
|
||||||
int num_generations = 0;
|
|
||||||
int num_threads = 1;
|
|
||||||
bool measure = false;
|
|
||||||
|
|
||||||
if (argc < 8) {
|
|
||||||
print_usage();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse arguments
|
|
||||||
for (int i = 1; i < argc; i++) {
|
|
||||||
if (std::string(argv[i]) == "--load") {
|
|
||||||
if (i + 1 < argc) {
|
|
||||||
infile = argv[i+1];
|
|
||||||
} else {
|
|
||||||
print_usage();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else if (std::string(argv[i]) == "--save") {
|
|
||||||
if (i + 1 < argc) {
|
|
||||||
outfile = argv[i+1];
|
|
||||||
} else {
|
|
||||||
print_usage();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else if (std::string(argv[i]) == "--mode") {
|
|
||||||
if (i + 1 < argc) {
|
|
||||||
if (std::string(argv[i+1]) == "seq") {
|
|
||||||
mode = Mode::SEQ;
|
|
||||||
} else if (std::string(argv[i+1]) == "omp") {
|
|
||||||
mode = Mode::OMP;
|
|
||||||
} else if (std::string(argv[i+1]) == "ocl") {
|
|
||||||
mode = Mode::OCL;
|
|
||||||
} else {
|
|
||||||
print_usage();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
print_usage();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else if (std::string(argv[i]) == "--threads") {
|
|
||||||
if (i + 1 < argc) {
|
|
||||||
num_threads = std::stoi(argv[i+1]);
|
|
||||||
} else {
|
|
||||||
print_usage();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else if (std::string(argv[i]) == "--device") {
|
|
||||||
if (i + 1 < argc) {
|
|
||||||
if (std::string(argv[i+1]) == "cpu") {
|
|
||||||
use_gpu = false;
|
|
||||||
} else if (std::string(argv[i+1]) == "gpu") {
|
|
||||||
use_gpu = true;
|
|
||||||
} else {
|
|
||||||
print_usage();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
print_usage();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else if (std::string(argv[i]) == "--generations") {
|
|
||||||
if (i + 1 < argc) {
|
|
||||||
num_generations = std::stoi(argv[i+1]);
|
|
||||||
} else {
|
|
||||||
print_usage();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else if (std::string(argv[i]) == "--measure") {
|
|
||||||
measure = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Just for testing
|
|
||||||
if (use_gpu) {
|
|
||||||
std::cout << "Using GPU" << std::endl;
|
|
||||||
} else {
|
|
||||||
std::cout << "Using CPU" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mode == Mode::SEQ) {
|
|
||||||
std::cout << "Running classic sequential version" << std::endl;
|
|
||||||
} else if (mode == Mode::OMP) {
|
|
||||||
std::cout << "Running OpenMP version with " << num_threads << " threads" << std::endl;
|
|
||||||
} else if (mode == Mode::OCL) {
|
|
||||||
std::cout << "Running OpenCL version" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mode == Mode::OCL) {
|
|
||||||
main_opencl(infile, outfile, num_generations);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read in the start state
|
// Read in the start state
|
||||||
std::ifstream world_file;
|
std::ifstream world_file;
|
||||||
world_file.open(infile);
|
world_file.open(infile);
|
||||||
@ -498,6 +412,103 @@ int main(int argc, char* argv[]) {
|
|||||||
if (measure) {
|
if (measure) {
|
||||||
std::cout << timing->getResults() << std::endl;
|
std::cout << timing->getResults() << std::endl;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
Timing *timing = Timing::getInstance();
|
||||||
|
|
||||||
|
// Setup.
|
||||||
|
timing->startSetup();
|
||||||
|
|
||||||
|
// Parse command line arguments
|
||||||
|
std::string infile;
|
||||||
|
std::string outfile;
|
||||||
|
Mode mode = Mode::SEQ;
|
||||||
|
bool use_gpu = false;
|
||||||
|
int num_generations = 0;
|
||||||
|
int num_threads = 1;
|
||||||
|
bool measure = false;
|
||||||
|
|
||||||
|
if (argc < 8) {
|
||||||
|
print_usage();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse arguments
|
||||||
|
for (int i = 1; i < argc; i++) {
|
||||||
|
if (std::string(argv[i]) == "--load") {
|
||||||
|
if (i + 1 < argc) {
|
||||||
|
infile = argv[i+1];
|
||||||
|
} else {
|
||||||
|
print_usage();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else if (std::string(argv[i]) == "--save") {
|
||||||
|
if (i + 1 < argc) {
|
||||||
|
outfile = argv[i+1];
|
||||||
|
} else {
|
||||||
|
print_usage();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else if (std::string(argv[i]) == "--mode") {
|
||||||
|
if (i + 1 < argc) {
|
||||||
|
if (std::string(argv[i+1]) == "seq") {
|
||||||
|
mode = Mode::SEQ;
|
||||||
|
} else if (std::string(argv[i+1]) == "omp") {
|
||||||
|
mode = Mode::OMP;
|
||||||
|
} else if (std::string(argv[i+1]) == "ocl") {
|
||||||
|
mode = Mode::OCL;
|
||||||
|
} else {
|
||||||
|
print_usage();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print_usage();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else if (std::string(argv[i]) == "--threads") {
|
||||||
|
if (i + 1 < argc) {
|
||||||
|
num_threads = std::stoi(argv[i+1]);
|
||||||
|
} else {
|
||||||
|
print_usage();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// TODO: This parameter isn't really needed anymore as we only use the GPU now
|
||||||
|
} else if (std::string(argv[i]) == "--device") {
|
||||||
|
if (i + 1 < argc) {
|
||||||
|
if (std::string(argv[i+1]) == "cpu") {
|
||||||
|
use_gpu = false;
|
||||||
|
} else if (std::string(argv[i+1]) == "gpu") {
|
||||||
|
use_gpu = true;
|
||||||
|
} else {
|
||||||
|
print_usage();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print_usage();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else if (std::string(argv[i]) == "--generations") {
|
||||||
|
if (i + 1 < argc) {
|
||||||
|
num_generations = std::stoi(argv[i+1]);
|
||||||
|
} else {
|
||||||
|
print_usage();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else if (std::string(argv[i]) == "--measure") {
|
||||||
|
measure = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If OpenCL was demanded, run that function.
|
||||||
|
if (mode == Mode::OCL) {
|
||||||
|
main_opencl(infile, outfile, num_generations, measure);
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
main_classic(infile, outfile, num_generations, measure, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user