Ignore:
Timestamp:
Sep 8, 2016, 3:24:31 PM (3 years ago)
Author:
lindanl
Message:

Add NVPTX Arch to the framework. Fix directory bug in make check.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/CudaDriver.h

    r5129 r5151  
    1515
    1616/// main - Program entry point
    17 int RunPTX(std::string PTXFilename, char * fileBuffer, ulong filesize) {
     17ulong * RunPTX(std::string PTXFilename, char * fileBuffer, ulong filesize, bool CountOnly) {
    1818 
    1919  CUdevice    device;
     
    3737  if (devMajor < 2) {
    3838    std::cerr << "ERROR: Device 0 is not SM 2.0 or greater\n";
    39     return 1;
     39    exit(-1);
    4040  }
    4141
     
    5555
    5656  // Get kernel function
    57   checkCudaErrors(cuModuleGetFunction(&function, cudaModule, "kernel"));
     57  checkCudaErrors(cuModuleGetFunction(&function, cudaModule, "GPU_Main"));
    5858
    5959  // Device data
     
    6363
    6464  int groupSize = GROUPTHREADS * sizeof(ulong) * 8;
    65   int bufferSize = (filesize/groupSize + 1) * groupSize;
     65  int groups = filesize/groupSize + 1;
     66  int bufferSize = groups * groupSize;
     67  int outputSize = 0;
    6668
    6769  checkCudaErrors(cuMemAlloc(&devBufferInput, bufferSize));
    68   // checkCudaErrors(cuMemsetD8(devBufferInput, 0, bufferSize));
    6970  checkCudaErrors(cuMemAlloc(&devBufferSize, sizeof(ulong)));
    70   checkCudaErrors(cuMemAlloc(&devBufferOutput, sizeof(ulong)*GROUPTHREADS));
     71  if (CountOnly){
     72    outputSize = sizeof(ulong) * GROUPTHREADS;
     73  }
     74  else{
     75    outputSize = sizeof(ulong) * 2 * GROUPTHREADS * groups;
     76  }
     77
     78  checkCudaErrors(cuMemAlloc(&devBufferOutput, outputSize));
    7179
    7280  //Copy from host to device
     
    92100  // std::cout << "kernel success.\n";
    93101  // Retrieve device data
    94   ulong * matchCount = (ulong *) malloc(sizeof(ulong)*GROUPTHREADS);
    95   checkCudaErrors(cuMemcpyDtoH(matchCount, devBufferOutput, sizeof(ulong)*GROUPTHREADS));
    96102
    97   int count = 0;
    98   for (unsigned i = 0; i < GROUPTHREADS; ++i) {
    99     count += matchCount[i];
    100     // std::cout << i << ":" << matchCount[i] << "\n";
     103  ulong * matchRslt = (ulong *) malloc(outputSize);
     104  checkCudaErrors(cuMemcpyDtoH(matchRslt, devBufferOutput, outputSize));
     105  if (CountOnly){
     106    int count = 0;
     107    for (unsigned i = 0; i < GROUPTHREADS; ++i) {
     108      count += matchRslt[i];
     109    }
     110    std::cout << count << "\n";
    101111  }
    102   std::cout << count << "\n";
     112
    103113
    104114  // Clean-up
     
    109119  checkCudaErrors(cuCtxDestroy(context));
    110120
    111   return 0;
     121  return matchRslt;
    112122}
Note: See TracChangeset for help on using the changeset viewer.