Changeset 5377 for icGREP


Ignore:
Timestamp:
Mar 29, 2017, 2:29:52 PM (2 years ago)
Author:
nmedfort
Message:

Support for stdin. Needs more testing.

Location:
icGREP/icgrep-devel/icgrep
Files:
26 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5373 r5377  
    5858
    5959SET(KERNEL_SRC kernels/kernel.cpp kernels/pipeline.cpp kernels/streamset.cpp kernels/interface.cpp)
    60 SET(KERNEL_SRC ${KERNEL_SRC} kernels/s2p_kernel.cpp kernels/mmap_kernel.cpp kernels/deletion.cpp kernels/swizzle.cpp kernels/p2s_kernel.cpp kernels/stdout_kernel.cpp)
     60SET(KERNEL_SRC ${KERNEL_SRC} kernels/s2p_kernel.cpp kernels/mmap_kernel.cpp kernels/deletion.cpp kernels/swizzle.cpp kernels/p2s_kernel.cpp kernels/stdin_kernel.cpp kernels/stdout_kernel.cpp)
    6161
    6262SET(IDISA_SRC IR_Gen/CBuilder.cpp IR_Gen/idisa_builder.cpp IR_Gen/idisa_avx_builder.cpp IR_Gen/idisa_i64_builder.cpp IR_Gen/idisa_sse_builder.cpp IR_Gen/idisa_nvptx_builder.cpp IR_Gen/idisa_target.cpp)
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5350 r5377  
    1818namespace IDISA {
    1919
    20 VectorType * IDISA_Builder::fwVectorType(unsigned fw) {
    21     int fieldCount = mBitBlockWidth/fw;
    22     return VectorType::get(getIntNTy(fw), fieldCount);
    23 }
    24 
    25 Value * IDISA_Builder::fwCast(unsigned fw, Value * a) {
    26     return a->getType() == fwVectorType(fw) ? a : CreateBitCast(a, fwVectorType(fw));
     20VectorType * IDISA_Builder::fwVectorType(const unsigned fw) {
     21    return VectorType::get(getIntNTy(fw), mBitBlockWidth / fw);
     22}
     23
     24Value * IDISA_Builder::fwCast(const unsigned fw, Value * const a) {
     25    VectorType * const ty = fwVectorType(fw);
     26    assert (a->getType()->canLosslesslyBitCastTo(fwVectorType(fw)));
     27    return CreateBitCast(a, ty);
    2728}
    2829
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r5374 r5377  
    2929
    3030    llvm::Value * bitCast(llvm::Value * a) {
    31         return (a->getType() == mBitBlockType) ? a : CreateBitCast(a, mBitBlockType);
     31        return CreateBitCast(a, mBitBlockType);
    3232    }
    3333
     
    5656    void CreateBlockAlignedStore(llvm::Value * const value, llvm::Value * const ptr, std::initializer_list<llvm::Value *> indices);
    5757
    58     llvm::VectorType * fwVectorType(unsigned fw);
     58    llvm::VectorType * fwVectorType(const unsigned fw);
    5959
    6060    llvm::Constant * simd_himask(unsigned fw);
  • icGREP/icgrep-devel/icgrep/array-test.cpp

    r5371 r5377  
    174174    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main, 0));
    175175
    176     ByteStream.setStreamSetBuffer(inputStream, fileSize);
     176    ByteStream.setStreamSetBuffer(inputStream);
    177177    BasisBits.allocateBuffer();
    178178    matches.allocateBuffer();
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r5373 r5377  
    8686    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
    8787
    88     ByteStream.setStreamSetBuffer(inputStream, fileSize);
     88    ByteStream.setStreamSetBuffer(inputStream);
    8989    Expanded3_4Out.allocateBuffer();
    9090    Radix64out.allocateBuffer();
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5318 r5377  
    245245    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    246246
    247     ChStream.setStreamSetBuffer(inputStream, fileSize);
     247    ChStream.setStreamSetBuffer(inputStream);
    248248    MatchResults.allocateBuffer();
    249249   
     
    324324    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    325325
    326     ByteStream.setStreamSetBuffer(inputStream, fileSize);
     326    ByteStream.setStreamSetBuffer(inputStream);
    327327    BasisBits.allocateBuffer();
    328     CCResults.setStreamSetBuffer(outputStream, fileSize);
     328    CCResults.setStreamSetBuffer(outputStream);
    329329   
    330330    generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &ccck});
     
    516516
    517517    Value * inputSize = iBuilder->CreateLoad(inputSizePtr);
    518     CCStream.setStreamSetBuffer(inputThreadPtr, inputSize);
    519     ResultStream.setEmptyBuffer(resultStreamPtr);
     518    CCStream.setStreamSetBuffer(inputThreadPtr);
     519    ResultStream.setStreamSetBuffer(resultStreamPtr);
    520520    mmapK.setInitialArguments({inputSize});
    521521
     
    644644    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main,0));
    645645
    646     MatchResults.setStreamSetBuffer(inputStream, fileSize);
     646    MatchResults.setStreamSetBuffer(inputStream);
    647647    mmapK.setInitialArguments({fileSize});
    648648   
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5369 r5377  
    2424#include <kernels/scanmatchgen.h>
    2525#include <kernels/streamset.h>
    26 #include <kernels/interface.h>
     26#include <kernels/stdin_kernel.h>
    2727#include <pablo/pablo_compiler.h>
    2828#include <pablo/pablo_kernel.h>
     
    3434#include <sstream>
    3535#include <cc/multiplex_CCs.h>
     36
     37#include <llvm/Support/raw_ostream.h>
     38
    3639#ifdef CUDA_ENABLED
    3740#include <IR_Gen/CudaDriver.h>
     
    4043#include <util/aligned_allocator.h>
    4144
    42 
    4345using namespace parabix;
    4446using namespace llvm;
     
    5759static cl::opt<bool> ShowLineNumbers("n", cl::desc("Show the line number with each matching line."), cl::cat(bGrepOutputOptions));
    5860static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
     61
     62/// iNVESTIGATE: icgrep is reporting stdin is not empty even when nothing is being piped into it?
     63static cl::opt<bool> UseStdIn("stdin", cl::desc("Read from standard input."), cl::cat(bGrepOutputOptions));
    5964
    6065bool isUTF_16 = false;
     
    7176#endif
    7277
    73 void GrepEngine::doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly, bool UTF_16) {
     78void GrepEngine::doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly) {
    7479    boost::filesystem::path file(fileName);
    7580    if (exists(file)) {
     
    146151}
    147152
     153void GrepEngine::doGrep(const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly) {
     154    if (CountOnly) {
     155        total_CountOnly[fileIdx] = mGrepFunction_CountOnly(nullptr, 0, fileIdx);
     156    } else {
     157        mGrepFunction(nullptr, 0, fileIdx);
     158    }
     159}
    148160
    149161Function * generateGPUKernel(Module * m, IDISA::IDISA_Builder * iBuilder, bool CountOnly){
     
    223235    const unsigned segmentSize = codegen::SegmentSize;
    224236   
     237    ExternalFileBuffer InputStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
     238    InputStream.setStreamSetBuffer(inputStream);
     239
    225240    ExternalFileBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1));
    226     MatchResults.setStreamSetBuffer(rsltStream, fileSize);
     241    MatchResults.setStreamSetBuffer(rsltStream);
    227242
    228243    kernel::MMapSourceKernel mmapK1(iBuilder, segmentSize);
     
    232247
    233248    ExternalFileBuffer LineBreak(iBuilder, iBuilder->getStreamSetTy(1, 1));
    234     LineBreak.setStreamSetBuffer(lbStream, fileSize);
     249    LineBreak.setStreamSetBuffer(lbStream);
    235250   
    236251    kernel::MMapSourceKernel mmapK2(iBuilder, segmentSize);
     
    239254    mmapK2.setInitialArguments({fileSize});
    240255
    241     kernel::ScanMatchKernel scanMatchK(iBuilder, grepType);
    242     scanMatchK.generateKernel({&MatchResults, &LineBreak}, {});
    243     scanMatchK.setInitialArguments({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
     256    kernel::ScanMatchKernel scanMatchK(iBuilder, grepType, 8);
     257    scanMatchK.generateKernel({&InputStream, &MatchResults, &LineBreak}, {});
     258    scanMatchK.setInitialArguments({fileIdx});
    244259   
    245260    generatePipeline(iBuilder, {&mmapK1, &mmapK2, &scanMatchK});
     
    261276    mGrepType = grepType;
    262277
    263     Type * const size_ty = iBuilder->getSizeTy();
    264     Type * const int8PtrTy = iBuilder->getInt8PtrTy();
     278    Type * const sizeTy = iBuilder->getSizeTy();
    265279    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), 0);
    266     Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
    267 
    268     Function * mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
     280    Type * const resultTy = CountOnly ? sizeTy : iBuilder->getVoidTy();
     281
     282    Function * mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, sizeTy, sizeTy, nullptr));
    269283    mainFn->setCallingConv(CallingConv::C);
    270284    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     
    278292    fileIdx->setName("fileIdx");
    279293
    280     ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));   
     294    StreamSetBuffer * byteStream = nullptr;
     295    kernel::KernelBuilder * sourceK = nullptr;
     296//    if (usingStdIn) {
     297//        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
     298//        cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
     299//        sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
     300//        sourceK->generateKernel({}, {byteStream});
     301//    } else {
     302        byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
     303        cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
     304        sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
     305        sourceK->generateKernel({}, {byteStream});
     306        sourceK->setInitialArguments({fileSize});
     307//    }
     308
    281309    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    282     ByteStream.setStreamSetBuffer(inputStream, fileSize);
    283310    BasisBits.allocateBuffer();
    284    
    285     kernel::MMapSourceKernel mmapK(iBuilder, segmentSize);
    286     mmapK.generateKernel({}, {&ByteStream});
    287     mmapK.setInitialArguments({fileSize});
    288311
    289312    kernel::S2PKernel  s2pk(iBuilder);
    290     s2pk.generateKernel({&ByteStream}, {&BasisBits});
     313    s2pk.generateKernel({byteStream}, {&BasisBits});
    291314   
    292315    std::vector<pablo::PabloKernel *> icgrepKs;
     
    304327
    305328    std::vector<kernel::KernelBuilder *> KernelList;
    306     KernelList.push_back(&mmapK);
     329    KernelList.push_back(sourceK);
    307330    KernelList.push_back(&s2pk);
    308331
     
    335358
    336359    } else {
    337         kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType);
    338         scanMatchK.generateKernel({&mergedResults, &LineBreakStream}, {});               
    339         scanMatchK.setInitialArguments({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
     360        kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType, encodingBits);
     361        scanMatchK.generateKernel({byteStream, &mergedResults, &LineBreakStream}, {});
     362        scanMatchK.setInitialArguments({fileIdx});
    340363
    341364        KernelList.push_back(&scanMatchK);
     
    356379    mEngine->finalizeObject();
    357380    delete iBuilder;
    358    
     381    delete sourceK;
     382    delete byteStream;
     383
    359384    if (CountOnly) {
    360385        mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
     
    365390}
    366391
    367 void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, GrepType grepType) {
     392void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, GrepType grepType, const bool usingStdIn) {
    368393    isUTF_16 = UTF_16;
    369394    int addrSpace = 0;
     
    403428
    404429    Type * const size_ty = iBuilder->getSizeTy();
    405     Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    406430    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), addrSpace);
    407431    Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
     
    453477        fileIdx = &*(args++);
    454478        fileIdx->setName("fileIdx");
    455     }
    456        
    457     ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
    458    
    459     kernel::MMapSourceKernel mmapK(iBuilder, segmentSize);
    460     mmapK.generateKernel({}, {&ByteStream});
    461     mmapK.setInitialArguments({fileSize});
     479
     480    } 
     481
     482    StreamSetBuffer * byteStream = nullptr;
     483    kernel::KernelBuilder * sourceK = nullptr;
     484    if (usingStdIn) {
     485        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * bufferSegments);
     486        cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
     487        sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
     488        sourceK->generateKernel({}, {byteStream});
     489    } else {
     490        byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
     491        cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
     492        sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
     493        sourceK->generateKernel({}, {byteStream});
     494        sourceK->setInitialArguments({fileSize});
     495    }
    462496   
    463497    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    464498
    465499    kernel::S2PKernel  s2pk(iBuilder);
    466     s2pk.generateKernel({&ByteStream}, {&BasisBits});
     500    s2pk.generateKernel({byteStream}, {&BasisBits});
    467501   
    468502    kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
     
    476510    pablo_function_passes(&icgrepK);
    477511
    478     ByteStream.setStreamSetBuffer(inputStream, fileSize);
     512
    479513    BasisBits.allocateBuffer();
    480514
    481515    if (CountOnly) {
    482516        icgrepK.generateKernel({&BasisBits, &LineBreakStream}, {});
    483         generatePipeline(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK});
     517        generatePipeline(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK});
    484518        iBuilder->CreateRet(icgrepK.createGetAccumulatorCall(icgrepK.getInstance(), "matchedLineCount"));
    485519    } else {
     
    487521        if (codegen::NVPTX){
    488522            ExternalFileBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1), addrSpace);
    489             MatchResults.setStreamSetBuffer(outputStream, fileSize);
     523            MatchResults.setStreamSetBuffer(outputStream);
    490524
    491525            icgrepK.generateKernel({&BasisBits, &LineBreakStream},  {&MatchResults});
    492             generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK});
     526            generatePipelineLoop(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK});
    493527
    494528        }
     
    500534            icgrepK.generateKernel({&BasisBits, &LineBreakStream}, {&MatchResults});
    501535
    502             kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType);
    503             scanMatchK.generateKernel({&MatchResults, &LineBreakStream}, {});               
    504             scanMatchK.setInitialArguments({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
     536            kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType, encodingBits);
     537            scanMatchK.generateKernel({byteStream, &MatchResults, &LineBreakStream}, {});
     538            scanMatchK.setInitialArguments({fileIdx});
    505539           
    506             generatePipeline(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK, &scanMatchK});
     540            generatePipeline(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK, &scanMatchK});
    507541        }
    508542        iBuilder->CreateRetVoid();
     
    537571    mEngine->finalizeObject();
    538572    delete iBuilder;
     573    delete sourceK;
     574    delete byteStream;
    539575   
    540576    if (CountOnly) {
     
    581617static std::vector<std::string> inputFiles;
    582618
    583 void initResult(std::vector<std::string> filenames){
     619void initFileResult(std::vector<std::string> filenames){
    584620    const int n = filenames.size();
    585621    if (n > 1) {
     
    595631}
    596632
    597 extern "C" {
    598     void wrapped_report_match(size_t lineNum, size_t line_start, size_t line_end, const char * buffer, size_t filesize, int fileIdx) {
    599         assert (buffer);
    600 #ifdef CUDA_ENABLED
    601     if (codegen::NVPTX){
    602         while(line_start>startPoints[blockNo]) blockNo++;
    603         line_start -= accumBytes[blockNo-1];
    604         line_end -= accumBytes[blockNo-1];
    605     }
    606 #endif
    607         int index = isUTF_16 ? 2 : 1;
    608         int idx = fileIdx;
    609          
    610         if (ShowFileNames) {
    611             resultStrs[idx] << inputFiles[idx] << ':';
    612         }
    613         if (ShowLineNumbers) {
    614             resultStrs[idx] << lineNum << ":";
    615         }
    616        
    617         if ((!isUTF_16 && buffer[line_start] == 0xA) && (line_start != line_end)) {
    618             // The line "starts" on the LF of a CRLF.  Really the end of the last line.
    619             line_start++;
    620         }
    621         if (((isUTF_16 && buffer[line_start] == 0x0) && buffer[line_start + 1] == 0xA) && (line_start != line_end)) {
    622             // The line "starts" on the LF of a CRLF.  Really the end of the last line.
    623             line_start += 2;
    624         }
    625         if (line_end == filesize) {
    626             // The match position is at end-of-file.   We have a final unterminated line.
    627             resultStrs[idx].write(&buffer[line_start * index], (line_end - line_start) * index);
    628             if (NormalizeLineBreaks) {
    629                 resultStrs[idx] << '\n';  // terminate it
     633template<typename CodeUnit>
     634void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const int fileIdx) {
     635    assert (buffer);
     636    assert (line_start <= line_end);
     637    assert (line_end < filesize);
     638#ifdef CUDA_ENABLED
     639if (codegen::NVPTX){
     640    while(line_start>startPoints[blockNo]) blockNo++;
     641    line_start -= accumBytes[blockNo-1];
     642    line_end -= accumBytes[blockNo-1];
     643}
     644#endif
     645
     646    if (ShowFileNames) {
     647        resultStrs[fileIdx] << inputFiles[fileIdx] << ':';
     648    }
     649    if (ShowLineNumbers) {
     650        resultStrs[fileIdx] << lineNum << ":";
     651    }
     652
     653    // If the line "starts" on the LF of a CRLF, it is actually the end of the last line.
     654    if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
     655        ++line_start;
     656    }
     657
     658    if (LLVM_UNLIKELY(line_end == filesize)) {
     659        // The match position is at end-of-file.   We have a final unterminated line.
     660        resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start) * sizeof(CodeUnit));
     661        if (NormalizeLineBreaks) {
     662            resultStrs[fileIdx] << '\n';  // terminate it
     663        }
     664    } else {
     665        const auto end_byte = buffer[line_end];
     666        if (NormalizeLineBreaks) {
     667            if (LLVM_UNLIKELY(end_byte == 0x85)) {
     668                // Line terminated with NEL, on the second byte.  Back up 1.
     669                line_end -= 1;
     670            } else if (LLVM_UNLIKELY(end_byte > 0xD)) {
     671                // Line terminated with PS or LS, on the third byte.  Back up 2.
     672                line_end -= 2;
    630673            }
    631             return;
    632         }
    633         unsigned char end_byte = (unsigned char)buffer[line_end];
    634         unsigned char penult_byte = (unsigned char)(buffer[line_end - 1]);
    635         if (NormalizeLineBreaks) {
    636             if (end_byte == 0x85) {
    637                 // Line terminated with NEL, on the second byte.  Back up 1.
    638                 line_end--;
    639             } else if (end_byte > 0xD) {
    640                 // Line terminated with PS or LS, on the third byte.  Back up 2.
    641                 isUTF_16 ? line_end-- : line_end -= 2;
    642             }
    643             resultStrs[idx].write(&buffer[line_start * index], (line_end - line_start) * index);
    644             resultStrs[idx] << '\n';
     674            resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start) * sizeof(CodeUnit));
     675            resultStrs[fileIdx] << '\n';
    645676        } else {
    646             if ((!isUTF_16 && end_byte == 0x0D) || (isUTF_16 && (end_byte == 0x0D && penult_byte == 0x0))) {
    647                 // Check for line_end on first byte of CRLF;  note that we don't
    648                 // want to access past the end of buffer.
    649                 if (line_end + 1 < filesize) {
    650                     if (!isUTF_16 && buffer[line_end + 1] == 0x0A) {
     677            if (end_byte == 0x0D) {
     678                // Check for line_end on first byte of CRLF; we don't want to access past the end of buffer.
     679                if ((line_end + 1) < filesize) {
     680                    if (buffer[line_end + 1] == 0x0A) {
    651681                        // Found CRLF; preserve both bytes.
    652                         line_end++;
    653                     }
    654                     if (isUTF_16 && buffer[line_end + 1] == 0x0 && buffer[line_end + 2] == 0x0A) {
    655                         // Found CRLF; preserve both bytes.
    656                         line_end += 2;
     682                        ++line_end;
    657683                    }
    658684                }
    659685            }
    660             resultStrs[idx].write(&buffer[line_start * index], (line_end - line_start + 1) * index);
     686            resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start + 1) * sizeof(CodeUnit));
    661687        }
    662688    }
     
    684710}
    685711
    686 extern "C" {
    687     void insert_codepoints(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
    688         assert (buffer);
    689         re::codepoint_t c = 0;
    690         ssize_t line_pos = line_start;
    691         while (isxdigit(buffer[line_pos])) {
    692             if (isdigit(buffer[line_pos])) {
    693                 c = (c << 4) | (buffer[line_pos] - '0');
    694             }
    695             else {
    696                 c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
    697             }
    698             line_pos++;
    699         }
    700         assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.       
    701         parsedCodePointSet->insert(c);
    702     }
    703 }
    704 
    705 extern "C" {
    706     void insert_property_values(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
    707         parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
    708     }
     712void insert_codepoints(const size_t lineNum, const size_t line_start, const size_t line_end, const char * const buffer) {
     713    assert (buffer);
     714    assert (line_start <= line_end);
     715    re::codepoint_t c = 0;
     716    size_t line_pos = line_start;
     717    while (isxdigit(buffer[line_pos])) {
     718        assert (line_pos < line_end);
     719        if (isdigit(buffer[line_pos])) {
     720            c = (c << 4) | (buffer[line_pos] - '0');
     721        }
     722        else {
     723            c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
     724        }
     725        line_pos++;
     726    }
     727    assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.
     728    parsedCodePointSet->insert(c);
     729}
     730
     731void insert_property_values(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
     732    assert (line_start <= line_end);
     733    parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
    709734}
    710735
     
    717742        if (fnName == "process_block_initialize_carries") continue;
    718743       
    719         if (fnName == "wrapped_report_match") {
    720             e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match);
     744        if (fnName == "wrapped_report_match8") {
     745            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match<uint8_t>);
     746        }
     747        if (fnName == "wrapped_report_match16") {
     748            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match<uint16_t>);
    721749        }
    722750        if (fnName == "insert_codepoints") {
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5341 r5377  
    2323    ~GrepEngine();
    2424 
    25     void grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal);
     25    void grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal, const bool usingStdIn = false);
    2626    void multiGrepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal);
    2727     
    28     void doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly, bool UTF_16);
     28    void doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly);
     29
     30    void doGrep(const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly);
    2931   
    3032    re::CC *  grepCodepoints();
     
    5052void setParsedPropertyValues();
    5153
    52 
    53 void initResult(std::vector<std::string> filenames);
     54void initFileResult(std::vector<std::string> filenames);
    5455void PrintResult(bool CountOnly, std::vector<size_t> & total_CountOnly);
    5556
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5368 r5377  
    33cc/cc_compiler.cpp
    44cc/cc_compiler.h
     5cc/multiplex_CCs.cpp
     6cc/multiplex_CCs.h
    57editd/editd.cpp
    68editd/editd_cpu_kernel.cpp
     
    9496pablo/passes/flattenif.cpp
    9597pablo/passes/flattenif.hpp
     98pablo/passes/ssapass.cpp
     99pablo/passes/ssapass.h
    96100pablo/arithmetic.h
    97101pablo/boolean.h
     
    122126pablo/pe_matchstar.h
    123127pablo/pe_ones.h
     128pablo/pe_phi.h
    124129pablo/pe_scanthru.h
    125130pablo/pe_string.h
     
    234239wc.cpp
    235240CMakeLists.txt
    236 pablo/pe_phi.h
    237 pablo/passes/ssapass.h
    238 pablo/passes/ssapass.cpp
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5373 r5377  
    3131#include <util/papi_helper.hpp>
    3232#endif
     33#include <poll.h>
     34
     35inline bool hasInputFromStdIn() {
     36    pollfd stdin_poll;
     37    stdin_poll.fd = STDIN_FILENO;
     38    stdin_poll.events = POLLIN | POLLRDBAND | POLLRDNORM | POLLPRI;
     39    return poll(&stdin_poll, 1, 0) == 1;
     40}
    3341
    3442using namespace llvm;
     
    200208
    201209    while (fileIdx < allFiles.size()){
    202         grepEngine->doGrep(allFiles[fileIdx], fileIdx, CountOnly, total_CountOnly, UTF_16);
     210        grepEngine->doGrep(allFiles[fileIdx], fileIdx, CountOnly, total_CountOnly);
    203211       
    204212        count_mutex.lock();
     
    384392        return 0;   // icgrep is called again, so we need to end this process.
    385393    }
    386    
     394
     395    const bool usingStdIn = hasInputFromStdIn();
     396
    387397    GrepEngine grepEngine;
    388     if(MultiGrepKernels){
     398    if (MultiGrepKernels) {
    389399        grepEngine.multiGrepCodeGen(module_name, RELists, CountOnly, UTF_16);
    390     }
    391     else{
    392         grepEngine.grepCodeGen(module_name, re_ast, CountOnly, UTF_16);
    393     }
    394 
    395     allFiles = getFullFileList(inputFiles);
    396    
    397     if (FileNamesOnly && NonMatchingFileNamesOnly) {
    398         // Strange request: print names of all matching files and all non-matching files: i.e., all of them.
    399         // (Although GNU grep prints nothing.)
    400         for (auto & f : allFiles) {
    401             if (boost::filesystem::exists(f)) {
    402                 std::cout << f << "\n";
    403             } else {
    404                 std::cerr << "Error: cannot open " << f << " for processing. Skipped.\n";
    405             }
    406         }
    407         exit(0);
    408     }
    409     if (FileNamesOnly) {
    410         llvm::report_fatal_error("Sorry, -l/-files-with-matches not yet supported\n.");
    411     }
    412     if (NonMatchingFileNamesOnly) {
    413         llvm::report_fatal_error("Sorry, -L/-files-without-match not yet supported\n.");
    414     }
    415    
    416     initResult(allFiles);
    417     for (unsigned i=0; i < allFiles.size(); ++i){
     400    } else {
     401        grepEngine.grepCodeGen(module_name, re_ast, CountOnly, UTF_16, GrepType::Normal, usingStdIn);
     402    }
     403
     404    if (usingStdIn)  {
     405
     406        allFiles = { "stdin" };
     407        initFileResult(allFiles);
    418408        total_CountOnly.push_back(0);
    419     }
    420 
    421     if (Threads <= 1) {
    422 
    423         #ifdef PRINT_TIMING_INFORMATION
    424         // PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY
    425         // PAPI_RES_STL, PAPI_BR_MSP, PAPI_LST_INS, PAPI_L1_TCM
    426         papi::PapiCounter<4> papiCounters({PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY});
    427         #endif
    428         for (unsigned i = 0; i != allFiles.size(); ++i) {
     409        grepEngine.doGrep(0, CountOnly, total_CountOnly);
     410
     411    } else {
     412
     413        allFiles = getFullFileList(inputFiles);
     414
     415        if (FileNamesOnly && NonMatchingFileNamesOnly) {
     416            // Strange request: print names of all matching files and all non-matching files: i.e., all of them.
     417            // (Although GNU grep prints nothing.)
     418            for (auto & f : allFiles) {
     419                if (boost::filesystem::exists(f)) {
     420                    std::cout << f << "\n";
     421                } else {
     422                    std::cerr << "Error: cannot open " << f << " for processing. Skipped.\n";
     423                }
     424            }
     425            exit(0);
     426        }
     427
     428        if (FileNamesOnly) {
     429            llvm::report_fatal_error("Sorry, -l/-files-with-matches not yet supported\n.");
     430        }
     431        if (NonMatchingFileNamesOnly) {
     432            llvm::report_fatal_error("Sorry, -L/-files-without-match not yet supported\n.");
     433        }
     434        initFileResult(allFiles);
     435
     436        for (unsigned i=0; i < allFiles.size(); ++i){
     437            total_CountOnly.push_back(0);
     438        }
     439
     440        if (Threads <= 1) {
     441
    429442            #ifdef PRINT_TIMING_INFORMATION
    430             papiCounters.start();
    431             const timestamp_t execution_start = read_cycle_counter();
     443            // PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY
     444            // PAPI_RES_STL, PAPI_BR_MSP, PAPI_LST_INS, PAPI_L1_TCM
     445            papi::PapiCounter<4> papiCounters({PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY});
    432446            #endif
    433             grepEngine.doGrep(allFiles[i], i, CountOnly, total_CountOnly, UTF_16);
    434             #ifdef PRINT_TIMING_INFORMATION
    435             const timestamp_t execution_end = read_cycle_counter();
    436             papiCounters.stop();
    437             std::cerr << "EXECUTION TIME: " << allFiles[i] << ":" << "CYCLES|" << (execution_end - execution_start) << papiCounters << std::endl;
    438             #endif
    439         }       
    440     } else if (Threads > 1) {
    441         const unsigned numOfThreads = Threads; // <- convert the command line value into an integer to allow stack allocation
    442         pthread_t threads[numOfThreads];
    443 
    444         for(unsigned long i = 0; i < numOfThreads; ++i){
    445             const int rc = pthread_create(&threads[i], nullptr, DoGrep, (void *)&grepEngine);
    446             if (rc) {
    447                 llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
    448             }
    449         }
    450 
    451         for(unsigned i = 0; i < numOfThreads; ++i) {
    452             void * status = nullptr;
    453             const int rc = pthread_join(threads[i], &status);
    454             if (rc) {
    455                 llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
    456             }
    457         }
    458     }
     447            for (unsigned i = 0; i != allFiles.size(); ++i) {
     448                #ifdef PRINT_TIMING_INFORMATION
     449                papiCounters.start();
     450                const timestamp_t execution_start = read_cycle_counter();
     451                #endif
     452                grepEngine.doGrep(allFiles[i], i, CountOnly, total_CountOnly);
     453                #ifdef PRINT_TIMING_INFORMATION
     454                const timestamp_t execution_end = read_cycle_counter();
     455                papiCounters.stop();
     456                std::cerr << "EXECUTION TIME: " << allFiles[i] << ":" << "CYCLES|" << (execution_end - execution_start) << papiCounters << std::endl;
     457                #endif
     458            }
     459        } else if (Threads > 1) {
     460            const unsigned numOfThreads = Threads; // <- convert the command line value into an integer to allow stack allocation
     461            pthread_t threads[numOfThreads];
     462
     463            for(unsigned long i = 0; i < numOfThreads; ++i){
     464                const int rc = pthread_create(&threads[i], nullptr, DoGrep, (void *)&grepEngine);
     465                if (rc) {
     466                    llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
     467                }
     468            }
     469
     470            for(unsigned i = 0; i < numOfThreads; ++i) {
     471                void * status = nullptr;
     472                const int rc = pthread_join(threads[i], &status);
     473                if (rc) {
     474                    llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
     475                }
     476            }
     477        }
     478
     479    }
     480   
     481
    459482   
    460483    PrintResult(CountOnly, total_CountOnly);
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5375 r5377  
    217217
    218218Value * KernelBuilder::getScalarFieldPtr(Value * instance, Value * index) const {
     219    assert ("instance cannot be null!" && instance);
    219220    return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), index});
    220221}
    221222
    222223Value * KernelBuilder::getScalarFieldPtr(Value * instance, const std::string & fieldName) const {
     224    assert ("instance cannot be null!" && instance);
    223225    return getScalarFieldPtr(instance, getScalarIndex(fieldName));
    224226}
     
    229231
    230232Value * KernelBuilder::getScalarField(Value * instance, Value * index) const {
     233    assert ("instance cannot be null!" && instance);
    231234    return iBuilder->CreateLoad(getScalarFieldPtr(instance, index));
    232235}
    233236
    234237void KernelBuilder::setScalarField(Value * instance, const std::string & fieldName, Value * value) const {
     238    assert ("instance cannot be null!" && instance);
    235239    iBuilder->CreateStore(value, getScalarFieldPtr(instance, fieldName));
    236240}
    237241
    238242void KernelBuilder::setScalarField(Value * instance, Value * index, Value * value) const {
     243    assert ("instance cannot be null!" && instance);
    239244    iBuilder->CreateStore(value, getScalarFieldPtr(instance, index));
    240245}
    241246
    242247Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
     248    assert ("instance cannot be null!" && instance);
    243249    unsigned ssIdx = getStreamSetIndex(name);
    244250    if (mStreamSetInputs[ssIdx].rate.isExact()) {
    245251        std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
    246         if (refSet == "") refSet = mStreamSetInputs[0].name;
     252        if (refSet.empty()) {
     253            refSet = mStreamSetInputs[0].name;
     254        }
    247255        Value * principalItemsProcessed = getScalarField(instance, refSet + PROCESSED_ITEM_COUNT_SUFFIX);
    248256        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
     
    252260
    253261Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name, Value * doFinal) const {
     262    assert ("instance cannot be null!" && instance);
    254263    unsigned ssIdx = getStreamSetIndex(name);
    255264    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
    256265        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
    257266        std::string principalField;
    258         if (refSet == "") {
    259             principalField = mStreamSetInputs.empty() ? mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX : mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
    260         }
    261         else {
     267        if (refSet.empty()) {
     268            if (mStreamSetInputs.empty()) {
     269                principalField = mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX;
     270            } else {
     271                principalField = mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
     272            }
     273        } else {
    262274            unsigned pfIndex = getStreamSetIndex(refSet);
    263275            if (mStreamSetInputs.size() > pfIndex && mStreamSetInputs[pfIndex].name == refSet) {
    264276               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
    265             }
    266             else {
     277            } else {
    267278               principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
    268279            }
     
    275286
    276287Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name) const {
     288    assert ("instance cannot be null!" && instance);
    277289    unsigned ssIdx = getStreamSetIndex(name);
    278290    std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
     
    280292        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
    281293        std::string principalField;
    282         if (refSet == "") {
     294        if (refSet.empty()) {
    283295            principalField = mStreamSetInputs.empty() ? mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX : mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
    284         }
    285         else {
     296        } else {
    286297            unsigned pfIndex = getStreamSetIndex(refSet);
    287298            if (mStreamSetInputs.size() > pfIndex && mStreamSetInputs[pfIndex].name == refSet) {
    288299               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
    289             }
    290             else {
     300            } else {
    291301               principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
    292302            }
     
    299309
    300310void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
     311    assert ("instance cannot be null!" && instance);
    301312    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
    302313}
    303314
    304315void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
     316    assert ("instance cannot be null!" && instance);
    305317    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
    306318}
    307319
     320llvm::Value * KernelBuilder::reserveItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
     321    assert ("instance cannot be null!" && instance);
     322    Value * itemCount = getProducedItemCount(instance, name);
     323    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
     324    return buf->reserveItemCount(getStreamSetBufferPtr(name), itemCount, value);
     325}
     326
    308327Value * KernelBuilder::getTerminationSignal(Value * instance) const {
     328    assert ("instance cannot be null!" && instance);
    309329    return getScalarField(instance, TERMINATION_SIGNAL);
    310330}
    311331
    312332void KernelBuilder::setTerminationSignal(Value * instance) const {
     333    assert ("instance cannot be null!" && instance);
    313334    setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getInt1(true));
    314335}
    315336
    316337LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * instance) const {
     338    assert ("instance cannot be null!" && instance);
    317339    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
    318340}
    319341
    320342void KernelBuilder::releaseLogicalSegmentNo(Value * instance, Value * newCount) const {
     343    assert ("instance cannot be null!" && instance);
    321344    iBuilder->CreateAtomicStoreRelease(newCount, getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
    322345}
     
    623646            Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
    624647            Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
    625             Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(priorBlock);
     648            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
     649            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(instance, priorBlock);
    626650            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
    627651            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
     
    629653            iBuilder->SetInsertPoint(copyBack);
    630654            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
    631             cb->createCopyBack(getStreamSetBufferPtr(mStreamSetOutputs[i].name), copyItems);
     655            cb->createCopyBack(instance, copyItems);
    632656            iBuilder->CreateBr(done);
    633657            iBuilder->SetInsertPoint(done);
     
    637661            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    638662            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     663            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
    639664            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
    640             Value * accessible = cb->getLinearlyAccessibleItems(priorProduced[priorIdx]);
     665            Value * accessible = cb->getLinearlyAccessibleItems(instance, priorProduced[priorIdx]);
    641666            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
    642667            iBuilder->CreateCondBr(wraparound, copyBack, done);
    643668            iBuilder->SetInsertPoint(copyBack);
    644669            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
    645             cb->createCopyBack(getStreamSetBufferPtr(mStreamSetOutputs[i].name), copyItems);
     670            cb->createCopyBack(instance, copyItems);
    646671            iBuilder->CreateBr(done);
    647672            iBuilder->SetInsertPoint(done);
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5370 r5377  
    4747
    4848    virtual void setProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const;
     49
     50    virtual llvm::Value * reserveItemCount(llvm::Value * instance, const std::string & name, llvm::Value * requested) const;
    4951
    5052    bool hasNoTerminateAttribute() { return mNoTerminateAttribute;}
     
    194196    llvm::Value * getRawOutputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    195197
     198    llvm::Value * reserveItemCount(const std::string & name, llvm::Value * requested) const {
     199        return reserveItemCount(getSelf(), name, requested);
     200    }
     201
    196202    llvm::Value * getScalarFieldPtr(const std::string & name) const {
    197203        return getScalarFieldPtr(getSelf(), name);
     
    225231        return setTerminationSignal(getSelf());
    226232    }
    227 
    228233
    229234    llvm::Value * getSelf() const {
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.cpp

    r5325 r5377  
    1616
    1717namespace kernel {
    18            
     18
    1919void MMapSourceKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     20
     21    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    2022    BasicBlock * setTermination = CreateBasicBlock("setTermination");
    2123    BasicBlock * mmapSourceExit = CreateBasicBlock("mmapSourceExit");
     
    2628    }
    2729    Value * produced = getProducedItemCount("sourceBuffer");
    28     Value * nextProduced = iBuilder->CreateAdd(produced, segmentItems);
    29     Value * lessThanFullSegment = iBuilder->CreateICmpULT(fileItems, nextProduced);
    30     produced = iBuilder->CreateSelect(lessThanFullSegment, fileItems, nextProduced);
    31     setProducedItemCount("sourceBuffer", produced);
    32 
     30    produced = iBuilder->CreateAdd(produced, segmentItems);
     31    Value * lessThanFullSegment = iBuilder->CreateICmpULT(fileItems, produced);
    3332    iBuilder->CreateCondBr(lessThanFullSegment, setTermination, mmapSourceExit);
    3433    iBuilder->SetInsertPoint(setTermination);
    3534    setTerminationSignal();
    36     iBuilder->CreateBr(mmapSourceExit);   
     35    iBuilder->CreateBr(mmapSourceExit);
    3736
    3837    iBuilder->SetInsertPoint(mmapSourceExit);
     38
     39    PHINode * itemsRead = iBuilder->CreatePHI(produced->getType(), 2);
     40    itemsRead->addIncoming(produced, entryBlock);
     41    itemsRead->addIncoming(fileItems, setTermination);
     42    setProducedItemCount("sourceBuffer", itemsRead);
    3943}
    4044
     45void MMapSourceKernel::generateInitMethod() {
     46//    Value * fileSize = getScalarField("fileSize");
     47//    fileSize = iBuilder->CreateUDiv(fileSize, iBuilder->getSize(mCodeUnitWidth / 8));
     48//    setProducedItemCount("sourceBuffer", fileSize);
     49}
    4150
    4251MMapSourceKernel::MMapSourceKernel(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment, unsigned codeUnitWidth)
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.h

    r5292 r5377  
    1515   pipeline. */
    1616   
    17 class MMapSourceKernel : public SegmentOrientedKernel {
     17class MMapSourceKernel final: public SegmentOrientedKernel {
    1818public:
    1919    MMapSourceKernel(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8); 
    2020private:
    21     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override final;
     21    void generateInitMethod() override;
     22    void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override;
    2223private:
    2324    const unsigned mSegmentBlocks;
    2425    const unsigned mCodeUnitWidth;
    2526};
     27
    2628}
    2729
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5356 r5377  
    2727        t0 = iBuilder->hsimd_packh_in_lanes(PACK_LANES, 16, x0, x1);
    2828        t1 = iBuilder->hsimd_packl_in_lanes(PACK_LANES, 16, x0, x1);
    29     }
    30     else {
     29    } else {
    3130        t0 = iBuilder->hsimd_packh(16, s0, s1);
    3231        t1 = iBuilder->hsimd_packl(16, s0, s1);
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5350 r5377  
    3838}
    3939       
     40
     41
    4042void ScanMatchKernel::generateDoBlockMethod() {
    4143
    42     auto savePoint = iBuilder->saveIP();
    43     Function * scanWordFunction = generateScanWordRoutine(iBuilder->getModule());
    44     iBuilder->restoreIP(savePoint);
    45 
    46     IntegerType * T = iBuilder->getSizeTy();
    47     const unsigned fieldCount = iBuilder->getBitBlockWidth() / T->getBitWidth();
    48     Type * scanwordVectorType =  VectorType::get(T, fieldCount);
    49     Value * blockNo = getScalarField("BlockNo");
    50     Value * scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));   
    51     Value * recordStart = getScalarField("LineStart");
    52     Value * recordNum = getScalarField("LineNum");
    53     Value * matches = loadInputStreamBlock("matchResult", iBuilder->getInt32(0));
    54     Value * linebreaks = loadInputStreamBlock("lineBreak", iBuilder->getInt32(0));
    55     Value * matchWordVector = iBuilder->CreateBitCast(matches, scanwordVectorType);
    56     Value * breakWordVector = iBuilder->CreateBitCast(linebreaks, scanwordVectorType);
    57     for(unsigned i = 0; i < fieldCount; ++i){
    58         Value * matchWord = iBuilder->CreateExtractElement(matchWordVector, ConstantInt::get(T, i));
    59         Value * recordBreaksWord = iBuilder->CreateExtractElement(breakWordVector, ConstantInt::get(T, i));
    60         Value * wordResult = iBuilder->CreateCall(scanWordFunction, {getSelf(), matchWord, recordBreaksWord, scanwordPos, recordStart, recordNum});
    61         scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, T->getBitWidth()));
    62         recordStart = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({0}));
    63         recordNum = iBuilder->CreateExtractValue(wordResult, std::vector<unsigned>({1}));
    64     }
    65     setScalarField("LineStart", recordStart);
    66     setScalarField("LineNum", recordNum);
    67     setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getSize(1)));
    68 }
    69 
    70    
    71 Function * ScanMatchKernel::generateScanWordRoutine(Module * m) const {
    72     Function * function = m->getFunction("scan_matches_in_scanword");
    73     if (LLVM_UNLIKELY(function != nullptr)) {
    74         return function;
    75     }
    76    
    77     LLVMContext & ctxt = m->getContext();
    78 
    79     IntegerType * T = iBuilder->getSizeTy();
    80     Type * S = PointerType::get(iBuilder->getIntNTy(8), 0);
    81     Type * returnType = StructType::get(ctxt, std::vector<Type *>({T, T}));
    82     FunctionType * functionType = FunctionType::get(returnType, std::vector<Type *>({PointerType::get(mKernelStateType, 0), T, T, T, T, T}), false);
    83    
    84     SmallVector<AttributeSet, 6> Attrs;
    85     Attrs.push_back(AttributeSet::get(ctxt, ~0U, std::vector<Attribute::AttrKind>({ Attribute::NoUnwind, Attribute::UWTable })));
    86     Attrs.push_back(AttributeSet::get(ctxt, 1, std::vector<Attribute::AttrKind>({})));
    87     Attrs.push_back(AttributeSet::get(ctxt, 2, std::vector<Attribute::AttrKind>({})));
    88     Attrs.push_back(AttributeSet::get(ctxt, 3, std::vector<Attribute::AttrKind>({})));
    89     Attrs.push_back(AttributeSet::get(ctxt, 4, std::vector<Attribute::AttrKind>({})));
    90     Attrs.push_back(AttributeSet::get(ctxt, 5, std::vector<Attribute::AttrKind>({})));
    91     AttributeSet AttrSet = AttributeSet::get(ctxt, Attrs);
    92    
    93     function = Function::Create(functionType, GlobalValue::ExternalLinkage, "scan_matches_in_scanword", m);
    94     function->setCallingConv(CallingConv::C);
    95     function->setAttributes(AttrSet);
    96     function->addFnAttr(llvm::Attribute::AlwaysInline);
    97    
    98     Function::arg_iterator args = function->arg_begin();
    99     Value * instance = &*(args++);
    100     instance->setName("this");
    101     Value * matches_input_parm = &*(args++);
    102     matches_input_parm->setName("matches");
    103     Value * record_breaks_input_parm = &*(args++);
    104     record_breaks_input_parm->setName("breaks");
    105     Value * scanwordPos = &*(args++);
    106     scanwordPos->setName("scanwordPos");
    107     Value * recordStart_input_parm = &*(args++);
    108     recordStart_input_parm->setName("pendingLineStart");
    109     Value * recordNum_input_parm = &*(args++);
    110     recordNum_input_parm->setName("lineNum");
    111    
     44    Module * const m = iBuilder->getModule();
     45    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
     46    BasicBlock * const scanWordIteration = CreateBasicBlock("ScanWordIteration");
     47    BasicBlock * const matches_test_block = CreateBasicBlock("matches_test_block");
     48    BasicBlock * const processMatchesEntry = CreateBasicBlock("process_matches_loop");
     49    BasicBlock * const prior_breaks_block = CreateBasicBlock("prior_breaks_block");
     50    BasicBlock * const loop_final_block = CreateBasicBlock("loop_final_block");
     51    BasicBlock * const processMatchesExit = CreateBasicBlock("matches_done_block");
     52    BasicBlock * const remaining_breaks_block = CreateBasicBlock("remaining_breaks_block");
     53    BasicBlock * const return_block = CreateBasicBlock("return_block");
     54    BasicBlock * const scanWordExit = CreateBasicBlock("ScanWordExit");
     55    IntegerType * const sizeTy = iBuilder->getSizeTy();
     56    PointerType * const codeUnitTy = iBuilder->getIntNTy(mCodeUnitWidth)->getPointerTo();
     57    const unsigned fieldCount = iBuilder->getBitBlockWidth() / sizeTy->getBitWidth();
     58    VectorType * const scanwordVectorType =  VectorType::get(sizeTy, fieldCount);
     59    Value * const blockNo = getScalarField("BlockNo");
     60    Value * const scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));
     61    Value * const lastRecordStart = getScalarField("LineStart");
     62    Value * const lastRecordNum = getScalarField("LineNum");
     63    Value * const inputStream = iBuilder->CreatePointerCast(getRawInputPointer("InputStream", iBuilder->getInt32(0), iBuilder->getInt32(0)), codeUnitTy);
     64
     65    Value * fileSize = iBuilder->CreateAdd(getProcessedItemCount("InputStream"), getScalarField("PendingBytes"));
     66
    11267    Constant * matchProcessor = nullptr;
     68    Value * fileIdx = nullptr;
    11369    switch (mGrepType) {
    11470        case GrepType::Normal:
    115             matchProcessor = m->getOrInsertFunction("wrapped_report_match", iBuilder->getVoidTy(), T, T, T, S, T, T, nullptr);
     71            fileIdx = getScalarField("FileIdx");
     72            matchProcessor = m->getOrInsertFunction("wrapped_report_match" + std::to_string(mCodeUnitWidth), iBuilder->getVoidTy(), sizeTy, sizeTy, sizeTy, codeUnitTy, sizeTy, sizeTy, nullptr);
    11673            break;
    11774        case GrepType::NameExpression:
    118             matchProcessor = m->getOrInsertFunction("insert_codepoints", iBuilder->getVoidTy(), T, T, T, S, nullptr);
     75            matchProcessor = m->getOrInsertFunction("insert_codepoints", iBuilder->getVoidTy(), sizeTy, sizeTy, sizeTy, codeUnitTy, nullptr);
    11976            break;
    12077        case GrepType::PropertyValue:
    121             matchProcessor = m->getOrInsertFunction("insert_property_values", iBuilder->getVoidTy(), T, T, T, S, nullptr);
     78            matchProcessor = m->getOrInsertFunction("insert_property_values", iBuilder->getVoidTy(), sizeTy, sizeTy, sizeTy, codeUnitTy, nullptr);
    12279            break;
    12380        default: llvm_unreachable("unknown grep type");
    12481    }
    125     iBuilder->SetInsertPoint(BasicBlock::Create(ctxt, "entry", function,0));
    126    
    127     BasicBlock * entry_block = iBuilder->GetInsertBlock();
    128     BasicBlock * matches_test_block = BasicBlock::Create(ctxt, "matches_test_block", function, 0);
    129     BasicBlock * process_matches_loop_entry = BasicBlock::Create(ctxt, "process_matches_loop", function, 0);
    130     BasicBlock * prior_breaks_block = BasicBlock::Create(ctxt, "prior_breaks_block", function, 0);
    131     BasicBlock * loop_final_block = BasicBlock::Create(ctxt, "loop_final_block", function, 0);
    132     BasicBlock * matches_done_block = BasicBlock::Create(ctxt, "matches_done_block", function, 0);
    133     BasicBlock * remaining_breaks_block = BasicBlock::Create(ctxt, "remaining_breaks_block", function, 0);
    134     BasicBlock * return_block = BasicBlock::Create(ctxt, "return_block", function, 0);
    135    
    136    
    137     // The match scanner works with a loop involving four variables:
    138     // (a) the bit stream scanword of matches marking the ends of selected records,
    139     // (b) the bit stream scanword of record_breaks marking the ends of all records,
    140     // (c) the integer lastRecordNum indicating the number of records processed so far,
    141     // (d) the index lastRecordStart indicating the file position of the last record.
    142     // We set up a loop structure, in which a set of 4 phi nodes initialize these
    143     // variables from either the input to the scanner or the computed values within
    144     // the loop body.
    145    
    146    
    147     iBuilder->CreateBr(matches_test_block);
    148    
    149     // LOOP Test Block
    150     iBuilder->SetInsertPoint(matches_test_block);
    151     PHINode * matches_phi = iBuilder->CreatePHI(T, 2, "matches");
    152     PHINode * record_breaks_phi = iBuilder->CreatePHI(T, 2, "record_breaks");
    153     PHINode * recordNum_phi = iBuilder->CreatePHI(T, 2, "recordNum");
    154     PHINode * recordStart_phi = iBuilder->CreatePHI(T, 2, "recordStart");
    155     matches_phi->addIncoming(matches_input_parm, entry_block);
    156     record_breaks_phi->addIncoming(record_breaks_input_parm, entry_block);
    157     recordNum_phi->addIncoming(recordNum_input_parm, entry_block);
    158     recordStart_phi->addIncoming(recordStart_input_parm, entry_block);
    159     Value * have_matches_cond = iBuilder->CreateICmpNE(matches_phi, ConstantInt::get(T, 0));
    160     iBuilder->CreateCondBr(have_matches_cond, process_matches_loop_entry, matches_done_block);
    161    
    162     // LOOP BODY
    163     // The loop body is entered if we have more matches to process.
    164     iBuilder->SetInsertPoint(process_matches_loop_entry);
    165     Value * prior_breaks = iBuilder->CreateAnd(generateForwardZeroesMask(iBuilder, matches_phi), record_breaks_phi);
    166     // Within the loop we have a conditional block that is executed if there are any prior
    167     // record breaks.
    168     Value * prior_breaks_cond = iBuilder->CreateICmpNE(prior_breaks, ConstantInt::get(T, 0));
    169     iBuilder->CreateCondBr(prior_breaks_cond, prior_breaks_block, loop_final_block);
    170    
    171     // PRIOR_BREAKS_BLOCK
    172     // If there are prior breaks, we count them and compute the record start position.
    173     iBuilder->SetInsertPoint(prior_breaks_block);
    174     Value * matchRecordNum = iBuilder->CreateAdd(generatePopcount(iBuilder, prior_breaks), recordNum_phi);
    175     Value * reverseDistance = generateCountReverseZeroes(iBuilder, prior_breaks);
    176     Value * width = ConstantInt::get(T, T->getBitWidth());
    177     Value * matchRecordStart = iBuilder->CreateAdd(scanwordPos, iBuilder->CreateSub(width, reverseDistance));
    178     iBuilder->CreateBr(loop_final_block);
    179    
    180     // LOOP FINAL BLOCK
    181     // The prior breaks, if any have been counted.  Set up phi nodes for the recordNum
    182     // and recortStart depending on whether the conditional execution of prior_breaks_block.
    183     iBuilder->SetInsertPoint(loop_final_block);
    184     PHINode * matchRecordNum_phi = iBuilder->CreatePHI(T, 2, "matchRecordNum");
    185     PHINode * matchRecordStart_phi = iBuilder->CreatePHI(T, 2, "matchRecordStart");
    186     matchRecordNum_phi->addIncoming(recordNum_phi, process_matches_loop_entry);
    187     matchRecordNum_phi->addIncoming(matchRecordNum, prior_breaks_block);
    188     matchRecordStart_phi->addIncoming(recordStart_phi, process_matches_loop_entry);
    189     matchRecordStart_phi->addIncoming(matchRecordStart, prior_breaks_block);   
    190     Value * matchRecordEnd = iBuilder->CreateAdd(scanwordPos, generateCountForwardZeroes(iBuilder, matches_phi));
    191    
    192 
    193     Value * fileBuf = getScalarField(instance, "FileBuf");
    194     switch (mGrepType) {
    195         case GrepType::Normal:
    196         {
    197             Value * fileSize = getScalarField(instance, "FileSize");
    198             Value * fileIdx = getScalarField(instance, "FileIdx");
    199             iBuilder->CreateCall(matchProcessor, std::vector<Value *>({matchRecordNum_phi, matchRecordStart_phi, matchRecordEnd, fileBuf, fileSize, fileIdx}));
    200             break;
    201         }
    202         case GrepType::NameExpression:
    203         case GrepType::PropertyValue:
    204             iBuilder->CreateCall(matchProcessor, std::vector<Value *>({matchRecordNum_phi, matchRecordStart_phi, matchRecordEnd, fileBuf}));
    205             break;
    206         default: llvm_unreachable("unknown grep type");
    207     }
    208    
    209     Value * remaining_matches = generateResetLowestBit(iBuilder, matches_phi);
    210     Value * remaining_breaks = iBuilder->CreateXor(record_breaks_phi, prior_breaks);
    211     matches_phi->addIncoming(remaining_matches, loop_final_block);
    212     record_breaks_phi->addIncoming(remaining_breaks, loop_final_block);
    213     recordNum_phi->addIncoming(matchRecordNum_phi, loop_final_block);
    214     recordStart_phi->addIncoming(matchRecordStart_phi, loop_final_block);
    215     iBuilder->CreateBr(matches_test_block);
    216    
    217    
    218     // LOOP EXIT/MATCHES_DONE
    219     iBuilder->SetInsertPoint(matches_done_block);
    220     // When the matches are done, there may be additional record breaks remaining
    221     Value * more_breaks_cond = iBuilder->CreateICmpNE(record_breaks_phi, ConstantInt::get(T, 0));
    222     iBuilder->CreateCondBr(more_breaks_cond, remaining_breaks_block, return_block);
    223    
    224     // REMAINING_BREAKS_BLOCK: process remaining record breaks after all matches are processed
    225     iBuilder->SetInsertPoint(remaining_breaks_block);
    226     Value * break_count = generatePopcount(iBuilder, record_breaks_phi);
    227     Value * final_record_num = iBuilder->CreateAdd(recordNum_phi, break_count);
    228     Value * reverseZeroes = generateCountReverseZeroes(iBuilder, record_breaks_phi);
    229     Value * pendingLineStart = iBuilder->CreateAdd(scanwordPos, iBuilder->CreateSub(width, reverseZeroes));
    230     iBuilder->CreateBr(return_block);
    231    
    232     // RETURN block
    233     iBuilder->SetInsertPoint(return_block);
    234     PHINode * finalRecordCount_phi = iBuilder->CreatePHI(T, 2, "finalRecordCount");
    235     PHINode * finalRecordStart_phi = iBuilder->CreatePHI(T, 2, "finalRecordStart");
    236     finalRecordCount_phi->addIncoming(recordNum_phi, matches_done_block);
    237     finalRecordCount_phi->addIncoming(final_record_num, remaining_breaks_block);
    238     finalRecordStart_phi->addIncoming(recordStart_phi, matches_done_block);
    239     finalRecordStart_phi->addIncoming(pendingLineStart, remaining_breaks_block);
    240     Value * retVal = UndefValue::get(returnType);
    241     retVal = iBuilder->CreateInsertValue(retVal, finalRecordStart_phi, 0);
    242     retVal = iBuilder->CreateInsertValue(retVal, finalRecordCount_phi, 1);
    243     iBuilder->CreateRet(retVal);
    244    
    245     return function;
    246 }
    247 
    248 ScanMatchKernel::ScanMatchKernel(IDISA::IDISA_Builder * iBuilder, GrepType grepType)
    249 : BlockOrientedKernel(iBuilder, "scanMatch",
    250     {Binding{iBuilder->getStreamSetTy(1, 1), "matchResult"}, Binding{iBuilder->getStreamSetTy(1, 1), "lineBreak"}},
     82    Value * const matchesPtr = getInputStreamBlockPtr("matchResult", iBuilder->getInt32(0));
     83    Value * const matches = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(matchesPtr), scanwordVectorType);
     84
     85    Value * const linebreaksPtr = getInputStreamBlockPtr("lineBreak", iBuilder->getInt32(0));
     86    Value * const linebreaks = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(linebreaksPtr), scanwordVectorType);
     87
     88    iBuilder->CreateBr(scanWordIteration);
     89
     90    iBuilder->SetInsertPoint(scanWordIteration);
     91
     92        // while (phiIndex < words per stride)
     93        PHINode * const phiIndex = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2, "index");
     94        phiIndex->addIncoming(iBuilder->getInt32(0), entryBlock);
     95        PHINode * const phiScanwordPos = iBuilder->CreatePHI(scanwordPos->getType(), 2, "pos");
     96        phiScanwordPos->addIncoming(scanwordPos, entryBlock);
     97        PHINode * const phiLineStart = iBuilder->CreatePHI(lastRecordStart->getType(), 2, "recordstart");
     98        phiLineStart->addIncoming(lastRecordStart, entryBlock);
     99        PHINode * const phiLineNum = iBuilder->CreatePHI(lastRecordNum->getType(), 2, "recordnum");
     100        phiLineNum->addIncoming(lastRecordNum, entryBlock);
     101        Value * const matchWord = iBuilder->CreateExtractElement(matches, phiIndex);
     102        Value * const recordBreaks = iBuilder->CreateExtractElement(linebreaks, phiIndex);
     103
     104        // The match scanner works with a loop involving four variables:
     105        // (a) the bit stream scanword of matches marking the ends of selected records,
     106        // (b) the bit stream scanword of record_breaks marking the ends of all records,
     107        // (c) the integer lastRecordNum indicating the number of records processed so far,
     108        // (d) the index lastRecordStart indicating the file position of the last record.
     109        // We set up a loop structure, in which a set of 4 phi nodes initialize these
     110        // variables from either the input to the scanner or the computed values within
     111        // the loop body.
     112
     113        iBuilder->CreateBr(matches_test_block);
     114
     115        // LOOP Test Block
     116        iBuilder->SetInsertPoint(matches_test_block);
     117        PHINode * phiMatchWord = iBuilder->CreatePHI(sizeTy, 2, "matches");
     118        PHINode * phiRecordBreaks = iBuilder->CreatePHI(sizeTy, 2, "recordbreaks");
     119        PHINode * phiRecordStart = iBuilder->CreatePHI(sizeTy, 2, "recordstart");
     120        PHINode * phiRecordNum = iBuilder->CreatePHI(sizeTy, 2, "recordnum");
     121        phiMatchWord->addIncoming(matchWord, scanWordIteration);
     122        phiRecordBreaks->addIncoming(recordBreaks, scanWordIteration);
     123        phiRecordNum->addIncoming(phiLineNum, scanWordIteration);
     124        phiRecordStart->addIncoming(phiLineStart, scanWordIteration);
     125        Value * anyMatches = iBuilder->CreateICmpNE(phiMatchWord, ConstantInt::getNullValue(sizeTy));
     126        iBuilder->CreateCondBr(anyMatches, processMatchesEntry, processMatchesExit);
     127
     128            // LOOP BODY
     129            // The loop body is entered if we have more matches to process.
     130            iBuilder->SetInsertPoint(processMatchesEntry);
     131            Value * prior_breaks = iBuilder->CreateAnd(generateForwardZeroesMask(iBuilder, phiMatchWord), phiRecordBreaks);
     132            // Within the loop we have a conditional block that is executed if there are any prior record breaks.
     133            Value * prior_breaks_cond = iBuilder->CreateICmpNE(prior_breaks, ConstantInt::getNullValue(sizeTy));
     134            iBuilder->CreateCondBr(prior_breaks_cond, prior_breaks_block, loop_final_block);
     135
     136                // PRIOR_BREAKS_BLOCK
     137                // If there are prior breaks, we count them and compute the record start position.
     138                iBuilder->SetInsertPoint(prior_breaks_block);
     139                Value * matchedRecordNum = iBuilder->CreateAdd(generatePopcount(iBuilder, prior_breaks), phiRecordNum);
     140                Value * reverseDistance = generateCountReverseZeroes(iBuilder, prior_breaks);
     141                Value * width = ConstantInt::get(sizeTy, sizeTy->getBitWidth());
     142                Value * priorRecordStart = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateSub(width, reverseDistance));
     143                iBuilder->CreateBr(loop_final_block);
     144
     145            // LOOP FINAL BLOCK
     146            // The prior breaks, if any have been counted.  Set up phi nodes for the recordNum
     147            // and recortStart depending on whether the conditional execution of prior_breaks_block.
     148            iBuilder->SetInsertPoint(loop_final_block);
     149            PHINode * matchRecordNum = iBuilder->CreatePHI(sizeTy, 2, "matchRecordNum");
     150            matchRecordNum->addIncoming(phiRecordNum, processMatchesEntry);
     151            matchRecordNum->addIncoming(matchedRecordNum, prior_breaks_block);
     152            phiRecordNum->addIncoming(matchRecordNum, loop_final_block);
     153
     154            PHINode * matchRecordStart = iBuilder->CreatePHI(sizeTy, 2, "matchRecordStart");
     155            matchRecordStart->addIncoming(phiRecordStart, processMatchesEntry);
     156            matchRecordStart->addIncoming(priorRecordStart, prior_breaks_block);
     157            phiRecordStart->addIncoming(matchRecordStart, loop_final_block);
     158
     159            Value * matchRecordEnd = iBuilder->CreateAdd(phiScanwordPos, generateCountForwardZeroes(iBuilder, phiMatchWord));
     160            switch (mGrepType) {
     161                case GrepType::Normal:
     162                    iBuilder->CreateCall(matchProcessor, {matchRecordNum, matchRecordStart, matchRecordEnd, inputStream, fileSize, fileIdx});
     163                    break;
     164                case GrepType::NameExpression:
     165                case GrepType::PropertyValue:
     166                    iBuilder->CreateCall(matchProcessor, {matchRecordNum, matchRecordStart, matchRecordEnd, inputStream});
     167                    break;
     168                default: break;
     169            }
     170
     171            Value * remaining_matches = generateResetLowestBit(iBuilder, phiMatchWord);
     172            phiMatchWord->addIncoming(remaining_matches, loop_final_block);
     173
     174            Value * remaining_breaks = iBuilder->CreateXor(phiRecordBreaks, prior_breaks);
     175            phiRecordBreaks->addIncoming(remaining_breaks, loop_final_block);
     176
     177            iBuilder->CreateBr(matches_test_block);
     178
     179        // LOOP EXIT/MATCHES_DONE
     180        iBuilder->SetInsertPoint(processMatchesExit);
     181        // When the matches are done, there may be additional record breaks remaining
     182        Value * more_breaks_cond = iBuilder->CreateICmpNE(phiRecordBreaks, ConstantInt::getNullValue(sizeTy));
     183        iBuilder->CreateCondBr(more_breaks_cond, remaining_breaks_block, return_block);
     184
     185            // REMAINING_BREAKS_BLOCK: process remaining record breaks after all matches are processed
     186            iBuilder->SetInsertPoint(remaining_breaks_block);
     187            Value * break_count = generatePopcount(iBuilder, phiRecordBreaks);
     188            Value * final_record_num = iBuilder->CreateAdd(phiRecordNum, break_count);
     189            Value * reverseZeroes = generateCountReverseZeroes(iBuilder, phiRecordBreaks);
     190            Value * pendingLineStart = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateSub(width, reverseZeroes));
     191            iBuilder->CreateBr(return_block);
     192
     193        // RETURN block
     194        iBuilder->SetInsertPoint(return_block);
     195        PHINode * phiFinalRecordNum = iBuilder->CreatePHI(sizeTy, 2, "finalRecordCount");
     196        PHINode * phiFinalRecordStart = iBuilder->CreatePHI(sizeTy, 2, "finalRecordStart");
     197
     198        phiFinalRecordNum->addIncoming(phiRecordNum, processMatchesExit);
     199        phiFinalRecordNum->addIncoming(final_record_num, remaining_breaks_block);
     200        phiLineNum->addIncoming(phiFinalRecordNum, return_block);
     201
     202        phiFinalRecordStart->addIncoming(phiRecordStart, processMatchesExit);
     203        phiFinalRecordStart->addIncoming(pendingLineStart, remaining_breaks_block);
     204        phiLineStart->addIncoming(phiFinalRecordStart, return_block);
     205
     206        Value * nextScanwordPos = iBuilder->CreateAdd(phiScanwordPos, ConstantInt::get(sizeTy, sizeTy->getBitWidth()));
     207        phiScanwordPos->addIncoming(nextScanwordPos, return_block);
     208
     209        Value * nextIndex = iBuilder->CreateAdd(phiIndex, iBuilder->getInt32(1));
     210        phiIndex->addIncoming(nextIndex, return_block);
     211        iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(nextIndex, iBuilder->getInt32(fieldCount)), scanWordIteration, scanWordExit);
     212
     213    iBuilder->SetInsertPoint(scanWordExit);
     214    setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, ConstantInt::get(blockNo->getType(), 1)));
     215    setScalarField("LineStart", phiFinalRecordStart);
     216    setScalarField("LineNum", phiFinalRecordNum);
     217}
     218
     219void ScanMatchKernel::generateInitMethod() {
     220    setScalarField("PendingBytes", iBuilder->getSize(iBuilder->getBitBlockWidth() + 2));
     221}
     222
     223void ScanMatchKernel::generateFinalBlockMethod(llvm::Value * remainingItems) {
     224    setScalarField("PendingBytes", remainingItems);
     225    CreateDoBlockMethodCall();
     226}
     227
     228ScanMatchKernel::ScanMatchKernel(IDISA::IDISA_Builder * iBuilder, GrepType grepType, const unsigned codeUnitWidth)
     229: BlockOrientedKernel(iBuilder, "scanMatch" + std::to_string(codeUnitWidth),
     230    {Binding{iBuilder->getStreamSetTy(1, 8), "InputStream"}, Binding{iBuilder->getStreamSetTy(1, 1), "matchResult"}, Binding{iBuilder->getStreamSetTy(1, 1), "lineBreak"}},
    251231    {},
    252     {Binding{iBuilder->getInt8PtrTy(), "FileBuf"}, Binding{iBuilder->getSizeTy(), "FileSize"}, Binding{iBuilder->getSizeTy(), "FileIdx"}},
     232    {Binding{iBuilder->getSizeTy(), "FileIdx"}},
    253233    {},
    254     {Binding{iBuilder->getSizeTy(), "BlockNo"}, Binding{iBuilder->getSizeTy(), "LineStart"}, Binding{iBuilder->getSizeTy(), "LineNum"}})
    255 , mGrepType(grepType) {
    256 }
    257 
    258 }
     234    {Binding{iBuilder->getSizeTy(), "BlockNo"}, Binding{iBuilder->getSizeTy(), "LineStart"}, Binding{iBuilder->getSizeTy(), "LineNum"}, Binding{iBuilder->getSizeTy(), "PendingBytes"}})
     235, mGrepType(grepType)
     236, mCodeUnitWidth(codeUnitWidth) {
     237}
     238
     239}
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.h

    r5297 r5377  
    66#define SCANMATCHGEN_H
    77
    8 #include "grep_type.h"  // for GrepType
    9 #include "kernel.h"     // for KernelBuilder
    10 namespace IDISA { class IDISA_Builder; }  // lines 16-16
    11 namespace llvm { class Function; }  // lines 14-14
    12 namespace llvm { class Module; }  // lines 14-14
     8#include "grep_type.h"
     9#include "kernel.h"
     10namespace IDISA { class IDISA_Builder; }
     11namespace llvm { class Function; }
     12namespace llvm { class Module; }
    1313
    1414namespace kernel {
    1515   
    16 class ScanMatchKernel : public BlockOrientedKernel {
     16class ScanMatchKernel final : public BlockOrientedKernel {
    1717public:
    18     ScanMatchKernel(IDISA::IDISA_Builder * iBuilder, GrepType grepType);
     18    ScanMatchKernel(IDISA::IDISA_Builder * iBuilder, GrepType grepType, unsigned codeUnitWidth);
    1919protected:
     20    void generateInitMethod() override;
    2021    void generateDoBlockMethod() override;
     22    void generateFinalBlockMethod(llvm::Value * remainingItems) override;
    2123private:
    22     llvm::Function * generateScanWordRoutine(llvm::Module * m) const;
    23 private:
    24     GrepType mGrepType;
     24    GrepType        mGrepType;
     25    const unsigned  mCodeUnitWidth;
    2526};
    2627}
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.cpp

    r5307 r5377  
    1010using namespace llvm;
    1111
     12inline static unsigned ceil_log2(const unsigned v) {
     13    assert ("log2(0) is undefined!" && v != 0);
     14    return 32 - __builtin_clz(v - 1);
     15}
     16
    1217namespace kernel {
    13    
    14 void StdInKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     18
     19void StdInKernel::generateDoSegmentMethod(Value * /* doFinal */, const std::vector<Value *> & /* producerPos */) {
    1520
    1621    BasicBlock * setTermination = CreateBasicBlock("setTermination");
    1722    BasicBlock * stdInExit = CreateBasicBlock("stdInExit");
    18 //    ConstantInt * blockItems = iBuilder->getSize(iBuilder->getBitBlockWidth());
    19     ConstantInt * itemBytes = iBuilder->getSize(mCodeUnitWidth/8);
    20     ConstantInt * segmentBytes = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth() * mCodeUnitWidth/8);
    21     ConstantInt * stdin_fileno = iBuilder->getInt32(STDIN_FILENO);
    22     Value * produced = getProducedItemCount("codeUnitBuffer");
    23 //    Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(produced, blockItems), itemBytes);
    24 //    Value * bytePtr = getRawItemPointer("codeUnitBuffer", iBuilder->getInt32(0), produced);
    25     Value * bytePtr = getOutputStream("codeUnitBuffer", iBuilder->getInt32(0));
     23    ConstantInt * segmentItems = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth() / mCodeUnitWidth);
     24    ConstantInt * segmentItems2 = iBuilder->getSize(2 * mSegmentBlocks * iBuilder->getBitBlockWidth() / mCodeUnitWidth);
     25    // on the first segment, we buffer twice the data necessary to ensure that we can safely check for a non-LF line break
     26    Value * itemsRead = getProducedItemCount("codeUnitBuffer");
     27    Value * isFirst = iBuilder->CreateICmpEQ(itemsRead, iBuilder->getSize(0));
     28    Value * itemsToRead = iBuilder->CreateSelect(isFirst, segmentItems2, segmentItems);
     29
     30    Value * segmentBytes = reserveItemCount("codeUnitBuffer", itemsToRead);
     31    Value * bytePtr =  getOutputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0));
    2632    bytePtr = iBuilder->CreatePointerCast(bytePtr, iBuilder->getInt8PtrTy());
     33    Value * bytesRead = iBuilder->CreateReadCall(iBuilder->getInt32(STDIN_FILENO), bytePtr, segmentBytes);
     34    itemsRead = iBuilder->CreateAdd(itemsRead, iBuilder->CreateUDiv(bytesRead, iBuilder->getSize(mCodeUnitWidth / 8)));
    2735
    28 
    29    
    30     Value * nRead = iBuilder->CreateReadCall(stdin_fileno, bytePtr, segmentBytes);
    31     Value * bytesRead = iBuilder->CreateSelect(iBuilder->CreateICmpSLT(nRead, iBuilder->getSize(0)), iBuilder->getSize(0), nRead);
    32     produced = iBuilder->CreateAdd(produced, iBuilder->CreateUDiv(bytesRead, itemBytes));
    33     setProducedItemCount("codeUnitBuffer", produced);
    34     Value * lessThanFullSegment = iBuilder->CreateICmpULT(bytesRead, segmentBytes);
    35     iBuilder->CreateCondBr(lessThanFullSegment, setTermination, stdInExit);
     36    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(bytesRead, iBuilder->getSize(0)), setTermination, stdInExit);
    3637    iBuilder->SetInsertPoint(setTermination);
    3738    setTerminationSignal();
    3839    iBuilder->CreateBr(stdInExit);
    39    
    4040    iBuilder->SetInsertPoint(stdInExit);
    4141
    42    
     42    setProducedItemCount("codeUnitBuffer", itemsRead);
    4343}
    4444
     
    6363}
    6464   
    65 void FileSource::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
     65void FileSource::generateDoSegmentMethod(Value * /* doFinal */, const std::vector<Value *> & /* producerPos */) {
    6666
    6767    BasicBlock * closeFile = CreateBasicBlock("closeFile");
     
    7070   
    7171    Value * produced = getProducedItemCount("codeUnitBuffer");
    72     Value * bytePtr = getOutputStream("codeUnitBuffer", iBuilder->getInt32(0));
     72    Value * bytePtr = getOutputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0));
    7373    bytePtr = iBuilder->CreatePointerCast(bytePtr, iBuilder->getInt8PtrTy());
    7474
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.h

    r5292 r5377  
    1414namespace kernel {
    1515
    16 class StdInKernel : public SegmentOrientedKernel {
     16class StdInKernel final : public SegmentOrientedKernel {
    1717public:
    1818    StdInKernel(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
    1919protected:
    20     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override final;
     20    void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override;
    2121private:
    2222    unsigned mSegmentBlocks;
     
    2525   
    2626
    27 class FileSource : public SegmentOrientedKernel {
     27class FileSource final : public SegmentOrientedKernel {
    2828public:
    2929    FileSource(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
    3030protected:
    31     void generateInitMethod() override final;
    32     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override final;
     31    void generateInitMethod() override;
     32    void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override;
    3333private:
    3434    unsigned mSegmentBlocks;
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5317 r5377  
    2929    Value * wraparound = nullptr;
    3030    if (isa<CircularBuffer>(b) || isa<CircularCopybackBuffer>(b)) {
    31         Value * accessible = b->getLinearlyAccessibleItems(processed);
     31        Value * instance = getStreamSetBufferPtr("codeUnitBuffer");
     32        Value * accessible = b->getLinearlyAccessibleItems(instance, processed);
    3233        wraparound = iBuilder->CreateICmpULT(accessible, itemsToDo);
    3334        itemsToDo = iBuilder->CreateSelect(wraparound, accessible, itemsToDo);
     
    99100    Value * wraparound = nullptr;
    100101    if (isa<CircularBuffer>(b) || isa<CircularCopybackBuffer>(b)) {
    101         Value * accessible = b->getLinearlyAccessibleItems(processed);
     102        Value * instance = getStreamSetBufferPtr("codeUnitBuffer");
     103        Value * accessible = b->getLinearlyAccessibleItems(instance, processed);
    102104        wraparound = iBuilder->CreateICmpULT(accessible, itemsToDo);
    103105        itemsToDo = iBuilder->CreateSelect(wraparound, accessible, itemsToDo);
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5361 r5377  
    3636Value * StreamSetBuffer::getStreamBlockPtr(Value * self, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
    3737    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(self)), "StreamSetBuffer: out-of-bounds stream access");
    38     return iBuilder->CreateGEP(getStreamSetBlockPtr(self, blockIndex), {iBuilder->getInt32(0), streamIndex});
     38    return iBuilder->CreateGEP(getStreamSetBlockPtr(getBaseAddress(self), blockIndex), {iBuilder->getInt32(0), streamIndex});
    3939}
    4040
    4141Value * StreamSetBuffer::getStreamPackPtr(Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
    4242    iBuilder->CreateAssert(iBuilder->CreateICmpULT(streamIndex, getStreamSetCount(self)), "StreamSetBuffer: out-of-bounds stream access");
    43     return iBuilder->CreateGEP(getStreamSetBlockPtr(self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
    44 }
    45 
    46 inline bool StreamSetBuffer::isCapacityGuaranteed(const llvm::Value * const index, const size_t capacity) const {
     43    return iBuilder->CreateGEP(getStreamSetBlockPtr(getBaseAddress(self), blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
     44}
     45
     46inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
    4747    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
    4848        if (LLVM_LIKELY(cast<ConstantInt>(index)->getLimitedValue() < capacity)) {
     
    5353}
    5454
    55 llvm::Value * StreamSetBuffer::getStreamSetCount(Value *) const {
     55Value * StreamSetBuffer::getStreamSetCount(Value *) const {
    5656    uint64_t count = 1;
    5757    if (isa<ArrayType>(mBaseType)) {
     
    6161}
    6262
    63 inline llvm::Value * StreamSetBuffer::modByBufferBlocks(llvm::Value * const offset) const {
     63inline Value * StreamSetBuffer::modByBufferBlocks(Value * const offset) const {
    6464    assert (offset->getType()->isIntegerTy());
    6565    if (isCapacityGuaranteed(offset, mBufferBlocks)) {
     
    8282 */
    8383Value * StreamSetBuffer::getRawItemPointer(Value * self, Value * streamIndex, Value * absolutePosition) const {
    84     Value * ptr = self;
     84    Value * ptr = getBaseAddress(self);
    8585    if (isa<ConstantInt>(streamIndex) && cast<ConstantInt>(streamIndex)->isZero()) {
    8686        ptr = iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), streamIndex});
     
    100100}
    101101
    102 Value * StreamSetBuffer::getLinearlyAccessibleItems(llvm::Value * fromPosition) const {
     102Value * StreamSetBuffer::getLinearlyAccessibleItems(Value * self, Value * fromPosition) const {
    103103    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
    104104        Constant * stride = iBuilder->getSize(iBuilder->getStride());
    105105        return iBuilder->CreateSub(stride, iBuilder->CreateURem(fromPosition, stride));
    106     }
    107     else {
     106    } else {
    108107        Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
    109108        return iBuilder->CreateSub(bufSize, iBuilder->CreateURem(fromPosition, bufSize));
     
    111110}
    112111
    113 Value * StreamSetBuffer::getLinearlyAccessibleBlocks(llvm::Value * fromBlock) const {
     112Value * StreamSetBuffer::getLinearlyAccessibleBlocks(Value * self, Value * fromBlock) const {
    114113    Constant * bufBlocks = iBuilder->getSize(mBufferBlocks);
    115114    return iBuilder->CreateSub(bufBlocks, iBuilder->CreateURem(fromBlock, bufBlocks));
    116115}
    117116
     117Value * StreamSetBuffer::reserveItemCount(Value * self, llvm::Value * position, llvm::Value *requested) const {
     118    report_fatal_error("reserve() can only be used with ExtensibleBuffers");
     119}
     120
     121Value * StreamSetBuffer::getBaseAddress(Value * self) const {
     122    return self;
     123}
    118124
    119125// Single Block Buffer
     
    125131
    126132// External File Buffer
    127 void ExternalFileBuffer::setStreamSetBuffer(Value * ptr, Value * /* fileSize */) {
    128     mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, getPointerType());
    129 }
    130 
    131 void ExternalFileBuffer::setEmptyBuffer(Value * ptr) {   
     133void ExternalFileBuffer::setStreamSetBuffer(Value * ptr) {
    132134    mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, getPointerType());
    133135}
     
    137139}
    138140
    139 Value * ExternalFileBuffer::getStreamSetBlockPtr(Value * self, Value * blockNo) const {
    140     return iBuilder->CreateGEP(self, blockNo);
    141 }
    142 
    143 Value * ExternalFileBuffer::getLinearlyAccessibleItems(llvm::Value *) const {
     141Value * ExternalFileBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
     142    return iBuilder->CreateGEP(self, blockIndex);
     143}
     144
     145Value * ExternalFileBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
    144146    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
     147}
     148
     149// ExtensibleBuffer
     150Value * ExtensibleBuffer::getLinearlyAccessibleItems(Value * self, Value * fromPosition) const {
     151    Value * capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     152    Value * capacity = iBuilder->CreateLoad(capacityPtr);
     153    return iBuilder->CreateSub(capacity, fromPosition);
     154}
     155
     156void ExtensibleBuffer::allocateBuffer() {
     157    Type * ty = getType();
     158    Value * instance = iBuilder->CreateCacheAlignedAlloca(ty);
     159    ConstantInt * const capacity = iBuilder->getSize(mBufferBlocks);
     160    Value * const capacityPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     161    iBuilder->CreateStore(capacity, capacityPtr);
     162    Constant * const size = ConstantExpr::getMul(ConstantExpr::getSizeOf(ty), capacity);
     163    Value * addr = iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(iBuilder->CreateShl(size, 1), size), iBuilder->getCacheAlignment());
     164    iBuilder->CreateMemZero(addr, size, iBuilder->getCacheAlignment());
     165    Value * const addrPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     166    addr = iBuilder->CreatePointerCast(addr, addrPtr->getType()->getPointerElementType());
     167    iBuilder->CreateStore(addr, addrPtr);
     168    mStreamSetBufferPtr = instance;
     169}
     170
     171Value * ExtensibleBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
     172    return iBuilder->CreateGEP(self, blockIndex);
     173}
     174
     175Value * ExtensibleBuffer::reserveItemCount(Value * self, llvm::Value * position, llvm::Value * requested) const {
     176
     177    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     178    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
     179    Type * const intTy = capacity->getType();
     180    Constant * const blockSize = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(getType()->getStructElementType(1)), intTy, false);
     181    Constant * const blockSize2 = ConstantExpr::getMul(blockSize, ConstantInt::get(intTy, 2));
     182
     183    BasicBlock * const entry = iBuilder->GetInsertBlock();
     184    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
     185    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
     186
     187    Value * const reserved = iBuilder->CreateAdd(position, requested);
     188
     189    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(reserved, capacity), resume, expand);
     190
     191    iBuilder->SetInsertPoint(expand);
     192
     193    Value * const currentSize = iBuilder->CreateMul(capacity, blockSize);
     194    Value * const reservedSize = iBuilder->CreateMul(reserved, blockSize2);
     195
     196    Value * newAddr = iBuilder->CreateAlignedMalloc(reservedSize, iBuilder->getCacheAlignment());
     197    Value * const baseAddrPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     198    Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
     199
     200    iBuilder->CreateMemCpy(newAddr, baseAddr, currentSize, iBuilder->getCacheAlignment());
     201    iBuilder->CreateAlignedFree(baseAddr);
     202    iBuilder->CreateMemZero(iBuilder->CreateGEP(newAddr, currentSize), iBuilder->CreateSub(reservedSize, currentSize), iBuilder->getCacheAlignment());
     203
     204    newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
     205
     206    iBuilder->CreateStore(newAddr, baseAddrPtr);
     207    iBuilder->CreateStore(iBuilder->CreateShl(capacity, 1), capacityPtr);
     208
     209    iBuilder->CreateBr(resume);
     210
     211    iBuilder->SetInsertPoint(resume);
     212
     213    return iBuilder->CreateMul(requested, blockSize);
     214}
     215
     216Value * ExtensibleBuffer::getBaseAddress(Value * self) const {
     217    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
    145218}
    146219
     
    237310
    238311Value * SwizzledCopybackBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
    239     assert (blockIndex->getType()->isIntegerTy());
    240    
    241     Value * offset = nullptr;
    242     if (mBufferBlocks == 1) {
    243         offset = ConstantInt::getNullValue(iBuilder->getSizeTy());
    244     } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
    245         offset = iBuilder->CreateAnd(blockIndex, ConstantInt::get(blockIndex->getType(), mBufferBlocks - 1));
    246     } else {
    247         offset = iBuilder->CreateURem(blockIndex, ConstantInt::get(blockIndex->getType(), mBufferBlocks));
    248     }
    249     return iBuilder->CreateGEP(self, offset);
    250 }
    251 
    252 SwizzledCopybackBuffer::SwizzledCopybackBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
     312    return iBuilder->CreateGEP(self, modByBufferBlocks(blockIndex));
     313}
     314
     315SwizzledCopybackBuffer::SwizzledCopybackBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned fieldwidth, unsigned AddressSpace)
    253316: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
    254317   
    255318}
    256 
    257 
    258319
    259320// Expandable Buffer
     
    272333}
    273334
    274 std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(llvm::Value * self, llvm::Value * streamIndex, Value * blockIndex, const bool readOnly) const {
     335std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
    275336
    276337    // ENTRY
     
    377438}
    378439
    379 llvm::Value * ExpandableBuffer::getStreamBlockPtr(llvm::Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
     440Value * ExpandableBuffer::getStreamBlockPtr(Value * self, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
    380441    Value * ptr, * offset;
    381442    std::tie(ptr, offset) = getInternalStreamBuffer(self, streamIndex, blockIndex, readOnly);
     
    383444}
    384445
    385 llvm::Value * ExpandableBuffer::getStreamPackPtr(llvm::Value * self, llvm::Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
     446Value * ExpandableBuffer::getStreamPackPtr(Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
    386447    Value * ptr, * offset;
    387448    std::tie(ptr, offset) = getInternalStreamBuffer(self, streamIndex, blockIndex, readOnly);
     
    389450}
    390451
    391 llvm::Value * ExpandableBuffer::getStreamSetCount(llvm::Value * self) const {
     452Value * ExpandableBuffer::getStreamSetCount(Value * self) const {
    392453    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
     454}
     455
     456Value * ExpandableBuffer::getBaseAddress(Value * self) const {
     457    return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
    393458}
    394459
     
    397462}
    398463
    399 Value * ExpandableBuffer::getLinearlyAccessibleItems(llvm::Value *) const {
     464Value * ExpandableBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
    400465    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
    401466}
    402467
    403468// Constructors
    404 SingleBlockBuffer::SingleBlockBuffer(IDISA::IDISA_Builder * b, llvm::Type * type)
     469SingleBlockBuffer::SingleBlockBuffer(IDISA::IDISA_Builder * b, Type * type)
    405470: StreamSetBuffer(BufferKind::BlockBuffer, b, type, resolveStreamSetType(b, type), 1, 0) {
    406471
    407472}
    408473
    409 ExternalFileBuffer::ExternalFileBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, unsigned AddressSpace)
     474ExternalFileBuffer::ExternalFileBuffer(IDISA::IDISA_Builder * b, Type * type, unsigned AddressSpace)
    410475: StreamSetBuffer(BufferKind::ExternalFileBuffer, b, type, resolveStreamSetType(b, type), 0, AddressSpace) {
    411476
    412477}
    413478
    414 CircularBuffer::CircularBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace)
     479ExtensibleBuffer::ExtensibleBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
     480: StreamSetBuffer(BufferKind::ExtensibleBuffer, b, type, StructType::get(b->getSizeTy(), resolveStreamSetType(b, type)->getPointerTo(), nullptr), bufferBlocks, AddressSpace) {
     481
     482}
     483
     484CircularBuffer::CircularBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
    415485: StreamSetBuffer(BufferKind::CircularBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
    416486
    417487}
    418488
    419 CircularCopybackBuffer::CircularCopybackBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
     489CircularCopybackBuffer::CircularCopybackBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
    420490: StreamSetBuffer(BufferKind::CircularCopybackBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks) {
    421491
    422492}
    423493
    424 ExpandableBuffer::ExpandableBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace)
     494ExpandableBuffer::ExpandableBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
    425495: StreamSetBuffer(BufferKind::ExpandableBuffer, b, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
    426496, mInitialCapacity(type->getArrayNumElements()) {
     
    438508
    439509}
     510
     511StreamSetBuffer::~StreamSetBuffer() { }
    440512
    441513// Helper routines
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5355 r5377  
    1919public:
    2020
    21     enum class BufferKind : unsigned {BlockBuffer, ExternalFileBuffer, CircularBuffer, CircularCopybackBuffer, SwizzledCopybackBuffer, ExpandableBuffer};
     21    enum class BufferKind : unsigned {BlockBuffer, ExternalFileBuffer, CircularBuffer, CircularCopybackBuffer, SwizzledCopybackBuffer, ExpandableBuffer, ExtensibleBuffer};
    2222
    2323    BufferKind getBufferKind() const {
     
    4444        return mStreamSetBufferPtr;
    4545    }
    46    
     46
    4747    virtual void allocateBuffer();
    4848
     
    5656
    5757    // The number of items that cam be linearly accessed from a given logical stream position.
    58     virtual llvm::Value * getLinearlyAccessibleItems(llvm::Value * fromPosition) const;
    59     virtual llvm::Value * getLinearlyAccessibleBlocks(llvm::Value * fromBlock) const;
     58    virtual llvm::Value * getLinearlyAccessibleItems(llvm::Value * self, llvm::Value * fromPosition) const;
     59    virtual llvm::Value * getLinearlyAccessibleBlocks(llvm::Value * self, llvm::Value * fromBlock) const;
     60
     61    virtual llvm::Value * reserveItemCount(llvm::Value * self, llvm::Value * position, llvm::Value * requested) const;
     62
     63    virtual ~StreamSetBuffer() = 0;
     64
    6065protected:
    6166
     
    6873
    6974    llvm::Value * modByBufferBlocks(llvm::Value * const offset) const;
     75
     76    virtual llvm::Value * getBaseAddress(llvm::Value * self) const;
    7077
    7178protected:
     
    7986};   
    8087
    81 class SingleBlockBuffer : public StreamSetBuffer {
     88class SingleBlockBuffer final : public StreamSetBuffer {
    8289public:
    8390    static inline bool classof(const StreamSetBuffer * b) {
     
    9198};
    9299
    93 class ExternalFileBuffer : public StreamSetBuffer {
     100class ExternalFileBuffer final : public StreamSetBuffer {
    94101public:
    95102    static inline bool classof(const StreamSetBuffer * b) {
     
    99106    ExternalFileBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, unsigned AddressSpace = 0);
    100107
    101     void setStreamSetBuffer(llvm::Value * ptr, llvm::Value * fileSize);
    102 
    103     void setEmptyBuffer(llvm::Value * buffer_ptr);
     108    void setStreamSetBuffer(llvm::Value * ptr);
    104109
    105110    // Can't allocate - raise an error. */
    106111    void allocateBuffer() override;
    107112
    108     llvm::Value * getLinearlyAccessibleItems(llvm::Value * fromPosition) const override;
     113    llvm::Value * getLinearlyAccessibleItems(llvm::Value * self, llvm::Value * fromPosition) const override;
    109114   
    110115protected:
    111116    llvm::Value * getStreamSetBlockPtr(llvm::Value * self, llvm::Value * blockNo) const override;
    112117};
    113    
    114 class CircularBuffer : public StreamSetBuffer {
     118
     119class ExtensibleBuffer final : public StreamSetBuffer {
     120public:
     121    static inline bool classof(const StreamSetBuffer * b) {
     122        return b->getBufferKind() == BufferKind::ExtensibleBuffer;
     123    }
     124
     125    ExtensibleBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace = 0);
     126
     127    llvm::Value * getLinearlyAccessibleItems(llvm::Value * self,llvm::Value * fromPosition) const override;
     128
     129    void allocateBuffer() override;
     130
     131    llvm::Value * reserveItemCount(llvm::Value * self, llvm::Value * position, llvm::Value * requested) const override;
     132
     133protected:
     134
     135    llvm::Value * getBaseAddress(llvm::Value * self) const override;
     136
     137    llvm::Value * getStreamSetBlockPtr(llvm::Value * self, llvm::Value * blockNo) const override;
     138
     139};
     140   
     141class CircularBuffer final : public StreamSetBuffer {
    115142public:
    116143    static inline bool classof(const StreamSetBuffer * b) {
     
    132159//  Kernels that read from a CircularCopybackBuffer must not access the overflow area.
    133160//
    134 class CircularCopybackBuffer : public StreamSetBuffer {
     161class CircularCopybackBuffer final : public StreamSetBuffer {
    135162public:
    136163    static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::CircularCopybackBuffer;}
     
    142169    // Generate copyback code for the given number of overflowItems.
    143170    void createCopyBack(llvm::Value * self, llvm::Value * overflowItems) const;
    144    
    145 
    146171       
    147        
    148172protected:
    149173    llvm::Value * getStreamSetBlockPtr(llvm::Value * self, llvm::Value * blockIndex) const override;
     
    153177};
    154178   
    155 class SwizzledCopybackBuffer : public StreamSetBuffer {
     179class SwizzledCopybackBuffer final : public StreamSetBuffer {
    156180public:
    157181    static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::SwizzledCopybackBuffer;}
     
    174198// within their set whenever the index exceeds its capacity
    175199//
    176 // ExpandableBuffers do not allow access to the base stream set but will automatically increase the number of streams
    177 // within their set whenever the index exceeds its capacity
    178 //
    179 class ExpandableBuffer : public StreamSetBuffer {
     200class ExpandableBuffer final : public StreamSetBuffer {
    180201public:
    181202    static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::ExpandableBuffer;}
     
    187208    llvm::Value * getStreamPackPtr(llvm::Value * self, llvm::Value * streamIndex, llvm::Value * blockIndex, llvm::Value * packIndex, const bool readOnly) const override;
    188209
    189     llvm::Value * getLinearlyAccessibleItems(llvm::Value * fromPosition) const override;
     210    llvm::Value * getLinearlyAccessibleItems(llvm::Value * self, llvm::Value * fromPosition) const override;
    190211
    191212    void allocateBuffer() override;
     
    194215
    195216protected:
     217
     218    llvm::Value * getBaseAddress(llvm::Value * self) const override;
    196219
    197220    llvm::Value * getStreamSetBlockPtr(llvm::Value * self, llvm::Value * blockIndex) const override;
  • icGREP/icgrep-devel/icgrep/preprocess.cpp

    r5360 r5377  
    6868
    6969    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
    70     ByteStream.setStreamSetBuffer(inputStream, fileSize);
     70    ByteStream.setStreamSetBuffer(inputStream);
    7171    ExternalFileBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    72     MatchResults.setStreamSetBuffer(lineBreak, fileSize);
     72    MatchResults.setStreamSetBuffer(lineBreak);
    7373
    7474    kernel::MMapSourceKernel mmapK(iBuilder, segmentSize);
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r5373 r5377  
    183183
    184184    // Use the pass manager to optimize the function.
     185    #ifndef NDEBUG
     186    try {
     187    #endif
    185188    legacy::PassManager PM;
    186189    #ifndef NDEBUG
     
    192195    PM.add(createCFGSimplificationPass());   
    193196    PM.run(*m);
    194 
     197    #ifndef NDEBUG
     198    } catch (...) { m->dump(); throw; }
     199    #endif
    195200    InitializeNativeTarget();
    196201    InitializeNativeTargetAsmPrinter();
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5373 r5377  
    351351    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
    352352
    353     ByteStream.setStreamSetBuffer(inputStream, fileSize);
     353    ByteStream.setStreamSetBuffer(inputStream);
    354354    BasisBits.allocateBuffer();
    355355    U8u16Bits.allocateBuffer();
     
    368368
    369369    if (mMapBuffering || memAlignBuffering) {
    370         U16external.setEmptyBuffer(outputStream);
     370        U16external.setStreamSetBuffer(outputStream);
    371371    } else {
    372372        U16out.allocateBuffer();
     
    455455    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
    456456   
    457     ByteStream.setStreamSetBuffer(inputStream, fileSize);
     457    ByteStream.setStreamSetBuffer(inputStream);
    458458    BasisBits.allocateBuffer();
    459459    U8u16Bits.allocateBuffer();
     
    463463    DeletionCounts.allocateBuffer();
    464464    if (mMapBuffering || memAlignBuffering) {
    465         U16external.setEmptyBuffer(outputStream);
     465        U16external.setStreamSetBuffer(outputStream);
    466466    } else {
    467467        U16out.allocateBuffer();
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5373 r5377  
    170170    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    171171
    172     ByteStream.setStreamSetBuffer(inputStream, fileSize);
     172    ByteStream.setStreamSetBuffer(inputStream);
    173173    BasisBits.allocateBuffer();
    174174   
Note: See TracChangeset for help on using the changeset viewer.