Changeset 5001


Ignore:
Timestamp:
Apr 6, 2016, 1:03:48 PM (20 months ago)
Author:
nmedfort
Message:

Symbol table work

Location:
icGREP/icgrep-devel/icgrep
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_builder.cpp

    r4995 r5001  
    7878        printRegister = function;
    7979    }
    80     assert (value->getType()->isVectorTy());
     80    assert (value->getType()->canLosslesslyBitCastTo(mBitBlockType));
    8181    CreateCall2(printRegister, geti8StrVal(*mMod, name.c_str(), name), CreateBitCast(value, mBitBlockType));
    8282}
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_target.cpp

    r4984 r5001  
    3939    }
    4040    else if (blockSize == 64)
    41         return new IDISA::IDISA_I64_Builder(mod, bitBlockType); 
     41        return new IDISA::IDISA_I64_Builder(mod, bitBlockType);
    4242    return new IDISA::IDISA_SSE2_Builder(mod, bitBlockType);
    4343}
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r4986 r5001  
    5353
    5454
    55 bool GrepEngine::finalLineIsUnterminated(char * mFileBuffer, size_t mFileSize) const {
    56     if (mFileSize == 0) return false;
    57     unsigned char end_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-1]);
     55bool GrepEngine::finalLineIsUnterminated(const char * const fileBuffer, const size_t fileSize) {
     56    if (fileSize == 0) return false;
     57    unsigned char end_byte = static_cast<unsigned char>(fileBuffer[fileSize-1]);
    5858    // LF through CR are line break characters
    5959    if ((end_byte >= 0xA) && (end_byte <= 0xD)) return false;
    6060    // Other line breaks require at least two bytes.
    61     if (mFileSize == 1) return true;
     61    if (fileSize == 1) return true;
    6262    // NEL
    63     unsigned char penult_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-2]);
     63    unsigned char penult_byte = static_cast<unsigned char>(fileBuffer[fileSize-2]);
    6464    if ((end_byte == 0x85) && (penult_byte == 0xC2)) return false;
    65     if (mFileSize == 2) return true;
     65    if (fileSize == 2) return true;
    6666    // LS and PS
    6767    if ((end_byte < 0xA8) || (end_byte > 0xA9)) return true;
    68     return (static_cast<unsigned char>(mFileBuffer[mFileSize-3]) != 0xE2) || (penult_byte != 0x80);
     68    return (static_cast<unsigned char>(fileBuffer[fileSize-3]) != 0xE2) || (penult_byte != 0x80);
    6969}
    7070
    7171void GrepEngine::doGrep(const std::string & fileName) {
    72     std::string mFileName = fileName;
    73     size_t mFileSize;
    74     char * mFileBuffer;
    75 
    76     const path file(mFileName);
     72    const path file(fileName);
    7773    if (exists(file)) {
    7874        if (is_directory(file)) {
     
    8076        }
    8177    } else {
    82         std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
     78        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
    8379        return;
    8480    }
    8581
    86     mFileSize = file_size(file);
    87     mapped_file mFile;
    88     if (mFileSize == 0) {
    89         mFileBuffer = nullptr;
     82    const size_t fileSize = file_size(file);
     83    if (fileSize > 0) {
     84        mapped_file file;
     85        try {
     86            file.open(fileName, mapped_file::priv, fileSize, 0);
     87        } catch (std::ios_base::failure e) {
     88            throw std::runtime_error("Boost mmap error: " + fileName + ": " + e.what());
     89        }
     90        char * const fileBuffer = file.data();
     91        mGrepFunction(fileBuffer, fileSize, fileName.c_str(), finalLineIsUnterminated(fileBuffer, fileSize));
     92        file.close();
    9093    }
    91     else {
    92         try {
    93             mFile.open(mFileName, mapped_file::priv, mFileSize, 0);
    94         } catch (std::ios_base::failure e) {
    95             std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
    96             return;
    97         }
    98         mFileBuffer = mFile.data();
    99     }
    100    
    101     uint64_t finalLineUnterminated = 0;
    102     if(finalLineIsUnterminated(mFileBuffer, mFileSize))
    103         finalLineUnterminated = 1;
    104    
    105     mGrepFunction(mFileBuffer, mFileSize, mFileName.c_str(), finalLineUnterminated);
    106 
    107 
    108     mFile.close();
    109 
    11094}
    11195
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r4986 r5001  
    3131private:
    3232   
    33     bool finalLineIsUnterminated(char * fileBuffer, size_t fileSize) const;
     33    static bool finalLineIsUnterminated(const char * const fileBuffer, const size_t fileSize);
    3434
    3535    GrepFunctionType mGrepFunction;
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5000 r5001  
    914914kernels/lane_s2p_kernel.cpp
    915915kernels/lane_s2p_kernel.h
    916 lane_icgrep.cpp
    917 lane_grep_engine.h
    918 lane_grep_engine.cpp
     916lanes.cpp
    919917kernels/instance.cpp
     918casefold.cpp
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5000 r5001  
    2525, mBlockNoIndex(0) {
    2626    assert (mDefaultBufferSize > 0);
    27     mBlockNoIndex = iBuilder->getInt32(addInternalState(builder->getInt64Ty(), "BlockNo"));
    2827}
    2928
     
    114113        return iBuilder->CreateGEP(inputStreamSet, { iBuilder->getInt32(0), index });
    115114    }
    116     #ifndef NDEBUG
    117     iBuilder->getModule()->dump();
    118     #endif
    119115    throw std::runtime_error("Cannot access the input stream with a non-constant value unless all input stream types are identical!");
    120116}
     
    200196 ** ------------------------------------------------------------------------------------------------------------- */
    201197Function * KernelBuilder::prepareFunction(std::vector<unsigned> && inputStreamOffsets) {
     198
     199    mBlockNoIndex = iBuilder->getInt32(addInternalState(iBuilder->getInt64Ty(), "BlockNo"));
    202200
    203201    mKernelStateType = StructType::create(iBuilder->getContext(), mInternalState, mKernelName);
     
    306304 ** ------------------------------------------------------------------------------------------------------------- */
    307305Instance * KernelBuilder::instantiate(std::initializer_list<llvm::Value *> inputStreams) {   
    308     throw std::runtime_error("Not supported!");
    309 //    AllocaInst * inputStruct = iBuilder->CreateAlloca(mInputStreamType);
    310 //    unsigned i = 0;
    311 //    for (Value * inputStream : inputStreams) {
    312 //        Value * ptr = iBuilder->CreateGEP(inputStruct, { iBuilder->getInt32(0), iBuilder->getInt32(i++)});
    313 //        iBuilder->CreateStore(iBuilder->CreatePointerCast(inputStream, ptr);
    314 //    }
    315 //    return instantiate(std::make_pair(inputStruct, 0));
     306    AllocaInst * inputStruct = iBuilder->CreateAlloca(mInputStreamType);
     307    unsigned i = 0;
     308    for (Value * inputStream : inputStreams) {
     309        Value * ptr = iBuilder->CreateGEP(inputStruct, { iBuilder->getInt32(0), iBuilder->getInt32(i++)});
     310        iBuilder->CreateStore(inputStream, ptr);
     311    }
     312    return instantiate(std::make_pair(inputStruct, 0));
    316313}
    317314
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5000 r5001  
    6262    Type * const inputType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), std::vector<Type *>({ArrayType::get(mBitBlockType, 8)})), 1), 0);
    6363
    64     Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", Type::getVoidTy(mMod->getContext()), inputType, int64ty, int8PtrTy, int64ty, nullptr));
     64    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", Type::getVoidTy(mMod->getContext()), inputType, int64ty, int8PtrTy, iBuilder->getInt1Ty(), nullptr));
    6565    main->setCallingConv(CallingConv::C);
    6666    Function::arg_iterator args = main->arg_begin();
     
    7676
    7777    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
     78
    7879
    7980    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
     
    9899    Instance * scanMatchInstance = mScanMatchKernel->instantiate(icGrepInstance->getResultSet());
    99100
    100     Value * ptr = iBuilder->CreateBitCast(inputStream, int8PtrTy);
    101 
    102     scanMatchInstance->setInternalState("FileBuf", ptr);
     101    scanMatchInstance->setInternalState("FileBuf", iBuilder->CreateBitCast(inputStream, int8PtrTy));
    103102    scanMatchInstance->setInternalState("FileSize", bufferSize);
    104103    scanMatchInstance->setInternalState("FileName", fileName);
     
    167166
    168167    iBuilder->SetInsertPoint(endBlock);
    169     Value * isFinalLineUnterminated = iBuilder->CreateICmpEQ(finalLineUnterminated, ConstantInt::get(int64ty, 0));
     168    Value * isFinalLineUnterminated = iBuilder->CreateICmpEQ(finalLineUnterminated, ConstantInt::getNullValue(finalLineUnterminated->getType()));
    170169    iBuilder->CreateCondBr(isFinalLineUnterminated, exitBlock, unterminatedBlock);
    171170   
  • icGREP/icgrep-devel/icgrep/kernels/symboltablepipeline.cpp

    r5000 r5001  
    1212#include <kernels/s2p_kernel.h>
    1313#include <kernels/instance.h>
    14 #include <kernels/stdout_kernel.h>
    1514
    1615#include <pablo/function.h>
     
    181180    const unsigned vectorWidth = iBuilder->getBitBlockWidth() / 32;
    182181    const unsigned gatherCount = vectorWidth * 4;
    183     Type * const transposedVectorType = VectorType::get(iBuilder->getInt8Ty(), iBuilder->getBitBlockWidth() / 8);
    184182
    185183    Type * startArrayType = ArrayType::get(iBuilder->getInt32Ty(), iBuilder->getBitBlockWidth() + gatherCount);
     
    230228    unsigned minKeyLength = 0;
    231229    for (unsigned maxKeyLength : endpoints) {
    232         Function * f = generateGatherFunction(minKeyLength, maxKeyLength, transposedVectorType);
     230        Function * f = generateGatherFunction(minKeyLength, maxKeyLength);
    233231        mGatherFunction.push_back(f);
    234232        iBuilder->CreateStore(f, iBuilder->CreateGEP(gatherFunctionPtrArray, iBuilder->getInt32(i++)));
     
    279277    PHINode * startBlockOffset = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
    280278    startBlockOffset->addIncoming(blockPos, groupBody);
    281     PHINode * startIndexPhi1 = iBuilder->CreatePHI(startIndex->getType(), 2);
     279    PHINode * startIndexPhi1 = iBuilder->CreatePHI(startIndex->getType(), 2, "startIndexPhi1");
    282280    startIndexPhi1->addIncoming(startIndex, groupBody);
    283281    PHINode * startIV = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
     
    295293    // START INNER COND
    296294    iBuilder->SetInsertPoint(startInnerCond);
    297     PHINode * startIndexPhi3 = iBuilder->CreatePHI(startIndex->getType(), 2);
    298     startIndexPhi3->addIncoming(startIndexPhi1, startOuterBody);
    299     startIndexPhi1->addIncoming(startIndexPhi3, startInnerCond);
     295    PHINode * startIndexPhi2 = iBuilder->CreatePHI(startIndex->getType(), 2, "startIndexPhi2");
     296    startIndexPhi2->addIncoming(startIndexPhi1, startOuterBody);
     297    startIndexPhi1->addIncoming(startIndexPhi2, startInnerCond);
    300298    PHINode * startFieldPhi = iBuilder->CreatePHI(intScanWordTy, 2);
    301299    startFieldPhi->addIncoming(startField, startOuterBody);
     
    308306    startFieldPhi->addIncoming(generateResetLowestBit(iBuilder, startFieldPhi), startInnerBody);
    309307    startPos = iBuilder->CreateTruncOrBitCast(iBuilder->CreateOr(startPos, startBlockOffset), iBuilder->getInt32Ty());
    310     iBuilder->CreateStore(startPos, iBuilder->CreateGEP(startArray, {iBuilder->getInt32(0), startIndexPhi3}));
    311     startIndexPhi3->addIncoming(iBuilder->CreateAdd(startIndexPhi3, ConstantInt::get(startIndexPhi3->getType(), 1)), startInnerBody);
     308    iBuilder->CreateStore(startPos, iBuilder->CreateGEP(startArray, {iBuilder->getInt32(0), startIndexPhi2}));
     309    startIndexPhi2->addIncoming(iBuilder->CreateAdd(startIndexPhi2, ConstantInt::get(startIndexPhi2->getType(), 1)), startInnerBody);
    312310    iBuilder->CreateBr(startInnerCond);
    313311
     
    318316    PHINode * endIndexPhi1 = iBuilder->CreatePHI(endIndex->getType(), 2);
    319317    endIndexPhi1->addIncoming(endIndex, startOuterCond);
    320     PHINode * startIndexPhi2 = iBuilder->CreatePHI(startIndex->getType(), 2);
    321     startIndexPhi2->addIncoming(startIndexPhi1, startOuterCond);
     318    PHINode * startIndexPhi3 = iBuilder->CreatePHI(startIndex->getType(), 2, "startIndexPhi3");
     319    startIndexPhi3->addIncoming(startIndexPhi1, startOuterCond);
    322320    PHINode * endIV = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
    323321    endIV->addIncoming(iBuilder->getInt64(0), startOuterCond);
     
    334332    // END POINT INNER COND
    335333    iBuilder->SetInsertPoint(endInnerCond);
    336     startIndexPhi3 = iBuilder->CreatePHI(startIndexPhi2->getType(), 3);
    337     startIndexPhi3->addIncoming(startIndexPhi2, endOuterBody);
    338     startIndexPhi3->addIncoming(startIndexPhi3, endInnerBody);
    339     startIndexPhi2->addIncoming(startIndexPhi3, endInnerCond);
     334    PHINode * startIndexPhi4 = iBuilder->CreatePHI(startIndexPhi3->getType(), 3, "startIndexPhi4");
     335    startIndexPhi4->addIncoming(startIndexPhi3, endOuterBody);
     336    startIndexPhi4->addIncoming(startIndexPhi4, endInnerBody);
     337    startIndexPhi3->addIncoming(startIndexPhi4, endInnerCond);
    340338    PHINode * endIndexPhi2 = iBuilder->CreatePHI(endIndex->getType(), 3);
    341339    endIndexPhi2->addIncoming(endIndexPhi1, endOuterBody);
     
    370368
    371369    Value * remainingArrayPtr = iBuilder->CreateGEP(startArrayPtr, iBuilder->getInt32(gatherCount));
    372     Value * remainingCount = iBuilder->CreateSub(startIndexPhi3, iBuilder->getInt32(gatherCount));
    373     iBuilder->CreateMemCpy(startArrayPtr, remainingArrayPtr, remainingCount, 4);
    374     startIndexPhi3->addIncoming(remainingCount, gather);
     370    Value * remainingCount = iBuilder->CreateSub(startIndexPhi4, iBuilder->getInt32(gatherCount));
     371    Value * remainingBytes = iBuilder->CreateMul(remainingCount, iBuilder->getInt32(4));
     372    iBuilder->CreateMemMove(startArrayPtr, remainingArrayPtr, remainingBytes, 4);
     373    startIndexPhi4->addIncoming(remainingCount, gather);
    375374    iBuilder->CreateBr(endInnerCond);
    376375
    377376    // NEXT GROUP
    378377    iBuilder->SetInsertPoint(nextGroup);
    379     iBuilder->CreateStore(startIndexPhi2, startIndexPtr);
     378    iBuilder->CreateStore(startIndexPhi3, startIndexPtr);
    380379    iBuilder->CreateStore(endIndexPhi1, endIndexPtr);
    381380    groupIV->addIncoming(iBuilder->CreateAdd(groupIV, ConstantInt::get(groupIV->getType(), 1)), nextGroup);
     
    389388 * @brief generateGatherFunction
    390389 ** ------------------------------------------------------------------------------------------------------------- */
    391 Function * SymbolTableBuilder::generateGatherFunction(const unsigned minKeyLength, const unsigned maxKeyLength, Type * const resultType) {
     390Function * SymbolTableBuilder::generateGatherFunction(const unsigned minKeyLength, const unsigned maxKeyLength) {
    392391
    393392    assert (minKeyLength < maxKeyLength);
     
    404403        const unsigned vectorWidth = iBuilder->getBitBlockWidth() / 32;
    405404        Type * const gatherVectorType =  VectorType::get(iBuilder->getInt32Ty(), vectorWidth);
    406         Type * const gatherVectorArrayType = ArrayType::get(gatherVectorType, maxCount);
     405        const unsigned gatherByteWidth = gatherVectorType->getPrimitiveSizeInBits() / 8;
     406        Type * const transposedVectorType = VectorType::get(iBuilder->getInt8Ty(), iBuilder->getBitBlockWidth() / 8);
     407        const unsigned transposedByteWidth = transposedVectorType->getPrimitiveSizeInBits() / 8;
     408
    407409
    408410        Type * const int32PtrTy = PointerType::get(iBuilder->getInt32Ty(), 0);
     
    440442        // ENTRY
    441443        iBuilder->SetInsertPoint(entry);
    442         AllocaInst * const buffer = iBuilder->CreateAlloca(resultType, iBuilder->getInt32(maxCount * 4), "buffer");
    443         iBuilder->CreateStore(Constant::getNullValue(buffer->getAllocatedType()), buffer);
    444         AllocaInst * const untransposedBuffer = iBuilder->CreateAlloca(gatherVectorArrayType, iBuilder->getInt32(4), "tmp");
    445         iBuilder->CreateStore(Constant::getNullValue(untransposedBuffer->getAllocatedType()), untransposedBuffer);
     444
     445        AllocaInst * const buffer = iBuilder->CreateAlloca(gatherVectorType, iBuilder->getInt32(maxCount * 4), "buffer");
     446        Value * end = iBuilder->CreateGEP(buffer, {iBuilder->getInt32(maxCount * 4)});
     447        Value * size = iBuilder->CreateSub(iBuilder->CreatePtrToInt(end, iBuilder->getInt64Ty()), iBuilder->CreatePtrToInt(buffer, iBuilder->getInt64Ty()));
     448        iBuilder->CreateMemSet(buffer, iBuilder->getInt8(0), size, 4);
     449        Value * const transposed = iBuilder->CreateBitCast(buffer, transposedVectorType->getPointerTo(), "transposed");
     450
     451        startArray = iBuilder->CreateBitCast(startArray, gatherVectorType->getPointerTo());
     452        endArray = iBuilder->CreateBitCast(endArray, gatherVectorType->getPointerTo());
     453
     454        iBuilder->CallPrintInt(functionName + ".numOfKeys", numOfKeys);
     455
    446456        iBuilder->CreateBr(gatherCond);
    447457
     
    450460        PHINode * remainingLanes = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
    451461        remainingLanes->addIncoming(numOfKeys, entry);
     462
    452463        PHINode * gatherIV = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
    453464        gatherIV->addIncoming(iBuilder->getInt32(0), entry);
    454         Value * gatherLoopTest = iBuilder->CreateICmpNE(gatherIV, iBuilder->getInt32(4));
    455         iBuilder->CreateCondBr(gatherLoopTest, partialGatherCond, transposeCond);
     465
     466        Value * gatherLoopTest = iBuilder->CreateICmpSGE(remainingLanes, iBuilder->getInt32(vectorWidth));
     467        iBuilder->CreateCondBr(gatherLoopTest, gatherBody, partialGatherCond);
    456468
    457469        // PARTIAL GATHER COND
    458470        iBuilder->SetInsertPoint(partialGatherCond);
    459         Value * partialGatherLoopTest = iBuilder->CreateICmpUGE(remainingLanes, iBuilder->getInt32(vectorWidth));
    460         iBuilder->CreateCondBr(partialGatherLoopTest, gatherBody, partialGatherBody);
     471        Value * partialGatherLoopTest = iBuilder->CreateICmpSLE(remainingLanes, iBuilder->getInt32(0));
     472        iBuilder->CreateCondBr(partialGatherLoopTest, transposeCond, partialGatherBody);
    461473
    462474        // PARTIAL GATHER BODY
    463475        iBuilder->SetInsertPoint(partialGatherBody);
    464         iBuilder->CallPrintInt(functionName + ".remainingLanes", remainingLanes);
    465476        Type * registerType = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    466         Value * maskedLanes = iBuilder->CreateSub(iBuilder->getInt32(vectorWidth), remainingLanes);
     477        Value * maskedLanes = iBuilder->CreateSub(iBuilder->getInt32(vectorWidth), remainingLanes);       
    467478        maskedLanes = iBuilder->CreateMul(maskedLanes, iBuilder->getInt32(32));
    468479        maskedLanes = iBuilder->CreateZExt(maskedLanes, registerType);
    469480        maskedLanes = iBuilder->CreateLShr(Constant::getAllOnesValue(registerType), maskedLanes);
    470481        maskedLanes = iBuilder->CreateBitCast(maskedLanes, gatherVectorType);
    471 
    472482        iBuilder->CreateBr(gatherBody);
    473483
     
    475485        iBuilder->SetInsertPoint(gatherBody);
    476486        PHINode * activeLanes = iBuilder->CreatePHI(gatherVectorType, 2, "activeLanes");
    477         activeLanes->addIncoming(Constant::getAllOnesValue(gatherVectorType), partialGatherCond);
     487        activeLanes->addIncoming(Constant::getAllOnesValue(gatherVectorType), gatherCond);
    478488        activeLanes->addIncoming(maskedLanes, partialGatherBody);
    479489
    480         iBuilder->CallPrintRegister(functionName + ".activeLanes", activeLanes);
    481 
    482         startArray = iBuilder->CreateBitCast(startArray, PointerType::get(gatherVectorType, 0));
     490
    483491        Value * startPos = iBuilder->CreateAlignedLoad(iBuilder->CreateGEP(startArray, gatherIV), 4);
     492        Value * const endPos = iBuilder->CreateAlignedLoad(iBuilder->CreateGEP(endArray, gatherIV), 4);
     493
    484494        for (unsigned blockCount = 0; blockCount < minCount; ++blockCount) {
    485495            Value * tokenData = generateMaskedGather(base, startPos, activeLanes);
     496            Value * ptr = iBuilder->CreateOr(buffer, iBuilder->CreateOr(gatherIV, iBuilder->getInt32(blockCount * 4)));
     497            iBuilder->CreateAlignedStore(tokenData, ptr, transposedByteWidth);
    486498            startPos = iBuilder->CreateAdd(startPos, four);
    487             iBuilder->CreateAlignedStore(tokenData, iBuilder->CreateGEP(untransposedBuffer, {iBuilder->getInt32(blockCount), gatherIV}), 4);
    488499        }
    489500
    490         endArray = iBuilder->CreateBitCast(endArray, PointerType::get(gatherVectorType, 0));
    491         Value * const endPos = iBuilder->CreateAlignedLoad(iBuilder->CreateGEP(endArray, gatherIV), 4);
    492501        for (unsigned blockCount = minCount; blockCount < maxCount; ++blockCount) {
    493502
    494503            // if we have not fully gathered the data for this key
    495             Value * atLeastOneByte = iBuilder->CreateSExt(iBuilder->CreateICmpULT(startPos, endPos), startPos->getType());
    496             atLeastOneByte = iBuilder->CreateAnd(atLeastOneByte, activeLanes);
     504            Value * atLeastOneByte = iBuilder->CreateSExt(iBuilder->CreateICmpSLT(startPos, endPos), startPos->getType());
     505            atLeastOneByte = iBuilder->CreateAnd(atLeastOneByte, activeLanes, "atLeastOneByte");
    497506
    498507            // gather it ...
    499508            Value * tokenData = generateMaskedGather(base, startPos, atLeastOneByte);
     509
    500510            // and compute how much data is remaining.
    501511            Value * remaining = iBuilder->CreateSub(endPos, startPos);
    502512
    503513            // if this token only has 1 to 3 bytes remaining ...
    504             Value * atLeastFourBytes = iBuilder->CreateSExt(iBuilder->CreateICmpUGE(remaining, four), remaining->getType());
     514            Value * atLeastFourBytes = iBuilder->CreateSExt(iBuilder->CreateICmpUGE(remaining, four), remaining->getType(), "atLeastFourBytes");
    505515
    506516            // determine how many bits do *not* belong to the token
     
    511521            Value * partialTokenMask = iBuilder->CreateLShr(ConstantInt::getAllOnesValue(remaining->getType()), remaining);
    512522            partialTokenMask = iBuilder->CreateOr(partialTokenMask, atLeastFourBytes);
    513 
    514523            tokenData = iBuilder->CreateAnd(partialTokenMask, tokenData);
    515             Value * untransposedBufferPtr = iBuilder->CreateGEP(untransposedBuffer, {iBuilder->getInt32(blockCount), gatherIV});
    516             iBuilder->CreateAlignedStore(tokenData, untransposedBufferPtr, 4);
    517             if (blockCount < (maxCount - 1)) {
    518                 startPos = iBuilder->CreateAdd(startPos, four);
    519             }
     524            Value * ptr = iBuilder->CreateGEP(buffer, iBuilder->CreateOr(gatherIV, iBuilder->getInt32(blockCount * 4)));
     525            iBuilder->CreateAlignedStore(tokenData, ptr, transposedByteWidth);
     526
     527            startPos = iBuilder->CreateAdd(startPos, four);
    520528        }
     529
    521530        gatherIV->addIncoming(iBuilder->CreateAdd(gatherIV, iBuilder->getInt32(1)), gatherBody);
    522531        remainingLanes->addIncoming(iBuilder->CreateSub(remainingLanes, iBuilder->getInt32(vectorWidth)), gatherBody);
     
    526535        iBuilder->SetInsertPoint(transposeCond);
    527536        PHINode * transposeIV = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
    528         transposeIV->addIncoming(iBuilder->getInt32(0), gatherCond);
     537        transposeIV->addIncoming(iBuilder->getInt32(0), partialGatherCond);
    529538        Value * transposeLoopTest = iBuilder->CreateICmpNE(transposeIV, iBuilder->getInt32(maxCount));
    530539        iBuilder->CreateCondBr(transposeLoopTest, transposeBody, exit);
     
    533542        iBuilder->SetInsertPoint(transposeBody);
    534543
     544        Value * offset = iBuilder->CreateMul(transposeIV, iBuilder->getInt32(4));
     545
    535546        Value * value[4];
    536         Value * temporary[4];
    537547        for (unsigned i = 0; i < 4; ++i) {
    538             Value * const ptr = iBuilder->CreateGEP(untransposedBuffer, {transposeIV, iBuilder->getInt32(i)});
    539             value[i] = iBuilder->CreateAlignedLoad(ptr, 4);
     548            Value * const ptr = iBuilder->CreateGEP(buffer, iBuilder->CreateAdd(offset, iBuilder->getInt32(i)));
     549            value[i] = iBuilder->CreateLoad(ptr);
    540550        }
    541         for (unsigned fieldWidth = 16; fieldWidth != 4; fieldWidth /= 2) {
     551
     552        for (unsigned byteWidth = 2; byteWidth; --byteWidth) {
     553            const unsigned fieldWidth = (byteWidth * 8);
    542554            const unsigned fieldCount = iBuilder->getBitBlockWidth() / fieldWidth;
    543             VectorType * const vecType = VectorType::get(IntegerType::get(mMod->getContext(), fieldWidth), fieldCount);
    544             std::vector<Constant *> lowFields(fieldCount);
    545             std::vector<Constant *> highFields(fieldCount);
     555            VectorType * const type = VectorType::get(Type::getIntNTy(iBuilder->getContext(), fieldWidth), fieldCount);
     556            std::vector<Constant *> even(fieldCount);
     557            std::vector<Constant *> odd(fieldCount);
    546558            for (unsigned j = 0; j < fieldCount; ++j) {
    547                 lowFields[j] = iBuilder->getInt32(j * 2);
    548                 highFields[j] = iBuilder->getInt32(j * 2 + 1);
     559                even[j] = iBuilder->getInt32(j * 2);
     560                odd[j] = iBuilder->getInt32(j * 2 + 1);
    549561            }
    550             Constant * const lowVector = ConstantVector::get(lowFields);
    551             Constant * const highVector = ConstantVector::get(highFields);
     562            Constant * const evenVector = ConstantVector::get(even);
     563            Constant * const oddVector = ConstantVector::get(odd);
     564            Value * result[4];
    552565            for (unsigned i = 0; i < 4; i += 2) {
    553                 value[i] = iBuilder->CreateBitCast(value[i], vecType);
    554                 value[i + 1] = iBuilder->CreateBitCast(value[i + 1], vecType);
    555                 temporary[i / 2] = iBuilder->CreateShuffleVector(value[i], value[i + 1], lowVector);
    556                 temporary[(i / 2) + 2] = iBuilder->CreateShuffleVector(value[i], value[i + 1], highVector);
     566                value[i] = iBuilder->CreateBitCast(value[i], type);
     567                value[i + 1] = iBuilder->CreateBitCast(value[i + 1], type);
     568                result[(i / byteWidth)] = iBuilder->CreateShuffleVector(value[i], value[i + 1], evenVector);
     569                result[(i / byteWidth) + byteWidth] = iBuilder->CreateShuffleVector(value[i], value[i + 1], oddVector);
    557570            }
    558             std::swap(value, temporary);
     571            for (unsigned i = 0; i < 4; ++i) {
     572                value[i] = result[i];
     573            }
    559574        }
    560         Value * offset = iBuilder->CreateShl(transposeIV, ConstantInt::get(transposeIV->getType(), 2));
     575
     576        for (unsigned i = 0; i < 4; ++i) {
     577            Value * ptr = iBuilder->CreateGEP(transposed, iBuilder->CreateAdd(offset, iBuilder->getInt32(i)));
     578            iBuilder->CreateAlignedStore(value[i], ptr, gatherByteWidth);
     579        }
     580
    561581        transposeIV->addIncoming(iBuilder->CreateAdd(transposeIV, iBuilder->getInt32(1)), transposeBody);
    562 
    563         for (unsigned i = 0; i < 4; ++i) {
    564             Value * index = offset;
    565             if (i) {
    566                 index = iBuilder->CreateAdd(offset, iBuilder->getInt32(i));
    567             }
    568             iBuilder->CallPrintRegister(functionName, value[i]);
    569             iBuilder->CreateAlignedStore(value[i], iBuilder->CreateGEP(buffer, index), 4);
    570         }
    571 
    572         Value * emptyGatherTest = iBuilder->CreateICmpUGT(remainingLanes, iBuilder->getInt32(0));
    573         iBuilder->CreateCondBr(emptyGatherTest, transposeCond, exit);
     582        iBuilder->CreateBr(transposeCond);
    574583
    575584        // EXIT
     
    578587        // ... call hashing function ...
    579588
     589        for (unsigned i = 0; i < maxKeyLength; ++i) {
     590            Value * ptr = iBuilder->CreateGEP(transposed, iBuilder->getInt32(i));
     591            Value * value = iBuilder->CreateAlignedLoad(ptr, gatherByteWidth);
     592            iBuilder->CallPrintRegister(functionName + ".output" + std::to_string(i), value);
     593        }
    580594
    581595        iBuilder->CreateRetVoid();
     596
     597        function->dump();
    582598
    583599        iBuilder->restoreIP(ip);
     
    594610
    595611    std::vector<unsigned> endpoints;
    596     endpoints.push_back(1);
    597     endpoints.push_back(2);
    598     endpoints.push_back(4);
    599612    endpoints.push_back(8);
    600     endpoints.push_back(16);
     613    endpoints.push_back(17);
     614    endpoints.push_back(27);
    601615
    602616    PabloCompiler pablo_compiler(mMod, iBuilder);
     
    610624    mSortingKernel = new KernelBuilder(iBuilder, "sorting", bufferSize);
    611625    mGatherKernel = new KernelBuilder(iBuilder, "gathering", 1);
    612     mStdOutKernel = new KernelBuilder(iBuilder, "stddout", 1);
    613626
    614627    generateS2PKernel(mMod, iBuilder, mS2PKernel);
     
    625638
    626639    generateGatherKernel(mGatherKernel, endpoints, 64);
    627     generateStdOutKernel(mMod, iBuilder, mStdOutKernel);
    628640}
    629641
     
    666678    Instance * sortingInstance = mSortingKernel->instantiate(leadingInstance->getResultSet());
    667679    Instance * gatheringInstance = mGatherKernel->instantiate(sortingInstance->getResultSet());
    668     Instance * stdOutInstance = mStdOutKernel->instantiate(gatheringInstance->getResultSet());
    669680
    670681    gatheringInstance->setInternalState("Base", iBuilder->CreateBitCast(inputStream, iBuilder->getInt8PtrTy()));
    671 
    672     stdOutInstance->setInternalState("RemainingBytes", bufferSize);  // The total number of bytes to be sent to stdout.
    673682
    674683    const unsigned leadingBlocks = (mLongestLookahead + iBuilder->getBitBlockWidth() - 1) / iBuilder->getBitBlockWidth();
     
    720729    sortingInstance->CreateDoBlockCall();
    721730    gatheringInstance->CreateDoBlockCall();
    722 //    stdOutInstance->CreateDoBlockCall();
     731
    723732    remainingBytes2->addIncoming(iBuilder->CreateSub(remainingBytes2, blockSize), regularBodyBlock);
    724733    iBuilder->CreateBr(regularTestBlock);
     
    739748    sortingInstance->CreateDoBlockCall();
    740749    gatheringInstance->CreateDoBlockCall();
    741 //    stdOutInstance->CreateDoBlockCall();
     750
    742751    iBuilder->CreateBr(finalTestBlock);
    743752
     
    755764    sortingInstance->CreateDoBlockCall();
    756765    gatheringInstance->CreateDoBlockCall();
    757 //    stdOutInstance->CreateDoBlockCall();
     766
    758767    remainingFullBlocks->addIncoming(iBuilder->CreateSub(remainingFullBlocks, iBuilder->getInt64(1)), finalBodyBlock);
    759768
     
    800809    delete mSortingKernel;
    801810    delete mGatherKernel;
    802     delete mStdOutKernel;
    803 }
    804 
    805 
    806 }
     811}
     812
     813
     814}
  • icGREP/icgrep-devel/icgrep/kernels/symboltablepipeline.h

    r5000 r5001  
    3333
    3434    void generateGatherKernel(KernelBuilder * kBuilder, const std::vector<unsigned> & endpoints, const unsigned scanWordBitWidth = 64);
    35     Function * generateGatherFunction(const unsigned minKeyLength, const unsigned maxKeyLength, Type * const resultType);
     35    Function * generateGatherFunction(const unsigned minKeyLength, const unsigned maxKeyLength);
    3636
    3737    Value * generateMaskedGather(Value * const base, Value * const vindex, Value * const mask);
     
    4444    KernelBuilder *                     mSortingKernel;
    4545    KernelBuilder *                     mGatherKernel;
    46     KernelBuilder *                     mStdOutKernel;
    4746
    4847    unsigned                            mLongestLookahead;
Note: See TracChangeset for help on using the changeset viewer.