Changeset 6189 for icGREP


Ignore:
Timestamp:
Nov 4, 2018, 5:32:24 PM (6 months ago)
Author:
nmedfort
Message:

Bug fixes for 32-bit

Location:
icGREP/icgrep-devel/icgrep
Files:
17 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r6184 r6189  
    291291}
    292292
    293 void CBuilder::CallPrintIntCond(const std::string & name, Value * const value, Value * const cond, const STD_FD fd) {
     293void CBuilder::CallPrintIntCond(StringRef name, Value * const value, Value * const cond, const STD_FD fd) {
    294294    BasicBlock * const insertBefore = GetInsertBlock()->getNextNode();
    295295    BasicBlock* const callBlock = CreateBasicBlock("callBlock", insertBefore);
     
    302302}
    303303
    304 void CBuilder::CallPrintInt(const std::string & name, Value * const value, const STD_FD fd) {
     304void CBuilder::CallPrintInt(StringRef name, Value * const value, const STD_FD fd) {
    305305    Module * const m = getModule();
    306306    Constant * printRegister = m->getFunction("print_int");
     307    IntegerType * const int64Ty = getInt64Ty();
    307308    if (LLVM_UNLIKELY(printRegister == nullptr)) {
    308         FunctionType *FT = FunctionType::get(getVoidTy(), { getInt32Ty(), PointerType::get(getInt8Ty(), 0), getSizeTy() }, false);
     309        FunctionType *FT = FunctionType::get(getVoidTy(), { getInt32Ty(), getInt8PtrTy(), int64Ty }, false);
    309310        Function * function = Function::Create(FT, Function::InternalLinkage, "print_int", m);
    310311        auto arg = function->arg_begin();
    311         const char * out = "%-40s = %" PRIx64 "\n";
    312312        BasicBlock * entry = BasicBlock::Create(getContext(), "entry", function);
    313313        IRBuilder<> builder(entry);
    314         std::vector<Value *> args;
    315314        Value * const fdInt = &*(arg++);
    316         args.push_back(fdInt);
    317         args.push_back(GetString(out));
     315        fdInt->setName("fd");
    318316        Value * const name = &*(arg++);
    319317        name->setName("name");
    320         args.push_back(name);
    321318        Value * value = &*arg;
    322         value->setName("value");
    323         args.push_back(value);
     319        value->setName("value");       
     320        std::vector<Value *> args(4);
     321        args[0] = fdInt;
     322        args[1] = GetString("%-40s = %" PRIx64 "\n");
     323        args[2] = name;
     324        args[3] = value;
    324325        builder.CreateCall(GetDprintf(), args);
    325326        builder.CreateRetVoid();
     
    328329    Value * num = nullptr;
    329330    if (value->getType()->isPointerTy()) {
    330         num = CreatePtrToInt(value, getSizeTy());
     331        num = CreatePtrToInt(value, int64Ty);
    331332    } else {
    332         num = CreateZExtOrBitCast(value, getSizeTy());
     333        num = CreateZExt(value, int64Ty);
    333334    }
    334335    assert (num->getType()->isIntegerTy());
    335     CreateCall(printRegister, {getInt32(static_cast<uint32_t>(fd)), GetString(name.c_str()), num});
     336    CreateCall(printRegister, {getInt32(static_cast<uint32_t>(fd)), GetString(name), num});
    336337}
    337338
     
    14261427}
    14271428
    1428 struct RemoveRedundantAssertionsPass : public llvm::ModulePass {
     1429struct RemoveRedundantAssertionsPass : public ModulePass {
    14291430    static char ID;
    14301431    RemoveRedundantAssertionsPass() : ModulePass(ID) { }
    14311432
    1432     virtual bool runOnModule(llvm::Module &M) override;
     1433    virtual bool runOnModule(Module &M) override;
    14331434};
    14341435
    1435 llvm::ModulePass * createRemoveRedundantAssertionsPass() {
     1436ModulePass * createRemoveRedundantAssertionsPass() {
    14361437    return new RemoveRedundantAssertionsPass();
    14371438}
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r6184 r6189  
    225225    };
    226226
    227     void CallPrintIntCond(const std::string & name, llvm::Value * const value, llvm::Value * const cond, const STD_FD fd = STD_FD::STD_ERR);
    228 
    229     void CallPrintInt(const std::string & name, llvm::Value * const value, const STD_FD fd = STD_FD::STD_ERR);
     227    void CallPrintIntCond(llvm::StringRef name, llvm::Value * const value, llvm::Value * const cond, const STD_FD fd = STD_FD::STD_ERR);
     228
     229    void CallPrintInt(llvm::StringRef name, llvm::Value * const value, const STD_FD fd = STD_FD::STD_ERR);
    230230       
    231231    llvm::Value * GetString(llvm::StringRef Str);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r6184 r6189  
    3636
    3737void IDISA_Builder::UnsupportedFieldWidthError(const unsigned fw, std::string op_name) {
    38     llvm::report_fatal_error(op_name + ": Unsupported field width: " +  std::to_string(fw));
    39 }
    40 
    41 void IDISA_Builder::CallPrintRegisterCond(const std::string & regName, llvm::Value * const value, llvm::Value * const cond, const STD_FD fd) {
     38    report_fatal_error(op_name + ": Unsupported field width: " +  std::to_string(fw));
     39}
     40
     41void IDISA_Builder::CallPrintRegisterCond(StringRef regName, Value * const value, Value * const cond, const STD_FD fd) {
    4242    BasicBlock * const insertBefore = GetInsertBlock()->getNextNode();
    4343    BasicBlock* const callBlock = CreateBasicBlock("callBlock", insertBefore);
     
    4949}
    5050
    51 void IDISA_Builder::CallPrintRegister(const std::string & name, Value * const value, const STD_FD fd) {
     51void IDISA_Builder::CallPrintRegister(StringRef name, Value * const value, const STD_FD fd) {
    5252    Module * const m = getModule();
    5353    Constant * printRegister = m->getFunction("print_register");
     
    6565        BasicBlock * entry = BasicBlock::Create(m->getContext(), "entry", function);
    6666        IRBuilder<> builder(entry);
    67         std::vector<Value *> args;
    6867        Value * const fdInt = &*(arg++);
    69         args.push_back(fdInt);
    70         args.push_back(GetString(out.str().c_str()));
    7168        Value * const name = &*(arg++);
    7269        name->setName("name");
    73         args.push_back(name);
    7470        Value * value = &*arg;
    7571        value->setName("value");
    7672        Type * const byteVectorType = VectorType::get(getInt8Ty(), (mBitBlockWidth / 8));
    7773        value = builder.CreateBitCast(value, byteVectorType);
     74
     75        std::vector<Value *> args;
     76        args.push_back(fdInt);
     77        args.push_back(GetString(out.str()));
     78        args.push_back(name);
    7879        for(unsigned i = (getBitBlockWidth() / 8); i != 0; --i) {
    7980            args.push_back(builder.CreateZExt(builder.CreateExtractElement(value, builder.getInt32(i - 1)), builder.getInt32Ty()));
     
    8384        printRegister = function;
    8485    }
    85     CreateCall(printRegister, {getInt32(static_cast<uint32_t>(fd)), GetString(name.c_str()), CreateBitCast(value, getBitBlockType())});
     86    CreateCall(printRegister, {getInt32(static_cast<uint32_t>(fd)), GetString(name), CreateBitCast(value, getBitBlockType())});
    8687}
    8788
     
    288289    IntegerType * const intTy = getIntNTy(vecTy->getBitWidth());
    289290    Constant * const FIELD_WIDTH = ConstantInt::get(shift->getType(), fw);
    290     Constant * const BLOCK_WIDTH = ConstantInt::get(shift->getType(), vecTy->getBitWidth());
     291//    Constant * const BLOCK_WIDTH = ConstantInt::get(shift->getType(), vecTy->getBitWidth());
    291292    shift = CreateMul(shift, FIELD_WIDTH);
    292293//    if (LLVM_UNLIKELY(safe && codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     
    329330    IntegerType * const intTy = getIntNTy(vecTy->getBitWidth());
    330331    Constant * const FIELD_WIDTH = ConstantInt::get(shift->getType(), fw);
    331     Constant * const BLOCK_WIDTH = ConstantInt::get(shift->getType(), vecTy->getBitWidth());
     332//    Constant * const BLOCK_WIDTH = ConstantInt::get(shift->getType(), vecTy->getBitWidth());
    332333    shift = CreateMul(shift, FIELD_WIDTH);
    333334//    if (LLVM_UNLIKELY(safe && codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     
    724725   
    725726
    726 llvm::Value * IDISA_Builder::mvmd_compress(unsigned fw, llvm::Value * a, llvm::Value * select_mask) {
     727Value * IDISA_Builder::mvmd_compress(unsigned fw, Value * a, Value * select_mask) {
    727728    UnsupportedFieldWidthError(fw, "mvmd_compress");
    728729}
     
    913914Constant * IDISA_Builder::bit_interleave_byteshuffle_table(unsigned fw) {
    914915    const unsigned fieldCount = mNativeBitBlockWidth/8;
    915     if (fw > 2) llvm::report_fatal_error("bit_interleave_byteshuffle_table requires fw == 1 or fw == 2");
     916    if (fw > 2) report_fatal_error("bit_interleave_byteshuffle_table requires fw == 1 or fw == 2");
    916917    // Bit interleave using shuffle.
    917918    // Make a shuffle table that translates the lower 4 bits of each byte in
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r6184 r6189  
    207207    }
    208208
    209     void CallPrintRegisterCond(const std::string & regName, llvm::Value * const value, llvm::Value * const cond, const STD_FD fd = STD_FD::STD_ERR);
    210     void CallPrintRegister(const std::string & regName, llvm::Value * const value, const STD_FD fd = STD_FD::STD_ERR);
     209    void CallPrintRegisterCond(llvm::StringRef regName, llvm::Value * const value, llvm::Value * const cond, const STD_FD fd = STD_FD::STD_ERR);
     210    void CallPrintRegister(llvm::StringRef regName, llvm::Value * const value, const STD_FD fd = STD_FD::STD_ERR);
    211211
    212212protected:
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r6186 r6189  
    219219}
    220220   
    221 StreamCompressKernel::StreamCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & kb
     221StreamCompressKernel::StreamCompressKernel(const std::unique_ptr<kernel::KernelBuilder> & b
    222222                                           , StreamSet * source
    223223                                           #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
     
    239239, mCompressedFieldWidth(FieldWidth)
    240240, mStreamCount(source->getNumElements()) {
    241     addInternalScalar(kb->getSizeTy(), "pendingItemCount");
    242     for (unsigned i = 0; i < mStreamCount; i++) {
    243         addInternalScalar(kb->getBitBlockType(), "pendingOutputBlock_" + std::to_string(i));
     241    Type * const fwTy = b->getIntNTy(mCompressedFieldWidth);
     242    addInternalScalar(fwTy, "pendingItemCount");
     243    for (unsigned i = 0; i < mStreamCount; i++) {
     244        addInternalScalar(b->getBitBlockType(), "pendingOutputBlock_" + std::to_string(i));
    244245    }
    245246
     
    247248   
    248249void StreamCompressKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfBlocks) {
    249     const unsigned fw = mCompressedFieldWidth;
    250     Type * fwTy = b->getIntNTy(fw);
    251     Type * sizeTy = b->getSizeTy();
    252     const unsigned numFields = b->getBitBlockWidth()/fw;
    253     Constant * zeroSplat = Constant::getNullValue(b->fwVectorType(fw));
     250    IntegerType * const fwTy = b->getIntNTy(mCompressedFieldWidth);
     251    IntegerType * const sizeTy = b->getSizeTy();
     252    const unsigned numFields = b->getBitBlockWidth() / mCompressedFieldWidth;
     253    Constant * zeroSplat = Constant::getNullValue(b->fwVectorType(mCompressedFieldWidth));
    254254    Constant * oneSplat = ConstantVector::getSplat(numFields, ConstantInt::get(fwTy, 1));
    255     Constant * fwSplat = ConstantVector::getSplat(numFields, ConstantInt::get(fwTy, fw));
    256     Constant * numFieldConst = ConstantInt::get(sizeTy, numFields);
    257     Constant * fwMaskSplat = ConstantVector::getSplat(numFields, ConstantInt::get(fwTy, fw-1));
    258     Constant * bitBlockWidthConst = ConstantInt::get(sizeTy, b->getBitBlockWidth());
     255    Constant * CFW = ConstantInt::get(fwTy, mCompressedFieldWidth);
     256    Constant * fwSplat = ConstantVector::getSplat(numFields, CFW);
     257    Constant * numFieldConst = ConstantInt::get(fwTy, numFields);
     258    Constant * fwMaskSplat = ConstantVector::getSplat(numFields, ConstantInt::get(fwTy, mCompressedFieldWidth - 1));
    259259    BasicBlock * entry = b->GetInsertBlock();
    260260    BasicBlock * segmentLoop = b->CreateBasicBlock("segmentLoop");
     
    262262    BasicBlock * finalWrite = b->CreateBasicBlock("finalWrite");
    263263    BasicBlock * updateProducedCount = b->CreateBasicBlock("updateProducedCount");
    264     Constant * const ZERO = b->getSize(0);
     264    Constant * const ZERO = ConstantInt::get(sizeTy, 0);
     265    Constant * const ONE = ConstantInt::get(sizeTy, 1);
     266
    265267
    266268    Value * pendingItemCount = b->getScalarField("pendingItemCount");
     
    273275    // Main Loop
    274276    b->SetInsertPoint(segmentLoop);
    275     PHINode * blockOffsetPhi = b->CreatePHI(b->getSizeTy(), 2);
    276     PHINode * outputBlockPhi = b->CreatePHI(b->getSizeTy(), 2);
    277     PHINode * pendingItemsPhi = b->CreatePHI(b->getSizeTy(), 2);
     277    PHINode * blockOffsetPhi = b->CreatePHI(sizeTy, 2);
     278    PHINode * outputBlockPhi = b->CreatePHI(sizeTy, 2);
     279    PHINode * pendingItemsPhi = b->CreatePHI(fwTy, 2);
    278280    PHINode * pendingDataPhi[mStreamCount];
    279281    blockOffsetPhi->addIncoming(ZERO, entry);
     
    284286        pendingDataPhi[i]->addIncoming(pendingData[i], entry);
    285287    }
    286 #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
     288    #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
    287289    Value * fieldPopCounts = b->simd_popcount(fw, b->loadInputStreamBlock("extractionMask", ZERO, blockOffsetPhi));
    288 #else
     290    #else
    289291    Value * fieldPopCounts = b->loadInputStreamBlock("unitCounts", ZERO, blockOffsetPhi);
    290 #endif
     292    #endif
    291293    // For each field determine the (partial) sum popcount of all fields up to and
    292294    // including the current field.
    293295    Value * partialSum = fieldPopCounts;
    294296    for (unsigned i = 1; i < numFields; i *= 2) {
    295         partialSum = b->simd_add(fw, partialSum, b->mvmd_slli(fw, partialSum, i));
    296     }
    297     Value * blockPopCount = b->CreateZExtOrTrunc(b->mvmd_extract(fw, partialSum, numFields-1), sizeTy);
     297        partialSum = b->simd_add(mCompressedFieldWidth, partialSum, b->mvmd_slli(mCompressedFieldWidth, partialSum, i));
     298    }
     299    // Value * blockPopCount = b->CreateZExtOrTrunc(b->mvmd_extract(mCompressedFieldWidth, partialSum, numFields - 1), fwTy);
     300
     301    Value * blockPopCount = b->mvmd_extract(mCompressedFieldWidth, partialSum, numFields - 1);
     302
    298303    //
    299304    // Now determine for each source field the output offset of the first bit.
    300305    // Note that this depends on the number of pending bits.
    301306    //
    302     Value * pendingOffset = b->CreateURem(pendingItemsPhi, ConstantInt::get(sizeTy, fw));
    303     Value * splatPending = b->simd_fill(fw, b->CreateZExtOrTrunc(pendingOffset, fwTy));
    304     Value * pendingFieldIdx = b->CreateUDiv(pendingItemsPhi, ConstantInt::get(sizeTy, fw));
    305     Value * offsets = b->simd_add(fw, b->mvmd_slli(fw, partialSum, 1), splatPending);
     307    Value * pendingOffset = b->CreateURem(pendingItemsPhi, CFW);
     308    Value * splatPending = b->simd_fill(mCompressedFieldWidth, b->CreateZExtOrTrunc(pendingOffset, fwTy));
     309    Value * pendingFieldIdx = b->CreateUDiv(pendingItemsPhi, CFW);
     310    Value * offsets = b->simd_add(mCompressedFieldWidth, b->mvmd_slli(mCompressedFieldWidth, partialSum, 1), splatPending);
    306311    offsets = b->simd_and(offsets, fwMaskSplat); // parallel URem fw
    307312   //
     
    310315    // be immediately combined into the current pending data field, so we calculate
    311316    // field numbers for all subsequent fields, (the fields that receive overflow bits).
    312     Value * fieldNo = b->simd_srli(fw, b->simd_add(fw, partialSum, splatPending), std::log2(fw));
     317    Value * pendingSum = b->simd_add(mCompressedFieldWidth, partialSum, splatPending);
     318    Value * fieldNo = b->simd_srli(mCompressedFieldWidth, pendingSum, std::log2(mCompressedFieldWidth));
    313319  //
    314320    // Now process the input data block of each stream in the input stream set.
     
    324330    std::vector<Value *> pendingOutput(mStreamCount);
    325331    std::vector<Value *> outputFields(mStreamCount);
    326     Value * backShift = b->simd_sub(fw, fwSplat, offsets);
    327     for (unsigned i = 0; i < mStreamCount; i++) {
    328         Value * currentFieldBits = b->simd_sllv(fw, sourceBlock[i], offsets);
    329         Value * nextFieldBits = b->simd_srlv(fw, sourceBlock[i], backShift);
    330         Value * firstField = b->mvmd_extract(fw, currentFieldBits, 0);
     332    Value * backShift = b->simd_sub(mCompressedFieldWidth, fwSplat, offsets);
     333    for (unsigned i = 0; i < mStreamCount; i++) {
     334        Value * currentFieldBits = b->simd_sllv(mCompressedFieldWidth, sourceBlock[i], offsets);
     335        Value * nextFieldBits = b->simd_srlv(mCompressedFieldWidth, sourceBlock[i], backShift);
     336        Value * firstField = b->mvmd_extract(mCompressedFieldWidth, currentFieldBits, 0);
    331337        Value * vec1 = b->CreateInsertElement(zeroSplat, firstField, pendingFieldIdx);
    332338        pendingOutput[i] = b->simd_or(pendingDataPhi[i], vec1);
    333339        // shift back currentFieldBits to combine with nextFieldBits.
    334         outputFields[i] = b->simd_or(b->mvmd_srli(fw, currentFieldBits, 1), nextFieldBits);
     340        outputFields[i] = b->simd_or(b->mvmd_srli(mCompressedFieldWidth, currentFieldBits, 1), nextFieldBits);
    335341    }
    336342    // Now combine forward all fields with the same field number.  This may require
    337343    // up to log2 numFields steps.
    338344    for (unsigned j = 1; j < numFields; j*=2) {
    339         Value * select = b->simd_eq(fw, fieldNo, b->mvmd_slli(fw, fieldNo, j));
     345        Value * select = b->simd_eq(mCompressedFieldWidth, fieldNo, b->mvmd_slli(mCompressedFieldWidth, fieldNo, j));
    340346        for (unsigned i = 0; i < mStreamCount; i++) {
    341             Value * fields_fwd = b->mvmd_slli(fw, outputFields[i], j);
     347            Value * fields_fwd = b->mvmd_slli(mCompressedFieldWidth, outputFields[i], j);
    342348            outputFields[i] = b->simd_or(outputFields[i], b->simd_and(select, fields_fwd));
    343349       }
     
    347353    // But it may be that last field number is 0 which will compare equal to a 0 shifted in.
    348354    // So we add 1 to field numbers first.
    349     Value * nonZeroFieldNo = b->simd_add(fw, fieldNo, oneSplat);
    350     Value * eqNext = b->simd_eq(fw, nonZeroFieldNo, b->mvmd_srli(fw, nonZeroFieldNo, 1));
    351     Value * compressMask = b->hsimd_signmask(fw, b->simd_not(eqNext));
    352     for (unsigned i = 0; i < mStreamCount; i++) {
    353         outputFields[i] = b->mvmd_compress(fw, outputFields[i], compressMask);
     355    Value * nonZeroFieldNo = b->simd_add(mCompressedFieldWidth, fieldNo, oneSplat);
     356    Value * eqNext = b->simd_eq(mCompressedFieldWidth, nonZeroFieldNo, b->mvmd_srli(mCompressedFieldWidth, nonZeroFieldNo, 1));
     357    Value * compressMask = b->hsimd_signmask(mCompressedFieldWidth, b->simd_not(eqNext));
     358    for (unsigned i = 0; i < mStreamCount; i++) {
     359        outputFields[i] = b->mvmd_compress(mCompressedFieldWidth, outputFields[i], compressMask);
    354360    }
    355361    //
     
    364370    // of fields held in register.  mvmd_sll may not handle this if it
    365371    // translates to an LLVM shl.
    366     Value * increment = b->CreateZExtOrTrunc(b->mvmd_extract(fw, fieldNo, 0), sizeTy);
     372    Value * increment = b->CreateZExtOrTrunc(b->mvmd_extract(mCompressedFieldWidth, fieldNo, 0), fwTy);
    367373    pendingFieldIdx = b->CreateAdd(pendingFieldIdx, increment);
    368374    Value * const pendingSpaceFilled = b->CreateICmpEQ(pendingFieldIdx, numFieldConst);
    369375    Value * shftBack = b->CreateSub(numFieldConst, pendingFieldIdx);
    370376    for (unsigned i = 0; i < mStreamCount; i++) {
    371         Value * shiftedField = b->mvmd_sll(fw, outputFields[i], pendingFieldIdx);
    372 
    373         Value * outputFwd = b->fwCast(fw, shiftedField);
     377        Value * shiftedField = b->mvmd_sll(mCompressedFieldWidth, outputFields[i], pendingFieldIdx);
     378        Value * outputFwd = b->fwCast(mCompressedFieldWidth, shiftedField);
    374379        shiftedField = b->CreateSelect(pendingSpaceFilled, zeroSplat, outputFwd);
    375 
    376380        pendingOutput[i] = b->simd_or(pendingOutput[i], shiftedField);
    377         outputFields[i] = b->mvmd_srl(fw, outputFields[i], shftBack);
     381        outputFields[i] = b->mvmd_srl(mCompressedFieldWidth, outputFields[i], shftBack);
    378382    }
    379383    //
     
    386390    // the pending data all fits within the pendingOutput.
    387391    Value * newPending = b->CreateAdd(pendingItemsPhi, blockPopCount);
    388     Value * doesFit = b->CreateICmpULT(newPending, bitBlockWidthConst);
    389     newPending = b->CreateSelect(doesFit, newPending, b->CreateSub(newPending, bitBlockWidthConst));
     392    Constant * BLOCK_WIDTH = ConstantInt::get(fwTy, b->getBitBlockWidth());
     393    Value * doesFit = b->CreateICmpULT(newPending, BLOCK_WIDTH);
     394    newPending = b->CreateSelect(doesFit, newPending, b->CreateSub(newPending, BLOCK_WIDTH));
    390395    //
    391396    // Prepare Phi nodes for the next iteration.
    392397    //
    393     Value * nextBlk = b->CreateAdd(blockOffsetPhi, b->getSize(1));
     398    Value * nextBlk = b->CreateAdd(blockOffsetPhi, ONE);
    394399    blockOffsetPhi->addIncoming(nextBlk, segmentLoop);
    395     Value * nextOutputBlk = b->CreateAdd(outputBlockPhi, b->getSize(1));
     400    Value * nextOutputBlk = b->CreateAdd(outputBlockPhi, ONE);
    396401    // But don't advance the output if all the data does fit into pendingOutput.
    397402    nextOutputBlk = b->CreateSelect(doesFit, outputBlockPhi, nextOutputBlk);
     
    400405
    401406    for (unsigned i = 0; i < mStreamCount; i++) {
    402         pendingOutput[i] = b->CreateSelect(doesFit, b->fwCast(fw, pendingOutput[i]), b->fwCast(fw, outputFields[i]));
     407        pendingOutput[i] = b->CreateSelect(doesFit, b->fwCast(mCompressedFieldWidth, pendingOutput[i]), b->fwCast(mCompressedFieldWidth, outputFields[i]));
    403408        pendingDataPhi[i]->addIncoming(b->bitCast(pendingOutput[i]), segmentLoop);
    404409    }
     
    415420    }
    416421    b->CreateCondBr(mIsFinal, finalWrite, updateProducedCount);
     422
    417423    b->SetInsertPoint(finalWrite);
    418424    for (unsigned i = 0; i < mStreamCount; i++) {
    419         //Value * pending = b->getScalarField("pendingOutputBlock_" + std::to_string(i));
    420425        Value * pending = b->bitCast(pendingOutput[i]);
    421426        b->storeOutputStreamBlock("compressedOutput", b->getInt32(i), nextOutputBlk, pending);
    422427    }
    423428    b->CreateBr(updateProducedCount);
     429
    424430    b->SetInsertPoint(updateProducedCount);
    425431    Value * produced = b->getProducedItemCount("compressedOutput");
    426     produced = b->CreateAdd(produced, b->CreateMul(nextOutputBlk, bitBlockWidthConst));
     432    Value * const blockOffset = b->CreateMul(nextOutputBlk, b->getSize(b->getBitBlockWidth()));
     433    produced = b->CreateAdd(produced, blockOffset);
     434    newPending = b->CreateZExtOrTrunc(newPending, sizeTy);
    427435    produced = b->CreateSelect(mIsFinal, b->CreateAdd(produced, newPending), produced);
    428436    b->setProducedItemCount("compressedOutput", produced);
  • icGREP/icgrep-devel/icgrep/kernels/deletion.h

    r6184 r6189  
    115115class DeleteByPEXTkernel final : public BlockOrientedKernel {
    116116public:
    117     DeleteByPEXTkernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned fw, unsigned streamCount, unsigned PEXT_width = 64);
     117    DeleteByPEXTkernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned fw, unsigned streamCount, unsigned PEXT_width = sizeof(size_t) * 8);
    118118    bool isCachable() const override { return true; }
    119119    bool hasSignature() const override { return false; }
     
    131131class SwizzledBitstreamCompressByCount final : public BlockOrientedKernel {
    132132public:
    133     SwizzledBitstreamCompressByCount(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned bitStreamCount, unsigned fieldWidth = 64);
     133    SwizzledBitstreamCompressByCount(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned bitStreamCount, unsigned fieldWidth = sizeof(size_t) * 8);
    134134    bool isCachable() const override { return true; }
    135135    bool hasSignature() const override { return false; }
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r6184 r6189  
    151151    Value * blockIndex = CreateLShr(processed, std::log2(getBitBlockWidth()));
    152152    if (blockOffset) {
    153         assert (blockOffset->getType() == blockIndex->getType());
    154         blockIndex = CreateAdd(blockIndex, blockOffset);
     153        blockIndex = CreateAdd(blockIndex, CreateZExtOrTrunc(blockOffset, blockIndex->getType()));
    155154    }
    156155    return buf->getStreamBlockPtr(this, streamIndex, blockIndex);
     
    162161    Value * blockIndex = CreateLShr(processed, std::log2(getBitBlockWidth()));
    163162    if (blockOffset) {
    164         assert (blockOffset->getType() == blockIndex->getType());
    165         blockIndex = CreateAdd(blockIndex, blockOffset);
     163        blockIndex = CreateAdd(blockIndex, CreateZExtOrTrunc(blockOffset, blockIndex->getType()));
    166164    }
    167165    return buf->getStreamPackPtr(this, streamIndex, blockIndex, packIndex);
     
    186184    Value * blockIndex = CreateLShr(produced, std::log2(getBitBlockWidth()));
    187185    if (blockOffset) {
    188         assert (blockOffset->getType() == blockIndex->getType());
    189         blockIndex = CreateAdd(blockIndex, blockOffset);
     186        blockIndex = CreateAdd(blockIndex, CreateZExtOrTrunc(blockOffset, blockIndex->getType()));
    190187    }
    191188    return buf->getStreamBlockPtr(this, streamIndex, blockIndex);
     
    197194    Value * blockIndex = CreateLShr(produced, std::log2(getBitBlockWidth()));
    198195    if (blockOffset) {
    199         assert (blockOffset->getType() == blockIndex->getType());
    200         blockIndex = CreateAdd(blockIndex, blockOffset);
     196        blockIndex = CreateAdd(blockIndex, CreateZExtOrTrunc(blockOffset, blockIndex->getType()));
    201197    }
    202198    return buf->getStreamPackPtr(this, streamIndex, blockIndex, packIndex);
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r6187 r6189  
    4646               
    4747void P2SKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
    48     const auto numOfStreams = getStreamBinding("basisBits").getNumElements();
     48    const auto numOfStreams = getInputStreamSet("basisBits")->getNumElements();
    4949    Value * p_bitblock[8];
    5050    // todo: generalize this to the nearest pow 2?
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.h

    r6184 r6189  
    6969class FieldDepositKernel final : public MultiBlockKernel {
    7070public:
    71     FieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * mask, StreamSet * input, StreamSet * output, const unsigned fieldWidth = 64);
     71    FieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * mask, StreamSet * input, StreamSet * output, const unsigned fieldWidth = sizeof(size_t) * 8);
    7272    bool isCachable() const override { return true; }
    7373    bool hasSignature() const override { return false; }
     
    8181class PDEPFieldDepositKernel final : public MultiBlockKernel {
    8282public:
    83     PDEPFieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * mask, StreamSet * expanded, StreamSet * outputs, const unsigned fieldWidth = 64);
     83    PDEPFieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * mask, StreamSet * expanded, StreamSet * outputs, const unsigned fieldWidth = sizeof(size_t) * 8);
    8484    bool isCachable() const override { return true; }
    8585    bool hasSignature() const override { return false; }
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/popcount_logic.hpp

    r6184 r6189  
    8383    if (pc.partialSumArray == nullptr) {
    8484        IntegerType * const sizeTy = b->getSizeTy();
    85         Constant * sizeOfSizeTy = ConstantExpr::getSizeOf(sizeTy);
    86         // TODO: this initial value should be more intelligently chosen
    87         Constant * maxNumOfStrides = ConstantInt::get(sizeOfSizeTy->getType(), codegen::SegmentSize / mKernel->getStride());
    88         pc.strideCapacity = b->CreateAlloca(maxNumOfStrides->getType());
     85        Constant * maxNumOfStrides = b->getSize(ceiling(mBufferGraph[mKernelIndex].upper));
     86        pc.strideCapacity = b->CreateAlloca(sizeTy);
    8987        b->CreateStore(maxNumOfStrides, pc.strideCapacity);
     88
     89        Constant * const sizeOfSizeTy = ConstantExpr::getTrunc(ConstantExpr::getSizeOf(sizeTy), sizeTy, true);
    9090        Constant * const arraySize = ConstantExpr::getMul(maxNumOfStrides, sizeOfSizeTy);
    9191        PointerType * const sizePtrTy = sizeTy->getPointerTo();
     92        Value * const ptr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(arraySize), sizePtrTy);
     93
    9294        pc.partialSumArray = b->CreateAlloca(sizePtrTy);
    93         Value * const ptr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(arraySize), sizePtrTy);
    9495        b->CreateStore(ptr, pc.partialSumArray);
     96
    9597        const Binding & input = mKernel->getInputStreamSetBinding(index);
    9698        if (input.hasAttribute(negated ? AttrId::RequiresNegatedPopCountArray : AttrId::RequiresPopCountArray)) {
     
    182184    // by preceeding kernels.
    183185    b->SetInsertPoint(popCountExpand);
    184     IntegerType * const sizeTy = b->getSizeTy();
    185     Constant * sizeOfSizeTy = ConstantExpr::getSizeOf(sizeTy);
    186186    Value * const newStrideCapacity = b->CreateRoundUp(mNumOfLinearStrides, strideCapacity);
    187187    b->CreateStore(newStrideCapacity, pc.strideCapacity);
     188    IntegerType * const sizeTy = b->getSizeTy();
     189    Constant * const sizeOfSizeTy = ConstantExpr::getTrunc(ConstantExpr::getSizeOf(sizeTy), sizeTy, true);
    188190    Value * const newStrideSize = b->CreateMul(newStrideCapacity, sizeOfSizeTy);
    189191    PointerType * const sizePtrTy = sizeTy->getPointerTo();
  • icGREP/icgrep-devel/icgrep/kernels/swizzle.h

    r6184 r6189  
    5252public:
    5353   
    54     SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> &, const std::vector<StreamSet *> & inputs, const std::vector<StreamSet *> & outputs, const unsigned fieldWidth = 64);
     54    SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> &, const std::vector<StreamSet *> & inputs, const std::vector<StreamSet *> & outputs, const unsigned fieldWidth = sizeof(size_t) * 8);
    5555   
    5656protected:
  • icGREP/icgrep-devel/icgrep/pablo/analysis/pabloverifier.cpp

    r5933 r6189  
    259259
    260260            PabloAST * const value = cast<Assign>(stmt)->getValue();
    261             if (LLVM_UNLIKELY(variable->getType() != value->getType())) {
     261
     262            Type * const A = value->getType();
     263            Type * const B = variable->getType();
     264
     265            bool invalid = false;
     266            if (A->isIntegerTy() && B->isIntegerTy()) {
     267                invalid = A->getPrimitiveSizeInBits() > B->getPrimitiveSizeInBits();
     268            } else {
     269                invalid = !A->canLosslesslyBitCastTo(B);
     270            }
     271
     272            if (LLVM_UNLIKELY(invalid)) {
    262273                std::string tmp;
    263274                raw_string_ostream out(tmp);
    264275                out << "invalid assignment: ";
    265276                PabloPrinter::print(stmt, out);
    266                 out << "  --- type of ";
    267                 PabloPrinter::print(variable, out);
    268                 out << " differs from ";
    269                 PabloPrinter::print(value, out);
     277                out << "  --- value cannot fit wthin variable";
    270278                throw std::runtime_error(out.str());
    271279            }
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.cpp

    r5889 r6189  
    3434
    3535#ifndef NDEBUG
    36 inline void checkSameType(const Type * const A, const Type * const B) {
     36inline void __checkSameType(const Type * const A, const Type * const B) {
    3737    assert ("DIFFERING CONTEXTS" && (&(A->getContext()) == &(B->getContext())));
    3838    assert ("DIFFERING TYPES" && (A == B));
    3939}
    40 inline void checkSameType(const PabloAST * const A, const PabloAST * const B) {
    41     checkSameType(A->getType(), B->getType());
    42 }
    43 #define CHECK_SAME_TYPE(A, B) checkSameType(A, B)
     40inline void __checkSameType(const PabloAST * const A, const PabloAST * const B) {
     41    __checkSameType(A->getType(), B->getType());
     42}
     43#define CHECK_SAME_TYPE(A, B) __checkSameType(A, B)
    4444#else
    4545#define CHECK_SAME_TYPE(A, B)
     
    155155
    156156Assign * PabloBlock::createAssign(PabloAST * const var, PabloAST * const value) {
    157     CHECK_SAME_TYPE(var, value);
    158157    Var * test = nullptr;
    159158    if (isa<Extract>(var)) {
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r6184 r6189  
    485485            value = compileExpression(b, cast<Assign>(stmt)->getValue());
    486486            if (isa<Extract>(expr) || (isa<Var>(expr) && cast<Var>(expr)->isKernelParameter())) {
    487                 Value * const ptr = compileExpression(b, expr, false);
    488                 b->CreateAlignedStore(value, ptr, getAlignment(value));
     487                Value * const ptr = compileExpression(b, expr, false);               
     488                Type * const elemTy = ptr->getType()->getPointerElementType();
     489                b->CreateAlignedStore(b->CreateZExt(value, elemTy), ptr, getAlignment(elemTy));
    489490                value = ptr;
    490491            }
  • icGREP/icgrep-devel/icgrep/toolchain/cpudriver.cpp

    r6187 r6189  
    7272    InitializeNativeTarget();
    7373    InitializeNativeTargetAsmPrinter();
    74     InitializeNativeTargetAsmParser();
     74//    InitializeNativeTargetAsmParser();
    7575    llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
    7676   
     
    162162}
    163163
    164 void CPUDriver::preparePassManager() {
     164inline legacy::PassManager CPUDriver::preparePassManager() {
     165
     166    legacy::PassManager PM;
     167
    165168    PassRegistry * Registry = PassRegistry::getPassRegistry();
    166169    initializeCore(*Registry);
     
    170173    if (LLVM_UNLIKELY(codegen::ShowUnoptimizedIROption != codegen::OmittedOption)) {
    171174        if (LLVM_LIKELY(mIROutputStream == nullptr)) {
    172             if (codegen::ShowUnoptimizedIROption != "") {
     175            if (!codegen::ShowUnoptimizedIROption.empty()) {
    173176                std::error_code error;
    174177                mUnoptimizedIROutputStream = make_unique<raw_fd_ostream>(codegen::ShowUnoptimizedIROption, error, sys::fs::OpenFlags::F_None);
     
    177180            }
    178181        }
    179         mPassManager.add(createPrintModulePass(*mUnoptimizedIROutputStream));
     182        PM.add(createPrintModulePass(*mUnoptimizedIROutputStream));
    180183    }
    181184    if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
    182         mPassManager.add(createVerifierPass());
    183     }
    184 
    185     mPassManager.add(createDeadCodeEliminationPass());        // Eliminate any trivially dead code
    186     mPassManager.add(createPromoteMemoryToRegisterPass());    // Promote stack variables to constants or PHI nodes
    187     mPassManager.add(createCFGSimplificationPass());          // Remove dead basic blocks and unnecessary branch statements / phi nodes
    188     mPassManager.add(createEarlyCSEPass());                   // Simple common subexpression elimination pass
    189     mPassManager.add(createInstructionCombiningPass());       // Simple peephole optimizations and bit-twiddling.
    190     mPassManager.add(createReassociatePass());                // Canonicalizes commutative expressions
    191     mPassManager.add(createGVNPass());                        // Global value numbering redundant expression elimination pass
    192     mPassManager.add(createCFGSimplificationPass());          // Repeat CFG Simplification to "clean up" any newly found redundant phi nodes
     185        PM.add(createVerifierPass());
     186    }
     187    PM.add(createDeadCodeEliminationPass());        // Eliminate any trivially dead code
     188    PM.add(createPromoteMemoryToRegisterPass());    // Promote stack variables to constants or PHI nodes
     189    PM.add(createCFGSimplificationPass());          // Remove dead basic blocks and unnecessary branch statements / phi nodes
     190    PM.add(createEarlyCSEPass());                   // Simple common subexpression elimination pass
     191    PM.add(createInstructionCombiningPass());       // Simple peephole optimizations and bit-twiddling.
     192    PM.add(createReassociatePass());                // Canonicalizes commutative expressions
     193    PM.add(createGVNPass());                        // Global value numbering redundant expression elimination pass
     194    PM.add(createCFGSimplificationPass());          // Repeat CFG Simplification to "clean up" any newly found redundant phi nodes
    193195    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    194         mPassManager.add(createRemoveRedundantAssertionsPass());
    195         mPassManager.add(createDeadCodeEliminationPass());
    196         mPassManager.add(createCFGSimplificationPass());
    197     }
    198 
     196        PM.add(createRemoveRedundantAssertionsPass());
     197        PM.add(createDeadCodeEliminationPass());
     198        PM.add(createCFGSimplificationPass());
     199    }
    199200    if (LLVM_UNLIKELY(codegen::ShowIROption != codegen::OmittedOption)) {
    200201        if (LLVM_LIKELY(mIROutputStream == nullptr)) {
    201             if (codegen::ShowIROption != "") {
     202            if (!codegen::ShowIROption.empty()) {
    202203                std::error_code error;
    203204                mIROutputStream = make_unique<raw_fd_ostream>(codegen::ShowIROption, error, sys::fs::OpenFlags::F_None);
     
    206207            }
    207208        }
    208         mPassManager.add(createPrintModulePass(*mIROutputStream));
    209     }
    210    
    211 #if LLVM_VERSION_INTEGER >= LLVM_VERSION_CODE(3, 7, 0)
     209        PM.add(createPrintModulePass(*mIROutputStream));
     210    }
     211    #if LLVM_VERSION_INTEGER >= LLVM_VERSION_CODE(3, 7, 0)
    212212    if (LLVM_UNLIKELY(codegen::ShowASMOption != codegen::OmittedOption)) {
    213         if (codegen::ShowASMOption != "") {
     213        if (!codegen::ShowASMOption.empty()) {
    214214            std::error_code error;
    215215            mASMOutputStream = make_unique<raw_fd_ostream>(codegen::ShowASMOption, error, sys::fs::OpenFlags::F_None);
     
    217217            mASMOutputStream = make_unique<raw_fd_ostream>(STDERR_FILENO, false, true);
    218218        }
    219         if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(mPassManager, *mASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
     219        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *mASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
    220220            report_fatal_error("LLVM error: could not add emit assembly pass");
    221221        }
    222222    }
    223 #endif
     223    #endif
     224    return PM;
    224225}
    225226
    226227void CPUDriver::generateUncachedKernels() {
    227228    if (mUncachedKernel.empty()) return;
    228     preparePassManager();
     229    auto PM = preparePassManager();
    229230    for (auto & kernel : mUncachedKernel) {
    230231        kernel->prepareKernel(iBuilder);
     
    235236        Module * const module = kernel->getModule(); assert (module);
    236237        module->setTargetTriple(mMainModule->getTargetTriple());
    237         mPassManager.run(*module);
     238        PM.run(*module);
    238239        mCachedKernel.emplace_back(kernel.release());
    239240    }
     
    270271
    271272    iBuilder->setModule(mMainModule);
    272     mPassManager.run(*mMainModule);
    273273    #ifdef ORCJIT
    274274    std::vector<std::unique_ptr<Module>> moduleSet;
  • icGREP/icgrep-devel/icgrep/toolchain/cpudriver.h

    r6184 r6189  
    5252    std::string getMangledName(std::string s);
    5353   
    54     void preparePassManager();
     54    llvm::legacy::PassManager preparePassManager();
    5555
    5656    llvm::Function * addLinkFunction(llvm::Module * mod, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const override;
     
    6767    std::unique_ptr<llvm::raw_fd_ostream>                   mIROutputStream;
    6868    std::unique_ptr<llvm::raw_fd_ostream>                   mASMOutputStream;
    69     llvm::legacy::PassManager                               mPassManager;
    7069};
    7170
  • icGREP/icgrep-devel/icgrep/u32u8.cpp

    r6184 r6189  
    8686class UTF8fieldDepositMask final : public BlockOrientedKernel {
    8787public:
    88     UTF8fieldDepositMask(const std::unique_ptr<KernelBuilder> & b, StreamSet * u32basis, StreamSet * u8fieldMask, StreamSet * u8unitCounts, unsigned depositFieldWidth = 64);
     88    UTF8fieldDepositMask(const std::unique_ptr<KernelBuilder> & b, StreamSet * u32basis, StreamSet * u8fieldMask, StreamSet * u8unitCounts, unsigned depositFieldWidth = sizeof(size_t) * 8);
    8989private:
    9090    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) override;
     
    343343
    344344    P->CreateKernelCall<UTF8assembly>(deposit18_20, deposit12_17, deposit6_11, deposit0_5,
    345                                      u8initial, u8final, u8mask6_11, u8mask12_17,
    346                                      u8basis);
     345                                      u8initial, u8final, u8mask6_11, u8mask12_17,
     346                                      u8basis);
    347347
    348348    P->CreateKernelCall<P2SKernel>(u8basis, u8bytes);
Note: See TracChangeset for help on using the changeset viewer.