Ignore:
Timestamp:
May 5, 2018, 1:25:24 PM (17 months ago)
Author:
cameron
Message:

Stream deletion bug fixes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r6013 r6014  
    146146    BasicBlock * segmentLoop = b->CreateBasicBlock("segmentLoop");
    147147    BasicBlock * segmentDone = b->CreateBasicBlock("segmentDone");
     148    BasicBlock * finalWrite = b->CreateBasicBlock("finalWrite");
     149    BasicBlock * updateProducedCount = b->CreateBasicBlock("updateProducedCount");
    148150    Constant * const ZERO = b->getSize(0);
    149151   
     
    198200    for (unsigned i = 0; i < mStreamCount; i++) {
    199201        sourceBlock[i] = b->loadInputStreamBlock("sourceStreamSet", b->getInt32(i), blockOffsetPhi);
    200 
    201202    }
    202203    // Now separate the bits of each field into ones that go into the current field
     
    205206    std::vector<Value *> pendingOutput(mStreamCount);
    206207    std::vector<Value *> outputFields(mStreamCount);
     208    Value * backShift = b->simd_sub(fw, fwSplat, offsets);
    207209    for (unsigned i = 0; i < mStreamCount; i++) {
    208210        Value * currentFieldBits = b->simd_sllv(fw, sourceBlock[i], offsets);
    209         Value * nextFieldBits = b->simd_srlv(fw, sourceBlock[i], b->simd_sub(fw, fwSplat, offsets));
     211        Value * nextFieldBits = b->simd_srlv(fw, sourceBlock[i], backShift);
    210212        Value * firstField = b->mvmd_extract(fw, currentFieldBits, 0);
    211213        Value * vec1 = b->CreateInsertElement(zeroSplat, firstField, pendingFieldIdx);
     
    214216        outputFields[i] = b->simd_or(b->mvmd_srli(fw, currentFieldBits, 1), nextFieldBits);
    215217    }
    216     // We may have filled the current field of the pendingOutput update the
    217     // pendingFieldIndex.
    218     Value * newPendingTotal = b->CreateZExtOrTrunc(b->mvmd_extract(fw, fieldPopCounts, 0), sizeTy);
    219     pendingFieldIdx = b->CreateUDiv(newPendingTotal, ConstantInt::get(sizeTy, fw));
    220218    // Now combine forward all fields with the same field number.  This may require
    221219    // up to log2 numFields steps.
     
    239237    // (a) shift forward outputField data to fill the pendingOutput values.
    240238    // (b) shift back outputField data to clear data added to pendingOutput.
     239    //
     240    // However, we may need to increment pendingFieldIndex if we previously
     241    // filled the field with the extracted firstField value.  The first
     242    // value of the fieldNo vector will be 0 or 1.
     243    // It is possible that pendingFieldIndex will reach the total number
     244    // of fields held in register.  mvmd_sll may not handle this if it
     245    // translates to an LLVM shl.
     246    Value * increment = b->CreateZExtOrTrunc(b->mvmd_extract(fw, fieldNo, 0), sizeTy);
     247    pendingFieldIdx = b->CreateAdd(pendingFieldIdx, increment);
     248    Value * const pendingSpaceFilled = b->CreateICmpEQ(pendingFieldIdx, numFieldConst);
    241249    Value * shftBack = b->CreateSub(numFieldConst, pendingFieldIdx);
    242250    for (unsigned i = 0; i < mStreamCount; i++) {
    243251        Value * outputFwd = b->mvmd_sll(fw, outputFields[i], pendingFieldIdx);
     252        outputFwd = b->CreateSelect(pendingSpaceFilled, zeroSplat, outputFwd);
    244253        pendingOutput[i] = b->simd_or(pendingOutput[i], outputFwd);
    245254        outputFields[i] = b->mvmd_srl(fw, outputFields[i], shftBack);
    246   }
     255    }
    247256    //
    248257    // Write the pendingOutput data to outputStream.
     
    282291        b->setScalarField("pendingOutputBlock_" + std::to_string(i), b->bitCast(pendingOutput[i]));
    283292    }
    284     Value * produced = b->getProducedItemCount("compressedOutput");
     293    b->CreateCondBr(mIsFinal, finalWrite, updateProducedCount);
     294    b->SetInsertPoint(finalWrite);
     295    for (unsigned i = 0; i < mStreamCount; i++) {
     296        //Value * pending = b->getScalarField("pendingOutputBlock_" + std::to_string(i));
     297        Value * pending = b->bitCast(pendingOutput[i]);
     298        b->storeOutputStreamBlock("compressedOutput", b->getInt32(i), nextOutputBlk, pending);
     299    }
     300    b->CreateBr(updateProducedCount);
     301    b->SetInsertPoint(updateProducedCount);
     302     Value * produced = b->getProducedItemCount("compressedOutput");
    285303    produced = b->CreateAdd(produced, b->CreateMul(nextOutputBlk, bitBlockWidthConst));
    286     produced = b->CreateSelect(mIsFinal, b->CreateAdd(produced, blockPopCount), produced);
     304    produced = b->CreateSelect(mIsFinal, b->CreateAdd(produced, newPending), produced);
    287305    b->setProducedItemCount("compressedOutput", produced);
    288306}
    289    
    290    
    291    
    292307
    293308SwizzledDeleteByPEXTkernel::SwizzledDeleteByPEXTkernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned streamCount, unsigned PEXT_width)
Note: See TracChangeset for help on using the changeset viewer.