Changeset 5277


Ignore:
Timestamp:
Jan 25, 2017, 4:25:24 PM (10 months ago)
Author:
cameron
Message:

radix64/base64 fixes

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r5267 r5277  
    5858    fileSize->setName("fileSize");
    5959
    60     //Round up to a multiple of 4.
    61     const unsigned segmentSize = ((codegen::SegmentSize + 3)/4) * 4;
     60    //Round up to a multiple of 3.
     61    const unsigned segmentSize = ((codegen::SegmentSize + 2)/3) * 3;
    6262   
    6363    const unsigned bufferSegments = codegen::BufferSegments;
     
    6565    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
    6666
    67     CircularBuffer Expanded3_4Out(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * bufferSegments * 16);
    68     CircularBuffer Radix64out(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * bufferSegments * 16);
    69     LinearCopybackBuffer Base64out(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * bufferSegments * 16 + 2);
     67    CircularBuffer Expanded3_4Out(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * 4/3 * bufferSegments);
     68    CircularBuffer Radix64out(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * 4/3 * bufferSegments);
     69    LinearCopybackBuffer Base64out(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * 4/3 * bufferSegments);
    7070   
    71     MMapSourceKernel mmapK(iBuilder, segmentSize * bufferSegments * 16);
     71    MMapSourceKernel mmapK(iBuilder, segmentSize);
    7272    mmapK.generateKernel({}, {&ByteStream});
    7373    mmapK.setInitialArguments({fileSize});
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5276 r5277  
    6868    BasicBlock * step3store2 = BasicBlock::Create(iBuilder->getContext(), "step3store2", doSegmentFunction, 0);
    6969    BasicBlock * itemsDone = BasicBlock::Create(iBuilder->getContext(), "itemsDone", doSegmentFunction, 0);
    70     BasicBlock * setTermination = BasicBlock::Create(iBuilder->getContext(), "setTermination", doSegmentFunction, 0);
     70    BasicBlock * expand3_4_final = BasicBlock::Create(iBuilder->getContext(), "expand3_4_final", doSegmentFunction, 0);
    7171    BasicBlock * expand3_4_exit = BasicBlock::Create(iBuilder->getContext(), "expand3_4_exit", doSegmentFunction, 0);
    72     BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), "finalExit", doSegmentFunction, 0);
    7372   
    7473    // Determine the require shufflevector constants.
     
    9493    Constant * Const3 = iBuilder->getSize(3);
    9594    Constant * Const4 = iBuilder->getSize(4);
     95    Constant * tripleBlockSize = iBuilder->getSize(3 * iBuilder->getStride());
    9696    Constant * stride = iBuilder->getSize(iBuilder->getStride());
    9797    Constant * packSize = iBuilder->getSize(PACK_SIZE);
    98     Constant * loopItemCount = iBuilder->getSize(3 * PACK_SIZE); // 3 packs per loop.
     98    Constant * triplePackSize = iBuilder->getSize(3 * PACK_SIZE); // 3 packs per loop.
    9999    UndefValue * undefPack = UndefValue::get(iBuilder->fwVectorType(8));
    100100   
     
    108108    Value * itemsAvail = iBuilder->CreateSub(producerPos, processed);
    109109   
    110     // Except for the final segment, we always process an integral number of triple blocks.
    111     Value * tripleBlocksToDo = iBuilder->CreateMul(blocksToDo, Const3);
    112     Constant * blockItems = iBuilder->getSize(iBuilder->getBitBlockWidth());
    113     Value * tripleItemMax = iBuilder->CreateMul(tripleBlocksToDo, blockItems);
    114 
    115     Value * lessThanFullSegment = iBuilder->CreateICmpULT(itemsAvail, tripleItemMax);
    116     Value * tripleBlockItems = iBuilder->CreateSelect(lessThanFullSegment, itemsAvail, tripleItemMax);
    117 
    118     Value * endSignal = iBuilder->CreateLoad(mStreamSetInputBuffers[0]->getEndOfInputPtr(streamStructPtr));
    119     Value * inFinalSegment = iBuilder->CreateAnd(endSignal, lessThanFullSegment);
    120     Value * itemsToDo = iBuilder->CreateSelect(inFinalSegment, itemsAvail, tripleBlockItems);
    121 
    122 //    iBuilder->CallPrintInt("itemsToDo", itemsToDo);
     110    //
     111    // The main loop processes 3 packs of data at a time.  For doFinal
     112    // processing, process all the remaining sets of 3 packs, otherwise
     113    // process in multiples of 3 full blocks of data.
     114    //
     115    Value * loopDivisor = iBuilder->CreateSelect(doFinal, triplePackSize, tripleBlockSize);
     116    Value * excessItems = iBuilder->CreateURem(itemsAvail, loopDivisor);
     117    Value * loopItemsToDo = iBuilder->CreateSub(itemsAvail, excessItems);
    123118
    124119    Value * blockNo = getScalarField(self, blockNoScalar);
     
    131126    Value * outputPackPtr = getStream(self, "expandedStream", outputBlockNo, iBuilder->getInt32(0), iBuilder->getInt32(0));
    132127
    133     Value * hasFullLoop = iBuilder->CreateICmpUGE(itemsToDo, loopItemCount);
    134 
     128    Value * hasFullLoop = iBuilder->CreateICmpUGE(loopItemsToDo, triplePackSize);
    135129
    136130    iBuilder->CreateCondBr(hasFullLoop, expand_3_4_loop, expand3_4_loop_exit);
     
    142136    loopInput_ptr->addIncoming(sourcePackPtr, expand2_3entry);
    143137    loopOutput_ptr->addIncoming(outputPackPtr, expand2_3entry);
    144     loopItemsRemain->addIncoming(itemsToDo, expand2_3entry);
     138    loopItemsRemain->addIncoming(loopItemsToDo, expand2_3entry);
    145139
    146140    // Step 1 of the main loop.
     
    165159
    166160    Value * loopNextInputPack = iBuilder->CreateGEP(loopInput_ptr, iBuilder->getInt32(3));
    167 
    168 
    169 
    170     Value * remainingItems = iBuilder->CreateSub(loopItemsRemain, loopItemCount);
    171 
    172     Value * loopProcessed = iBuilder->CreateSub(itemsToDo, remainingItems);
    173     loopProcessed = iBuilder->CreateMul(iBuilder->CreateUDiv(loopProcessed, iBuilder->getInt64(3)), iBuilder->getInt64(4));
     161    Value * remainingItems = iBuilder->CreateSub(loopItemsRemain, triplePackSize);
    174162
    175163    Value * loopNextOutputPack;
     
    180168    loopItemsRemain->addIncoming(remainingItems, expand_3_4_loop);
    181169
    182     Value * continueLoop = iBuilder->CreateICmpUGE(remainingItems, loopItemCount);
     170    Value * continueLoop = iBuilder->CreateICmpUGE(remainingItems, triplePackSize);
    183171    iBuilder->CreateCondBr(continueLoop, expand_3_4_loop, expand3_4_loop_exit);
    184 
    185     // Except for the final segment, the number of items remaining is now 0.
    186     // For the final segment, less than loopItemCount items remain.
     172   
    187173    iBuilder->SetInsertPoint(expand3_4_loop_exit);
    188174    PHINode * loopExitInput_ptr = iBuilder->CreatePHI(sourcePackPtr->getType(), 2);
    189175    PHINode * loopExitOutput_ptr = iBuilder->CreatePHI(outputPackPtr->getType(), 2);
    190     PHINode * loopExitItemsRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    191176    loopExitInput_ptr->addIncoming(sourcePackPtr, expand2_3entry);
    192177    loopExitOutput_ptr->addIncoming(outputPackPtr, expand2_3entry);
    193     loopExitItemsRemain->addIncoming(itemsToDo, expand2_3entry);
    194178    loopExitInput_ptr->addIncoming(loopNextInputPack, expand_3_4_loop);
    195179    loopExitOutput_ptr->addIncoming(loopNextOutputPack, expand_3_4_loop);
    196     loopExitItemsRemain->addIncoming(remainingItems, expand_3_4_loop);
     180
     181    // Update the produced and processed items count based on the loopItemsToDo value.
     182    processed = iBuilder->CreateAdd(processed, loopItemsToDo);
     183    setProcessedItemCount(self, "sourceStream", processed);
     184   
     185    setScalarField(self, blockNoScalar, iBuilder->CreateUDiv(processed, stride));
     186    // We have produced 4 output bytes for every 3 input bytes.
     187    Value * totalProduced = iBuilder->CreateMul(iBuilder->CreateUDiv(processed, Const3), Const4);
     188    setProducedItemCount(self, "expandedStream", totalProduced);
     189   
     190    // Except for final segment processing, we are done.
     191    iBuilder->CreateCondBr(doFinal, expand3_4_final, expand3_4_exit);
     192
     193    // Final segment processing.   Less than a triplePack remains.
     194    iBuilder->SetInsertPoint(expand3_4_final);
     195   
    197196    // There may be one or two remaining full packs and/or a partial pack.
    198197    //
     
    205204    // (f) 2N+1 .. 9N/4 remaining items: do Steps 1 and 2, do Step 3 up to the first write only.
    206205    // (g) 9N/4+1 .. 3N - 1 remaining items: do Steps 1, 2 and 3.
    207     Value * condition_a = iBuilder->CreateICmpEQ(loopExitItemsRemain, ConstantInt::getNullValue(iBuilder->getSizeTy()));
     206    Value * condition_a = iBuilder->CreateICmpEQ(excessItems, ConstantInt::getNullValue(iBuilder->getSizeTy()));
    208207    iBuilder->CreateCondBr(condition_a, itemsDone, finalStep1);
    209208    // Final Step1 processing
     
    212211    expand0 = iBuilder->bitCast(iBuilder->CreateShuffleVector(undefPack, pack0, expand_3_4_shuffle[0]));
    213212    iBuilder->CreateAlignedStore(expand0, loopExitOutput_ptr, packAlign);
    214     Value * condition_b = iBuilder->CreateICmpULE(loopExitItemsRemain, iBuilder->getSize(3 * PACK_SIZE/4));
     213    Value * condition_b = iBuilder->CreateICmpULE(excessItems, iBuilder->getSize(3 * PACK_SIZE/4));
    215214    iBuilder->CreateCondBr(condition_b, itemsDone, finalStep2);
    216215    // Final Step 2 processing
    217216    iBuilder->SetInsertPoint(finalStep2);
    218     Value * condition_c = iBuilder->CreateICmpULE(loopExitItemsRemain, packSize);
     217    Value * condition_c = iBuilder->CreateICmpULE(excessItems, packSize);
    219218    iBuilder->CreateCondBr(condition_c, step2store, step2load);
    220219    iBuilder->SetInsertPoint(step2load);
     
    229228    expand1 = iBuilder->bitCast(iBuilder->CreateShuffleVector(pack0, pack1phi, expand_3_4_shuffle[1]));
    230229    iBuilder->CreateAlignedStore(expand1, outPack1_ptr, packAlign);
    231     Value * condition_d = iBuilder->CreateICmpULE(loopExitItemsRemain, iBuilder->getSize(6 * PACK_SIZE/4));
     230    Value * condition_d = iBuilder->CreateICmpULE(excessItems, iBuilder->getSize(6 * PACK_SIZE/4));
    232231    iBuilder->CreateCondBr(condition_d, itemsDone, finalStep3);
    233232    // Final Step 3
    234233    iBuilder->SetInsertPoint(finalStep3);
    235     Value * condition_e = iBuilder->CreateICmpULE(loopExitItemsRemain, iBuilder->getSize(2 * PACK_SIZE));
     234    Value * condition_e = iBuilder->CreateICmpULE(excessItems, iBuilder->getSize(2 * PACK_SIZE));
    236235    iBuilder->CreateCondBr(condition_e, step3store, step3load);
    237236    iBuilder->SetInsertPoint(step3load);
     
    246245    expand2 = iBuilder->bitCast(iBuilder->CreateShuffleVector(pack1phi, pack2phi, expand_3_4_shuffle[2]));
    247246    iBuilder->CreateAlignedStore(expand2, outPack2_ptr, packAlign);
    248     Value * condition_f = iBuilder->CreateICmpULE(loopExitItemsRemain, iBuilder->getSize(9 * PACK_SIZE/4));
     247    Value * condition_f = iBuilder->CreateICmpULE(excessItems, iBuilder->getSize(9 * PACK_SIZE/4));
    249248    iBuilder->CreateCondBr(condition_f, itemsDone, step3store2);
    250249    iBuilder->SetInsertPoint(step3store2);
     
    255254    //
    256255    iBuilder->SetInsertPoint(itemsDone);
    257 
    258     processed = iBuilder->CreateAdd(processed, itemsToDo);
     256    processed = iBuilder->CreateAdd(processed, excessItems);
    259257    setProcessedItemCount(self, "sourceStream", processed);
    260258
     
    263261    // bytes is not a multiple of 3, then we have one more output byte for each excess
    264262    // input byte.
    265     Value * totalProduced = iBuilder->CreateAdd(iBuilder->CreateMul(iBuilder->CreateUDiv(processed, Const3), Const4), iBuilder->CreateURem(processed, Const3));
     263    totalProduced = iBuilder->CreateAdd(iBuilder->CreateMul(iBuilder->CreateUDiv(processed, Const3), Const4), iBuilder->CreateURem(processed, Const3));
    266264    setProducedItemCount(self, "expandedStream", totalProduced);
    267265   
    268     iBuilder->CreateCondBr(inFinalSegment, setTermination, expand3_4_exit);
    269     iBuilder->SetInsertPoint(setTermination);
    270 #ifndef NDEBUG
    271 //    iBuilder->CallPrintInt(mKernelName + " termination in segment ", segmentNo);
    272 #endif
    273     setTerminationSignal(self);
    274     mStreamSetOutputBuffers[0]->setEndOfInput(ssStructPtr);
    275266    iBuilder->CreateBr(expand3_4_exit);
    276267    iBuilder->SetInsertPoint(expand3_4_exit);
    277     // Must be the last action, for synchronization.
    278     iBuilder->CreateBr(finalExit);
    279    
    280     iBuilder->SetInsertPoint(finalExit);
    281268    iBuilder->CreateRetVoid();
    282269    iBuilder->restoreIP(savePoint);
    283270}
    284271
    285 
    286 // The doBlock method is deprecated.   But in case it is used, just call doSegment with
    287 // 1 as the number of blocks to do.
    288 void expand3_4Kernel::generateDoBlockMethod() const {
    289     auto savePoint = iBuilder->saveIP();
    290     Module * m = iBuilder->getModule();
    291     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    292     Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
    293     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    294     Value * self = getParameter(doBlockFunction, "self");
    295     iBuilder->CreateCall(doSegmentFunction, {self, iBuilder->getSize(1)});
    296     iBuilder->CreateRetVoid();
    297     iBuilder->restoreIP(savePoint);
    298 }
    299272
    300273// Radix 64 determination, converting 3 bytes to 4 6-bit values.
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r5267 r5277  
    2525   
    2626private:
    27     void generateDoBlockMethod() const override;
    2827    void generateDoSegmentMethod() const override;
    2928   
Note: See TracChangeset for help on using the changeset viewer.