Ignore:
Timestamp:
Jan 28, 2017, 3:12:03 PM (2 years ago)
Author:
nmedfort
Message:

Start of work to simplify kernel writing. Removed generateDoBlockLogic method.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5283 r5285  
    107107    Value * loopItemsToDo = iBuilder->CreateSub(itemsAvail, excessItems);
    108108
    109     Value * blockNo = getScalarField(self, blockNoScalar);
     109    Value * blockNo = getBlockNo(self);
    110110
    111111    // A block is made up of 8 packs.  Get the pointer to the first pack (changes the type of the pointer only).
     
    173173    setProcessedItemCount(self, "sourceStream", processed);
    174174   
    175     setScalarField(self, blockNoScalar, iBuilder->CreateUDiv(processed, stride));
     175    setBlockNo(self, iBuilder->CreateUDiv(processed, stride));
    176176    // We have produced 4 output bytes for every 3 input bytes.
    177177    Value * totalProduced = iBuilder->CreateMul(iBuilder->CreateUDiv(processed, Const3), Const4);
     
    247247    setProcessedItemCount(self, "sourceStream", processed);
    248248
    249     setScalarField(self, blockNoScalar, iBuilder->CreateUDiv(processed, stride));
     249    setBlockNo(self, iBuilder->CreateUDiv(processed, stride));
    250250    // We have produced 4 output bytes for every 3 input bytes.  If the number of input
    251251    // bytes is not a multiple of 3, then we have one more output byte for each excess
     
    271271//                                   ba    bits to move 12 positions left
    272272//    xwvuts|  nlkjzy|  barqpm|  hgfedc    Target
    273 void radix64Kernel::generateDoBlockLogic(Value * self, Value * blockNo) const {
    274 
     273void radix64Kernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
    275274    Value * step_right_6 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00C00000));
    276275    Value * step_left_8 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x003F0000));
     
    279278    Value * step_right_2 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x000000FC));
    280279    Value * step_left_12 = iBuilder->simd_fill(32, ConstantInt::get(iBuilder->getInt32Ty(), 0x00000003));
    281    
    282280    for (unsigned i = 0; i < 8; i++) {
    283281        Value * expandedStream = getStream(self, "expandedStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
    284282        Value * bytepack = iBuilder->CreateBlockAlignedLoad(expandedStream);
    285 
    286283        Value * right_6_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_6), 6);
    287284        Value * right_4_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_4), 4);
     285        Value * mid = iBuilder->simd_or(right_6_result, right_4_result);
    288286        Value * right_2_result = iBuilder->simd_srli(32, iBuilder->simd_and(bytepack, step_right_2), 2);
     287        mid = iBuilder->simd_or(mid, right_2_result);
    289288        Value * left_8_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_8), 8);
     289        mid = iBuilder->simd_or(mid, left_8_result);
    290290        Value * left_10_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_10), 10);
     291        mid = iBuilder->simd_or(mid, left_10_result);
    291292        Value * left_12_result = iBuilder->simd_slli(32, iBuilder->simd_and(bytepack, step_left_12), 12);
    292 
    293         Value * mid = right_6_result;
    294         mid = iBuilder->simd_or(mid, right_4_result);
    295         mid = iBuilder->simd_or(mid, right_2_result);
    296         mid = iBuilder->simd_or(mid, left_8_result);
    297         mid = iBuilder->simd_or(mid, left_10_result);
    298293        mid = iBuilder->simd_or(mid, left_12_result);
    299294        Value * radix64pack = iBuilder->bitCast(mid);
    300 
    301295        Value * radix64stream = getStream(self, "radix64stream",blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
    302296        iBuilder->CreateBlockAlignedStore(radix64pack, radix64stream);
     
    304298    Value * produced = getProducedItemCount(self, "radix64stream");
    305299    produced = iBuilder->CreateAdd(produced, iBuilder->getSize(iBuilder->getStride()));
    306     setProducedItemCount(self, "radix64stream", produced);   
    307 }
    308 
    309 void radix64Kernel::generateFinalBlockMethod() const {
    310     auto savePoint = iBuilder->saveIP();
    311     Module * m = iBuilder->getModule();
    312     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    313     BasicBlock * radix64_fb_entry = BasicBlock::Create(iBuilder->getContext(), "radix64_fb_entry", finalBlockFunction, 0);
    314     iBuilder->SetInsertPoint(radix64_fb_entry);
    315     BasicBlock * radix64_loop = BasicBlock::Create(iBuilder->getContext(), "radix64_loop", finalBlockFunction, 0);
    316     BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", finalBlockFunction, 0);
    317     BasicBlock * handleRemainFirstByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainFirstByte", finalBlockFunction, 0);
    318     BasicBlock * handleRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainSecondByte", finalBlockFunction, 0);
    319     BasicBlock * handleNoRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleNoRemainSecondByte", finalBlockFunction, 0);
    320     BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", finalBlockFunction, 0);
     300    setProducedItemCount(self, "radix64stream", produced);
     301}
     302
     303void radix64Kernel::generateFinalBlockMethod(Function * function, Value *self, Value * remainingBytes, Value * blockNo) const {
     304
     305    BasicBlock * entry = iBuilder->GetInsertBlock();
     306    BasicBlock * radix64_loop = BasicBlock::Create(iBuilder->getContext(), "radix64_loop", function, 0);
     307    BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", function, 0);
     308    BasicBlock * handleRemainFirstByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainFirstByte", function, 0);
     309    BasicBlock * handleRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleRemainSecondByte", function, 0);
     310    BasicBlock * handleNoRemainSecondByte = BasicBlock::Create(iBuilder->getContext(), "handleNoRemainSecondByte", function, 0);
     311    BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", function, 0);
    321312    // Final Block arguments: self, remaining.
    322     Function::arg_iterator args = finalBlockFunction->arg_begin();
    323     Value * self = &*(args++);
    324     Value * remainingBytes = &*(args++);
    325313    Value * remainMod4 = iBuilder->CreateAnd(remainingBytes, iBuilder->getSize(3));
    326314
    327315    const unsigned PACK_SIZE = iBuilder->getStride()/8;
    328316    Constant * packSize = iBuilder->getSize(PACK_SIZE);
    329     Value * blockNo = getScalarField(self, blockNoScalar);
    330317
    331318    Value * step_right_6 = iBuilder->simd_fill(32, iBuilder->getInt32(0x00C00000));
     
    336323    Value * step_left_12 = iBuilder->simd_fill(32, iBuilder->getInt32(0x00000003));
    337324
    338 
    339325    // Enter the loop only if there is at least one byte remaining to process.
    340326    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0)), fbExit, radix64_loop);
     
    343329    PHINode * idx = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
    344330    PHINode * loopRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    345     idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), radix64_fb_entry);
    346     loopRemain->addIncoming(remainingBytes, radix64_fb_entry);
     331    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), entry);
     332    loopRemain->addIncoming(remainingBytes, entry);
    347333
    348334    Value * expandedStreamLoopPtr = getStream(self, "expandedStream", blockNo, iBuilder->getInt32(0), idx);
     
    426412    Value * produced = iBuilder->CreateAdd(getProducedItemCount(self, "radix64stream"), iBuilder->CreateAdd(remainingBytes, outputNumberAdd));
    427413    setProducedItemCount(self, "radix64stream", produced);
    428 
    429     iBuilder->CreateRetVoid();
    430     iBuilder->restoreIP(savePoint);
    431 }
    432 
    433 void radix64Kernel::generateDoBlockMethod() const {
    434     auto savePoint = iBuilder->saveIP();
    435 
    436     Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
    437 
    438     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    439 
    440     Value * self = getParameter(doBlockFunction, "self");
    441     Value * blockNo = getScalarField(self, blockNoScalar);
    442 
    443     generateDoBlockLogic(self, blockNo);
    444 
    445     iBuilder->CreateRetVoid();
    446     iBuilder->restoreIP(savePoint);
    447 }
    448 
    449 void base64Kernel::generateDoBlockLogic(Value * self, Value * blockNo) const {       
     414}
     415
     416void base64Kernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
    450417    for (unsigned i = 0; i < 8; i++) {
    451418        Value * radix64stream_ptr = getStream(self, "radix64stream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
     
    474441}
    475442
    476 
    477443// Special processing for the base 64 format.   The output must always contain a multiple
    478444// of 4 bytes.   When the number of radix 64 values is not a multiple of 4
    479445// number of radix 64 values
    480 void base64Kernel::generateFinalBlockMethod() const {
    481     auto savePoint = iBuilder->saveIP();
    482     Module * m = iBuilder->getModule();
    483     Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    484     BasicBlock * base64_fb_entry = BasicBlock::Create(iBuilder->getContext(), "base64_fb_entry", finalBlockFunction, 0);
    485     iBuilder->SetInsertPoint(base64_fb_entry);
    486     BasicBlock * base64_loop = BasicBlock::Create(iBuilder->getContext(), "base64_loop", finalBlockFunction, 0);
    487     BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", finalBlockFunction, 0);
    488     BasicBlock * doPadding = BasicBlock::Create(iBuilder->getContext(), "doPadding", finalBlockFunction, 0);
    489     BasicBlock * doPadding2 = BasicBlock::Create(iBuilder->getContext(), "doPadding2", finalBlockFunction, 0);
    490     BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", finalBlockFunction, 0);
    491     // Final Block arguments: self, remaining.
    492     Function::arg_iterator args = finalBlockFunction->arg_begin();
    493     Value * self = &*(args++);
    494     Value * remainingBytes = &*(args++);
     446void base64Kernel::generateFinalBlockMethod(Function * function, Value * self, Value * remainingBytes, Value * blockNo) const {
     447
     448    BasicBlock * entry = iBuilder->GetInsertBlock();
     449    BasicBlock * base64_loop = BasicBlock::Create(iBuilder->getContext(), "base64_loop", function, 0);
     450    BasicBlock * loopExit = BasicBlock::Create(iBuilder->getContext(), "loopExit", function, 0);
     451    BasicBlock * doPadding = BasicBlock::Create(iBuilder->getContext(), "doPadding", function, 0);
     452    BasicBlock * doPadding2 = BasicBlock::Create(iBuilder->getContext(), "doPadding2", function, 0);
     453    BasicBlock * fbExit = BasicBlock::Create(iBuilder->getContext(), "fbExit", function, 0);
     454
    495455    Value * remainMod4 = iBuilder->CreateAnd(remainingBytes, iBuilder->getSize(3));
    496456    Value * padBytes = iBuilder->CreateSub(iBuilder->getSize(4), remainMod4);
     
    498458
    499459    Constant * packSize = iBuilder->getSize(iBuilder->getStride() / 8);
    500     Value * blockNo = getScalarField(self, blockNoScalar);
    501460
    502461    // Enter the loop only if there is at least one byte remaining to process.
    503462    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0)), fbExit, base64_loop);
    504    
     463
    505464    iBuilder->SetInsertPoint(base64_loop);
    506465    PHINode * idx = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
    507466    PHINode * loopRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    508     idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), base64_fb_entry);
    509     loopRemain->addIncoming(remainingBytes, base64_fb_entry);
     467    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), entry);
     468    loopRemain->addIncoming(remainingBytes, entry);
    510469    Value * radix64streamPtr = getStream(self, "radix64stream", blockNo, iBuilder->getInt32(0), idx);
    511470    Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64streamPtr);
     
    542501    Value * produced = iBuilder->CreateAdd(getProducedItemCount(self, "base64stream"), iBuilder->CreateAdd(remainingBytes, padBytes));
    543502    setProducedItemCount(self, "base64stream", produced);
    544     iBuilder->CreateRetVoid();
    545     iBuilder->restoreIP(savePoint);
    546 }
    547 
    548 void base64Kernel::generateDoBlockMethod() const {
    549     auto savePoint = iBuilder->saveIP();
    550 
    551     Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
    552 
    553     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    554 
    555     Value * self = getParameter(doBlockFunction, "self");
    556     Value * blockNo = getScalarField(self, blockNoScalar);
    557 
    558     generateDoBlockLogic(self, blockNo);
    559 
    560     iBuilder->CreateRetVoid();
    561     iBuilder->restoreIP(savePoint);
    562503}
    563504
Note: See TracChangeset for help on using the changeset viewer.