Ignore:
Timestamp:
Jan 14, 2017, 3:49:56 PM (2 years ago)
Author:
nmedfort
Message:

Changes working towards simplifying accessing stream elements + some modifications to simplify include / forward declarations within the CodeGen? library.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5247 r5260  
    33#include "IR_Gen/idisa_builder.h"
    44#include <llvm/IR/Type.h>
     5#include <llvm/IR/Module.h>
    56#include <iostream>
    67#include <stdint.h>
     
    89#include <llvm/Support/raw_ostream.h>
    910
    10 
     11using namespace llvm;
    1112
    1213namespace kernel{
     
    4041}
    4142               
    42 void p2sKernel::generateDoBlockMethod() const {
     43void P2SKernel::generateDoBlockMethod() const {
    4344    auto savePoint = iBuilder->saveIP();
    4445    Module * m = iBuilder->getModule();
     
    5051    Value * self = getParameter(doBlockFunction, "self");
    5152    Value * blockNo = getScalarField(self, blockNoScalar);
    52     Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
    53     Value * byteStreamBlock_ptr = getStreamSetBlockPtr(self, "byteStream", blockNo);
    54 
    5553    Value * p_bitblock[8];
    5654    for (unsigned i = 0; i < 8; i++) {
    57         p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
     55        Value * ptr = getStream(self, "basisBits", blockNo, iBuilder->getInt32(i));
     56        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(ptr);
    5857    }
    5958    Value * s_bytepack[8];
    6059    p2s(iBuilder, p_bitblock, s_bytepack);
    6160    for (unsigned j = 0; j < 8; ++j) {
    62         iBuilder->CreateBlockAlignedStore(s_bytepack[j], byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(j)});
    63     }
    64     iBuilder->CreateRetVoid();
    65     iBuilder->restoreIP(savePoint);
    66 }
    67        
    68 void p2sKernel_withCompressedOutput::generateDoBlockMethod() const {
    69     auto savePoint = iBuilder->saveIP();
    70     Module * m = iBuilder->getModule();
    71     Type * i8PtrTy = iBuilder->getInt8PtrTy();
    72     Type * i32 = iBuilder->getIntNTy(32);
    73     Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0);
    74    
    75     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    76    
     61        Value * ptr = getStream(self, "byteStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(j));
     62        iBuilder->CreateBlockAlignedStore(s_bytepack[j], ptr);
     63    }
     64    iBuilder->CreateRetVoid();
     65    iBuilder->restoreIP(savePoint);
     66}
     67
     68void P2SKernelWithCompressedOutput::generateDoBlockMethod() const {
     69    auto savePoint = iBuilder->saveIP();
     70    Module * m = iBuilder->getModule();
     71    Type * i8PtrTy = iBuilder->getInt8PtrTy();
     72    Type * i32 = iBuilder->getIntNTy(32);
     73    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0);
     74
     75    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     76
    7777    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    7878    Value * self = getParameter(doBlockFunction, "self");
    7979    Value * blockNo = getScalarField(self, blockNoScalar);
    80     Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
    81     Value * delCountBlock_ptr = getStreamSetBlockPtr(self, "deletionCounts", blockNo);
    82     Value * byteStreamBlock_ptr = getStreamSetBlockPtr(self, "byteStream", blockNo);
    83    
     80
     81
     82
    8483    Value * basisBits[8];
    8584    for (unsigned i = 0; i < 8; i++) {
    86         basisBits[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
     85        Value * basisBitsBlock_ptr = getStream(self, "basisBits", blockNo, iBuilder->getInt32(i));
     86        basisBits[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr);
    8787    }
    8888    Value * bytePack[8];
    8989    p2s(iBuilder, basisBits, bytePack);
    90    
     90
    9191    unsigned units_per_register = iBuilder->getBitBlockWidth()/8;
    92    
    93     Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
    94    
    95     Value * output_ptr = iBuilder->CreateBitCast(byteStreamBlock_ptr, i8PtrTy);
    96     Value * offset = ConstantInt::get(i32, 0);
    97    
     92    Value * delCountBlock_ptr = getStream(self, "deletionCounts", blockNo, iBuilder->getInt32(0));
     93    Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
     94
     95    Value * output_ptr = getStreamView(i8PtrTy, self, "byteStream", blockNo, iBuilder->getInt32(0));
     96    Value * offset = iBuilder->getInt32(0);
    9897    for (unsigned j = 0; j < 8; ++j) {
    9998        iBuilder->CreateAlignedStore(bytePack[j], iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, offset), bitBlockPtrTy), 1);
     
    104103}
    105104
    106 void p2s_16Kernel::generateDoBlockMethod() const {
    107     auto savePoint = iBuilder->saveIP();
    108     Module * m = iBuilder->getModule();
    109    
    110     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    111    
    112     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    113     Value * self = getParameter(doBlockFunction, "self");
    114     Value * blockNo = getScalarField(self, blockNoScalar);
    115     Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
    116     Value * i16StreamBlock_ptr = getStreamSetBlockPtr(self, "i16Stream", blockNo);
     105void P2S16Kernel::generateDoBlockMethod() const {
     106    auto savePoint = iBuilder->saveIP();
     107    Module * m = iBuilder->getModule();
     108   
     109    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     110   
     111    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     112    Value * self = getParameter(doBlockFunction, "self");
     113    Value * blockNo = getScalarField(self, blockNoScalar);   
    117114   
    118115    Value * hi_input[8];
    119116    for (unsigned j = 0; j < 8; ++j) {
    120         hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(j)});
     117        Value * ptr = getStream(self, "basisBits", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(j));
     118        hi_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
    121119    }
    122120    Value * hi_bytes[8];
     
    125123    Value * lo_input[8];
    126124    for (unsigned j = 0; j < 8; ++j) {
    127         lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
     125        Value * ptr = getStream(self, "basisBits", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(j + 8));
     126        lo_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
    128127    }
    129128    Value * lo_bytes[8];
     
    133132        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
    134133        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    135         // iBuilder->getInt32(0),
    136         iBuilder->CreateBlockAlignedStore(merge0, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2*j)});
    137         iBuilder->CreateBlockAlignedStore(merge1, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2*j+1)});
    138     }
    139     iBuilder->CreateRetVoid();
    140     iBuilder->restoreIP(savePoint);
    141 }
    142 
    143 void p2s_16Kernel_withCompressedOutput::generateDoBlockMethod() const {
     134        Value * ptr0 = getStream(self, "i16Stream", blockNo, iBuilder->getInt32(2 * j));
     135        iBuilder->CreateBlockAlignedStore(merge0, ptr0);
     136        Value * ptr1 = getStream(self, "i16Stream", blockNo, iBuilder->getInt32(2 * j + 1));
     137        iBuilder->CreateBlockAlignedStore(merge1, ptr1);
     138    }
     139    iBuilder->CreateRetVoid();
     140    iBuilder->restoreIP(savePoint);
     141}
     142
     143void P2S16KernelWithCompressedOutput::generateDoBlockMethod() const {
    144144    auto savePoint = iBuilder->saveIP();
    145145    Module * m = iBuilder->getModule();
     
    150150
    151151    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    152     Constant * stride = iBuilder->getSize(iBuilder->getStride());
    153152
    154153    Value * self = getParameter(doBlockFunction, "self");
    155154    Value * blockNo = getScalarField(self, blockNoScalar);
    156     Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
    157     Value * delCountBlock_ptr = getStreamSetBlockPtr(self, "deletionCounts", blockNo);
     155
     156    Value * hi_input[8];
     157    for (unsigned j = 0; j < 8; ++j) {
     158        Value * ptr = getStream(self, "basisBits", blockNo, iBuilder->getInt32(j));
     159        hi_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
     160    }
     161    Value * hi_bytes[8];
     162    p2s(iBuilder, hi_input, hi_bytes);
     163
     164    Value * lo_input[8];
     165    for (unsigned j = 0; j < 8; ++j) {
     166        Value * ptr = getStream(self, "basisBits", blockNo, iBuilder->getInt32(j + 8));
     167        lo_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
     168    }
     169    Value * lo_bytes[8];
     170    p2s(iBuilder, lo_input, lo_bytes);
     171
     172    Value * delCountBlock_ptr = getStream(self, "deletionCounts", blockNo, iBuilder->getInt32(0));
     173    Value * unit_counts = iBuilder->fwCast(iBuilder->getBitBlockWidth() / 16, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
     174
     175    PointerType * int16PtrTy = PointerType::get(iBuilder->getInt16Ty(), 0);
     176    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
    158177    Value * i16UnitsGenerated = getProducedItemCount(self, "i16Stream"); // units generated to buffer
    159178    Value * i16BlockNo = iBuilder->CreateUDiv(i16UnitsGenerated, stride);
    160 
    161     Value * i16StreamBase_ptr = iBuilder->CreateBitCast(getStreamSetBlockPtr(self, "i16Stream", i16BlockNo), PointerType::get(iBuilder->getInt16Ty(), 0));
    162 
    163     Value * u16_output_ptr = iBuilder->CreateGEP(i16StreamBase_ptr, iBuilder->CreateURem(i16UnitsGenerated, stride));
    164 
    165 
    166     Value * hi_input[8];
    167     for (unsigned j = 0; j < 8; ++j) {
    168         hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
    169     }
    170     Value * hi_bytes[8];
    171     p2s(iBuilder, hi_input, hi_bytes);
    172 
    173     Value * lo_input[8];
    174     for (unsigned j = 0; j < 8; ++j) {
    175         lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
    176     }
    177     Value * lo_bytes[8];
    178     p2s(iBuilder, lo_input, lo_bytes);
    179 
    180     const auto UTF_16_units_per_register = iBuilder->getBitBlockWidth() / 16;
    181 
    182     Value * unit_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
    183 
     179    Value * u16_output_ptr = getStreamView(int16PtrTy, self, "i16Stream", i16BlockNo, iBuilder->CreateURem(i16UnitsGenerated, stride));
    184180    Value * offset = ConstantInt::get(i32, 0);
    185 
    186181    for (unsigned j = 0; j < 8; ++j) {
    187182        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
    188183        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    189184        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
    190         offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j)), i32);
     185        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j)), i32);
    191186        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
    192         offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j+1)), i32);
    193     }
    194 
     187        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j + 1)), i32);
     188    }
    195189    i16UnitsGenerated = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
    196190    setProducedItemCount(self, "i16Stream", i16UnitsGenerated);
     
    199193}
    200194
    201 void p2s_16Kernel_withCompressedOutput::generateFinalBlockMethod() const {
     195void P2S16KernelWithCompressedOutput::generateFinalBlockMethod() const {
    202196    auto savePoint = iBuilder->saveIP();
    203197    Module * m = iBuilder->getModule();
     
    214208    }
    215209    Value * i16UnitsGenerated = getProducedItemCount(self, "i16Stream"); // units generated to buffer
    216 
    217210    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
    218211    i16UnitsGenerated = getProducedItemCount(self, "i16Stream"); // units generated to buffer
     
    226219}
    227220   
    228    
    229 }
     221}
Note: See TracChangeset for help on using the changeset viewer.