Changeset 6078


Ignore:
Timestamp:
Jun 10, 2018, 6:05:40 PM (10 days ago)
Author:
cameron
Message:

Interim bug fix for StreamExpandKernel?

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r6071 r6078  
    185185    b->SetInsertPoint(expandLoop);
    186186    PHINode * blockNoPhi = b->CreatePHI(b->getSizeTy(), 2);
    187     PHINode * pendingItemsPhi = b->CreatePHI(b->getSizeTy(), 2);
     187    PHINode * pendingOffsetPhi = b->CreatePHI(b->getSizeTy(), 2);
    188188    PHINode * pendingDataPhi[mSelectedStreamCount];
    189189    blockNoPhi->addIncoming(ZERO, entry);
    190     pendingItemsPhi->addIncoming(sourceOffset, entry);
     190    pendingOffsetPhi->addIncoming(sourceOffset, entry);
    191191    for (unsigned i = 0; i < mSelectedStreamCount; i++) {
    192192        pendingDataPhi[i] = b->CreatePHI(b->getBitBlockType(), 2);
     
    197197    // has been saved in the kernel state, determine the next full block number
    198198    // for loading source streams.
    199     Value * pendingBlockEnd = b->CreateAdd(pendingItemsPhi, bw_sub1Const);
     199    Value * pendingBlockEnd = b->CreateAdd(pendingOffsetPhi, bw_sub1Const);
    200200    Value * srcBlockNo = b->CreateUDiv(pendingBlockEnd, bwConst);
    201201   
    202202    // Calculate the field values and offsets we need for assembling a
    203203    // a full block of source bits.  Assembly will use the following operations.
    204     // A = b->simd_srl(fw, b->mvmd_dsll(fw, source, pending, field_offset_lo), bit_offset);
    205     // B = b->simd_sll(fw, b->mvmd_dsll(fw, source, pending, field_offset_hi), shift_fwd);
     204    // A = b->simd_srlv(fw, b->mvmd_dsll(fw, source, pending, field_offset_lo), bit_offset);
     205    // B = b->simd_sllv(fw, b->mvmd_dsll(fw, source, pending, field_offset_hi), shift_fwd);
    206206    // all_source_bits = simd_or(A, B);
    207     Value * pendingOffset = b->CreateURem(pendingItemsPhi, bwConst);
    208     Value * field_offset_lo =  b->CreateUDiv(pendingOffset, fwConst);
     207    Value * pendingOffset = b->CreateURem(pendingOffsetPhi, bwConst);
     208    Value * pendingItems = b->CreateURem(b->CreateSub(bwConst, pendingOffset), bwConst);
     209    Value * field_offset_lo = b->CreateUDiv(b->CreateAdd(pendingItems, fw_sub1Const), fwConst);
    209210    Value * bit_offset = b->simd_fill(fw, b->CreateURem(pendingOffset, fwConst));
    210211    // Carefully avoid a shift by the full fieldwith (which gives a poison value).
    211212    // field_offset_lo + 1 unless the bit_offset is 0, in which case it is just field_offset_lo.
    212     Value * field_offset_hi =  b->CreateUDiv(b->CreateAdd(pendingOffset, fw_sub1Const), fwConst);
     213    Value * field_offset_hi =  b->CreateUDiv(pendingItems, fwConst);
    213214    // fw - bit_offset, unless bit_offset is 0, in which case, the shift_fwd is 0.
    214215    Value * shift_fwd = b->CreateURem(b->CreateSub(fwSplat, bit_offset), fwSplat);
     
    235236
    236237    // Now load and process source streams.
     238    Value * source[mSelectedStreamCount];
    237239    for (unsigned i = 0; i < mSelectedStreamCount; i++) {
    238         Value * source = b->loadInputStreamBlock("source", b->getInt32(mSelectedStreamBase + i), srcBlockNo);
    239         Value * A = b->simd_srlv(fw, b->mvmd_dsll(fw, source, pendingDataPhi[i], field_offset_lo), bit_offset);
    240         Value * B = b->simd_sllv(fw, b->mvmd_dsll(fw, source, pendingDataPhi[i], field_offset_hi), shift_fwd);
     240        source[i] = b->loadInputStreamBlock("source", b->getInt32(mSelectedStreamBase + i), srcBlockNo);
     241        Value * A = b->simd_srlv(fw, b->mvmd_dsll(fw, source[i], pendingDataPhi[i], field_offset_lo), bit_offset);
     242        Value * B = b->simd_sllv(fw, b->mvmd_dsll(fw, source[i], pendingDataPhi[i], field_offset_hi), shift_fwd);
    241243        Value * full_source_block = b->simd_or(A, B);
    242244        Value * C = b->simd_srlv(fw, b->mvmd_shuffle(fw, full_source_block, source_field_lo), source_shift_lo);
     
    244246        Value * output = b->bitCast(b->simd_or(C, D));
    245247        b->storeOutputStreamBlock("output", b->getInt32(i), blockNoPhi, output);
    246         pendingDataPhi[i]->addIncoming(source, expandLoop);
     248        pendingDataPhi[i]->addIncoming(source[i], expandLoop);
    247249    }
    248250    //
     
    251253    Value * nextBlk = b->CreateAdd(blockNoPhi, b->getSize(1));
    252254    blockNoPhi->addIncoming(nextBlk, expandLoop);
    253     Value * newPending = b->CreateAdd(pendingItemsPhi, blockPopCount);
    254     pendingItemsPhi->addIncoming(newPending, expandLoop);
     255    Value * newPending = b->CreateAdd(pendingOffsetPhi, blockPopCount);
     256    pendingOffsetPhi->addIncoming(newPending, expandLoop);
    255257    //
    256258    // Now continue the loop if there are more blocks to process.
Note: See TracChangeset for help on using the changeset viewer.