Ignore:
Timestamp:
May 16, 2018, 10:02:54 PM (15 months ago)
Author:
cameron
Message:

Some fixes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r6045 r6046  
    210210    // Calculate the field values and offsets we need for assembling a
    211211    // a full block of source bits.  Assembly will use the following operations.
    212     // A = b->simd_srli(fw, b->mvmd_dslli(fw, source, pending, field_offset_lo), bit_offset);
    213     // B = b->simd_slli(fw, b->mvmd_dslli(fw, source, pending, field_offset_hi), shift_fwd);
     212    // A = b->simd_srl(fw, b->mvmd_dsll(fw, source, pending, field_offset_lo), bit_offset);
     213    // B = b->simd_sll(fw, b->mvmd_dsll(fw, source, pending, field_offset_hi), shift_fwd);
    214214    // all_source_bits = simd_or(A, B);
    215215    Value * pendingOffset = b->CreateURem(pendingBlockEnd, bwConst);
    216     Value * field_offset_lo =  b->simd_fill(fw, b->CreateUDiv(pendingOffset, fwConst));
     216    Value * field_offset_lo =  b->CreateUDiv(pendingOffset, fwConst);
    217217    Value * bit_offset = b->simd_fill(fw, b->CreateURem(pendingOffset, fwConst));
    218218   
    219219    // Carefully avoid a shift by the full fieldwith (which gives a poison value).
    220220    // field_offset_lo + 1 unless the bit_offset is 0, in which case it is just field_offset_lo.
    221     Value * field_offset_hi =  b->simd_fill(fw, b->CreateUDiv(b->CreateAdd(pendingOffset, fw_sub1Const), fwConst));
     221    Value * field_offset_hi =  b->CreateUDiv(b->CreateAdd(pendingOffset, fw_sub1Const), fwConst);
    222222    // fw - bit_offset, unless bit_offset is 0, in which case, the shift_fwd is 0.
    223223    Value * shift_fwd = b->CreateURem(b->CreateSub(fwSplat, bit_offset), fwSplat);
     
    273273        b->setScalarField("pendingSourceBlock_" + std::to_string(i), b->bitCast(pendingDataPhi[i]));
    274274    }
    275     b->getModule()->dump();
    276275}
    277276
     
    338337    Value * extractionMaskPtr = kb->getInputStreamBlockPtr("depositMask", ZERO, blockOffsetPhi);
    339338    extractionMaskPtr = kb->CreatePointerCast(extractionMaskPtr, fieldPtrTy);
     339    for (unsigned i = 0; i < fieldsPerBlock; i++) {
     340        mask[i] = kb->CreateLoad(kb->CreateGEP(extractionMaskPtr, kb->getInt32(i)));
     341    }
    340342    for (unsigned j = 0; j < mStreamCount; ++j) {
    341343        Value * inputPtr = kb->getInputStreamBlockPtr("inputStreamSet", kb->getInt32(j), blockOffsetPhi);
Note: See TracChangeset for help on using the changeset viewer.