Changeset 5827


Ignore:
Timestamp:
Jan 11, 2018, 4:39:05 PM (8 months ago)
Author:
nmedfort
Message:

Compilation bug fix for CreateStreamCpy?. Needs more testing.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5793 r5827  
    195195    const auto fieldWidth = getFieldWidth(itemWidth * itemAlignment, blockWidth);
    196196    const auto alignment = (fieldWidth + 7) / 8;
     197
    197198    if (LLVM_LIKELY(itemWidth < fieldWidth)) {
    198199        const auto factor = fieldWidth / itemWidth;
     
    237238
    238239    Value * const n = buf->getStreamSetCount(this, getStreamHandle(name));
     240
    239241    if (isConstantOne(n) || fieldWidth == blockWidth || (isConstantZero(targetOffset) && isConstantZero(sourceOffset))) {
    240242        if (isConstantOne(n)) {
     
    262264        PointerType * const blockPtrTy = blockTy->getPointerTo();
    263265
    264         target = CreatePointerCast(target, blockPtrTy);
    265         source = CreatePointerCast(source, blockPtrTy);
     266        target = CreatePointerCast(target, blockPtrTy, "target");
     267        source = CreatePointerCast(source, blockPtrTy, "source");
    266268
    267269        assert ((blockWidth % fieldWidth) == 0);
     
    269271        VectorType * const shiftTy = VectorType::get(fieldWidthTy, blockWidth / fieldWidth);
    270272        Constant * const width = getSize(blockWidth / itemWidth);
     273        Constant * const ZERO = getSize(0);
     274        Constant * const ONE = getSize(1);
    271275        BasicBlock * const entry = GetInsertBlock();
    272276
     
    290294            Value * const blocksToCopy = CreateMul(CreateUDiv(itemsToCopy, width), n);
    291295            Value * const offset = CreateURem(sourceOffset, width);
     296            Value * const offsetVector = simd_fill(fieldWidth, CreateTrunc(offset, fieldWidthTy));
    292297            Value * const remaining = CreateSub(width, offset);
    293             Value * const trailing = CreateURem(CreateAdd(sourceOffset, itemsToCopy), width);
    294 
    295             BasicBlock * const streamCopy = CreateBasicBlock(name + "StreamCopy");
    296             BasicBlock * const streamCopyRemaining = CreateBasicBlock(name + "StreamCopyRemaining");
    297             BasicBlock * const streamCopyEnd = CreateBasicBlock(name + "StreamCopyEnd");
    298 
    299             CreateCondBr(CreateICmpNE(blocksToCopy, getSize(0)), streamCopy, streamCopyRemaining);
     298            Value * const remainingVector = simd_fill(fieldWidth, CreateTrunc(remaining, fieldWidthTy));
     299
     300            BasicBlock * const streamCopy = CreateBasicBlock(name + "PullCopy");
     301            BasicBlock * const streamCopyRemaining = CreateBasicBlock(name + "PullCopyRemaining");
     302            BasicBlock * const streamCopyEnd = CreateBasicBlock(name + "PullCopyEnd");
     303
     304            CreateCondBr(CreateICmpNE(blocksToCopy, ZERO), streamCopy, streamCopyRemaining);
    300305
    301306            SetInsertPoint(streamCopy);
     
    303308            i->addIncoming(n, entry);
    304309            Value * prior = CreateAlignedLoad(CreateGEP(source, CreateSub(i, n)), alignment);
    305             prior = CreateLShr(CreateBitCast(prior, shiftTy), offset);
     310            prior = CreateBitCast(CreateLShr(CreateBitCast(prior, shiftTy), offsetVector), blockTy);
    306311            Value * value = CreateAlignedLoad(CreateGEP(source, i), alignment);
    307             value = CreateShl(CreateBitCast(value, shiftTy), remaining);
    308             Value * const result = CreateBitCast(CreateOr(value, prior), blockTy);
    309             CreateAlignedStore(result, CreateGEP(target, i), alignment);
    310             Value * const next_i = CreateAdd(i, getSize(1));
     312            value = CreateBitCast(CreateShl(CreateBitCast(value, shiftTy), remainingVector), blockTy);
     313            CreateAlignedStore(CreateOr(value, prior), CreateGEP(target, i), alignment);
     314            Value * const next_i = CreateAdd(i, ONE);
    311315            i->addIncoming(next_i, streamCopy);
    312316            CreateCondBr(CreateICmpNE(next_i, blocksToCopy), streamCopy, streamCopyRemaining);
     
    314318            SetInsertPoint(streamCopyRemaining);
    315319            PHINode * const j = CreatePHI(getSizeTy(), 2);
    316             j->addIncoming(getSize(0), streamCopy);
    317             Value * k = CreateAdd(blocksToCopy, j);
    318             Value * final = CreateAlignedLoad(CreateGEP(source, k), alignment);
    319             final = CreateLShr(CreateBitCast(prior, shiftTy), trailing);
    320             CreateAlignedStore(final, CreateGEP(target, k), alignment);
    321             Value * const next_j = CreateAdd(i, getSize(1));
    322             i->addIncoming(next_j, streamCopyRemaining);
    323             CreateCondBr(CreateICmpNE(next_j, n), streamCopyRemaining, streamCopyEnd);
     320            j->addIncoming(blocksToCopy, entry);
     321            j->addIncoming(blocksToCopy, streamCopy);
     322            Value * final = CreateAlignedLoad(CreateGEP(source, j), alignment);
     323            final = CreateBitCast(CreateLShr(CreateBitCast(final, shiftTy), offsetVector), blockTy);
     324            CreateAlignedStore(final, CreateGEP(target, j), alignment);
     325            Value * const next_j = CreateAdd(j, ONE);
     326            j->addIncoming(next_j, streamCopyRemaining);
     327            CreateCondBr(CreateICmpNE(next_j, CreateAdd(blocksToCopy, n)), streamCopyRemaining, streamCopyEnd);
    324328
    325329            SetInsertPoint(streamCopyEnd);
     
    343347            */
    344348
    345             BasicBlock * const streamCopy = CreateBasicBlock(name + "StreamCopy");
    346             BasicBlock * const streamCopyRemainingCond = CreateBasicBlock(name + "StreamCopyRemainingCond");
    347             BasicBlock * const streamCopyRemaining = CreateBasicBlock(name + "StreamCopyRemaining");
    348             BasicBlock * const streamCopyEnd = CreateBasicBlock(name + "StreamCopyEnd");
    349 
    350             Value * const offset = CreateURem(targetOffset, width);
    351             Value * const copied = CreateSub(width, offset);
    352             Value * const mask = CreateLShr(Constant::getAllOnesValue(shiftTy), copied);
     349            BasicBlock * const streamCopy = CreateBasicBlock(name + "PushCopy");
     350            BasicBlock * const streamCopyRemainingCond = CreateBasicBlock(name + "PushCopyRemainingCond");
     351            BasicBlock * const streamCopyRemaining = CreateBasicBlock(name + "PushCopyRemaining");
     352            BasicBlock * const streamCopyEnd = CreateBasicBlock(name + "PushCopyEnd");
     353
     354            Value * const pos = CreateURem(targetOffset, width);
     355            Value * const copied = CreateSub(width, pos);
     356            Value * const copiedVector = simd_fill(fieldWidth, CreateTrunc(copied, fieldWidthTy));
     357            Value * const mask = CreateLShr(Constant::getAllOnesValue(shiftTy), copiedVector);
     358            Value * const offsetVector = simd_fill(fieldWidth, CreateTrunc(pos, fieldWidthTy));
     359
     360            CreateBr(streamCopy);
    353361
    354362            SetInsertPoint(streamCopy);
    355363            PHINode * const i = CreatePHI(getSizeTy(), 2);
    356             i->addIncoming(getSize(0), entry);
    357             Value * targetValue = CreateAlignedLoad(CreateGEP(target, i), alignment);
    358             targetValue = CreateAnd(CreateBitCast(targetValue, shiftTy), mask);
     364            i->addIncoming(ZERO, entry);
     365            Value * priorTargetValue = CreateAlignedLoad(CreateGEP(target, i), alignment);
     366            priorTargetValue = CreateBitCast(CreateAnd(CreateBitCast(priorTargetValue, shiftTy), mask), blockTy);
    359367            Value * sourceValue = CreateAlignedLoad(CreateGEP(source, i), alignment);
    360             sourceValue = CreateShl(CreateBitCast(sourceValue, shiftTy), offset);
    361             CreateAlignedStore(CreateOr(sourceValue, targetValue), CreateGEP(source, i), alignment);
    362             Value * const next_i = CreateAdd(i, getSize(1));
     368            sourceValue = CreateBitCast(CreateShl(CreateBitCast(sourceValue, shiftTy), offsetVector), blockTy);
     369            CreateAlignedStore(CreateOr(sourceValue, priorTargetValue), CreateGEP(target, i), alignment);
     370            Value * const next_i = CreateAdd(i, ONE);
    363371            i->addIncoming(next_i, streamCopy);
    364372            CreateCondBr(CreateICmpNE(next_i, n), streamCopy, streamCopyRemainingCond);
     
    370378            SetInsertPoint(streamCopyRemaining);
    371379            PHINode * const j = CreatePHI(getSizeTy(), 2);
    372             j->addIncoming(n, entry);
     380            j->addIncoming(n, streamCopyRemainingCond);
    373381            Value * prior = CreateAlignedLoad(CreateGEP(source, CreateSub(j, n)), alignment);
    374             prior = CreateShl(CreateBitCast(prior, shiftTy), offset);
     382            prior = CreateBitCast(CreateShl(CreateBitCast(prior, shiftTy), offsetVector), blockTy);
    375383            Value * value = CreateAlignedLoad(CreateGEP(source, j), alignment);
    376             value = CreateLShr(CreateBitCast(value, shiftTy), copied);
    377             Value * const result = CreateBitCast(CreateOr(value, prior), blockTy);
    378             CreateAlignedStore(result, CreateGEP(target, j), alignment);
    379             Value * const next_j = CreateAdd(j, getSize(1));
    380             j->addIncoming(next_j, streamCopy);
     384            value = CreateBitCast(CreateLShr(CreateBitCast(value, shiftTy), copiedVector), blockTy);
     385            CreateAlignedStore(CreateOr(value, prior), CreateGEP(target, j), alignment);
     386            Value * const next_j = CreateAdd(j, ONE);
     387            j->addIncoming(next_j, streamCopyRemaining);
    381388            CreateCondBr(CreateICmpNE(next_j, blocksToCopy), streamCopyRemaining, streamCopyEnd);
    382389
    383390            SetInsertPoint(streamCopyEnd);
    384391        }
    385 
    386392    }
    387393}
Note: See TracChangeset for help on using the changeset viewer.