Changeset 5852


Ignore:
Timestamp:
Jan 30, 2018, 1:51:06 AM (10 months ago)
Author:
xwa163
Message:
  1. Use MemCpy? instead of streamCpy when handling buffer CopyBack?
  2. Rollback change of kernel_builder
  3. Fix bug of SwizzledDeleteByPEXTkernel when input data file is large
  4. Add large test cases for character_deletion
Location:
icGREP/icgrep-devel
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/QA/character_deletion/character_deletion_test.xml

    r5848 r5852  
    6565    <random-testcase name="random9000" random-deletion="true" length="9000" />
    6666    <random-testcase name="random11390" random-deletion="true" length="11390" />
     67    <random-testcase name="random18390" random-deletion="true" length="18390" />
     68    <random-testcase name="random38390" random-deletion="true" length="38390" />
     69    <random-testcase name="random118390" random-deletion="true" length="118390" />
    6770</character-deletion-test>
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5848 r5852  
    8484
    8585void SwizzledDeleteByPEXTkernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder, Value * remainingBytes) {
     86    const auto originalProducedItemCount = iBuilder->getProducedItemCount("outputSwizzle0");
    8687    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    8788    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
     
    9091    const auto masks = get_PEXT_masks(iBuilder, delMask);
    9192    generateProcessingLoop(iBuilder, masks, delMask);
     93
     94    const auto newProducedItemCount = iBuilder->getProducedItemCount("outputSwizzle0");
    9295    Constant * blockOffsetMask = iBuilder->getSize(iBuilder->getBitBlockWidth() - 1);
    9396    Constant * outputIndexShift = iBuilder->getSize(std::log2(mDelCountFieldWidth));
     
    9699    Value * producedOffset = iBuilder->CreateAnd(outputProduced, blockOffsetMask);
    97100    Value * outputIndex = iBuilder->CreateLShr(producedOffset, outputIndexShift);
     101
     102    const auto deltaOutputIndex = iBuilder->CreateSub(
     103            iBuilder->CreateUDiv(newProducedItemCount, iBuilder->getSize(iBuilder->getBitBlockWidth())),
     104            iBuilder->CreateUDiv(originalProducedItemCount, iBuilder->getSize(iBuilder->getBitBlockWidth()))
     105    );
     106    outputIndex = iBuilder->CreateAdd(outputIndex, iBuilder->CreateMul(deltaOutputIndex, iBuilder->getSize(iBuilder->getBitBlockWidth() / mDelCountFieldWidth)));
     107
    98108    Value * pendingOffset = iBuilder->getScalarField("pendingOffset");
    99109
     
    102112        Value * pendingData = iBuilder->getScalarField("pendingSwizzleData" + std::to_string(i));
    103113        Value * outputStreamPtr = iBuilder->getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0));
    104                 // TODO it seems that we do not need to store pending data here
    105         // iBuilder->CreateBlockAlignedStore(pendingData, iBuilder->CreateGEP(outputStreamPtr, outputIndex));
     114        iBuilder->CreateBlockAlignedStore(pendingData, iBuilder->CreateGEP(outputStreamPtr, outputIndex));
    106115    }
    107116    iBuilder->setProducedItemCount("outputSwizzle0", iBuilder->CreateAdd(pendingOffset, outputProduced));
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5841 r5852  
    11141114            b->SetInsertPoint(copyBack);
    11151115            Value * const baseAddress = b->getBaseAddress(name);
    1116             const auto copyAlignment = getItemAlignment(mStreamSetOutputs[i]);
    1117             b->CreateStreamCpy(name, baseAddress, ZERO, baseAddress, bufferSize, current, copyAlignment);
     1116            const StreamSetBuffer * const buf = this->getAnyStreamSetBuffer(name);
     1117            const auto numOfStreams = buf->getNumOfStreams();
     1118            const auto itemWidth = getItemWidth(this->getBinding(name));
     1119
     1120            const auto sizeByBit = b->CreateMul(b->CreateMul(b->getSize(itemWidth), bufferSize), b->getSize(numOfStreams));
     1121            const auto sizeByByte = b->CreateUDiv(sizeByBit, b->getSize(8));
     1122            const auto sourcePtr = b->CreateGEP(b->CreatePointerCast(baseAddress, b->getInt8PtrTy()), sizeByByte);
     1123            const auto targetPtr = b->CreatePointerCast(baseAddress, b->getInt8PtrTy());
     1124
     1125            const auto itemsToBeCopyByBit = b->CreateMul(b->CreateMul(b->getSize(itemWidth), current), b->getSize(numOfStreams));
     1126            const auto itemsToBeCopyByByte = b->CreateUDiv(itemsToBeCopyByBit, b->getSize(8));
     1127            b->CreateMemCpy(targetPtr, sourcePtr, itemsToBeCopyByByte, 8);
     1128
    11181129            b->CreateBr(done);
    11191130
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5846 r5852  
    237237    Type * const fieldWidthTy = getIntNTy(fieldWidth);
    238238
    239     Value * n = buf->getStreamSetCount(this, getStreamHandle(name));
     239    Value * const n = buf->getStreamSetCount(this, getStreamHandle(name));
    240240
    241241    if (isConstantOne(n) || fieldWidth == blockWidth || (isConstantZero(targetOffset) && isConstantZero(sourceOffset))) {
     
    260260
    261261    } else { // either the target offset or source offset is non-zero but not both
    262         auto t = getIntNTy(fieldWidth * buf->getNumOfStreams());
    263         PointerType * const ptrTy = t->getPointerTo();
    264         target = CreateGEP(CreatePointerCast(target, ptrTy), targetOffset);
    265         source = CreateGEP(CreatePointerCast(source, ptrTy), sourceOffset);
    266         n = this->CreateUDiv(n, this->getSize(buf->getNumOfStreams()));
    267 
    268262        VectorType * const blockTy = getBitBlockType();
    269263        PointerType * const blockPtrTy = blockTy->getPointerTo();
Note: See TracChangeset for help on using the changeset viewer.