Changeset 6014


Ignore:
Timestamp:
May 5, 2018, 1:25:24 PM (5 months ago)
Author:
cameron
Message:

Stream deletion bug fixes

Location:
icGREP/icgrep-devel/icgrep
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6012 r6014  
    292292        Type * v8xi32Ty = VectorType::get(getInt32Ty(), 8);
    293293        Type * v8xi1Ty = VectorType::get(getInt1Ty(), 8);
     294        Constant * mask0000000Fsplaat = ConstantVector::getSplat(8, ConstantInt::get(getInt32Ty(), 0xF));
    294295        Value * shuf32Func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx2_permd);
    295296        Value * PEXT_func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_bmi_pext_32);
     
    311312            Shifts[i] = getInt32(i*4);
    312313        }
    313         Value * compress = CreateCall(shuf32Func, {a, CreateLShr(bdcst, ConstantVector::get({Shifts, 8}))});
    314         Value * selectf = CreateBitCast(CreateTrunc(CreateSub(CreateShl(getInt32(1), field_count), getInt32(1)), getInt8Ty()), v8xi1Ty);
    315         return CreateSelect(selectf, ConstantVector::getNullValue(v8xi32Ty), compress);
     314        Value * shuf = CreateAnd(CreateLShr(bdcst, ConstantVector::get({Shifts, 8})), mask0000000Fsplaat);
     315        Value * compress = CreateCall(shuf32Func, {a, shuf});
     316        Value * field_mask = CreateTrunc(CreateSub(CreateShl(getInt32(1), field_count), getInt32(1)), getInt8Ty());
     317        Value * selectf = CreateBitCast(field_mask, v8xi1Ty);
     318        Value * result = CreateSelect(selectf, compress, ConstantVector::getNullValue(v8xi32Ty));
     319        return result;
    316320    }
    317321    return IDISA_Builder::mvmd_compress(fw, a, select_mask);
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r6013 r6014  
    146146    BasicBlock * segmentLoop = b->CreateBasicBlock("segmentLoop");
    147147    BasicBlock * segmentDone = b->CreateBasicBlock("segmentDone");
     148    BasicBlock * finalWrite = b->CreateBasicBlock("finalWrite");
     149    BasicBlock * updateProducedCount = b->CreateBasicBlock("updateProducedCount");
    148150    Constant * const ZERO = b->getSize(0);
    149151   
     
    198200    for (unsigned i = 0; i < mStreamCount; i++) {
    199201        sourceBlock[i] = b->loadInputStreamBlock("sourceStreamSet", b->getInt32(i), blockOffsetPhi);
    200 
    201202    }
    202203    // Now separate the bits of each field into ones that go into the current field
     
    205206    std::vector<Value *> pendingOutput(mStreamCount);
    206207    std::vector<Value *> outputFields(mStreamCount);
     208    Value * backShift = b->simd_sub(fw, fwSplat, offsets);
    207209    for (unsigned i = 0; i < mStreamCount; i++) {
    208210        Value * currentFieldBits = b->simd_sllv(fw, sourceBlock[i], offsets);
    209         Value * nextFieldBits = b->simd_srlv(fw, sourceBlock[i], b->simd_sub(fw, fwSplat, offsets));
     211        Value * nextFieldBits = b->simd_srlv(fw, sourceBlock[i], backShift);
    210212        Value * firstField = b->mvmd_extract(fw, currentFieldBits, 0);
    211213        Value * vec1 = b->CreateInsertElement(zeroSplat, firstField, pendingFieldIdx);
     
    214216        outputFields[i] = b->simd_or(b->mvmd_srli(fw, currentFieldBits, 1), nextFieldBits);
    215217    }
    216     // We may have filled the current field of the pendingOutput update the
    217     // pendingFieldIndex.
    218     Value * newPendingTotal = b->CreateZExtOrTrunc(b->mvmd_extract(fw, fieldPopCounts, 0), sizeTy);
    219     pendingFieldIdx = b->CreateUDiv(newPendingTotal, ConstantInt::get(sizeTy, fw));
    220218    // Now combine forward all fields with the same field number.  This may require
    221219    // up to log2 numFields steps.
     
    239237    // (a) shift forward outputField data to fill the pendingOutput values.
    240238    // (b) shift back outputField data to clear data added to pendingOutput.
     239    //
     240    // However, we may need to increment pendingFieldIndex if we previously
     241    // filled the field with the extracted firstField value.  The first
     242    // value of the fieldNo vector will be 0 or 1.
     243    // It is possible that pendingFieldIndex will reach the total number
     244    // of fields held in register.  mvmd_sll may not handle this if it
     245    // translates to an LLVM shl.
     246    Value * increment = b->CreateZExtOrTrunc(b->mvmd_extract(fw, fieldNo, 0), sizeTy);
     247    pendingFieldIdx = b->CreateAdd(pendingFieldIdx, increment);
     248    Value * const pendingSpaceFilled = b->CreateICmpEQ(pendingFieldIdx, numFieldConst);
    241249    Value * shftBack = b->CreateSub(numFieldConst, pendingFieldIdx);
    242250    for (unsigned i = 0; i < mStreamCount; i++) {
    243251        Value * outputFwd = b->mvmd_sll(fw, outputFields[i], pendingFieldIdx);
     252        outputFwd = b->CreateSelect(pendingSpaceFilled, zeroSplat, outputFwd);
    244253        pendingOutput[i] = b->simd_or(pendingOutput[i], outputFwd);
    245254        outputFields[i] = b->mvmd_srl(fw, outputFields[i], shftBack);
    246   }
     255    }
    247256    //
    248257    // Write the pendingOutput data to outputStream.
     
    282291        b->setScalarField("pendingOutputBlock_" + std::to_string(i), b->bitCast(pendingOutput[i]));
    283292    }
    284     Value * produced = b->getProducedItemCount("compressedOutput");
     293    b->CreateCondBr(mIsFinal, finalWrite, updateProducedCount);
     294    b->SetInsertPoint(finalWrite);
     295    for (unsigned i = 0; i < mStreamCount; i++) {
     296        //Value * pending = b->getScalarField("pendingOutputBlock_" + std::to_string(i));
     297        Value * pending = b->bitCast(pendingOutput[i]);
     298        b->storeOutputStreamBlock("compressedOutput", b->getInt32(i), nextOutputBlk, pending);
     299    }
     300    b->CreateBr(updateProducedCount);
     301    b->SetInsertPoint(updateProducedCount);
     302     Value * produced = b->getProducedItemCount("compressedOutput");
    285303    produced = b->CreateAdd(produced, b->CreateMul(nextOutputBlk, bitBlockWidthConst));
    286     produced = b->CreateSelect(mIsFinal, b->CreateAdd(produced, blockPopCount), produced);
     304    produced = b->CreateSelect(mIsFinal, b->CreateAdd(produced, newPending), produced);
    287305    b->setProducedItemCount("compressedOutput", produced);
    288306}
    289    
    290    
    291    
    292307
    293308SwizzledDeleteByPEXTkernel::SwizzledDeleteByPEXTkernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned streamCount, unsigned PEXT_width)
Note: See TracChangeset for help on using the changeset viewer.