Changeset 5636


Ignore:
Timestamp:
Sep 13, 2017, 12:53:27 PM (9 days ago)
Author:
cameron
Message:

Fixes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r5635 r5636  
    3535    Value * itemsToDo = &*(args++); // Since PDEP marker stream is a bit stream, this is the number of PDEP marker bits to process
    3636    // Get pointer to start of the StreamSetBlock containing unprocessed input items.
    37     Value * sourceItemsAvail = args++;
     37    Value * sourceItemsAvail =  &*(args++);
    3838    Value * PDEPStrmPtr = &*(args++);
    3939    Value * inputSwizzlesPtr = &*(args++);
    40 
    4140    // Get pointer to start of the output StreamSetBlock we're currently writing to
    4241    Value * outputStreamPtr = &*(args);
     
    7675    Value * updatedSourceItems = sourceItemsRemaining;
    7776    Value * PDEP_ms_blk = kb->CreateBlockAlignedLoad(kb->CreateGEP(PDEPStrmPtr, {blockOffsetPhi, kb->getInt32(0)}));
    78     kb->CallPrintRegister("PDEP_ms_blk", PDEP_ms_blk);
    7977
    8078    const auto PDEP_masks = get_PDEP_masks(kb, PDEP_ms_blk, mPDEPWidth);   
     
    8381    Value * total_count = mask_popcounts[0];
    8482    for (unsigned j = 1; j < mask_popcounts.size(); j++) {
    85         total_count = kb->CreateAdd(mask_popcounts[j]);
     83        total_count = kb->CreateAdd(total_count, mask_popcounts[j]);
    8684    }
    87     kb->CreateCondBr(kb->CreateUGE(total_count, sourceItemsRemaining), processBlock, terminate);
     85    kb->CreateCondBr(kb->CreateICmpULE(total_count, sourceItemsRemaining), processBlock, terminate);
    8886    kb->SetInsertPoint(processBlock);
    8987
     
    123121        // Store the result
    124122        kb->CreateBlockAlignedStore(result_swizzle, kb->CreateGEP(outputStreamPtr, {blockOffsetPhi, kb->getSize(i)}));
    125                                     kb->CallPrintRegister("result_swizzle", result_swizzle);
    126123        updatedProcessedBits = kb->CreateAdd(updatedProcessedBits, mask_popcounts[i]);
    127124        updatedSourceItems = kb->CreateSub(updatedSourceItems, mask_popcounts[i]);
     
    135132
    136133    kb->SetInsertPoint(terminate);
    137    
    138     kb->setProcessedItemCount("PDEPmarkerStream", updatedProcessedBitsPhi);   
     134    Value * itemsDone = kb->CreateMul(blockOffsetPhi, blockWidth);
     135    itemsDone = kb->CreateSelect(kb->CreateICmpULT(itemsToDo, itemsDone), itemsToDo, itemsDone);
     136    kb->setProcessedItemCount("PDEPmarkerStream", kb->CreateAdd(itemsDone, processedSourceBits));   
    139137    kb->setProcessedItemCount("sourceStreamSet", updatedProcessedBitsPhi);   
    140138}
Note: See TracChangeset for help on using the changeset viewer.