Changeset 6041


Ignore:
Timestamp:
May 16, 2018, 11:51:22 AM (5 months ago)
Author:
xwa163
Message:

Use simd_pdep in swizzled version of pdep kernels

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r5985 r6041  
    121121
    122122        // Apply PDEP to each element of the combined swizzle using the current PDEP mask
    123         Value * result = UndefValue::get(buffer->getType());
    124123        Value * const mask = b->CreateExtractElement(selectors, i);
    125         for (unsigned j = 0; j < mSwizzleFactor; j++) {
    126             Value * source_field = b->CreateExtractElement(buffer, j);
    127             Value * PDEP_field = b->CreateCall(pdep, {source_field, mask});
    128             result = b->CreateInsertElement(result, PDEP_field, j);
    129         }
     124        Value* result = b->simd_pdep(pdepWidth, buffer, b->simd_fill(pdepWidth, mask));
    130125
    131126        // Store the result
  • icGREP/icgrep-devel/icgrep/kernels/swizzled_multiple_pdep_kernel.cpp

    r6026 r6041  
    144144        Value * const usedShift = b->simd_fill(pdepWidth, required);
    145145        for (int iStreamSetIndex = 0; iStreamSetIndex < mNumberOfStreamSet; iStreamSetIndex++) {
    146             Value * result = UndefValue::get(bufferArray[iStreamSetIndex]->getType());
    147             for (unsigned j = 0; j < mSwizzleFactor; j++) {
    148                 Value * source_field = b->CreateExtractElement(bufferArray[iStreamSetIndex], j);
    149                 Value * PDEP_field = b->CreateCall(pdep, {source_field, mask});
    150                 result = b->CreateInsertElement(result, PDEP_field, j);
    151             }
     146            Value* result = b->simd_pdep(pdepWidth, bufferArray[iStreamSetIndex], b->simd_fill(pdepWidth, mask));
    152147            // Store the result
    153148            Value * const outputStreamPtr = b->getOutputStreamBlockPtr("output" + std::to_string(iStreamSetIndex), b->getSize(i), strideIndex);
Note: See TracChangeset for help on using the changeset viewer.