Ignore:
Timestamp:
Mar 13, 2017, 10:38:00 PM (2 years ago)
Author:
cameron
Message:

Adam's changes to add swizzling option to DeleteByPEXT kernel

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5355 r5362  
    259259    const unsigned segmentSize = codegen::SegmentSize;
    260260    const unsigned bufferSegments = codegen::ThreadNum+1;
    261    
     261
    262262    assert (iBuilder);
    263263
     
    305305    u8u16_pablo(&u8u16k);
    306306    u8u16k.generateKernel({&BasisBits}, {&U8u16Bits, &DelMask, &ErrorMask});
    307    
    308    
     307
    309308    // Apply a deletion algorithm to discard all but the final position of the UTF-8
    310     // sequences for each UTF-16 code unit.
    311     CircularBuffer u16CompressedInFields(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
    312     CircularBuffer DeletionCounts(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
    313 
    314     DeleteByPEXTkernel delK(iBuilder, 64, 16);
    315     delK.generateKernel({&U8u16Bits, &DelMask}, {&u16CompressedInFields, &DeletionCounts});
    316    
    317     // Swizzle for sequential compression within SIMD lanes.
     309    // sequences for each UTF-16 code unit. Swizzle the results.
    318310    CircularBuffer SwizzleFields0(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
    319311    CircularBuffer SwizzleFields1(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
    320312    CircularBuffer SwizzleFields2(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
    321313    CircularBuffer SwizzleFields3(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
    322     SwizzleGenerator swizzleK(iBuilder, 16, 4, 1);
    323     swizzleK.generateKernel({&u16CompressedInFields}, {&SwizzleFields0, &SwizzleFields1, &SwizzleFields2, &SwizzleFields3});
    324    
     314    CircularBuffer DeletionCounts(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
     315
     316    DeleteByPEXTkernel delK(iBuilder, 64, 16, true);
     317    delK.generateKernel({&U8u16Bits, &DelMask}, {&SwizzleFields0, &SwizzleFields1, &SwizzleFields2, &SwizzleFields3, &DeletionCounts});
     318;
    325319    //  Produce fully compressed swizzled UTF-16 bit streams
    326320    SwizzledCopybackBuffer u16Swizzle0(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
     
    328322    SwizzledCopybackBuffer u16Swizzle2(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
    329323    SwizzledCopybackBuffer u16Swizzle3(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
    330     //
     324
    331325    SwizzledBitstreamCompressByCount compressK(iBuilder, 16);
    332326    compressK.generateKernel({&DeletionCounts, &SwizzleFields0, &SwizzleFields1, &SwizzleFields2, &SwizzleFields3},
    333327                             {&u16Swizzle0, &u16Swizzle1, &u16Swizzle2, &u16Swizzle3});
    334     
     328 
    335329    // Produce unswizzled UTF-16 bit streams
    336     //
    337330    CircularBuffer u16bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
    338331    SwizzleGenerator unSwizzleK(iBuilder, 16, 1, 4);
     
    364357    DelMask.allocateBuffer();
    365358    ErrorMask.allocateBuffer();
    366     u16CompressedInFields.allocateBuffer();
    367359    DeletionCounts.allocateBuffer();
    368360    SwizzleFields0.allocateBuffer();
     
    375367    u16Swizzle3.allocateBuffer();
    376368    u16bits.allocateBuffer();
     369
    377370    if (mMapBuffering || memAlignBuffering) {
    378371        U16external.setEmptyBuffer(outputStream);
     
    384377
    385378    if (segmentPipelineParallel){
    386         generateSegmentParallelPipeline(iBuilder, {&mmapK, &s2pk, &u8u16k, &delK, &swizzleK, &compressK, &unSwizzleK, &p2sk, &outK});
    387     } else {
    388         generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &u8u16k, &delK, &swizzleK, &compressK, &unSwizzleK, &p2sk, &outK});
     379        generateSegmentParallelPipeline(iBuilder, {&mmapK, &s2pk, &u8u16k, &delK, &compressK, &unSwizzleK, &p2sk, &outK});
     380    } else {
     381        generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &u8u16k, &delK, &compressK, &unSwizzleK, &p2sk, &outK});
    389382    }
    390383
Note: See TracChangeset for help on using the changeset viewer.