Ignore:
Timestamp:
Mar 2, 2017, 12:18:43 PM (2 years ago)
Author:
cameron
Message:

Swizzled bitstream deletion and -enable-AVX-deletion in u8u16

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5310 r5355  
    99#include <cc/cc_compiler.h>                        // for CC_Compiler
    1010#include <kernels/deletion.h>                      // for DeletionKernel
     11#include <kernels/swizzle.h>                      // for DeletionKernel
    1112#include <kernels/mmap_kernel.h>                   // for MMapSourceKernel
    1213#include <kernels/p2s_kernel.h>                    // for P2S16KernelWithCom...
     
    4647static cl::opt<std::string> outputFile(cl::Positional, cl::desc("<output file>"),  cl::Required, cl::cat(u8u16Options));
    4748static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(u8u16Options));
     49static cl::opt<bool> enableAVXdel("enable-AVX-deletion", cl::desc("Enable AVX2 deletion algorithms."), cl::cat(u8u16Options));
    4850static cl::opt<bool> mMapBuffering("mmap-buffering", cl::desc("Enable mmap buffering."), cl::cat(u8u16Options));
    4951static cl::opt<bool> memAlignBuffering("memalign-buffering", cl::desc("Enable posix_memalign buffering."), cl::cat(u8u16Options));
     
    253255}
    254256
    255 Function * u8u16Pipeline(Module * mod, IDISA::IDISA_Builder * iBuilder) {
     257Function * u8u16PipelineAVX2(Module * mod, IDISA::IDISA_Builder * iBuilder) {
    256258
    257259    const unsigned segmentSize = codegen::SegmentSize;
     
    277279    fileSize->setName("fileSize");
    278280
     281    // File data from mmap
    279282    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
    280283
     284    MMapSourceKernel mmapK(iBuilder, segmentSize);
     285    mmapK.generateKernel({}, {&ByteStream});
     286    mmapK.setInitialArguments({fileSize});
     287   
     288    // Transposed bits from s2p
    281289    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    282290
     291    S2PKernel s2pk(iBuilder);
     292    s2pk.generateKernel({&ByteStream}, {&BasisBits});
     293   
     294    // Calculate UTF-16 data bits through bitwise logic on u8-indexed streams.
    283295    CircularBuffer U8u16Bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
    284296    CircularBuffer DelMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
    285297    CircularBuffer ErrorMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
    286298
     299    PabloKernel u8u16k(iBuilder, "u8u16",
     300                       {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
     301                       {Binding{iBuilder->getStreamSetTy(16, 1), "u16bit"},
     302                           Binding{iBuilder->getStreamSetTy(1, 1), "delMask"},
     303                           Binding{iBuilder->getStreamSetTy(1, 1), "errMask"}}, {});
     304   
     305    u8u16_pablo(&u8u16k);
     306    u8u16k.generateKernel({&BasisBits}, {&U8u16Bits, &DelMask, &ErrorMask});
     307   
     308   
     309    // Apply a deletion algorithm to discard all but the final position of the UTF-8
     310    // sequences for each UTF-16 code unit.
     311    CircularBuffer u16CompressedInFields(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
     312    CircularBuffer DeletionCounts(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
     313
     314    DeleteByPEXTkernel delK(iBuilder, 64, 16);
     315    delK.generateKernel({&U8u16Bits, &DelMask}, {&u16CompressedInFields, &DeletionCounts});
     316   
     317    // Swizzle for sequential compression within SIMD lanes.
     318    CircularBuffer SwizzleFields0(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
     319    CircularBuffer SwizzleFields1(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
     320    CircularBuffer SwizzleFields2(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
     321    CircularBuffer SwizzleFields3(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
     322    SwizzleGenerator swizzleK(iBuilder, 16, 4, 1);
     323    swizzleK.generateKernel({&u16CompressedInFields}, {&SwizzleFields0, &SwizzleFields1, &SwizzleFields2, &SwizzleFields3});
     324   
     325    //  Produce fully compressed swizzled UTF-16 bit streams
     326    SwizzledCopybackBuffer u16Swizzle0(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
     327    SwizzledCopybackBuffer u16Swizzle1(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
     328    SwizzledCopybackBuffer u16Swizzle2(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
     329    SwizzledCopybackBuffer u16Swizzle3(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
     330    //
     331    SwizzledBitstreamCompressByCount compressK(iBuilder, 16);
     332    compressK.generateKernel({&DeletionCounts, &SwizzleFields0, &SwizzleFields1, &SwizzleFields2, &SwizzleFields3},
     333                             {&u16Swizzle0, &u16Swizzle1, &u16Swizzle2, &u16Swizzle3});
     334   
     335    // Produce unswizzled UTF-16 bit streams
     336    //
     337    CircularBuffer u16bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
     338    SwizzleGenerator unSwizzleK(iBuilder, 16, 1, 4);
     339    unSwizzleK.setName("unswizzle");
     340    unSwizzleK.generateKernel({&u16Swizzle0, &u16Swizzle1, &u16Swizzle2, &u16Swizzle3}, {&u16bits});
     341   
     342    // Different choices for the output buffer depending on chosen option.
     343    ExternalFileBuffer U16external(iBuilder, iBuilder->getStreamSetTy(1, 16));
     344    CircularBuffer U16out(iBuilder, iBuilder->getStreamSetTy(1, 16), segmentSize * bufferSegments);
     345
     346    P2S16Kernel p2sk(iBuilder);
     347
     348    //P2S16KernelWithCompressedOutput p2sk(iBuilder);
     349
     350    FileSink outK(iBuilder, 16);
     351    if (mMapBuffering || memAlignBuffering) {
     352        p2sk.generateKernel({&u16bits}, {&U16external});
     353        outK.generateKernel({&U16external}, {});
     354    } else {
     355        p2sk.generateKernel({&u16bits}, {&U16out});
     356        outK.generateKernel({&U16out}, {});
     357    }
     358   
     359    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
     360
     361    ByteStream.setStreamSetBuffer(inputStream, fileSize);
     362    BasisBits.allocateBuffer();
     363    U8u16Bits.allocateBuffer();
     364    DelMask.allocateBuffer();
     365    ErrorMask.allocateBuffer();
     366    u16CompressedInFields.allocateBuffer();
     367    DeletionCounts.allocateBuffer();
     368    SwizzleFields0.allocateBuffer();
     369    SwizzleFields1.allocateBuffer();
     370    SwizzleFields2.allocateBuffer();
     371    SwizzleFields3.allocateBuffer();
     372    u16Swizzle0.allocateBuffer();
     373    u16Swizzle1.allocateBuffer();
     374    u16Swizzle2.allocateBuffer();
     375    u16Swizzle3.allocateBuffer();
     376    u16bits.allocateBuffer();
     377    if (mMapBuffering || memAlignBuffering) {
     378        U16external.setEmptyBuffer(outputStream);
     379    } else {
     380        U16out.allocateBuffer();
     381    }
     382    Value * fName = iBuilder->CreatePointerCast(iBuilder->CreateGlobalString(outputFile.c_str()), iBuilder->getInt8PtrTy());
     383    outK.setInitialArguments({fName});
     384
     385    if (segmentPipelineParallel){
     386        generateSegmentParallelPipeline(iBuilder, {&mmapK, &s2pk, &u8u16k, &delK, &swizzleK, &compressK, &unSwizzleK, &p2sk, &outK});
     387    } else {
     388        generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &u8u16k, &delK, &swizzleK, &compressK, &unSwizzleK, &p2sk, &outK});
     389    }
     390
     391    iBuilder->CreateRetVoid();
     392    return main;
     393}
     394
     395
     396Function * u8u16Pipeline(Module * mod, IDISA::IDISA_Builder * iBuilder) {
     397   
     398    const unsigned segmentSize = codegen::SegmentSize;
     399    const unsigned bufferSegments = codegen::ThreadNum+1;
     400   
     401    assert (iBuilder);
     402   
     403    Type * const size_ty = iBuilder->getSizeTy();
     404    Type * const voidTy = iBuilder->getVoidTy();
     405    Type * const bitBlockType = iBuilder->getBitBlockType();
     406    Type * const inputType = ArrayType::get(ArrayType::get(bitBlockType, 8), 1)->getPointerTo();
     407    Type * const outputType = ArrayType::get(ArrayType::get(bitBlockType, 16), 1)->getPointerTo();
     408   
     409    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, inputType, outputType, size_ty, nullptr));
     410    main->setCallingConv(CallingConv::C);
     411    Function::arg_iterator args = main->arg_begin();
     412   
     413    Value * const inputStream = &*(args++);
     414    inputStream->setName("inputStream");
     415    Value * const outputStream = &*(args++);
     416    outputStream->setName("outputStream");
     417    Value * const fileSize = &*(args++);
     418    fileSize->setName("fileSize");
     419   
     420    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
     421   
     422    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
     423   
     424    CircularBuffer U8u16Bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
     425    CircularBuffer DelMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
     426    CircularBuffer ErrorMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
     427   
    287428    CircularBuffer U16Bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
    288429   
    289430    CircularBuffer DeletionCounts(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
    290 
     431   
    291432    // Different choices for the output buffer depending on chosen option.
    292433    ExternalFileBuffer U16external(iBuilder, iBuilder->getStreamSetTy(1, 16));
    293434    CircularCopybackBuffer U16out(iBuilder, iBuilder->getStreamSetTy(1, 16), segmentSize * bufferSegments, 1 /*overflow block*/);
    294 
     435   
    295436    MMapSourceKernel mmapK(iBuilder, segmentSize);
    296437    mmapK.generateKernel({}, {&ByteStream});
     
    298439   
    299440    S2PKernel s2pk(iBuilder);
    300 
     441   
    301442    s2pk.generateKernel({&ByteStream}, {&BasisBits});
    302 
     443   
    303444    PabloKernel u8u16k(iBuilder, "u8u16",
    304445                       {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
    305446                       {Binding{iBuilder->getStreamSetTy(16, 1), "u16bit"},
    306                         Binding{iBuilder->getStreamSetTy(1, 1), "delMask"},
    307                         Binding{iBuilder->getStreamSetTy(1, 1), "errMask"}}, {});
    308 
     447                           Binding{iBuilder->getStreamSetTy(1, 1), "delMask"},
     448                           Binding{iBuilder->getStreamSetTy(1, 1), "errMask"}}, {});
     449   
    309450    u8u16_pablo(&u8u16k);
    310 
     451   
    311452    u8u16k.generateKernel({&BasisBits}, {&U8u16Bits, &DelMask, &ErrorMask});
    312 
     453   
    313454    DeletionKernel delK(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
    314455    delK.generateKernel({&U8u16Bits, &DelMask}, {&U16Bits, &DeletionCounts});
    315 
     456   
    316457    P2S16KernelWithCompressedOutput p2sk(iBuilder);
    317 
     458   
    318459    FileSink outK(iBuilder, 16);
    319460    if (mMapBuffering || memAlignBuffering) {
     
    325466    }
    326467    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
    327 
     468   
    328469    ByteStream.setStreamSetBuffer(inputStream, fileSize);
    329470    BasisBits.allocateBuffer();
     
    340481    Value * fName = iBuilder->CreatePointerCast(iBuilder->CreateGlobalString(outputFile.c_str()), iBuilder->getInt8PtrTy());
    341482    outK.setInitialArguments({fName});
    342 
     483   
    343484    if (segmentPipelineParallel){
    344485        generateSegmentParallelPipeline(iBuilder, {&mmapK, &s2pk, &u8u16k, &delK, &p2sk, &outK});
     
    346487        generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &u8u16k, &delK, &p2sk, &outK});
    347488    }
    348 
     489   
    349490    iBuilder->CreateRetVoid();
    350491    return main;
    351492}
    352 
    353 
    354493
    355494
     
    364503    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
    365504
    366     llvm::Function * main_IR = u8u16Pipeline(M, idb);
    367    
     505    llvm::Function * main_IR = (enableAVXdel && AVX2_available() && codegen::BlockSize==256) ? u8u16PipelineAVX2(M, idb) : u8u16Pipeline(M, idb);
     506
    368507    verifyModule(*M, &dbgs());
    369508    u8u16Engine = JIT_to_ExecutionEngine(M);   
Note: See TracChangeset for help on using the changeset viewer.