Changeset 6085


Ignore:
Timestamp:
Jun 13, 2018, 6:03:20 AM (4 months ago)
Author:
cameron
Message:

u32u8 progress

Location:
icGREP/icgrep-devel/icgrep
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r6055 r6085  
    102102        kb->storeOutputStreamBlock("outputStreamSet", kb->getInt32(j), blockOffsetPhi, output);
    103103    }
     104#ifndef STREAM_COMPRESS_USING_EXTRACTION_MASK
    104105    Value * unitCount = kb->simd_popcount(mCompressFieldWidth, extractionMask);
    105106    kb->storeOutputStreamBlock("unitCounts", kb->getInt32(0), blockOffsetPhi, kb->bitCast(unitCount));
     107#endif
    106108    Value * nextBlk = kb->CreateAdd(blockOffsetPhi, kb->getSize(1));
    107109    blockOffsetPhi->addIncoming(nextBlk, processBlock);
     
    115117                      {Binding{kb->getStreamSetTy(streamCount), "inputStreamSet"},
    116118                          Binding{kb->getStreamSetTy(), "extractionMask"}},
    117                       {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"},
    118                           Binding{kb->getStreamSetTy(), "unitCounts", FixedRate(), RoundUpTo(kb->getBitBlockWidth())}},
     119#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
     120                   {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"}},
     121#else
     122                   {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"},
     123                       Binding{kb->getStreamSetTy(), "unitCounts", FixedRate(), RoundUpTo(kb->getBitBlockWidth())}},
     124#endif
    119125                      {}, {}, {})
    120126, mCompressFieldWidth(fieldWidth)
     
    144150    Value * extractionMaskPtr = kb->getInputStreamBlockPtr("extractionMask", ZERO, blockOffsetPhi);
    145151    extractionMaskPtr = kb->CreatePointerCast(extractionMaskPtr, fieldPtrTy);
     152#ifndef STREAM_COMPRESS_USING_EXTRACTION_MASK
    146153    Value * unitCountPtr = kb->getOutputStreamBlockPtr("unitCounts", ZERO, blockOffsetPhi);
    147154    unitCountPtr = kb->CreatePointerCast(unitCountPtr, fieldPtrTy);
     
    151158        kb->CreateStore(popc, kb->CreateGEP(unitCountPtr, kb->getInt32(i)));
    152159    }
     160#else
     161    for (unsigned i = 0; i < fieldsPerBlock; i++) {
     162        mask[i] = kb->CreateLoad(kb->CreateGEP(extractionMaskPtr, kb->getInt32(i)));
     163    }
     164#endif
    153165    for (unsigned j = 0; j < mStreamCount; ++j) {
    154166        Value * inputPtr = kb->getInputStreamBlockPtr("inputStreamSet", kb->getInt32(j), blockOffsetPhi);
     
    173185                   {Binding{kb->getStreamSetTy(streamCount), "inputStreamSet"},
    174186                       Binding{kb->getStreamSetTy(), "extractionMask"}},
     187#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
     188                   {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"}},
     189#else
    175190                   {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"},
    176191                       Binding{kb->getStreamSetTy(), "unitCounts", FixedRate(), RoundUpTo(kb->getBitBlockWidth())}},
     192#endif
    177193                   {}, {}, {})
    178194, mPEXTWidth(fieldWidth)
     
    184200: MultiBlockKernel("streamCompress" + std::to_string(fieldWidth) + "_" + std::to_string(streamCount),
    185201                   {Binding{kb->getStreamSetTy(streamCount), "sourceStreamSet"},
    186                        Binding{kb->getStreamSetTy(), "unitCounts"}},
     202#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
     203                       Binding{kb->getStreamSetTy(), "extractionMask"}},
     204#else
     205                   Binding{kb->getStreamSetTy(), "unitCounts"}},
     206#endif
    187207                   {Binding{kb->getStreamSetTy(streamCount), "compressedOutput", BoundedRate(0, 1)}},
    188208                   {}, {}, {})
     
    234254        pendingDataPhi[i]->addIncoming(pendingData[i], entry);
    235255    }
     256#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
     257    Value * fieldPopCounts = b->simd_popcount(fw, b->loadInputStreamBlock("extractionMask", ZERO, blockOffsetPhi));
     258#else
    236259    Value * fieldPopCounts = b->loadInputStreamBlock("unitCounts", ZERO, blockOffsetPhi);
     260#endif
    237261    // For each field determine the (partial) sum popcount of all fields up to and
    238262    // including the current field.
     
    241265        partialSum = b->simd_add(fw, partialSum, b->mvmd_slli(fw, partialSum, i));
    242266    }
    243     Value * blockPopCount = b->CreateZExtOrTrunc(b->CreateExtractElement(partialSum, numFields-1), sizeTy);
     267    Value * blockPopCount = b->CreateZExtOrTrunc(b->mvmd_extract(fw, partialSum, numFields-1), sizeTy);
    244268    //
    245269    // Now determine for each source field the output offset of the first bit.
     
    366390    b->CreateBr(updateProducedCount);
    367391    b->SetInsertPoint(updateProducedCount);
    368      Value * produced = b->getProducedItemCount("compressedOutput");
     392    Value * produced = b->getProducedItemCount("compressedOutput");
    369393    produced = b->CreateAdd(produced, b->CreateMul(nextOutputBlk, bitBlockWidthConst));
    370394    produced = b->CreateSelect(mIsFinal, b->CreateAdd(produced, newPending), produced);
     
    802826    }
    803827    parabix::StreamSetBuffer * compressedFields = mDriver.addBuffer<parabix::StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(N), mBufferBlocks);
     828#ifndef STREAM_COMPRESS_USING_EXTRACTION_MASK
    804829    parabix::StreamSetBuffer * unitCounts = mDriver.addBuffer<parabix::StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), mBufferBlocks);
    805830   
     
    808833    Kernel * streamK = mDriver.addKernelInstance<StreamCompressKernel>(iBuilder, mIntraFieldCompressionWidth, N);
    809834    mDriver.makeKernelCall(streamK, {compressedFields, unitCounts}, {outputs});
    810 }
    811 
    812 
    813 }
     835#else
     836    mDriver.makeKernelCall(compressK, {inputs, mask}, {compressedFields});
     837   
     838    Kernel * streamK = mDriver.addKernelInstance<StreamCompressKernel>(iBuilder, mIntraFieldCompressionWidth, N);
     839    mDriver.makeKernelCall(streamK, {compressedFields, mask}, {outputs});
     840#endif
     841}
     842
     843
     844}
  • icGREP/icgrep-devel/icgrep/u32u8.cpp

    r6075 r6085  
    2424#include <toolchain/cpudriver.h>
    2525#include <kernels/streamset.h>
     26#include <kernels/hex_convert.h>
    2627#include <llvm/ADT/StringRef.h>
    2728#include <llvm/IR/CallingConv.h>
     
    9495: BlockOrientedKernel("u8depositMask",
    9596            {Binding{b->getStreamSetTy(1, 21), "basis"}},
    96             {Binding{b->getStreamSetTy(1, 1), "fieldDepositMask", FixedRate(4)},
     97#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
     98            {Binding{b->getStreamSetTy(1, 1), "fieldDepositMask", FixedRate(4)},
     99                      Binding{b->getStreamSetTy(1, 1), "extractionMask", FixedRate(4)}},
     100#else
     101            {Binding{b->getStreamSetTy(1, 1), "fieldDepositMask", FixedRate(4)},
    97102                Binding{b->getStreamSetTy(1, 1), "codeUnitCounts", FixedRate(4), RoundUpTo(b->getBitBlockWidth())}},
    98             {}, {}, {Binding{b->getBitBlockType(), "EOFmask"}}), mDepositFieldWidth(depositFieldWidth) {
     103#endif
     104                {}, {}, {Binding{b->getBitBlockType(), "EOFmask"}}), mDepositFieldWidth(depositFieldWidth) {
    99105}
    100106
    101107
    102108void UTF8fieldDepositMask::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
     109    Value * fileExtentMask = b->CreateNot(b->getScalarField("EOFmask"));
    103110    // If any of bits 16 through 20 are 1, a four-byte UTF-8 sequence is required.
    104111    Value * u8len4 = b->loadInputStreamBlock("basis", b->getSize(16), b->getSize(0));
     
    107114    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(19), b->getSize(0)));
    108115    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(20), b->getSize(0)), "u8len4");
     116    u8len4 = b->CreateAnd(u8len4, fileExtentMask);
    109117    Value * u8len34 = u8len4;
    110118    // Otherwise, if any of bits 11 through 15 are 1, a three-byte UTF-8 sequence is required.
     
    114122    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(14), b->getSize(0)));
    115123    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(15), b->getSize(0)));
     124    u8len34 = b->CreateAnd(u8len34, fileExtentMask);
    116125    Value * nonASCII = u8len34;
    117126    // Otherwise, if any of bits 7 through 10 are 1, a two-byte UTF-8 sequence is required.
     
    120129    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(9), b->getSize(0)));
    121130    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(10), b->getSize(0)), "nonASCII");
     131    nonASCII = b->CreateAnd(nonASCII, fileExtentMask);
    122132    //
    123133    //  UTF-8 sequence length:    1     2     3       4
     
    125135    //  interleave u8len3|u8len4, allOnes() for bits 1, 3:  x..., ..x.
    126136    //  interleave prefix4, u8len2|u8len3|u8len4 for bits 0, 2:  .x.., ...x
    127     Value * fileExtentMask = b->CreateNot(b->getScalarField("EOFmask"));
    128137   
    129138    Value * maskA_lo = b->esimd_mergel(1, u8len34, fileExtentMask);
     
    140149    for (unsigned j = 0; j < 4; ++j) {
    141150        Value * deposit_mask = b->simd_pext(mDepositFieldWidth, mask1000, extraction_mask[j]);
     151        b->storeOutputStreamBlock("fieldDepositMask", b->getSize(0), b->getSize(j), deposit_mask);
     152#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
     153        b->storeOutputStreamBlock("extractionMask", b->getSize(0), b->getSize(j), extraction_mask[j]);
     154#else
    142155        Value * unit_counts = b->simd_popcount(mDepositFieldWidth, extraction_mask[j]);
    143         b->storeOutputStreamBlock("fieldDepositMask", b->getSize(0), b->getSize(j), deposit_mask);
    144156        b->storeOutputStreamBlock("codeUnitCounts", b->getSize(0), b->getSize(j), unit_counts);
     157#endif
    145158    }
    146159}
     
    183196    // Eliminate lookahead positions that are the final position of the prior unit.
    184197    PabloAST * secondLast = pb.createAnd(lookAheadFinal, nonFinal);
    185     PabloAST * u8mask6_11 = pb.createOr(secondLast, ASCII, "u8mask6_11");
     198    PabloAST * u8mask6_11 = pb.createInFile(pb.createOr(secondLast, ASCII, "u8mask6_11"));
    186199    PabloAST * prefix2 = pb.createAnd(secondLast, initial);
    187200    PabloAST * lookAhead2 = pb.createLookahead(u8final, 2, "lookahead2");
    188201    PabloAST * thirdLast = pb.createAnd(pb.createAnd(lookAhead2, nonFinal), pb.createNot(secondLast));
    189     PabloAST * u8mask12_17 = pb.createOr(thirdLast, pb.createOr(prefix2, ASCII), "u8mask12_17");
     202    PabloAST * u8mask12_17 = pb.createInFile(pb.createOr(thirdLast, pb.createOr(prefix2, ASCII), "u8mask12_17"));
    190203    pb.createAssign(pb.createExtract(getOutputStreamVar("u8initial"), pb.getInteger(0)), initial);
    191204    pb.createAssign(pb.createExtract(getOutputStreamVar("u8mask6_11"), pb.getInteger(0)), u8mask6_11);
     
    261274
    262275    const unsigned u32buffersize = codegen::SegmentSize * codegen::ThreadNum;
    263         const unsigned u8buffersize = 4 * u32buffersize;
     276    const unsigned u8buffersize = 4 * (u32buffersize + 1);
     277    const unsigned u8buffersize2 = u8buffersize + 1;
     278    const unsigned u8buffersize3 = u8buffersize2 + 4;
    264279
    265280    Type * const voidTy = idb->getVoidTy();
     
    274289    idb->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
    275290   
    276     // File data from mmap
     291    // Source data
    277292    StreamSetBuffer * codeUnitStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, 32));
    278293   
    279     Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(idb, 32);
    280     mmapK->setInitialArguments({fileDecriptor});
    281     pxDriver.makeKernelCall(mmapK, {}, {codeUnitStream});
     294    Kernel * sourceK = pxDriver.addKernelInstance<FDSourceKernel>(idb, 32);
     295    sourceK->setInitialArguments({idb->getInt8(0), fileDecriptor});
     296    pxDriver.makeKernelCall(sourceK, {}, {codeUnitStream});
    282297   
    283298    // Source buffers for transposed UTF-32 basis bits.
     
    287302    pxDriver.makeKernelCall(s2p21K, {codeUnitStream}, {u32basis});
    288303
    289     StreamSetBuffer * u8unitCounts = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
    290304
    291305        // Buffers for calculated deposit masks.
    292306    StreamSetBuffer * u8fieldMask = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
    293     StreamSetBuffer * u8final = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
    294     StreamSetBuffer * u8initial = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
    295     StreamSetBuffer * u8mask12_17 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
    296     StreamSetBuffer * u8mask6_11 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
     307    StreamSetBuffer * u8final = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize2);
     308    StreamSetBuffer * u8initial = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize2);
     309    StreamSetBuffer * u8mask12_17 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize2);
     310    StreamSetBuffer * u8mask6_11 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize2);
    297311
    298312    // Intermediate buffers for deposited bits
    299     StreamSetBuffer * deposit18_20 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(3), u8buffersize);
    300     StreamSetBuffer * deposit12_17 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize);
    301     StreamSetBuffer * deposit6_11 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize);
    302     StreamSetBuffer * deposit0_5 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize);
     313    StreamSetBuffer * deposit18_20 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(3), u8buffersize3);
     314    StreamSetBuffer * deposit12_17 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize3);
     315    StreamSetBuffer * deposit6_11 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize3);
     316    StreamSetBuffer * deposit0_5 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize3);
    303317
    304318    // Final buffers for computed UTF-8 basis bits and byte stream.
    305     StreamSetBuffer * u8basis = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), u8buffersize);
    306     StreamSetBuffer * u8bytes = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 8), u8buffersize);
     319    StreamSetBuffer * u8basis = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), u8buffersize3);
     320    StreamSetBuffer * u8bytes = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 8), u8buffersize3);
    307321
    308322    // Calculate the u8final deposit mask.
     323#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
     324    StreamSetBuffer * extractionMask = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
     325    kernel::Kernel * fieldDepositMaskK = pxDriver.addKernelInstance<UTF8fieldDepositMask>(idb);
     326    pxDriver.makeKernelCall(fieldDepositMaskK, {u32basis}, {u8fieldMask, extractionMask});
     327    kernel::Kernel * streamK = pxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, 1);
     328    pxDriver.makeKernelCall(streamK, {u8fieldMask, extractionMask}, {u8final});
     329#else
     330    StreamSetBuffer * u8unitCounts = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
    309331    kernel::Kernel * fieldDepositMaskK = pxDriver.addKernelInstance<UTF8fieldDepositMask>(idb);
    310332    pxDriver.makeKernelCall(fieldDepositMaskK, {u32basis}, {u8fieldMask, u8unitCounts});
    311333    kernel::Kernel * streamK = pxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, 1);
    312334    pxDriver.makeKernelCall(streamK, {u8fieldMask, u8unitCounts}, {u8final});
    313 
     335#endif
     336/*    kernel::Kernel * hexConvert =  pxDriver.addKernelInstance<BinaryToHex>(idb);
     337    pxDriver.makeKernelCall(hexConvert, {u8final}, {u8bytes});
     338*/
    314339    kernel::Kernel * maskK = pxDriver.addKernelInstance<UTF8_DepositMasks>(idb);
    315340    pxDriver.makeKernelCall(maskK, {u8final}, {u8initial, u8mask12_17, u8mask6_11});
Note: See TracChangeset for help on using the changeset viewer.