Ignore:
Timestamp:
Dec 3, 2017, 12:40:40 PM (20 months ago)
Author:
nmedfort
Message:

Bug fixes and simplified MultiBlockKernel? logic

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5605 r5755  
    290290   
    291291    // File data from mmap
    292     StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
    293    
    294     Kernel * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder, segmentSize));
     292    StreamSetBuffer * ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     293   
     294    Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder, segmentSize);
    295295    mmapK->setInitialArguments({fileDecriptor});
    296296    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    297297   
    298298    // Transposed bits from s2p
    299     StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
    300    
    301     Kernel * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
     299    StreamSetBuffer * BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
     300   
     301    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
    302302    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    303303   
    304304    // Calculate UTF-16 data bits through bitwise logic on u8-indexed streams.
    305     StreamSetBuffer * U8u16Bits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments));
    306     StreamSetBuffer * DelMask = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments));
    307     StreamSetBuffer * ErrorMask = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments));
    308    
    309     Kernel * u8u16k = pxDriver.addKernelInstance(make_unique<U8U16Kernel>(iBuilder));
     305    StreamSetBuffer * U8u16Bits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
     306    StreamSetBuffer * DelMask = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
     307    StreamSetBuffer * ErrorMask = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
     308   
     309    Kernel * u8u16k = pxDriver.addKernelInstance<U8U16Kernel>(iBuilder);
    310310    pxDriver.makeKernelCall(u8u16k, {BasisBits}, {U8u16Bits, DelMask, ErrorMask});
    311311   
    312312    // Allocate space for fully compressed swizzled UTF-16 bit streams
    313     StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer(make_unique<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1));
    314     StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer(make_unique<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1));
    315     StreamSetBuffer * u16Swizzle2 = pxDriver.addBuffer(make_unique<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1));
    316     StreamSetBuffer * u16Swizzle3 = pxDriver.addBuffer(make_unique<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1));
     313    StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
     314    StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
     315    StreamSetBuffer * u16Swizzle2 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
     316    StreamSetBuffer * u16Swizzle3 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
    317317   
    318318    // Apply a deletion algorithm to discard all but the final position of the UTF-8
    319319    // sequences (bit streams) for each UTF-16 code unit. Also compresses and swizzles the result.
    320     Kernel * delK = pxDriver.addKernelInstance(make_unique<SwizzledDeleteByPEXTkernel>(iBuilder, 64, 16));
     320    Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 64, 16);
    321321    pxDriver.makeKernelCall(delK, {U8u16Bits, DelMask}, {u16Swizzle0, u16Swizzle1, u16Swizzle2, u16Swizzle3});
    322322
    323323    // Produce unswizzled UTF-16 bit streams
    324     StreamSetBuffer * u16bits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments));
    325    
    326     Kernel * unSwizzleK = pxDriver.addKernelInstance(make_unique<SwizzleGenerator>(iBuilder, 16, 1, 4));
     324    StreamSetBuffer * u16bits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
     325   
     326    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 16, 1, 4);
    327327    pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1, u16Swizzle2, u16Swizzle3}, {u16bits});
    328328   
    329     Kernel * p2sk = pxDriver.addKernelInstance(make_unique<P2S16Kernel>(iBuilder));
    330    
    331     Kernel * outK = pxDriver.addKernelInstance(make_unique<FileSink>(iBuilder, 16));
     329    Kernel * p2sk = pxDriver.addKernelInstance<P2S16Kernel>(iBuilder);
     330   
     331    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 16);
    332332    Value * fName = iBuilder->CreatePointerCast(iBuilder->GetString(outputFile.c_str()), iBuilder->getInt8PtrTy());
    333333    outK->setInitialArguments({fName});
     
    336336    StreamSetBuffer * U16out = nullptr;
    337337    if (mMapBuffering || memAlignBuffering) {
    338         U16out = pxDriver.addExternalBuffer(make_unique<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), outputStream));
     338        U16out = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), outputStream);
    339339    } else {
    340         U16out = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), segmentSize * bufferSegments));
     340        U16out = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), segmentSize * bufferSegments);
    341341    }
    342342    pxDriver.makeKernelCall(p2sk, {u16bits}, {U16out});
     
    375375
    376376    // File data from mmap
    377     StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
    378    
    379     Kernel * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder, segmentSize));
     377    StreamSetBuffer * ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     378   
     379    Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder, segmentSize);
    380380    mmapK->setInitialArguments({fileDecriptor});
    381381    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    382382   
    383383    // Transposed bits from s2p
    384     StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
    385    
    386     Kernel * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
     384    StreamSetBuffer * BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
     385   
     386    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
    387387    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    388388   
    389389    // Calculate UTF-16 data bits through bitwise logic on u8-indexed streams.
    390     StreamSetBuffer * U8u16Bits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments));
    391     StreamSetBuffer * DelMask = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments));
    392     StreamSetBuffer * ErrorMask = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments));
    393    
    394     Kernel * u8u16k = pxDriver.addKernelInstance(make_unique<U8U16Kernel>(iBuilder));
     390    StreamSetBuffer * U8u16Bits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
     391    StreamSetBuffer * DelMask = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
     392    StreamSetBuffer * ErrorMask = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
     393   
     394    Kernel * u8u16k = pxDriver.addKernelInstance<U8U16Kernel>(iBuilder);
    395395    pxDriver.makeKernelCall(u8u16k, {BasisBits}, {U8u16Bits, DelMask, ErrorMask});
    396396   
    397     StreamSetBuffer * U16Bits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments));
    398    
    399     StreamSetBuffer * DeletionCounts = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments));
    400 
    401     Kernel * delK = pxDriver.addKernelInstance(make_unique<DeletionKernel>(iBuilder, iBuilder->getBitBlockWidth()/16, 16));
     397    StreamSetBuffer * U16Bits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
     398   
     399    StreamSetBuffer * DeletionCounts = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
     400
     401    Kernel * delK = pxDriver.addKernelInstance<DeletionKernel>(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
    402402    pxDriver.makeKernelCall(delK, {U8u16Bits, DelMask}, {U16Bits, DeletionCounts});
    403403   
    404     Kernel * p2sk = pxDriver.addKernelInstance(make_unique<P2S16KernelWithCompressedOutput>(iBuilder));
     404    Kernel * p2sk = pxDriver.addKernelInstance<P2S16KernelWithCompressedOutput>(iBuilder);
    405405 
    406406    Kernel * outK = nullptr;
    407407    if (outputFile=="") {
    408         outK = pxDriver.addKernelInstance(make_unique<StdOutKernel>(iBuilder, 16));
     408        outK = pxDriver.addKernelInstance<StdOutKernel>(iBuilder, 16);
    409409    }
    410410    else {
    411         outK = pxDriver.addKernelInstance(make_unique<FileSink>(iBuilder, 16));
     411        outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 16);
    412412        Value * fName = iBuilder->CreatePointerCast(iBuilder->GetString(outputFile.c_str()), iBuilder->getInt8PtrTy());
    413413        outK->setInitialArguments({fName});
     
    416416    StreamSetBuffer * U16out = nullptr;
    417417    if (mMapBuffering || memAlignBuffering) {
    418         U16out = pxDriver.addExternalBuffer(make_unique<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), outputStream));
     418        U16out = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), outputStream);
    419419    } else {
    420         U16out = pxDriver.addBuffer(make_unique<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), segmentSize * bufferSegments, 1 /*overflow block*/));
     420        U16out = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), segmentSize * bufferSegments, 1);
    421421    }
    422422    pxDriver.makeKernelCall(p2sk, {U16Bits, DeletionCounts}, {U16out});
Note: See TracChangeset for help on using the changeset viewer.