Ignore:
Timestamp:
Feb 3, 2018, 12:02:14 PM (16 months ago)
Author:
cameron
Message:

Using DirectCC builder updates; speedup wc -l

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5847 r5861  
    4242             CL_ENUM_VAL_SENTINEL), cl::cat(wcFlags), cl::Grouping);
    4343                                                 
    44 
     44static std::string wc_modes = "";
    4545
    4646static int defaultFieldWidth = 7;  // default field width
     
    8383class WordCountKernel final: public pablo::PabloKernel {
    8484public:
    85     WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
     85    WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b, Binding && inputStreamSet);
    8686    bool isCachable() const override { return true; }
    8787    bool hasSignature() const override { return false; }
     
    9090};
    9191
    92 WordCountKernel::WordCountKernel (const std::unique_ptr<kernel::KernelBuilder> & b)
    93 : PabloKernel(b, "wc",
    94     {Binding{b->getStreamSetTy(8, 1), "u8bit"}},
     92WordCountKernel::WordCountKernel (const std::unique_ptr<kernel::KernelBuilder> & b, Binding && inputStreamSet)
     93: PabloKernel(b, "wc_" + wc_modes,
     94    {inputStreamSet},
    9595    {},
    9696    {},
     
    101101void WordCountKernel::generatePabloMethod() {
    102102    PabloBuilder pb(getEntryScope());
    103     //  input: 8 basis bit streams
    104     std::vector<PabloAST *> u8_bits = getInputStreamSet("u8bit");
     103    std::unique_ptr<cc::CC_Compiler> ccc;
     104    if (CountWords || CountChars) {
     105        ccc = make_unique<cc::Parabix_CC_Compiler>(this, getInputStreamSet("u8bit"));
     106    } else {
     107        ccc = make_unique<cc::Direct_CC_Compiler>(this, pb.createExtract(getInput(0), pb.getInteger(0)));
     108    }
     109
    105110    //  output: 3 counters
    106 
    107     cc::Parabix_CC_Compiler ccc(this, u8_bits);
    108 
    109111    Var * lc = getOutputScalarVar("lineCount");
    110112    Var * wc = getOutputScalarVar("wordCount");
     
    112114
    113115    if (CountLines) {
    114         PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
     116        PabloAST * LF = ccc->compileCC(re::makeByte(0x0A));
    115117        pb.createAssign(lc, pb.createCount(LF));
    116118    }
    117119    if (CountWords) {
    118         PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
     120        PabloAST * WS = ccc->compileCC(re::makeCC(re::makeByte(0x09, 0x0D), re::makeByte(0x20)));
    119121        PabloAST * wordChar = pb.createNot(WS);
    120122        // WS_follow_or_start = 1 past WS or at start of file
     
    128130        // not UTF-8, or is not valid?
    129131        //
    130         PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
     132        PabloAST * u8Begin = ccc->compileCC(re::makeCC(re::makeByte(0, 0x7F), re::makeByte(0xC2, 0xF4)));
    131133        pb.createAssign(cc, pb.createCount(u8Begin));
    132134    }
     
    163165    StreamSetBuffer * const ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    164166
    165     StreamSetBuffer * const BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
    166167
    167168    Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder);
    168169    mmapK->setInitialArguments({fileDecriptor});
    169170    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    170 
    171     Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
    172     pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    173    
    174     Kernel * wck = pxDriver.addKernelInstance<WordCountKernel>(iBuilder);
    175     pxDriver.makeKernelCall(wck, {BasisBits}, {});
     171   
     172    Kernel * wck  = nullptr;
     173    if (CountWords || CountChars) {
     174        StreamSetBuffer * const BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
     175        Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
     176        pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     177       
     178        wck = pxDriver.addKernelInstance<WordCountKernel>(iBuilder, Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"});
     179        pxDriver.makeKernelCall(wck, {BasisBits}, {});
     180
     181
     182    } else {
     183        wck = pxDriver.addKernelInstance<WordCountKernel>(iBuilder, Binding{iBuilder->getStreamSetTy(1, 8), "u8byte"});
     184        pxDriver.makeKernelCall(wck, {ByteStream}, {});
     185    }
    176186
    177187    pxDriver.generatePipelineIR();
     
    217227                case WordOption: CountWords = true; break;
    218228                case LineOption: CountLines = true; break;
    219                 case CharOption: CountBytes = true; CountChars = false; break;
    220                 case ByteOption: CountChars = true; CountBytes = false; break;
     229                case CharOption: CountChars = true; CountBytes = false; break;
     230                case ByteOption: CountBytes = true; CountChars = false; break;
    221231            }
    222232        }
    223233    }
    224    
     234    if (CountLines) wc_modes += "l";
     235    if (CountWords) wc_modes += "w";
     236    if (CountChars) wc_modes += "m";
     237    if (CountBytes) wc_modes += "c";
     238
    225239    ParabixDriver pxDriver("wc");
    226240    wcPipelineGen(pxDriver);
     
    243257    if (CountBytes) maxCount = TotalBytes;
    244258   
     259   
     260   
    245261    int fieldWidth = std::to_string(maxCount).size() + 1;
    246262    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
Note: See TracChangeset for help on using the changeset viewer.