source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5772

Last change on this file since 5772 was 5755, checked in by nmedfort, 19 months ago

Bug fixes and simplified MultiBlockKernel? logic

File size: 9.7 KB
RevLine 
[5019]1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
[5425]10#include <toolchain/toolchain.h>
[5019]11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/Support/CommandLine.h>
14#include <llvm/Support/raw_ostream.h>
15#include <cc/cc_compiler.h>
[5063]16#include <pablo/pablo_kernel.h>
[5436]17#include <kernels/kernel_builder.h>
[5238]18#include <IR_Gen/idisa_target.h>
[5100]19#include <kernels/streamset.h>
[5429]20#include <kernels/source_kernel.h>
[5019]21#include <kernels/s2p_kernel.h>
22#include <pablo/pablo_compiler.h>
23#include <pablo/pablo_toolchain.h>
[5464]24#include <toolchain/cpudriver.h>
[5418]25#include <fcntl.h>
[5019]26
[5267]27using namespace llvm;
28
[5026]29static cl::OptionCategory wcFlags("Command Flags", "wc options");
[5019]30
[5026]31static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
[5019]32
[5029]33enum CountOptions {
34    LineOption, WordOption, CharOption, ByteOption
35};
[5019]36
[5030]37static cl::list<CountOptions> wcOptions(
[5029]38  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
39             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
40             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
[5732]41             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m).")
42             CL_ENUM_VAL_SENTINEL), cl::cat(wcFlags), cl::Grouping);
[5029]43                                                 
[5019]44
45
[5020]46static int defaultFieldWidth = 7;  // default field width
[5019]47
[5029]48
49bool CountLines = false;
50bool CountWords = false;
51bool CountChars = false;
52bool CountBytes = false;
53
[5020]54std::vector<uint64_t> lineCount;
55std::vector<uint64_t> wordCount;
56std::vector<uint64_t> charCount;
57std::vector<uint64_t> byteCount;
58
[5019]59uint64_t TotalLines = 0;
60uint64_t TotalWords = 0;
61uint64_t TotalChars = 0;
62uint64_t TotalBytes = 0;
63
[5217]64using namespace pablo;
65using namespace kernel;
66using namespace parabix;
[5029]67
68//  The callback routine that records counts in progress.
[5019]69//
70extern "C" {
[5029]71    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
[5020]72        lineCount[fileIdx] = lines;
73        wordCount[fileIdx] = words;
74        charCount[fileIdx] = chars;
75        byteCount[fileIdx] = bytes;
76        TotalLines += lines;
77        TotalWords += words;
78        TotalChars += chars;
79        TotalBytes += bytes;
[5019]80    }
81}
82
[5435]83class WordCountKernel final: public pablo::PabloKernel {
84public:
[5436]85    WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
[5435]86    bool isCachable() const override { return true; }
[5464]87    bool hasSignature() const override { return false; }
[5436]88protected:
89    void generatePabloMethod() override;
[5435]90};
[5019]91
[5436]92WordCountKernel::WordCountKernel (const std::unique_ptr<kernel::KernelBuilder> & b)
[5435]93: PabloKernel(b, "wc",
94    {Binding{b->getStreamSetTy(8, 1), "u8bit"}},
95    {},
96    {},
97    {Binding{b->getSizeTy(), "lineCount"}, Binding{b->getSizeTy(), "wordCount"}, Binding{b->getSizeTy(), "charCount"}}) {
98
99}
100
[5436]101void WordCountKernel::generatePabloMethod() {
[5435]102
[5019]103    //  input: 8 basis bit streams
[5435]104    const auto u8bitSet = getInputStreamVar("u8bit");
[5063]105    //  output: 3 counters
[5435]106
107    cc::CC_Compiler ccc(this, u8bitSet);
108
[5202]109    PabloBuilder & pb = ccc.getBuilder();
[5019]110
[5435]111    Var * lc = getOutputScalarVar("lineCount");
112    Var * wc = getOutputScalarVar("wordCount");
113    Var * cc = getOutputScalarVar("charCount");
[5202]114
[5019]115    if (CountLines) {
[5202]116        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
117        pb.createAssign(lc, pb.createCount(LF));
[5019]118    }
119    if (CountWords) {
[5202]120        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
121        PabloAST * wordChar = pb.createNot(WS);
[5019]122        // WS_follow_or_start = 1 past WS or at start of file
[5202]123        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
124        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
125        pb.createAssign(wc, pb.createCount(wordStart));
[5019]126    }
127    if (CountChars) {
128        //
129        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
130        // not UTF-8, or is not valid?
131        //
[5435]132        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
[5202]133        pb.createAssign(cc, pb.createCount(u8Begin));
[5019]134    }
135}
136
[5418]137typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
[5391]138
139void wcPipelineGen(ParabixDriver & pxDriver) {
140
[5435]141    auto & iBuilder = pxDriver.getBuilder();
[5391]142    Module * m = iBuilder->getModule();
[5457]143    const unsigned segmentSize = codegen::SegmentSize;
144    const unsigned bufferSegments = codegen::ThreadNum+1;
145
146   
[5418]147    Type * const int32Ty = iBuilder->getInt32Ty();
148    Type * const sizeTy = iBuilder->getSizeTy();
[5230]149    Type * const voidTy = iBuilder->getVoidTy();
[5418]150
151    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
152    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
153
154    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
155    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
[5019]156    main->setCallingConv(CallingConv::C);
[5418]157    Function::arg_iterator args = main->arg_begin();   
158    Value * const fileDecriptor = &*(args++);
159    fileDecriptor->setName("fileDecriptor");
[5019]160    Value * const fileIdx = &*(args++);
161    fileIdx->setName("fileIdx");
[5418]162
[5391]163    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
164
[5755]165    StreamSetBuffer * const ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
[5409]166
[5755]167    StreamSetBuffer * const BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
[5409]168
[5755]169    Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder);
[5418]170    mmapK->setInitialArguments({fileDecriptor});
[5414]171    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
[5391]172
[5755]173    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
[5414]174    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
[5300]175   
[5755]176    Kernel * wck = pxDriver.addKernelInstance<WordCountKernel>(iBuilder);
[5414]177    pxDriver.makeKernelCall(wck, {BasisBits}, {});
[5249]178
[5391]179    pxDriver.generatePipelineIR();
[5019]180   
[5440]181    iBuilder->setKernel(mmapK);
182    Value * const fileSize = iBuilder->getAccumulator("fileSize");
183    iBuilder->setKernel(wck);
184    Value * const lineCount = iBuilder->getAccumulator("lineCount");
185    Value * const wordCount = iBuilder->getAccumulator("wordCount");
186    Value * const charCount = iBuilder->getAccumulator("charCount");
[5035]187
[5418]188    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
[5597]189    pxDriver.deallocateBuffers();
[5019]190    iBuilder->CreateRetVoid();
[5401]191
[5474]192    pxDriver.finalizeObject();
[5019]193}
194
[5418]195void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
[5019]196    std::string fileName = inputFiles[fileIdx];
[5418]197    const int fd = open(fileName.c_str(), O_RDONLY);
198    if (LLVM_UNLIKELY(fd == -1)) {
199        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
[5019]200    } else {
[5418]201        fn_ptr(fd, fileIdx);
202        close(fd);
[5019]203    }
204}
205
206int main(int argc, char *argv[]) {
[5486]207    codegen::ParseCommandLineOptions(argc, argv, {&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
[5029]208    if (wcOptions.size() == 0) {
[5019]209        CountLines = true;
210        CountWords = true;
211        CountBytes = true;
[5418]212    } else {
[5029]213        CountLines = false;
214        CountWords = false;
215        CountBytes = false;
216        CountChars = false;
217        for (unsigned i = 0; i < wcOptions.size(); i++) {
218            switch (wcOptions[i]) {
219                case WordOption: CountWords = true; break;
220                case LineOption: CountLines = true; break;
221                case CharOption: CountBytes = true; CountChars = false; break;
222                case ByteOption: CountChars = true; CountBytes = false; break;
223            }
224        }
225    }
[5019]226   
[5474]227    ParabixDriver pxDriver("wc");
228    wcPipelineGen(pxDriver);
229    auto wordCountFunctionPtr = reinterpret_cast<WordCountFunctionType>(pxDriver.getMain());
[5019]230
[5418]231    const auto fileCount = inputFiles.size();
[5020]232    lineCount.resize(fileCount);
233    wordCount.resize(fileCount);
234    charCount.resize(fileCount);
235    byteCount.resize(fileCount);
236   
[5418]237    for (unsigned i = 0; i < fileCount; ++i) {
238        wc(wordCountFunctionPtr, i);
[5019]239    }
240   
[5021]241    size_t maxCount = 0;
[5020]242    if (CountLines) maxCount = TotalLines;
243    if (CountWords) maxCount = TotalWords;
244    if (CountChars) maxCount = TotalChars;
245    if (CountBytes) maxCount = TotalBytes;
246   
247    int fieldWidth = std::to_string(maxCount).size() + 1;
248    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
249
250    for (unsigned i = 0; i < inputFiles.size(); ++i) {
[5029]251        std::cout << std::setw(fieldWidth-1);
[5020]252        if (CountLines) {
[5029]253            std::cout << lineCount[i] << std::setw(fieldWidth);
[5020]254        }
255        if (CountWords) {
[5029]256            std::cout << wordCount[i] << std::setw(fieldWidth);
[5020]257        }
258        if (CountChars) {
[5029]259            std::cout << charCount[i] << std::setw(fieldWidth);
[5020]260        }
261        if (CountBytes) {
[5029]262            std::cout << byteCount[i];
[5020]263        }
264        std::cout << " " << inputFiles[i] << std::endl;
265    }
[5019]266    if (inputFiles.size() > 1) {
[5029]267        std::cout << std::setw(fieldWidth-1);
[5019]268        if (CountLines) {
[5029]269            std::cout << TotalLines << std::setw(fieldWidth);
[5019]270        }
271        if (CountWords) {
[5029]272            std::cout << TotalWords << std::setw(fieldWidth);
[5019]273        }
274        if (CountChars) {
[5029]275            std::cout << TotalChars << std::setw(fieldWidth);
[5019]276        }
277        if (CountBytes) {
[5029]278            std::cout << TotalBytes;
[5019]279        }
280        std::cout << " total" << std::endl;
281    }
282
283    return 0;
284}
Note: See TracBrowser for help on using the repository browser.