source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5842

Last change on this file since 5842 was 5842, checked in by cameron, 15 months ago

Decoupling PabloKernels? from CC_compiler

File size: 9.7 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/Support/CommandLine.h>
14#include <llvm/Support/raw_ostream.h>
15#include <cc/cc_compiler.h>
16#include <pablo/pablo_kernel.h>
17#include <kernels/kernel_builder.h>
18#include <IR_Gen/idisa_target.h>
19#include <kernels/streamset.h>
20#include <kernels/source_kernel.h>
21#include <kernels/s2p_kernel.h>
22#include <pablo/pablo_compiler.h>
23#include <pablo/pablo_toolchain.h>
24#include <toolchain/cpudriver.h>
25#include <fcntl.h>
26
27using namespace llvm;
28
29static cl::OptionCategory wcFlags("Command Flags", "wc options");
30
31static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
32
33enum CountOptions {
34    LineOption, WordOption, CharOption, ByteOption
35};
36
37static cl::list<CountOptions> wcOptions(
38  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
39             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
40             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
41             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m).")
42             CL_ENUM_VAL_SENTINEL), cl::cat(wcFlags), cl::Grouping);
43                                                 
44
45
46static int defaultFieldWidth = 7;  // default field width
47
48
49bool CountLines = false;
50bool CountWords = false;
51bool CountChars = false;
52bool CountBytes = false;
53
54std::vector<uint64_t> lineCount;
55std::vector<uint64_t> wordCount;
56std::vector<uint64_t> charCount;
57std::vector<uint64_t> byteCount;
58
59uint64_t TotalLines = 0;
60uint64_t TotalWords = 0;
61uint64_t TotalChars = 0;
62uint64_t TotalBytes = 0;
63
64using namespace pablo;
65using namespace kernel;
66using namespace parabix;
67
68//  The callback routine that records counts in progress.
69//
70extern "C" {
71    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
72        lineCount[fileIdx] = lines;
73        wordCount[fileIdx] = words;
74        charCount[fileIdx] = chars;
75        byteCount[fileIdx] = bytes;
76        TotalLines += lines;
77        TotalWords += words;
78        TotalChars += chars;
79        TotalBytes += bytes;
80    }
81}
82
83class WordCountKernel final: public pablo::PabloKernel {
84public:
85    WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
86    bool isCachable() const override { return true; }
87    bool hasSignature() const override { return false; }
88protected:
89    void generatePabloMethod() override;
90};
91
92WordCountKernel::WordCountKernel (const std::unique_ptr<kernel::KernelBuilder> & b)
93: PabloKernel(b, "wc",
94    {Binding{b->getStreamSetTy(8, 1), "u8bit"}},
95    {},
96    {},
97    {Binding{b->getSizeTy(), "lineCount"}, Binding{b->getSizeTy(), "wordCount"}, Binding{b->getSizeTy(), "charCount"}}) {
98
99}
100
101void WordCountKernel::generatePabloMethod() {
102    PabloBuilder pb(getEntryScope());
103    //  input: 8 basis bit streams
104    const auto u8bitSet = getInputStreamVar("u8bit");
105    //  output: 3 counters
106
107    cc::CC_Compiler ccc(this, u8bitSet);
108
109    Var * lc = getOutputScalarVar("lineCount");
110    Var * wc = getOutputScalarVar("wordCount");
111    Var * cc = getOutputScalarVar("charCount");
112
113    if (CountLines) {
114        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
115        pb.createAssign(lc, pb.createCount(LF));
116    }
117    if (CountWords) {
118        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
119        PabloAST * wordChar = pb.createNot(WS);
120        // WS_follow_or_start = 1 past WS or at start of file
121        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
122        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
123        pb.createAssign(wc, pb.createCount(wordStart));
124    }
125    if (CountChars) {
126        //
127        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
128        // not UTF-8, or is not valid?
129        //
130        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
131        pb.createAssign(cc, pb.createCount(u8Begin));
132    }
133}
134
135typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
136
137void wcPipelineGen(ParabixDriver & pxDriver) {
138
139    auto & iBuilder = pxDriver.getBuilder();
140    Module * m = iBuilder->getModule();
141    const unsigned segmentSize = codegen::SegmentSize;
142    const unsigned bufferSegments = codegen::ThreadNum+1;
143
144   
145    Type * const int32Ty = iBuilder->getInt32Ty();
146    Type * const sizeTy = iBuilder->getSizeTy();
147    Type * const voidTy = iBuilder->getVoidTy();
148
149    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
150    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
151
152    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
153    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
154    main->setCallingConv(CallingConv::C);
155    Function::arg_iterator args = main->arg_begin();   
156    Value * const fileDecriptor = &*(args++);
157    fileDecriptor->setName("fileDecriptor");
158    Value * const fileIdx = &*(args++);
159    fileIdx->setName("fileIdx");
160
161    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
162
163    StreamSetBuffer * const ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
164
165    StreamSetBuffer * const BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
166
167    Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder);
168    mmapK->setInitialArguments({fileDecriptor});
169    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
170
171    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
172    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
173   
174    Kernel * wck = pxDriver.addKernelInstance<WordCountKernel>(iBuilder);
175    pxDriver.makeKernelCall(wck, {BasisBits}, {});
176
177    pxDriver.generatePipelineIR();
178   
179    iBuilder->setKernel(mmapK);
180    Value * const fileSize = iBuilder->getAccumulator("fileSize");
181    iBuilder->setKernel(wck);
182    Value * const lineCount = iBuilder->getAccumulator("lineCount");
183    Value * const wordCount = iBuilder->getAccumulator("wordCount");
184    Value * const charCount = iBuilder->getAccumulator("charCount");
185
186    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
187    pxDriver.deallocateBuffers();
188    iBuilder->CreateRetVoid();
189
190    pxDriver.finalizeObject();
191}
192
193void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
194    std::string fileName = inputFiles[fileIdx];
195    const int fd = open(fileName.c_str(), O_RDONLY);
196    if (LLVM_UNLIKELY(fd == -1)) {
197        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
198    } else {
199        fn_ptr(fd, fileIdx);
200        close(fd);
201    }
202}
203
204int main(int argc, char *argv[]) {
205    codegen::ParseCommandLineOptions(argc, argv, {&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
206    if (wcOptions.size() == 0) {
207        CountLines = true;
208        CountWords = true;
209        CountBytes = true;
210    } else {
211        CountLines = false;
212        CountWords = false;
213        CountBytes = false;
214        CountChars = false;
215        for (unsigned i = 0; i < wcOptions.size(); i++) {
216            switch (wcOptions[i]) {
217                case WordOption: CountWords = true; break;
218                case LineOption: CountLines = true; break;
219                case CharOption: CountBytes = true; CountChars = false; break;
220                case ByteOption: CountChars = true; CountBytes = false; break;
221            }
222        }
223    }
224   
225    ParabixDriver pxDriver("wc");
226    wcPipelineGen(pxDriver);
227    auto wordCountFunctionPtr = reinterpret_cast<WordCountFunctionType>(pxDriver.getMain());
228
229    const auto fileCount = inputFiles.size();
230    lineCount.resize(fileCount);
231    wordCount.resize(fileCount);
232    charCount.resize(fileCount);
233    byteCount.resize(fileCount);
234   
235    for (unsigned i = 0; i < fileCount; ++i) {
236        wc(wordCountFunctionPtr, i);
237    }
238   
239    size_t maxCount = 0;
240    if (CountLines) maxCount = TotalLines;
241    if (CountWords) maxCount = TotalWords;
242    if (CountChars) maxCount = TotalChars;
243    if (CountBytes) maxCount = TotalBytes;
244   
245    int fieldWidth = std::to_string(maxCount).size() + 1;
246    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
247
248    for (unsigned i = 0; i < inputFiles.size(); ++i) {
249        std::cout << std::setw(fieldWidth-1);
250        if (CountLines) {
251            std::cout << lineCount[i] << std::setw(fieldWidth);
252        }
253        if (CountWords) {
254            std::cout << wordCount[i] << std::setw(fieldWidth);
255        }
256        if (CountChars) {
257            std::cout << charCount[i] << std::setw(fieldWidth);
258        }
259        if (CountBytes) {
260            std::cout << byteCount[i];
261        }
262        std::cout << " " << inputFiles[i] << std::endl;
263    }
264    if (inputFiles.size() > 1) {
265        std::cout << std::setw(fieldWidth-1);
266        if (CountLines) {
267            std::cout << TotalLines << std::setw(fieldWidth);
268        }
269        if (CountWords) {
270            std::cout << TotalWords << std::setw(fieldWidth);
271        }
272        if (CountChars) {
273            std::cout << TotalChars << std::setw(fieldWidth);
274        }
275        if (CountBytes) {
276            std::cout << TotalBytes;
277        }
278        std::cout << " total" << std::endl;
279    }
280
281    return 0;
282}
Note: See TracBrowser for help on using the repository browser.