source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5474

Last change on this file since 5474 was 5474, checked in by nmedfort, 2 years ago

Eliminated ExecutionEngine? memory leak. Intentionally broke compatibility with prior versions to ensure unchecked in projects are restructured.

File size: 9.9 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13// #include <llvm/ExecutionEngine/ExecutionEngine.h>
14// #include <llvm/Linker/Linker.h>
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <kernels/kernel_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/source_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <pablo/pablo_compiler.h>
25#include <pablo/pablo_toolchain.h>
26#include <toolchain/cpudriver.h>
27#include <fcntl.h>
28
29using namespace llvm;
30
31static cl::OptionCategory wcFlags("Command Flags", "wc options");
32
33static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
34
35enum CountOptions {
36    LineOption, WordOption, CharOption, ByteOption
37};
38
39static cl::list<CountOptions> wcOptions(
40  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
41             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
42             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
43             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
44             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
45                                                 
46
47
48static int defaultFieldWidth = 7;  // default field width
49
50
51bool CountLines = false;
52bool CountWords = false;
53bool CountChars = false;
54bool CountBytes = false;
55
56std::vector<uint64_t> lineCount;
57std::vector<uint64_t> wordCount;
58std::vector<uint64_t> charCount;
59std::vector<uint64_t> byteCount;
60
61uint64_t TotalLines = 0;
62uint64_t TotalWords = 0;
63uint64_t TotalChars = 0;
64uint64_t TotalBytes = 0;
65
66using namespace pablo;
67using namespace kernel;
68using namespace parabix;
69
70//  The callback routine that records counts in progress.
71//
72extern "C" {
73    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
74        lineCount[fileIdx] = lines;
75        wordCount[fileIdx] = words;
76        charCount[fileIdx] = chars;
77        byteCount[fileIdx] = bytes;
78        TotalLines += lines;
79        TotalWords += words;
80        TotalChars += chars;
81        TotalBytes += bytes;
82    }
83}
84
85class WordCountKernel final: public pablo::PabloKernel {
86public:
87    WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
88    bool isCachable() const override { return true; }
89    bool hasSignature() const override { return false; }
90protected:
91    void generatePabloMethod() override;
92};
93
94WordCountKernel::WordCountKernel (const std::unique_ptr<kernel::KernelBuilder> & b)
95: PabloKernel(b, "wc",
96    {Binding{b->getStreamSetTy(8, 1), "u8bit"}},
97    {},
98    {},
99    {Binding{b->getSizeTy(), "lineCount"}, Binding{b->getSizeTy(), "wordCount"}, Binding{b->getSizeTy(), "charCount"}}) {
100
101}
102
103void WordCountKernel::generatePabloMethod() {
104
105    //  input: 8 basis bit streams
106    const auto u8bitSet = getInputStreamVar("u8bit");
107    //  output: 3 counters
108
109    cc::CC_Compiler ccc(this, u8bitSet);
110
111    PabloBuilder & pb = ccc.getBuilder();
112
113    Var * lc = getOutputScalarVar("lineCount");
114    Var * wc = getOutputScalarVar("wordCount");
115    Var * cc = getOutputScalarVar("charCount");
116
117    if (CountLines) {
118        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
119        pb.createAssign(lc, pb.createCount(LF));
120    }
121    if (CountWords) {
122        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
123        PabloAST * wordChar = pb.createNot(WS);
124        // WS_follow_or_start = 1 past WS or at start of file
125        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
126        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
127        pb.createAssign(wc, pb.createCount(wordStart));
128    }
129    if (CountChars) {
130        //
131        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
132        // not UTF-8, or is not valid?
133        //
134        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
135        pb.createAssign(cc, pb.createCount(u8Begin));
136    }
137}
138
139typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
140
141void wcPipelineGen(ParabixDriver & pxDriver) {
142
143    auto & iBuilder = pxDriver.getBuilder();
144    Module * m = iBuilder->getModule();
145    const unsigned segmentSize = codegen::SegmentSize;
146    const unsigned bufferSegments = codegen::ThreadNum+1;
147
148   
149    Type * const int32Ty = iBuilder->getInt32Ty();
150    Type * const sizeTy = iBuilder->getSizeTy();
151    Type * const voidTy = iBuilder->getVoidTy();
152
153    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
154    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
155
156    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
157    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
158    main->setCallingConv(CallingConv::C);
159    Function::arg_iterator args = main->arg_begin();   
160    Value * const fileDecriptor = &*(args++);
161    fileDecriptor->setName("fileDecriptor");
162    Value * const fileIdx = &*(args++);
163    fileIdx->setName("fileIdx");
164
165    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
166
167    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
168
169    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
170
171    Kernel * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder));
172    mmapK->setInitialArguments({fileDecriptor});
173    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
174
175    Kernel * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
176    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
177   
178    Kernel * wck = pxDriver.addKernelInstance(make_unique<WordCountKernel>(iBuilder));
179    pxDriver.makeKernelCall(wck, {BasisBits}, {});
180
181    pxDriver.generatePipelineIR();
182   
183    iBuilder->setKernel(mmapK);
184    Value * const fileSize = iBuilder->getAccumulator("fileSize");
185    iBuilder->setKernel(wck);
186    Value * const lineCount = iBuilder->getAccumulator("lineCount");
187    Value * const wordCount = iBuilder->getAccumulator("wordCount");
188    Value * const charCount = iBuilder->getAccumulator("charCount");
189
190    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
191   
192    iBuilder->CreateRetVoid();
193
194    pxDriver.finalizeObject();
195}
196
197void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
198    std::string fileName = inputFiles[fileIdx];
199    const int fd = open(fileName.c_str(), O_RDONLY);
200    if (LLVM_UNLIKELY(fd == -1)) {
201        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
202    } else {
203        fn_ptr(fd, fileIdx);
204        close(fd);
205    }
206}
207
208int main(int argc, char *argv[]) {
209    AddParabixVersionPrinter();
210    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
211    cl::ParseCommandLineOptions(argc, argv);
212    if (wcOptions.size() == 0) {
213        CountLines = true;
214        CountWords = true;
215        CountBytes = true;
216    } else {
217        CountLines = false;
218        CountWords = false;
219        CountBytes = false;
220        CountChars = false;
221        for (unsigned i = 0; i < wcOptions.size(); i++) {
222            switch (wcOptions[i]) {
223                case WordOption: CountWords = true; break;
224                case LineOption: CountLines = true; break;
225                case CharOption: CountBytes = true; CountChars = false; break;
226                case ByteOption: CountChars = true; CountBytes = false; break;
227            }
228        }
229    }
230   
231    ParabixDriver pxDriver("wc");
232    wcPipelineGen(pxDriver);
233    auto wordCountFunctionPtr = reinterpret_cast<WordCountFunctionType>(pxDriver.getMain());
234
235    const auto fileCount = inputFiles.size();
236    lineCount.resize(fileCount);
237    wordCount.resize(fileCount);
238    charCount.resize(fileCount);
239    byteCount.resize(fileCount);
240   
241    for (unsigned i = 0; i < fileCount; ++i) {
242        wc(wordCountFunctionPtr, i);
243    }
244   
245    size_t maxCount = 0;
246    if (CountLines) maxCount = TotalLines;
247    if (CountWords) maxCount = TotalWords;
248    if (CountChars) maxCount = TotalChars;
249    if (CountBytes) maxCount = TotalBytes;
250   
251    int fieldWidth = std::to_string(maxCount).size() + 1;
252    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
253
254    for (unsigned i = 0; i < inputFiles.size(); ++i) {
255        std::cout << std::setw(fieldWidth-1);
256        if (CountLines) {
257            std::cout << lineCount[i] << std::setw(fieldWidth);
258        }
259        if (CountWords) {
260            std::cout << wordCount[i] << std::setw(fieldWidth);
261        }
262        if (CountChars) {
263            std::cout << charCount[i] << std::setw(fieldWidth);
264        }
265        if (CountBytes) {
266            std::cout << byteCount[i];
267        }
268        std::cout << " " << inputFiles[i] << std::endl;
269    }
270    if (inputFiles.size() > 1) {
271        std::cout << std::setw(fieldWidth-1);
272        if (CountLines) {
273            std::cout << TotalLines << std::setw(fieldWidth);
274        }
275        if (CountWords) {
276            std::cout << TotalWords << std::setw(fieldWidth);
277        }
278        if (CountChars) {
279            std::cout << TotalChars << std::setw(fieldWidth);
280        }
281        if (CountBytes) {
282            std::cout << TotalBytes;
283        }
284        std::cout << " total" << std::endl;
285    }
286
287    return 0;
288}
Note: See TracBrowser for help on using the repository browser.