source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5464

Last change on this file since 5464 was 5464, checked in by nmedfort, 2 years ago

Restructuring work for the Driver classes. Start of work to eliminate the memory leaks with the ExecutionEngine?. Replaced custom AlignedMalloc? with backend call to std::aligned_malloc. Salvaged some work on DistributionPass? for reevaluation.

File size: 10.0 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13// #include <llvm/ExecutionEngine/ExecutionEngine.h>
14// #include <llvm/Linker/Linker.h>
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <kernels/kernel_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/source_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <pablo/pablo_compiler.h>
25#include <pablo/pablo_toolchain.h>
26#include <toolchain/cpudriver.h>
27#include <fcntl.h>
28
29using namespace llvm;
30
31static cl::OptionCategory wcFlags("Command Flags", "wc options");
32
33static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
34
35enum CountOptions {
36    LineOption, WordOption, CharOption, ByteOption
37};
38
39static cl::list<CountOptions> wcOptions(
40  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
41             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
42             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
43             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
44             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
45                                                 
46
47
48static int defaultFieldWidth = 7;  // default field width
49
50
51bool CountLines = false;
52bool CountWords = false;
53bool CountChars = false;
54bool CountBytes = false;
55
56std::vector<uint64_t> lineCount;
57std::vector<uint64_t> wordCount;
58std::vector<uint64_t> charCount;
59std::vector<uint64_t> byteCount;
60
61uint64_t TotalLines = 0;
62uint64_t TotalWords = 0;
63uint64_t TotalChars = 0;
64uint64_t TotalBytes = 0;
65
66using namespace pablo;
67using namespace kernel;
68using namespace parabix;
69
70//  The callback routine that records counts in progress.
71//
72extern "C" {
73    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
74        lineCount[fileIdx] = lines;
75        wordCount[fileIdx] = words;
76        charCount[fileIdx] = chars;
77        byteCount[fileIdx] = bytes;
78        TotalLines += lines;
79        TotalWords += words;
80        TotalChars += chars;
81        TotalBytes += bytes;
82    }
83}
84
85class WordCountKernel final: public pablo::PabloKernel {
86public:
87    WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
88    bool isCachable() const override { return true; }
89    bool hasSignature() const override { return false; }
90protected:
91    void generatePabloMethod() override;
92};
93
94WordCountKernel::WordCountKernel (const std::unique_ptr<kernel::KernelBuilder> & b)
95: PabloKernel(b, "wc",
96    {Binding{b->getStreamSetTy(8, 1), "u8bit"}},
97    {},
98    {},
99    {Binding{b->getSizeTy(), "lineCount"}, Binding{b->getSizeTy(), "wordCount"}, Binding{b->getSizeTy(), "charCount"}}) {
100
101}
102
103void WordCountKernel::generatePabloMethod() {
104
105    //  input: 8 basis bit streams
106    const auto u8bitSet = getInputStreamVar("u8bit");
107    //  output: 3 counters
108
109    cc::CC_Compiler ccc(this, u8bitSet);
110
111    PabloBuilder & pb = ccc.getBuilder();
112
113    Var * lc = getOutputScalarVar("lineCount");
114    Var * wc = getOutputScalarVar("wordCount");
115    Var * cc = getOutputScalarVar("charCount");
116
117    if (CountLines) {
118        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
119        pb.createAssign(lc, pb.createCount(LF));
120    }
121    if (CountWords) {
122        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
123        PabloAST * wordChar = pb.createNot(WS);
124        // WS_follow_or_start = 1 past WS or at start of file
125        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
126        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
127        pb.createAssign(wc, pb.createCount(wordStart));
128    }
129    if (CountChars) {
130        //
131        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
132        // not UTF-8, or is not valid?
133        //
134        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
135        pb.createAssign(cc, pb.createCount(u8Begin));
136    }
137}
138
139typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
140
141void wcPipelineGen(ParabixDriver & pxDriver) {
142
143    auto & iBuilder = pxDriver.getBuilder();
144    Module * m = iBuilder->getModule();
145    const unsigned segmentSize = codegen::SegmentSize;
146    const unsigned bufferSegments = codegen::ThreadNum+1;
147
148   
149    Type * const int32Ty = iBuilder->getInt32Ty();
150    Type * const sizeTy = iBuilder->getSizeTy();
151    Type * const voidTy = iBuilder->getVoidTy();
152
153    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
154    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
155
156    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
157    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
158    main->setCallingConv(CallingConv::C);
159    Function::arg_iterator args = main->arg_begin();   
160    Value * const fileDecriptor = &*(args++);
161    fileDecriptor->setName("fileDecriptor");
162    Value * const fileIdx = &*(args++);
163    fileIdx->setName("fileIdx");
164
165    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
166
167    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
168
169    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
170
171    Kernel * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder));
172    mmapK->setInitialArguments({fileDecriptor});
173    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
174
175    Kernel * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
176    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
177   
178    Kernel * wck = pxDriver.addKernelInstance(make_unique<WordCountKernel>(iBuilder));
179    pxDriver.makeKernelCall(wck, {BasisBits}, {});
180
181    pxDriver.generatePipelineIR();
182   
183    iBuilder->setKernel(mmapK);
184    Value * const fileSize = iBuilder->getAccumulator("fileSize");
185    iBuilder->setKernel(wck);
186    Value * const lineCount = iBuilder->getAccumulator("lineCount");
187    Value * const wordCount = iBuilder->getAccumulator("wordCount");
188    Value * const charCount = iBuilder->getAccumulator("charCount");
189
190    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
191   
192    iBuilder->CreateRetVoid();
193
194    pxDriver.linkAndFinalize();
195}
196
197
198WordCountFunctionType wcCodeGen() {
199    ParabixDriver pxDriver("wc");
200    wcPipelineGen(pxDriver);
201    return reinterpret_cast<WordCountFunctionType>(pxDriver.getPointerToMain());
202}
203
204void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
205    std::string fileName = inputFiles[fileIdx];
206    const int fd = open(fileName.c_str(), O_RDONLY);
207    if (LLVM_UNLIKELY(fd == -1)) {
208        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
209    } else {
210        fn_ptr(fd, fileIdx);
211        close(fd);
212    }
213}
214
215int main(int argc, char *argv[]) {
216    AddParabixVersionPrinter();
217    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
218    cl::ParseCommandLineOptions(argc, argv);
219    if (wcOptions.size() == 0) {
220        CountLines = true;
221        CountWords = true;
222        CountBytes = true;
223    } else {
224        CountLines = false;
225        CountWords = false;
226        CountBytes = false;
227        CountChars = false;
228        for (unsigned i = 0; i < wcOptions.size(); i++) {
229            switch (wcOptions[i]) {
230                case WordOption: CountWords = true; break;
231                case LineOption: CountLines = true; break;
232                case CharOption: CountBytes = true; CountChars = false; break;
233                case ByteOption: CountChars = true; CountBytes = false; break;
234            }
235        }
236    }
237   
238    WordCountFunctionType wordCountFunctionPtr = wcCodeGen();
239
240    const auto fileCount = inputFiles.size();
241    lineCount.resize(fileCount);
242    wordCount.resize(fileCount);
243    charCount.resize(fileCount);
244    byteCount.resize(fileCount);
245   
246    for (unsigned i = 0; i < fileCount; ++i) {
247        wc(wordCountFunctionPtr, i);
248    }
249   
250    size_t maxCount = 0;
251    if (CountLines) maxCount = TotalLines;
252    if (CountWords) maxCount = TotalWords;
253    if (CountChars) maxCount = TotalChars;
254    if (CountBytes) maxCount = TotalBytes;
255   
256    int fieldWidth = std::to_string(maxCount).size() + 1;
257    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
258
259    for (unsigned i = 0; i < inputFiles.size(); ++i) {
260        std::cout << std::setw(fieldWidth-1);
261        if (CountLines) {
262            std::cout << lineCount[i] << std::setw(fieldWidth);
263        }
264        if (CountWords) {
265            std::cout << wordCount[i] << std::setw(fieldWidth);
266        }
267        if (CountChars) {
268            std::cout << charCount[i] << std::setw(fieldWidth);
269        }
270        if (CountBytes) {
271            std::cout << byteCount[i];
272        }
273        std::cout << " " << inputFiles[i] << std::endl;
274    }
275    if (inputFiles.size() > 1) {
276        std::cout << std::setw(fieldWidth-1);
277        if (CountLines) {
278            std::cout << TotalLines << std::setw(fieldWidth);
279        }
280        if (CountWords) {
281            std::cout << TotalWords << std::setw(fieldWidth);
282        }
283        if (CountChars) {
284            std::cout << TotalChars << std::setw(fieldWidth);
285        }
286        if (CountBytes) {
287            std::cout << TotalBytes;
288        }
289        std::cout << " total" << std::endl;
290    }
291
292    return 0;
293}
Note: See TracBrowser for help on using the repository browser.