source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5435

Last change on this file since 5435 was 5435, checked in by nmedfort, 2 years ago

Continued refactoring work.

File size: 9.8 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <IR_Gen/idisa_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/source_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <pablo/pablo_compiler.h>
25#include <pablo/pablo_toolchain.h>
26#include <fcntl.h>
27
28using namespace llvm;
29
30static cl::OptionCategory wcFlags("Command Flags", "wc options");
31
32static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
33
34enum CountOptions {
35    LineOption, WordOption, CharOption, ByteOption
36};
37
38static cl::list<CountOptions> wcOptions(
39  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
40             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
41             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
42             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
43             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
44                                                 
45
46
47static int defaultFieldWidth = 7;  // default field width
48
49
50bool CountLines = false;
51bool CountWords = false;
52bool CountChars = false;
53bool CountBytes = false;
54
55std::vector<uint64_t> lineCount;
56std::vector<uint64_t> wordCount;
57std::vector<uint64_t> charCount;
58std::vector<uint64_t> byteCount;
59
60uint64_t TotalLines = 0;
61uint64_t TotalWords = 0;
62uint64_t TotalChars = 0;
63uint64_t TotalBytes = 0;
64
65using namespace pablo;
66using namespace kernel;
67using namespace parabix;
68
69//  The callback routine that records counts in progress.
70//
71extern "C" {
72    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
73        lineCount[fileIdx] = lines;
74        wordCount[fileIdx] = words;
75        charCount[fileIdx] = chars;
76        byteCount[fileIdx] = bytes;
77        TotalLines += lines;
78        TotalWords += words;
79        TotalChars += chars;
80        TotalBytes += bytes;
81    }
82}
83
84class WordCountKernel final: public pablo::PabloKernel {
85public:
86    WordCountKernel(const std::unique_ptr<IDISA::IDISA_Builder> & b);
87    bool isCachable() const override { return true; }
88    bool moduleIDisSignature() const override { return true; }
89    void prepareKernel() override;
90};
91
92WordCountKernel::WordCountKernel (const std::unique_ptr<IDISA::IDISA_Builder> & b)
93: PabloKernel(b, "wc",
94    {Binding{b->getStreamSetTy(8, 1), "u8bit"}},
95    {},
96    {},
97    {Binding{b->getSizeTy(), "lineCount"}, Binding{b->getSizeTy(), "wordCount"}, Binding{b->getSizeTy(), "charCount"}}) {
98
99}
100
101void WordCountKernel::prepareKernel() {
102
103    //  input: 8 basis bit streams
104    const auto u8bitSet = getInputStreamVar("u8bit");
105    //  output: 3 counters
106
107    cc::CC_Compiler ccc(this, u8bitSet);
108
109    PabloBuilder & pb = ccc.getBuilder();
110
111    Var * lc = getOutputScalarVar("lineCount");
112    Var * wc = getOutputScalarVar("wordCount");
113    Var * cc = getOutputScalarVar("charCount");
114
115    if (CountLines) {
116        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
117        pb.createAssign(lc, pb.createCount(LF));
118    }
119    if (CountWords) {
120        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
121        PabloAST * wordChar = pb.createNot(WS);
122        // WS_follow_or_start = 1 past WS or at start of file
123        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
124        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
125        pb.createAssign(wc, pb.createCount(wordStart));
126    }
127    if (CountChars) {
128        //
129        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
130        // not UTF-8, or is not valid?
131        //
132        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
133        pb.createAssign(cc, pb.createCount(u8Begin));
134    }
135    pablo_function_passes(this);
136    PabloKernel::prepareKernel();
137}
138
139typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
140
141void wcPipelineGen(ParabixDriver & pxDriver) {
142
143    auto & iBuilder = pxDriver.getBuilder();
144    Module * m = iBuilder->getModule();
145   
146    Type * const int32Ty = iBuilder->getInt32Ty();
147    Type * const sizeTy = iBuilder->getSizeTy();
148    Type * const voidTy = iBuilder->getVoidTy();
149
150    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
151    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
152
153    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
154    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
155    main->setCallingConv(CallingConv::C);
156    Function::arg_iterator args = main->arg_begin();   
157    Value * const fileDecriptor = &*(args++);
158    fileDecriptor->setName("fileDecriptor");
159    Value * const fileIdx = &*(args++);
160    fileIdx->setName("fileIdx");
161
162    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
163
164    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
165
166    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<SingleBlockBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1)));
167
168    Kernel * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder));
169    mmapK->setInitialArguments({fileDecriptor});
170    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
171
172    Kernel * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
173    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
174   
175    Kernel * wck = pxDriver.addKernelInstance(make_unique<WordCountKernel>(iBuilder));
176    pxDriver.makeKernelCall(wck, {BasisBits}, {});
177
178    pxDriver.generatePipelineIR();
179   
180    Value * const fileSize = mmapK->getAccumulator("fileSize");
181    Value * const lineCount = wck->getAccumulator("lineCount");
182    Value * const wordCount = wck->getAccumulator("wordCount");
183    Value * const charCount = wck->getAccumulator("charCount");
184
185    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
186   
187    iBuilder->CreateRetVoid();
188
189    pxDriver.linkAndFinalize();
190}
191
192
193WordCountFunctionType wcCodeGen() {
194    ParabixDriver pxDriver("wc");
195    wcPipelineGen(pxDriver);
196    return reinterpret_cast<WordCountFunctionType>(pxDriver.getPointerToMain());
197}
198
199void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
200    std::string fileName = inputFiles[fileIdx];
201    const int fd = open(fileName.c_str(), O_RDONLY);
202    if (LLVM_UNLIKELY(fd == -1)) {
203        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
204    } else {
205        fn_ptr(fd, fileIdx);
206        close(fd);
207    }
208}
209
210int main(int argc, char *argv[]) {
211    AddParabixVersionPrinter();
212    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
213    cl::ParseCommandLineOptions(argc, argv);
214    if (wcOptions.size() == 0) {
215        CountLines = true;
216        CountWords = true;
217        CountBytes = true;
218    } else {
219        CountLines = false;
220        CountWords = false;
221        CountBytes = false;
222        CountChars = false;
223        for (unsigned i = 0; i < wcOptions.size(); i++) {
224            switch (wcOptions[i]) {
225                case WordOption: CountWords = true; break;
226                case LineOption: CountLines = true; break;
227                case CharOption: CountBytes = true; CountChars = false; break;
228                case ByteOption: CountChars = true; CountBytes = false; break;
229            }
230        }
231    }
232   
233    WordCountFunctionType wordCountFunctionPtr = wcCodeGen();
234
235    const auto fileCount = inputFiles.size();
236    lineCount.resize(fileCount);
237    wordCount.resize(fileCount);
238    charCount.resize(fileCount);
239    byteCount.resize(fileCount);
240   
241    for (unsigned i = 0; i < fileCount; ++i) {
242        wc(wordCountFunctionPtr, i);
243    }
244   
245    size_t maxCount = 0;
246    if (CountLines) maxCount = TotalLines;
247    if (CountWords) maxCount = TotalWords;
248    if (CountChars) maxCount = TotalChars;
249    if (CountBytes) maxCount = TotalBytes;
250   
251    int fieldWidth = std::to_string(maxCount).size() + 1;
252    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
253
254    for (unsigned i = 0; i < inputFiles.size(); ++i) {
255        std::cout << std::setw(fieldWidth-1);
256        if (CountLines) {
257            std::cout << lineCount[i] << std::setw(fieldWidth);
258        }
259        if (CountWords) {
260            std::cout << wordCount[i] << std::setw(fieldWidth);
261        }
262        if (CountChars) {
263            std::cout << charCount[i] << std::setw(fieldWidth);
264        }
265        if (CountBytes) {
266            std::cout << byteCount[i];
267        }
268        std::cout << " " << inputFiles[i] << std::endl;
269    }
270    if (inputFiles.size() > 1) {
271        std::cout << std::setw(fieldWidth-1);
272        if (CountLines) {
273            std::cout << TotalLines << std::setw(fieldWidth);
274        }
275        if (CountWords) {
276            std::cout << TotalWords << std::setw(fieldWidth);
277        }
278        if (CountChars) {
279            std::cout << TotalChars << std::setw(fieldWidth);
280        }
281        if (CountBytes) {
282            std::cout << TotalBytes;
283        }
284        std::cout << " total" << std::endl;
285    }
286
287    return 0;
288}
Note: See TracBrowser for help on using the repository browser.