source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5448

Last change on this file since 5448 was 5440, checked in by nmedfort, 2 years ago

Large refactoring step. Removed IR generation code from Kernel (formally KernelBuilder?) and moved it into the new KernelBuilder? class.

File size: 9.8 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <kernels/kernel_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/source_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <pablo/pablo_compiler.h>
25#include <pablo/pablo_toolchain.h>
26#include <fcntl.h>
27
28using namespace llvm;
29
30static cl::OptionCategory wcFlags("Command Flags", "wc options");
31
32static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
33
34enum CountOptions {
35    LineOption, WordOption, CharOption, ByteOption
36};
37
38static cl::list<CountOptions> wcOptions(
39  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
40             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
41             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
42             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
43             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
44                                                 
45
46
47static int defaultFieldWidth = 7;  // default field width
48
49
50bool CountLines = false;
51bool CountWords = false;
52bool CountChars = false;
53bool CountBytes = false;
54
55std::vector<uint64_t> lineCount;
56std::vector<uint64_t> wordCount;
57std::vector<uint64_t> charCount;
58std::vector<uint64_t> byteCount;
59
60uint64_t TotalLines = 0;
61uint64_t TotalWords = 0;
62uint64_t TotalChars = 0;
63uint64_t TotalBytes = 0;
64
65using namespace pablo;
66using namespace kernel;
67using namespace parabix;
68
69//  The callback routine that records counts in progress.
70//
71extern "C" {
72    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
73        lineCount[fileIdx] = lines;
74        wordCount[fileIdx] = words;
75        charCount[fileIdx] = chars;
76        byteCount[fileIdx] = bytes;
77        TotalLines += lines;
78        TotalWords += words;
79        TotalChars += chars;
80        TotalBytes += bytes;
81    }
82}
83
84class WordCountKernel final: public pablo::PabloKernel {
85public:
86    WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
87    bool isCachable() const override { return true; }
88    bool moduleIDisSignature() const override { return true; }
89protected:
90    void generatePabloMethod() override;
91};
92
93WordCountKernel::WordCountKernel (const std::unique_ptr<kernel::KernelBuilder> & b)
94: PabloKernel(b, "wc",
95    {Binding{b->getStreamSetTy(8, 1), "u8bit"}},
96    {},
97    {},
98    {Binding{b->getSizeTy(), "lineCount"}, Binding{b->getSizeTy(), "wordCount"}, Binding{b->getSizeTy(), "charCount"}}) {
99
100}
101
102void WordCountKernel::generatePabloMethod() {
103
104    //  input: 8 basis bit streams
105    const auto u8bitSet = getInputStreamVar("u8bit");
106    //  output: 3 counters
107
108    cc::CC_Compiler ccc(this, u8bitSet);
109
110    PabloBuilder & pb = ccc.getBuilder();
111
112    Var * lc = getOutputScalarVar("lineCount");
113    Var * wc = getOutputScalarVar("wordCount");
114    Var * cc = getOutputScalarVar("charCount");
115
116    if (CountLines) {
117        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
118        pb.createAssign(lc, pb.createCount(LF));
119    }
120    if (CountWords) {
121        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
122        PabloAST * wordChar = pb.createNot(WS);
123        // WS_follow_or_start = 1 past WS or at start of file
124        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
125        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
126        pb.createAssign(wc, pb.createCount(wordStart));
127    }
128    if (CountChars) {
129        //
130        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
131        // not UTF-8, or is not valid?
132        //
133        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
134        pb.createAssign(cc, pb.createCount(u8Begin));
135    }
136}
137
138typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
139
140void wcPipelineGen(ParabixDriver & pxDriver) {
141
142    auto & iBuilder = pxDriver.getBuilder();
143    Module * m = iBuilder->getModule();
144   
145    Type * const int32Ty = iBuilder->getInt32Ty();
146    Type * const sizeTy = iBuilder->getSizeTy();
147    Type * const voidTy = iBuilder->getVoidTy();
148
149    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
150    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
151
152    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
153    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
154    main->setCallingConv(CallingConv::C);
155    Function::arg_iterator args = main->arg_begin();   
156    Value * const fileDecriptor = &*(args++);
157    fileDecriptor->setName("fileDecriptor");
158    Value * const fileIdx = &*(args++);
159    fileIdx->setName("fileIdx");
160
161    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
162
163    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
164
165    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<SingleBlockBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1)));
166
167    Kernel * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder));
168    mmapK->setInitialArguments({fileDecriptor});
169    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
170
171    Kernel * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
172    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
173   
174    Kernel * wck = pxDriver.addKernelInstance(make_unique<WordCountKernel>(iBuilder));
175    pxDriver.makeKernelCall(wck, {BasisBits}, {});
176
177    pxDriver.generatePipelineIR();
178   
179    iBuilder->setKernel(mmapK);
180    Value * const fileSize = iBuilder->getAccumulator("fileSize");
181    iBuilder->setKernel(wck);
182    Value * const lineCount = iBuilder->getAccumulator("lineCount");
183    Value * const wordCount = iBuilder->getAccumulator("wordCount");
184    Value * const charCount = iBuilder->getAccumulator("charCount");
185
186    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
187   
188    iBuilder->CreateRetVoid();
189
190    pxDriver.linkAndFinalize();
191}
192
193
194WordCountFunctionType wcCodeGen() {
195    ParabixDriver pxDriver("wc");
196    wcPipelineGen(pxDriver);
197    return reinterpret_cast<WordCountFunctionType>(pxDriver.getPointerToMain());
198}
199
200void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
201    std::string fileName = inputFiles[fileIdx];
202    const int fd = open(fileName.c_str(), O_RDONLY);
203    if (LLVM_UNLIKELY(fd == -1)) {
204        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
205    } else {
206        fn_ptr(fd, fileIdx);
207        close(fd);
208    }
209}
210
211int main(int argc, char *argv[]) {
212    AddParabixVersionPrinter();
213    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
214    cl::ParseCommandLineOptions(argc, argv);
215    if (wcOptions.size() == 0) {
216        CountLines = true;
217        CountWords = true;
218        CountBytes = true;
219    } else {
220        CountLines = false;
221        CountWords = false;
222        CountBytes = false;
223        CountChars = false;
224        for (unsigned i = 0; i < wcOptions.size(); i++) {
225            switch (wcOptions[i]) {
226                case WordOption: CountWords = true; break;
227                case LineOption: CountLines = true; break;
228                case CharOption: CountBytes = true; CountChars = false; break;
229                case ByteOption: CountChars = true; CountBytes = false; break;
230            }
231        }
232    }
233   
234    WordCountFunctionType wordCountFunctionPtr = wcCodeGen();
235
236    const auto fileCount = inputFiles.size();
237    lineCount.resize(fileCount);
238    wordCount.resize(fileCount);
239    charCount.resize(fileCount);
240    byteCount.resize(fileCount);
241   
242    for (unsigned i = 0; i < fileCount; ++i) {
243        wc(wordCountFunctionPtr, i);
244    }
245   
246    size_t maxCount = 0;
247    if (CountLines) maxCount = TotalLines;
248    if (CountWords) maxCount = TotalWords;
249    if (CountChars) maxCount = TotalChars;
250    if (CountBytes) maxCount = TotalBytes;
251   
252    int fieldWidth = std::to_string(maxCount).size() + 1;
253    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
254
255    for (unsigned i = 0; i < inputFiles.size(); ++i) {
256        std::cout << std::setw(fieldWidth-1);
257        if (CountLines) {
258            std::cout << lineCount[i] << std::setw(fieldWidth);
259        }
260        if (CountWords) {
261            std::cout << wordCount[i] << std::setw(fieldWidth);
262        }
263        if (CountChars) {
264            std::cout << charCount[i] << std::setw(fieldWidth);
265        }
266        if (CountBytes) {
267            std::cout << byteCount[i];
268        }
269        std::cout << " " << inputFiles[i] << std::endl;
270    }
271    if (inputFiles.size() > 1) {
272        std::cout << std::setw(fieldWidth-1);
273        if (CountLines) {
274            std::cout << TotalLines << std::setw(fieldWidth);
275        }
276        if (CountWords) {
277            std::cout << TotalWords << std::setw(fieldWidth);
278        }
279        if (CountChars) {
280            std::cout << TotalChars << std::setw(fieldWidth);
281        }
282        if (CountBytes) {
283            std::cout << TotalBytes;
284        }
285        std::cout << " total" << std::endl;
286    }
287
288    return 0;
289}
Note: See TracBrowser for help on using the repository browser.