source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5436

Last change on this file since 5436 was 5436, checked in by nmedfort, 2 years ago

Continued refactoring work. PabloKernel? now abstract base type with a 'generatePabloMethod' hook to generate Pablo code.

File size: 9.7 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <kernels/kernel_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/source_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <pablo/pablo_compiler.h>
25#include <pablo/pablo_toolchain.h>
26#include <fcntl.h>
27
28using namespace llvm;
29
30static cl::OptionCategory wcFlags("Command Flags", "wc options");
31
32static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
33
34enum CountOptions {
35    LineOption, WordOption, CharOption, ByteOption
36};
37
38static cl::list<CountOptions> wcOptions(
39  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
40             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
41             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
42             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
43             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
44                                                 
45
46
47static int defaultFieldWidth = 7;  // default field width
48
49
50bool CountLines = false;
51bool CountWords = false;
52bool CountChars = false;
53bool CountBytes = false;
54
55std::vector<uint64_t> lineCount;
56std::vector<uint64_t> wordCount;
57std::vector<uint64_t> charCount;
58std::vector<uint64_t> byteCount;
59
60uint64_t TotalLines = 0;
61uint64_t TotalWords = 0;
62uint64_t TotalChars = 0;
63uint64_t TotalBytes = 0;
64
65using namespace pablo;
66using namespace kernel;
67using namespace parabix;
68
69//  The callback routine that records counts in progress.
70//
71extern "C" {
72    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
73        lineCount[fileIdx] = lines;
74        wordCount[fileIdx] = words;
75        charCount[fileIdx] = chars;
76        byteCount[fileIdx] = bytes;
77        TotalLines += lines;
78        TotalWords += words;
79        TotalChars += chars;
80        TotalBytes += bytes;
81    }
82}
83
84class WordCountKernel final: public pablo::PabloKernel {
85public:
86    WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
87    bool isCachable() const override { return true; }
88    bool moduleIDisSignature() const override { return true; }
89protected:
90    void generatePabloMethod() override;
91};
92
93WordCountKernel::WordCountKernel (const std::unique_ptr<kernel::KernelBuilder> & b)
94: PabloKernel(b, "wc",
95    {Binding{b->getStreamSetTy(8, 1), "u8bit"}},
96    {},
97    {},
98    {Binding{b->getSizeTy(), "lineCount"}, Binding{b->getSizeTy(), "wordCount"}, Binding{b->getSizeTy(), "charCount"}}) {
99
100}
101
102void WordCountKernel::generatePabloMethod() {
103
104    //  input: 8 basis bit streams
105    const auto u8bitSet = getInputStreamVar("u8bit");
106    //  output: 3 counters
107
108    cc::CC_Compiler ccc(this, u8bitSet);
109
110    PabloBuilder & pb = ccc.getBuilder();
111
112    Var * lc = getOutputScalarVar("lineCount");
113    Var * wc = getOutputScalarVar("wordCount");
114    Var * cc = getOutputScalarVar("charCount");
115
116    if (CountLines) {
117        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
118        pb.createAssign(lc, pb.createCount(LF));
119    }
120    if (CountWords) {
121        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
122        PabloAST * wordChar = pb.createNot(WS);
123        // WS_follow_or_start = 1 past WS or at start of file
124        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
125        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
126        pb.createAssign(wc, pb.createCount(wordStart));
127    }
128    if (CountChars) {
129        //
130        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
131        // not UTF-8, or is not valid?
132        //
133        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
134        pb.createAssign(cc, pb.createCount(u8Begin));
135    }
136}
137
138typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
139
140void wcPipelineGen(ParabixDriver & pxDriver) {
141
142    auto & iBuilder = pxDriver.getBuilder();
143    Module * m = iBuilder->getModule();
144   
145    Type * const int32Ty = iBuilder->getInt32Ty();
146    Type * const sizeTy = iBuilder->getSizeTy();
147    Type * const voidTy = iBuilder->getVoidTy();
148
149    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
150    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
151
152    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
153    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
154    main->setCallingConv(CallingConv::C);
155    Function::arg_iterator args = main->arg_begin();   
156    Value * const fileDecriptor = &*(args++);
157    fileDecriptor->setName("fileDecriptor");
158    Value * const fileIdx = &*(args++);
159    fileIdx->setName("fileIdx");
160
161    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
162
163    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
164
165    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<SingleBlockBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1)));
166
167    Kernel * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder));
168    mmapK->setInitialArguments({fileDecriptor});
169    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
170
171    Kernel * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
172    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
173   
174    Kernel * wck = pxDriver.addKernelInstance(make_unique<WordCountKernel>(iBuilder));
175    pxDriver.makeKernelCall(wck, {BasisBits}, {});
176
177    pxDriver.generatePipelineIR();
178   
179    Value * const fileSize = mmapK->getAccumulator("fileSize");
180    Value * const lineCount = wck->getAccumulator("lineCount");
181    Value * const wordCount = wck->getAccumulator("wordCount");
182    Value * const charCount = wck->getAccumulator("charCount");
183
184    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
185   
186    iBuilder->CreateRetVoid();
187
188    pxDriver.linkAndFinalize();
189}
190
191
192WordCountFunctionType wcCodeGen() {
193    ParabixDriver pxDriver("wc");
194    wcPipelineGen(pxDriver);
195    return reinterpret_cast<WordCountFunctionType>(pxDriver.getPointerToMain());
196}
197
198void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
199    std::string fileName = inputFiles[fileIdx];
200    const int fd = open(fileName.c_str(), O_RDONLY);
201    if (LLVM_UNLIKELY(fd == -1)) {
202        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
203    } else {
204        fn_ptr(fd, fileIdx);
205        close(fd);
206    }
207}
208
209int main(int argc, char *argv[]) {
210    AddParabixVersionPrinter();
211    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
212    cl::ParseCommandLineOptions(argc, argv);
213    if (wcOptions.size() == 0) {
214        CountLines = true;
215        CountWords = true;
216        CountBytes = true;
217    } else {
218        CountLines = false;
219        CountWords = false;
220        CountBytes = false;
221        CountChars = false;
222        for (unsigned i = 0; i < wcOptions.size(); i++) {
223            switch (wcOptions[i]) {
224                case WordOption: CountWords = true; break;
225                case LineOption: CountLines = true; break;
226                case CharOption: CountBytes = true; CountChars = false; break;
227                case ByteOption: CountChars = true; CountBytes = false; break;
228            }
229        }
230    }
231   
232    WordCountFunctionType wordCountFunctionPtr = wcCodeGen();
233
234    const auto fileCount = inputFiles.size();
235    lineCount.resize(fileCount);
236    wordCount.resize(fileCount);
237    charCount.resize(fileCount);
238    byteCount.resize(fileCount);
239   
240    for (unsigned i = 0; i < fileCount; ++i) {
241        wc(wordCountFunctionPtr, i);
242    }
243   
244    size_t maxCount = 0;
245    if (CountLines) maxCount = TotalLines;
246    if (CountWords) maxCount = TotalWords;
247    if (CountChars) maxCount = TotalChars;
248    if (CountBytes) maxCount = TotalBytes;
249   
250    int fieldWidth = std::to_string(maxCount).size() + 1;
251    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
252
253    for (unsigned i = 0; i < inputFiles.size(); ++i) {
254        std::cout << std::setw(fieldWidth-1);
255        if (CountLines) {
256            std::cout << lineCount[i] << std::setw(fieldWidth);
257        }
258        if (CountWords) {
259            std::cout << wordCount[i] << std::setw(fieldWidth);
260        }
261        if (CountChars) {
262            std::cout << charCount[i] << std::setw(fieldWidth);
263        }
264        if (CountBytes) {
265            std::cout << byteCount[i];
266        }
267        std::cout << " " << inputFiles[i] << std::endl;
268    }
269    if (inputFiles.size() > 1) {
270        std::cout << std::setw(fieldWidth-1);
271        if (CountLines) {
272            std::cout << TotalLines << std::setw(fieldWidth);
273        }
274        if (CountWords) {
275            std::cout << TotalWords << std::setw(fieldWidth);
276        }
277        if (CountChars) {
278            std::cout << TotalChars << std::setw(fieldWidth);
279        }
280        if (CountBytes) {
281            std::cout << TotalBytes;
282        }
283        std::cout << " total" << std::endl;
284    }
285
286    return 0;
287}
Note: See TracBrowser for help on using the repository browser.