source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5457

Last change on this file since 5457 was 5457, checked in by cameron, 2 years ago

Deprecating SingleBlockBuffer?

File size: 10.0 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <kernels/kernel_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/source_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <pablo/pablo_compiler.h>
25#include <pablo/pablo_toolchain.h>
26#include <fcntl.h>
27
28using namespace llvm;
29
30static cl::OptionCategory wcFlags("Command Flags", "wc options");
31
32static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
33
34enum CountOptions {
35    LineOption, WordOption, CharOption, ByteOption
36};
37
38static cl::list<CountOptions> wcOptions(
39  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
40             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
41             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
42             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
43             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
44                                                 
45
46
47static int defaultFieldWidth = 7;  // default field width
48
49
50bool CountLines = false;
51bool CountWords = false;
52bool CountChars = false;
53bool CountBytes = false;
54
55std::vector<uint64_t> lineCount;
56std::vector<uint64_t> wordCount;
57std::vector<uint64_t> charCount;
58std::vector<uint64_t> byteCount;
59
60uint64_t TotalLines = 0;
61uint64_t TotalWords = 0;
62uint64_t TotalChars = 0;
63uint64_t TotalBytes = 0;
64
65using namespace pablo;
66using namespace kernel;
67using namespace parabix;
68
69//  The callback routine that records counts in progress.
70//
71extern "C" {
72    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
73        lineCount[fileIdx] = lines;
74        wordCount[fileIdx] = words;
75        charCount[fileIdx] = chars;
76        byteCount[fileIdx] = bytes;
77        TotalLines += lines;
78        TotalWords += words;
79        TotalChars += chars;
80        TotalBytes += bytes;
81    }
82}
83
84class WordCountKernel final: public pablo::PabloKernel {
85public:
86    WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
87    bool isCachable() const override { return true; }
88    bool moduleIDisSignature() const override { return true; }
89protected:
90    void generatePabloMethod() override;
91};
92
93WordCountKernel::WordCountKernel (const std::unique_ptr<kernel::KernelBuilder> & b)
94: PabloKernel(b, "wc",
95    {Binding{b->getStreamSetTy(8, 1), "u8bit"}},
96    {},
97    {},
98    {Binding{b->getSizeTy(), "lineCount"}, Binding{b->getSizeTy(), "wordCount"}, Binding{b->getSizeTy(), "charCount"}}) {
99
100}
101
102void WordCountKernel::generatePabloMethod() {
103
104    //  input: 8 basis bit streams
105    const auto u8bitSet = getInputStreamVar("u8bit");
106    //  output: 3 counters
107
108    cc::CC_Compiler ccc(this, u8bitSet);
109
110    PabloBuilder & pb = ccc.getBuilder();
111
112    Var * lc = getOutputScalarVar("lineCount");
113    Var * wc = getOutputScalarVar("wordCount");
114    Var * cc = getOutputScalarVar("charCount");
115
116    if (CountLines) {
117        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
118        pb.createAssign(lc, pb.createCount(LF));
119    }
120    if (CountWords) {
121        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
122        PabloAST * wordChar = pb.createNot(WS);
123        // WS_follow_or_start = 1 past WS or at start of file
124        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
125        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
126        pb.createAssign(wc, pb.createCount(wordStart));
127    }
128    if (CountChars) {
129        //
130        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
131        // not UTF-8, or is not valid?
132        //
133        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
134        pb.createAssign(cc, pb.createCount(u8Begin));
135    }
136}
137
138typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
139
140void wcPipelineGen(ParabixDriver & pxDriver) {
141
142    auto & iBuilder = pxDriver.getBuilder();
143    Module * m = iBuilder->getModule();
144    const unsigned segmentSize = codegen::SegmentSize;
145    const unsigned bufferSegments = codegen::ThreadNum+1;
146
147   
148    Type * const int32Ty = iBuilder->getInt32Ty();
149    Type * const sizeTy = iBuilder->getSizeTy();
150    Type * const voidTy = iBuilder->getVoidTy();
151
152    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
153    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
154
155    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
156    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
157    main->setCallingConv(CallingConv::C);
158    Function::arg_iterator args = main->arg_begin();   
159    Value * const fileDecriptor = &*(args++);
160    fileDecriptor->setName("fileDecriptor");
161    Value * const fileIdx = &*(args++);
162    fileIdx->setName("fileIdx");
163
164    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
165
166    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
167
168    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
169
170    Kernel * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder));
171    mmapK->setInitialArguments({fileDecriptor});
172    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
173
174    Kernel * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
175    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
176   
177    Kernel * wck = pxDriver.addKernelInstance(make_unique<WordCountKernel>(iBuilder));
178    pxDriver.makeKernelCall(wck, {BasisBits}, {});
179
180    pxDriver.generatePipelineIR();
181   
182    iBuilder->setKernel(mmapK);
183    Value * const fileSize = iBuilder->getAccumulator("fileSize");
184    iBuilder->setKernel(wck);
185    Value * const lineCount = iBuilder->getAccumulator("lineCount");
186    Value * const wordCount = iBuilder->getAccumulator("wordCount");
187    Value * const charCount = iBuilder->getAccumulator("charCount");
188
189    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
190   
191    iBuilder->CreateRetVoid();
192
193    pxDriver.linkAndFinalize();
194}
195
196
197WordCountFunctionType wcCodeGen() {
198    ParabixDriver pxDriver("wc");
199    wcPipelineGen(pxDriver);
200    return reinterpret_cast<WordCountFunctionType>(pxDriver.getPointerToMain());
201}
202
203void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
204    std::string fileName = inputFiles[fileIdx];
205    const int fd = open(fileName.c_str(), O_RDONLY);
206    if (LLVM_UNLIKELY(fd == -1)) {
207        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
208    } else {
209        fn_ptr(fd, fileIdx);
210        close(fd);
211    }
212}
213
214int main(int argc, char *argv[]) {
215    AddParabixVersionPrinter();
216    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
217    cl::ParseCommandLineOptions(argc, argv);
218    if (wcOptions.size() == 0) {
219        CountLines = true;
220        CountWords = true;
221        CountBytes = true;
222    } else {
223        CountLines = false;
224        CountWords = false;
225        CountBytes = false;
226        CountChars = false;
227        for (unsigned i = 0; i < wcOptions.size(); i++) {
228            switch (wcOptions[i]) {
229                case WordOption: CountWords = true; break;
230                case LineOption: CountLines = true; break;
231                case CharOption: CountBytes = true; CountChars = false; break;
232                case ByteOption: CountChars = true; CountBytes = false; break;
233            }
234        }
235    }
236   
237    WordCountFunctionType wordCountFunctionPtr = wcCodeGen();
238
239    const auto fileCount = inputFiles.size();
240    lineCount.resize(fileCount);
241    wordCount.resize(fileCount);
242    charCount.resize(fileCount);
243    byteCount.resize(fileCount);
244   
245    for (unsigned i = 0; i < fileCount; ++i) {
246        wc(wordCountFunctionPtr, i);
247    }
248   
249    size_t maxCount = 0;
250    if (CountLines) maxCount = TotalLines;
251    if (CountWords) maxCount = TotalWords;
252    if (CountChars) maxCount = TotalChars;
253    if (CountBytes) maxCount = TotalBytes;
254   
255    int fieldWidth = std::to_string(maxCount).size() + 1;
256    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
257
258    for (unsigned i = 0; i < inputFiles.size(); ++i) {
259        std::cout << std::setw(fieldWidth-1);
260        if (CountLines) {
261            std::cout << lineCount[i] << std::setw(fieldWidth);
262        }
263        if (CountWords) {
264            std::cout << wordCount[i] << std::setw(fieldWidth);
265        }
266        if (CountChars) {
267            std::cout << charCount[i] << std::setw(fieldWidth);
268        }
269        if (CountBytes) {
270            std::cout << byteCount[i];
271        }
272        std::cout << " " << inputFiles[i] << std::endl;
273    }
274    if (inputFiles.size() > 1) {
275        std::cout << std::setw(fieldWidth-1);
276        if (CountLines) {
277            std::cout << TotalLines << std::setw(fieldWidth);
278        }
279        if (CountWords) {
280            std::cout << TotalWords << std::setw(fieldWidth);
281        }
282        if (CountChars) {
283            std::cout << TotalChars << std::setw(fieldWidth);
284        }
285        if (CountBytes) {
286            std::cout << TotalBytes;
287        }
288        std::cout << " total" << std::endl;
289    }
290
291    return 0;
292}
Note: See TracBrowser for help on using the repository browser.