source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5609

Last change on this file since 5609 was 5597, checked in by nmedfort, 23 months ago

Modified stream set buffers to use heap memory.

File size: 9.8 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13// #include <llvm/ExecutionEngine/ExecutionEngine.h>
14// #include <llvm/Linker/Linker.h>
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <kernels/kernel_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/source_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <pablo/pablo_compiler.h>
25#include <pablo/pablo_toolchain.h>
26#include <toolchain/cpudriver.h>
27#include <fcntl.h>
28
29using namespace llvm;
30
31static cl::OptionCategory wcFlags("Command Flags", "wc options");
32
33static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
34
35enum CountOptions {
36    LineOption, WordOption, CharOption, ByteOption
37};
38
39static cl::list<CountOptions> wcOptions(
40  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
41             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
42             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
43             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
44             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
45                                                 
46
47
48static int defaultFieldWidth = 7;  // default field width
49
50
51bool CountLines = false;
52bool CountWords = false;
53bool CountChars = false;
54bool CountBytes = false;
55
56std::vector<uint64_t> lineCount;
57std::vector<uint64_t> wordCount;
58std::vector<uint64_t> charCount;
59std::vector<uint64_t> byteCount;
60
61uint64_t TotalLines = 0;
62uint64_t TotalWords = 0;
63uint64_t TotalChars = 0;
64uint64_t TotalBytes = 0;
65
66using namespace pablo;
67using namespace kernel;
68using namespace parabix;
69
70//  The callback routine that records counts in progress.
71//
72extern "C" {
73    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
74        lineCount[fileIdx] = lines;
75        wordCount[fileIdx] = words;
76        charCount[fileIdx] = chars;
77        byteCount[fileIdx] = bytes;
78        TotalLines += lines;
79        TotalWords += words;
80        TotalChars += chars;
81        TotalBytes += bytes;
82    }
83}
84
85class WordCountKernel final: public pablo::PabloKernel {
86public:
87    WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
88    bool isCachable() const override { return true; }
89    bool hasSignature() const override { return false; }
90protected:
91    void generatePabloMethod() override;
92};
93
94WordCountKernel::WordCountKernel (const std::unique_ptr<kernel::KernelBuilder> & b)
95: PabloKernel(b, "wc",
96    {Binding{b->getStreamSetTy(8, 1), "u8bit"}},
97    {},
98    {},
99    {Binding{b->getSizeTy(), "lineCount"}, Binding{b->getSizeTy(), "wordCount"}, Binding{b->getSizeTy(), "charCount"}}) {
100
101}
102
103void WordCountKernel::generatePabloMethod() {
104
105    //  input: 8 basis bit streams
106    const auto u8bitSet = getInputStreamVar("u8bit");
107    //  output: 3 counters
108
109    cc::CC_Compiler ccc(this, u8bitSet);
110
111    PabloBuilder & pb = ccc.getBuilder();
112
113    Var * lc = getOutputScalarVar("lineCount");
114    Var * wc = getOutputScalarVar("wordCount");
115    Var * cc = getOutputScalarVar("charCount");
116
117    if (CountLines) {
118        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
119        pb.createAssign(lc, pb.createCount(LF));
120    }
121    if (CountWords) {
122        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
123        PabloAST * wordChar = pb.createNot(WS);
124        // WS_follow_or_start = 1 past WS or at start of file
125        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
126        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
127        pb.createAssign(wc, pb.createCount(wordStart));
128    }
129    if (CountChars) {
130        //
131        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
132        // not UTF-8, or is not valid?
133        //
134        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
135        pb.createAssign(cc, pb.createCount(u8Begin));
136    }
137}
138
139typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
140
141void wcPipelineGen(ParabixDriver & pxDriver) {
142
143    auto & iBuilder = pxDriver.getBuilder();
144    Module * m = iBuilder->getModule();
145    const unsigned segmentSize = codegen::SegmentSize;
146    const unsigned bufferSegments = codegen::ThreadNum+1;
147
148   
149    Type * const int32Ty = iBuilder->getInt32Ty();
150    Type * const sizeTy = iBuilder->getSizeTy();
151    Type * const voidTy = iBuilder->getVoidTy();
152
153    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
154    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
155
156    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
157    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
158    main->setCallingConv(CallingConv::C);
159    Function::arg_iterator args = main->arg_begin();   
160    Value * const fileDecriptor = &*(args++);
161    fileDecriptor->setName("fileDecriptor");
162    Value * const fileIdx = &*(args++);
163    fileIdx->setName("fileIdx");
164
165    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
166
167    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
168
169    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
170
171    Kernel * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder));
172    mmapK->setInitialArguments({fileDecriptor});
173    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
174
175    Kernel * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
176    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
177   
178    Kernel * wck = pxDriver.addKernelInstance(make_unique<WordCountKernel>(iBuilder));
179    pxDriver.makeKernelCall(wck, {BasisBits}, {});
180
181    pxDriver.generatePipelineIR();
182   
183    iBuilder->setKernel(mmapK);
184    Value * const fileSize = iBuilder->getAccumulator("fileSize");
185    iBuilder->setKernel(wck);
186    Value * const lineCount = iBuilder->getAccumulator("lineCount");
187    Value * const wordCount = iBuilder->getAccumulator("wordCount");
188    Value * const charCount = iBuilder->getAccumulator("charCount");
189
190    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
191    pxDriver.deallocateBuffers();
192    iBuilder->CreateRetVoid();
193
194    pxDriver.finalizeObject();
195}
196
197void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
198    std::string fileName = inputFiles[fileIdx];
199    const int fd = open(fileName.c_str(), O_RDONLY);
200    if (LLVM_UNLIKELY(fd == -1)) {
201        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
202    } else {
203        fn_ptr(fd, fileIdx);
204        close(fd);
205    }
206}
207
208int main(int argc, char *argv[]) {
209    codegen::ParseCommandLineOptions(argc, argv, {&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
210    if (wcOptions.size() == 0) {
211        CountLines = true;
212        CountWords = true;
213        CountBytes = true;
214    } else {
215        CountLines = false;
216        CountWords = false;
217        CountBytes = false;
218        CountChars = false;
219        for (unsigned i = 0; i < wcOptions.size(); i++) {
220            switch (wcOptions[i]) {
221                case WordOption: CountWords = true; break;
222                case LineOption: CountLines = true; break;
223                case CharOption: CountBytes = true; CountChars = false; break;
224                case ByteOption: CountChars = true; CountBytes = false; break;
225            }
226        }
227    }
228   
229    ParabixDriver pxDriver("wc");
230    wcPipelineGen(pxDriver);
231    auto wordCountFunctionPtr = reinterpret_cast<WordCountFunctionType>(pxDriver.getMain());
232
233    const auto fileCount = inputFiles.size();
234    lineCount.resize(fileCount);
235    wordCount.resize(fileCount);
236    charCount.resize(fileCount);
237    byteCount.resize(fileCount);
238   
239    for (unsigned i = 0; i < fileCount; ++i) {
240        wc(wordCountFunctionPtr, i);
241    }
242   
243    size_t maxCount = 0;
244    if (CountLines) maxCount = TotalLines;
245    if (CountWords) maxCount = TotalWords;
246    if (CountChars) maxCount = TotalChars;
247    if (CountBytes) maxCount = TotalBytes;
248   
249    int fieldWidth = std::to_string(maxCount).size() + 1;
250    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
251
252    for (unsigned i = 0; i < inputFiles.size(); ++i) {
253        std::cout << std::setw(fieldWidth-1);
254        if (CountLines) {
255            std::cout << lineCount[i] << std::setw(fieldWidth);
256        }
257        if (CountWords) {
258            std::cout << wordCount[i] << std::setw(fieldWidth);
259        }
260        if (CountChars) {
261            std::cout << charCount[i] << std::setw(fieldWidth);
262        }
263        if (CountBytes) {
264            std::cout << byteCount[i];
265        }
266        std::cout << " " << inputFiles[i] << std::endl;
267    }
268    if (inputFiles.size() > 1) {
269        std::cout << std::setw(fieldWidth-1);
270        if (CountLines) {
271            std::cout << TotalLines << std::setw(fieldWidth);
272        }
273        if (CountWords) {
274            std::cout << TotalWords << std::setw(fieldWidth);
275        }
276        if (CountChars) {
277            std::cout << TotalChars << std::setw(fieldWidth);
278        }
279        if (CountBytes) {
280            std::cout << TotalBytes;
281        }
282        std::cout << " total" << std::endl;
283    }
284
285    return 0;
286}
Note: See TracBrowser for help on using the repository browser.