source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5733

Last change on this file since 5733 was 5732, checked in by cameron, 18 months ago

More changes in preparation for LLVM 3.9, 4.0

File size: 9.8 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/Support/CommandLine.h>
14#include <llvm/Support/raw_ostream.h>
15#include <cc/cc_compiler.h>
16#include <pablo/pablo_kernel.h>
17#include <kernels/kernel_builder.h>
18#include <IR_Gen/idisa_target.h>
19#include <kernels/streamset.h>
20#include <kernels/source_kernel.h>
21#include <kernels/s2p_kernel.h>
22#include <pablo/pablo_compiler.h>
23#include <pablo/pablo_toolchain.h>
24#include <toolchain/cpudriver.h>
25#include <fcntl.h>
26
27using namespace llvm;
28
29static cl::OptionCategory wcFlags("Command Flags", "wc options");
30
31static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
32
33enum CountOptions {
34    LineOption, WordOption, CharOption, ByteOption
35};
36
37static cl::list<CountOptions> wcOptions(
38  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
39             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
40             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
41             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m).")
42             CL_ENUM_VAL_SENTINEL), cl::cat(wcFlags), cl::Grouping);
43                                                 
44
45
46static int defaultFieldWidth = 7;  // default field width
47
48
49bool CountLines = false;
50bool CountWords = false;
51bool CountChars = false;
52bool CountBytes = false;
53
54std::vector<uint64_t> lineCount;
55std::vector<uint64_t> wordCount;
56std::vector<uint64_t> charCount;
57std::vector<uint64_t> byteCount;
58
59uint64_t TotalLines = 0;
60uint64_t TotalWords = 0;
61uint64_t TotalChars = 0;
62uint64_t TotalBytes = 0;
63
64using namespace pablo;
65using namespace kernel;
66using namespace parabix;
67
68//  The callback routine that records counts in progress.
69//
70extern "C" {
71    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
72        lineCount[fileIdx] = lines;
73        wordCount[fileIdx] = words;
74        charCount[fileIdx] = chars;
75        byteCount[fileIdx] = bytes;
76        TotalLines += lines;
77        TotalWords += words;
78        TotalChars += chars;
79        TotalBytes += bytes;
80    }
81}
82
83class WordCountKernel final: public pablo::PabloKernel {
84public:
85    WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
86    bool isCachable() const override { return true; }
87    bool hasSignature() const override { return false; }
88protected:
89    void generatePabloMethod() override;
90};
91
92WordCountKernel::WordCountKernel (const std::unique_ptr<kernel::KernelBuilder> & b)
93: PabloKernel(b, "wc",
94    {Binding{b->getStreamSetTy(8, 1), "u8bit"}},
95    {},
96    {},
97    {Binding{b->getSizeTy(), "lineCount"}, Binding{b->getSizeTy(), "wordCount"}, Binding{b->getSizeTy(), "charCount"}}) {
98
99}
100
101void WordCountKernel::generatePabloMethod() {
102
103    //  input: 8 basis bit streams
104    const auto u8bitSet = getInputStreamVar("u8bit");
105    //  output: 3 counters
106
107    cc::CC_Compiler ccc(this, u8bitSet);
108
109    PabloBuilder & pb = ccc.getBuilder();
110
111    Var * lc = getOutputScalarVar("lineCount");
112    Var * wc = getOutputScalarVar("wordCount");
113    Var * cc = getOutputScalarVar("charCount");
114
115    if (CountLines) {
116        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
117        pb.createAssign(lc, pb.createCount(LF));
118    }
119    if (CountWords) {
120        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
121        PabloAST * wordChar = pb.createNot(WS);
122        // WS_follow_or_start = 1 past WS or at start of file
123        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
124        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
125        pb.createAssign(wc, pb.createCount(wordStart));
126    }
127    if (CountChars) {
128        //
129        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
130        // not UTF-8, or is not valid?
131        //
132        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
133        pb.createAssign(cc, pb.createCount(u8Begin));
134    }
135}
136
137typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
138
139void wcPipelineGen(ParabixDriver & pxDriver) {
140
141    auto & iBuilder = pxDriver.getBuilder();
142    Module * m = iBuilder->getModule();
143    const unsigned segmentSize = codegen::SegmentSize;
144    const unsigned bufferSegments = codegen::ThreadNum+1;
145
146   
147    Type * const int32Ty = iBuilder->getInt32Ty();
148    Type * const sizeTy = iBuilder->getSizeTy();
149    Type * const voidTy = iBuilder->getVoidTy();
150
151    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
152    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
153
154    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
155    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
156    main->setCallingConv(CallingConv::C);
157    Function::arg_iterator args = main->arg_begin();   
158    Value * const fileDecriptor = &*(args++);
159    fileDecriptor->setName("fileDecriptor");
160    Value * const fileIdx = &*(args++);
161    fileIdx->setName("fileIdx");
162
163    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
164
165    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
166
167    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
168
169    Kernel * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder));
170    mmapK->setInitialArguments({fileDecriptor});
171    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
172
173    Kernel * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
174    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
175   
176    Kernel * wck = pxDriver.addKernelInstance(make_unique<WordCountKernel>(iBuilder));
177    pxDriver.makeKernelCall(wck, {BasisBits}, {});
178
179    pxDriver.generatePipelineIR();
180   
181    iBuilder->setKernel(mmapK);
182    Value * const fileSize = iBuilder->getAccumulator("fileSize");
183    iBuilder->setKernel(wck);
184    Value * const lineCount = iBuilder->getAccumulator("lineCount");
185    Value * const wordCount = iBuilder->getAccumulator("wordCount");
186    Value * const charCount = iBuilder->getAccumulator("charCount");
187
188    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
189    pxDriver.deallocateBuffers();
190    iBuilder->CreateRetVoid();
191
192    pxDriver.finalizeObject();
193}
194
195void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
196    std::string fileName = inputFiles[fileIdx];
197    const int fd = open(fileName.c_str(), O_RDONLY);
198    if (LLVM_UNLIKELY(fd == -1)) {
199        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
200    } else {
201        fn_ptr(fd, fileIdx);
202        close(fd);
203    }
204}
205
206int main(int argc, char *argv[]) {
207    codegen::ParseCommandLineOptions(argc, argv, {&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
208    if (wcOptions.size() == 0) {
209        CountLines = true;
210        CountWords = true;
211        CountBytes = true;
212    } else {
213        CountLines = false;
214        CountWords = false;
215        CountBytes = false;
216        CountChars = false;
217        for (unsigned i = 0; i < wcOptions.size(); i++) {
218            switch (wcOptions[i]) {
219                case WordOption: CountWords = true; break;
220                case LineOption: CountLines = true; break;
221                case CharOption: CountBytes = true; CountChars = false; break;
222                case ByteOption: CountChars = true; CountBytes = false; break;
223            }
224        }
225    }
226   
227    ParabixDriver pxDriver("wc");
228    wcPipelineGen(pxDriver);
229    auto wordCountFunctionPtr = reinterpret_cast<WordCountFunctionType>(pxDriver.getMain());
230
231    const auto fileCount = inputFiles.size();
232    lineCount.resize(fileCount);
233    wordCount.resize(fileCount);
234    charCount.resize(fileCount);
235    byteCount.resize(fileCount);
236   
237    for (unsigned i = 0; i < fileCount; ++i) {
238        wc(wordCountFunctionPtr, i);
239    }
240   
241    size_t maxCount = 0;
242    if (CountLines) maxCount = TotalLines;
243    if (CountWords) maxCount = TotalWords;
244    if (CountChars) maxCount = TotalChars;
245    if (CountBytes) maxCount = TotalBytes;
246   
247    int fieldWidth = std::to_string(maxCount).size() + 1;
248    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
249
250    for (unsigned i = 0; i < inputFiles.size(); ++i) {
251        std::cout << std::setw(fieldWidth-1);
252        if (CountLines) {
253            std::cout << lineCount[i] << std::setw(fieldWidth);
254        }
255        if (CountWords) {
256            std::cout << wordCount[i] << std::setw(fieldWidth);
257        }
258        if (CountChars) {
259            std::cout << charCount[i] << std::setw(fieldWidth);
260        }
261        if (CountBytes) {
262            std::cout << byteCount[i];
263        }
264        std::cout << " " << inputFiles[i] << std::endl;
265    }
266    if (inputFiles.size() > 1) {
267        std::cout << std::setw(fieldWidth-1);
268        if (CountLines) {
269            std::cout << TotalLines << std::setw(fieldWidth);
270        }
271        if (CountWords) {
272            std::cout << TotalWords << std::setw(fieldWidth);
273        }
274        if (CountChars) {
275            std::cout << TotalChars << std::setw(fieldWidth);
276        }
277        if (CountBytes) {
278            std::cout << TotalBytes;
279        }
280        std::cout << " total" << std::endl;
281    }
282
283    return 0;
284}
Note: See TracBrowser for help on using the repository browser.