source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5418

Last change on this file since 5418 was 5418, checked in by nmedfort, 2 years ago

Removed non-functional CUDA code from icgrep and consolidated grep and multigrep mode into a single function; allowed segment parallel pipeline to utilize process as its initial thread; modified MMapSourceKernel to map and perform mmap directly and advise the OS to drop consumed data streams.

File size: 9.9 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <kernels/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <IR_Gen/idisa_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/mmap_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <kernels/pipeline.h>
25#include <pablo/pablo_compiler.h>
26#include <pablo/pablo_toolchain.h>
27#include <fcntl.h>
28
29using namespace llvm;
30
31static cl::OptionCategory wcFlags("Command Flags", "wc options");
32
33static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
34
35enum CountOptions {
36    LineOption, WordOption, CharOption, ByteOption
37};
38
39static cl::list<CountOptions> wcOptions(
40  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
41             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
42             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
43             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
44             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
45                                                 
46
47
48static int defaultFieldWidth = 7;  // default field width
49
50
51bool CountLines = false;
52bool CountWords = false;
53bool CountChars = false;
54bool CountBytes = false;
55
56std::vector<uint64_t> lineCount;
57std::vector<uint64_t> wordCount;
58std::vector<uint64_t> charCount;
59std::vector<uint64_t> byteCount;
60
61uint64_t TotalLines = 0;
62uint64_t TotalWords = 0;
63uint64_t TotalChars = 0;
64uint64_t TotalBytes = 0;
65
66using namespace pablo;
67using namespace kernel;
68using namespace parabix;
69
70//  The callback routine that records counts in progress.
71//
72extern "C" {
73    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
74        lineCount[fileIdx] = lines;
75        wordCount[fileIdx] = words;
76        charCount[fileIdx] = chars;
77        byteCount[fileIdx] = bytes;
78        TotalLines += lines;
79        TotalWords += words;
80        TotalChars += chars;
81        TotalBytes += bytes;
82    }
83}
84
85//
86//
87
88std::unique_ptr<PabloKernel> wc_gen(IDISA::IDISA_Builder * iBuilder) {
89   
90    auto kernel = std::unique_ptr<PabloKernel>(new PabloKernel(iBuilder, "Parabix:wc",
91                    {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
92                    {},
93                    {},
94                    {Binding{iBuilder->getSizeTy(), "lineCount"}, Binding{iBuilder->getSizeTy(), "wordCount"}, Binding{iBuilder->getSizeTy(), "charCount"}}));
95   
96    //  input: 8 basis bit streams
97    const auto u8bitSet = kernel->getInputStreamVar("u8bit");
98    //  output: 3 counters
99   
100    cc::CC_Compiler ccc(kernel.get(), u8bitSet);
101   
102    PabloBuilder & pb = ccc.getBuilder();
103
104    Var * lc = kernel->getOutputScalarVar("lineCount");
105    Var * wc = kernel->getOutputScalarVar("wordCount");
106    Var * cc = kernel->getOutputScalarVar("charCount");
107
108    if (CountLines) {
109        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
110        pb.createAssign(lc, pb.createCount(LF));
111    }
112    if (CountWords) {
113        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
114        PabloAST * wordChar = pb.createNot(WS);
115        // WS_follow_or_start = 1 past WS or at start of file
116        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
117        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
118        pb.createAssign(wc, pb.createCount(wordStart));
119    }
120    if (CountChars) {
121        //
122        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
123        // not UTF-8, or is not valid?
124        //
125        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
126        pb.createAssign(cc, pb.createCount(u8Begin));
127    }
128    pablo_function_passes(kernel.get());
129    return kernel;
130}
131
132
133
134
135typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
136
137void wcPipelineGen(ParabixDriver & pxDriver) {
138
139    IDISA::IDISA_Builder * iBuilder = pxDriver.getIDISA_Builder();
140    Module * m = iBuilder->getModule();
141   
142    Type * const int32Ty = iBuilder->getInt32Ty();
143    Type * const sizeTy = iBuilder->getSizeTy();
144    Type * const voidTy = iBuilder->getVoidTy();
145
146    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
147    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
148
149    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
150    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
151    main->setCallingConv(CallingConv::C);
152    Function::arg_iterator args = main->arg_begin();   
153    Value * const fileDecriptor = &*(args++);
154    fileDecriptor->setName("fileDecriptor");
155    Value * const fileIdx = &*(args++);
156    fileIdx->setName("fileIdx");
157
158    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
159
160    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
161
162    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<SingleBlockBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1)));
163
164    KernelBuilder * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder));
165    mmapK->setInitialArguments({fileDecriptor});
166    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
167
168    KernelBuilder * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
169    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
170   
171    KernelBuilder * wck = pxDriver.addKernelInstance(wc_gen(iBuilder));
172    pxDriver.makeKernelCall(wck, {BasisBits}, {});
173
174
175    pxDriver.generatePipelineIR();
176   
177    Value * const fileSize = mmapK->getAccumulator("fileSize");
178    Value * const lineCount = wck->getAccumulator("lineCount");
179    Value * const wordCount = wck->getAccumulator("wordCount");
180    Value * const charCount = wck->getAccumulator("charCount");
181
182    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
183   
184    iBuilder->CreateRetVoid();
185
186    pxDriver.linkAndFinalize();
187}
188
189
190WordCountFunctionType wcCodeGen() {
191    Module * M = new Module("wc", getGlobalContext());
192    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
193    ParabixDriver pxDriver(idb);
194   
195    wcPipelineGen(pxDriver);
196
197    WordCountFunctionType main = reinterpret_cast<WordCountFunctionType>(pxDriver.getPointerToMain());
198    delete idb;
199    return main;
200}
201
202void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
203    std::string fileName = inputFiles[fileIdx];
204    const int fd = open(fileName.c_str(), O_RDONLY);
205    if (LLVM_UNLIKELY(fd == -1)) {
206        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
207    } else {
208        fn_ptr(fd, fileIdx);
209        close(fd);
210    }
211}
212
213int main(int argc, char *argv[]) {
214    AddParabixVersionPrinter();
215    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
216    cl::ParseCommandLineOptions(argc, argv);
217    if (wcOptions.size() == 0) {
218        CountLines = true;
219        CountWords = true;
220        CountBytes = true;
221    } else {
222        CountLines = false;
223        CountWords = false;
224        CountBytes = false;
225        CountChars = false;
226        for (unsigned i = 0; i < wcOptions.size(); i++) {
227            switch (wcOptions[i]) {
228                case WordOption: CountWords = true; break;
229                case LineOption: CountLines = true; break;
230                case CharOption: CountBytes = true; CountChars = false; break;
231                case ByteOption: CountChars = true; CountBytes = false; break;
232            }
233        }
234    }
235   
236    WordCountFunctionType wordCountFunctionPtr = wcCodeGen();
237
238    const auto fileCount = inputFiles.size();
239    lineCount.resize(fileCount);
240    wordCount.resize(fileCount);
241    charCount.resize(fileCount);
242    byteCount.resize(fileCount);
243   
244    for (unsigned i = 0; i < fileCount; ++i) {
245        wc(wordCountFunctionPtr, i);
246    }
247   
248    size_t maxCount = 0;
249    if (CountLines) maxCount = TotalLines;
250    if (CountWords) maxCount = TotalWords;
251    if (CountChars) maxCount = TotalChars;
252    if (CountBytes) maxCount = TotalBytes;
253   
254    int fieldWidth = std::to_string(maxCount).size() + 1;
255    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
256
257    for (unsigned i = 0; i < inputFiles.size(); ++i) {
258        std::cout << std::setw(fieldWidth-1);
259        if (CountLines) {
260            std::cout << lineCount[i] << std::setw(fieldWidth);
261        }
262        if (CountWords) {
263            std::cout << wordCount[i] << std::setw(fieldWidth);
264        }
265        if (CountChars) {
266            std::cout << charCount[i] << std::setw(fieldWidth);
267        }
268        if (CountBytes) {
269            std::cout << byteCount[i];
270        }
271        std::cout << " " << inputFiles[i] << std::endl;
272    }
273    if (inputFiles.size() > 1) {
274        std::cout << std::setw(fieldWidth-1);
275        if (CountLines) {
276            std::cout << TotalLines << std::setw(fieldWidth);
277        }
278        if (CountWords) {
279            std::cout << TotalWords << std::setw(fieldWidth);
280        }
281        if (CountChars) {
282            std::cout << TotalChars << std::setw(fieldWidth);
283        }
284        if (CountBytes) {
285            std::cout << TotalBytes;
286        }
287        std::cout << " total" << std::endl;
288    }
289
290    return 0;
291}
Note: See TracBrowser for help on using the repository browser.