source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5292

Last change on this file since 5292 was 5292, checked in by nmedfort, 3 years ago

Removed 'function' and 'self' parameters from generateXXXMethod() functions.

File size: 10.3 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <IR_Gen/idisa_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/mmap_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <kernels/pipeline.h>
25#include <pablo/pablo_compiler.h>
26#include <pablo/pablo_toolchain.h>
27#include <boost/filesystem.hpp>
28#include <boost/iostreams/device/mapped_file.hpp>
29
30
31using namespace llvm;
32
33static cl::OptionCategory wcFlags("Command Flags", "wc options");
34
35static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
36
37enum CountOptions {
38    LineOption, WordOption, CharOption, ByteOption
39};
40
41static cl::list<CountOptions> wcOptions(
42  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
43             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
44             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
45             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
46             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
47                                                 
48
49
50static int defaultFieldWidth = 7;  // default field width
51
52
53bool CountLines = false;
54bool CountWords = false;
55bool CountChars = false;
56bool CountBytes = false;
57
58std::vector<uint64_t> lineCount;
59std::vector<uint64_t> wordCount;
60std::vector<uint64_t> charCount;
61std::vector<uint64_t> byteCount;
62
63uint64_t TotalLines = 0;
64uint64_t TotalWords = 0;
65uint64_t TotalChars = 0;
66uint64_t TotalBytes = 0;
67
68using namespace pablo;
69using namespace kernel;
70using namespace parabix;
71
72//  The callback routine that records counts in progress.
73//
74extern "C" {
75    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
76        lineCount[fileIdx] = lines;
77        wordCount[fileIdx] = words;
78        charCount[fileIdx] = chars;
79        byteCount[fileIdx] = bytes;
80        TotalLines += lines;
81        TotalWords += words;
82        TotalChars += chars;
83        TotalBytes += bytes;
84    }
85}
86
87//
88//
89
90void wc_gen(PabloKernel * kernel) {
91    //  input: 8 basis bit streams
92    //  output: 3 counters
93   
94    cc::CC_Compiler ccc(kernel);
95   
96    PabloBuilder & pb = ccc.getBuilder();
97
98    Var * lc = kernel->addOutput("lineCount", kernel->getSizeTy());
99    Var * wc = kernel->addOutput("wordCount", kernel->getSizeTy());
100    Var * cc = kernel->addOutput("charCount", kernel->getSizeTy());
101
102    if (CountLines) {
103        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
104        pb.createAssign(lc, pb.createCount(LF));
105    }
106    if (CountWords) {
107        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
108        PabloAST * wordChar = pb.createNot(WS);
109        // WS_follow_or_start = 1 past WS or at start of file
110        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
111        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
112        pb.createAssign(wc, pb.createCount(wordStart));
113    }
114    if (CountChars) {
115        //
116        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
117        // not UTF-8, or is not valid?
118        //
119        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
120        pb.createAssign(cc, pb.createCount(u8Begin));
121    }
122}
123
124Function * pipeline(Module * m, IDISA::IDISA_Builder * iBuilder) {
125    Type * mBitBlockType = iBuilder->getBitBlockType();
126    Constant * record_counts_routine;
127    Type * const size_ty = iBuilder->getSizeTy();
128    Type * const voidTy = iBuilder->getVoidTy();
129    record_counts_routine = m->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
130    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
131   
132    Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
133    main->setCallingConv(CallingConv::C);
134    Function::arg_iterator args = main->arg_begin();
135   
136    Value * const inputStream = &*(args++);
137    inputStream->setName("input");
138    Value * const fileSize = &*(args++);
139    fileSize->setName("fileSize");
140    Value * const fileIdx = &*(args++);
141    fileIdx->setName("fileIdx");
142   
143    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
144
145    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8, 1));
146   
147    MMapSourceKernel mmapK(iBuilder);
148    std::unique_ptr<Module> mmapM = mmapK.createKernelModule({}, {&ByteStream});
149    mmapK.setInitialArguments({fileSize});
150   
151    S2PKernel  s2pk(iBuilder);
152    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
153
154    PabloKernel wck(iBuilder, "wc");
155    wc_gen(&wck);
156    pablo_function_passes(&wck);
157   
158    std::unique_ptr<Module> wcM = wck.createKernelModule({&BasisBits}, {});
159   
160    mmapK.addKernelDeclarations(m);
161    s2pk.addKernelDeclarations(m);
162    wck.addKernelDeclarations(m);
163   
164    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
165
166    ByteStream.setStreamSetBuffer(inputStream, fileSize);
167    BasisBits.allocateBuffer();
168   
169    generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &wck});
170   
171    Value * lineCount = wck.createGetAccumulatorCall(wck.getInstance(), "lineCount");
172    Value * wordCount = wck.createGetAccumulatorCall(wck.getInstance(), "wordCount");
173    Value * charCount = wck.createGetAccumulatorCall(wck.getInstance(), "charCount");
174
175    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
176   
177    iBuilder->CreateRetVoid();
178   
179    Linker L(*m);
180    L.linkInModule(std::move(mmapM));
181    L.linkInModule(std::move(s2pM));
182    L.linkInModule(std::move(wcM));
183   
184    return main;
185}
186
187
188typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
189
190static ExecutionEngine * wcEngine = nullptr;
191
192wcFunctionType wcCodeGen(void) { 
193    Module * M = new Module("wc", getGlobalContext());
194    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
195
196    llvm::Function * main_IR = pipeline(M, idb);
197
198    wcEngine = JIT_to_ExecutionEngine(M);
199   
200    wcEngine->finalizeObject();
201
202    delete idb;
203    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
204}
205
206void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
207    std::string fileName = inputFiles[fileIdx];
208    size_t fileSize;
209    char * fileBuffer;
210   
211    const boost::filesystem::path file(fileName);
212    if (exists(file)) {
213        if (is_directory(file)) {
214            return;
215        }
216    } else {
217        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
218        return;
219    }
220   
221    fileSize = file_size(file);
222    boost::iostreams::mapped_file_source mappedFile;
223    if (fileSize == 0) {
224        fileBuffer = nullptr;
225    }
226    else {
227        try {
228            mappedFile.open(fileName);
229        } catch (std::exception &e) {
230            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
231            return;
232        }
233        fileBuffer = const_cast<char *>(mappedFile.data());
234    }
235    fn_ptr(fileBuffer, fileSize, fileIdx);
236
237    mappedFile.close();
238   
239}
240
241
242
243int main(int argc, char *argv[]) {
244    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
245    cl::ParseCommandLineOptions(argc, argv);
246    if (wcOptions.size() == 0) {
247        CountLines = true;
248        CountWords = true;
249        CountBytes = true;
250    }
251    else {
252        CountLines = false;
253        CountWords = false;
254        CountBytes = false;
255        CountChars = false;
256        for (unsigned i = 0; i < wcOptions.size(); i++) {
257            switch (wcOptions[i]) {
258                case WordOption: CountWords = true; break;
259                case LineOption: CountLines = true; break;
260                case CharOption: CountBytes = true; CountChars = false; break;
261                case ByteOption: CountChars = true; CountBytes = false; break;
262            }
263        }
264    }
265   
266   
267    wcFunctionType fn_ptr = wcCodeGen();
268
269    int fileCount = inputFiles.size();
270    lineCount.resize(fileCount);
271    wordCount.resize(fileCount);
272    charCount.resize(fileCount);
273    byteCount.resize(fileCount);
274   
275    for (unsigned i = 0; i < inputFiles.size(); ++i) {
276        wc(fn_ptr, i);
277    }
278   
279    size_t maxCount = 0;
280    if (CountLines) maxCount = TotalLines;
281    if (CountWords) maxCount = TotalWords;
282    if (CountChars) maxCount = TotalChars;
283    if (CountBytes) maxCount = TotalBytes;
284   
285    int fieldWidth = std::to_string(maxCount).size() + 1;
286    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
287
288    for (unsigned i = 0; i < inputFiles.size(); ++i) {
289        std::cout << std::setw(fieldWidth-1);
290        if (CountLines) {
291            std::cout << lineCount[i] << std::setw(fieldWidth);
292        }
293        if (CountWords) {
294            std::cout << wordCount[i] << std::setw(fieldWidth);
295        }
296        if (CountChars) {
297            std::cout << charCount[i] << std::setw(fieldWidth);
298        }
299        if (CountBytes) {
300            std::cout << byteCount[i];
301        }
302        std::cout << " " << inputFiles[i] << std::endl;
303    }
304    if (inputFiles.size() > 1) {
305        std::cout << std::setw(fieldWidth-1);
306        if (CountLines) {
307            std::cout << TotalLines << std::setw(fieldWidth);
308        }
309        if (CountWords) {
310            std::cout << TotalWords << std::setw(fieldWidth);
311        }
312        if (CountChars) {
313            std::cout << TotalChars << std::setw(fieldWidth);
314        }
315        if (CountBytes) {
316            std::cout << TotalBytes;
317        }
318        std::cout << " total" << std::endl;
319    }
320
321    return 0;
322}
Note: See TracBrowser for help on using the repository browser.