source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5074

Last change on this file since 5074 was 5074, checked in by cameron, 3 years ago

Kernel infrastructure: move common logic into KernelBuilder? base class; demo linking in wc

File size: 12.4 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <sstream>
12
13
14#include <toolchain.h>
15#include <pablo/pablo_toolchain.h>
16#include <llvm/IR/Function.h>
17#include <llvm/IR/Module.h>
18#include <llvm/ExecutionEngine/ExecutionEngine.h>
19#include <llvm/ExecutionEngine/MCJIT.h>
20#include "llvm/Linker/Linker.h"
21
22#include <llvm/Support/CommandLine.h>
23#include <llvm/Support/raw_ostream.h>
24
25#include <utf_encoding.h>
26#include <re/re_cc.h>
27#include <cc/cc_compiler.h>
28#include <pablo/function.h>
29#include <pablo/pablo_kernel.h>
30#include <IDISA/idisa_builder.h>
31#include <IDISA/idisa_target.h>
32#include <kernels/interface.h>
33#include <kernels/kernel.h>
34#include <kernels/s2p_kernel.h>
35
36#include <pablo/pablo_compiler.h>
37#include <pablo/pablo_toolchain.h>
38
39
40#include <utf_encoding.h>
41
42// mmap system
43#include <boost/filesystem.hpp>
44#include <boost/iostreams/device/mapped_file.hpp>
45using namespace boost::iostreams;
46using namespace boost::filesystem;
47
48#include <fcntl.h>
49static cl::OptionCategory wcFlags("Command Flags", "wc options");
50
51static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
52
53enum CountOptions {
54    LineOption, WordOption, CharOption, ByteOption
55};
56
57static cl::list<CountOptions> wcOptions(
58  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
59             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
60             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
61             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
62             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
63                                                 
64
65
66static int defaultFieldWidth = 7;  // default field width
67
68
69bool CountLines = false;
70bool CountWords = false;
71bool CountChars = false;
72bool CountBytes = false;
73
74std::vector<uint64_t> lineCount;
75std::vector<uint64_t> wordCount;
76std::vector<uint64_t> charCount;
77std::vector<uint64_t> byteCount;
78
79uint64_t TotalLines = 0;
80uint64_t TotalWords = 0;
81uint64_t TotalChars = 0;
82uint64_t TotalBytes = 0;
83
84
85//  The callback routine that records counts in progress.
86//
87extern "C" {
88    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
89        lineCount[fileIdx] = lines;
90        wordCount[fileIdx] = words;
91        charCount[fileIdx] = chars;
92        byteCount[fileIdx] = bytes;
93        TotalLines += lines;
94        TotalWords += words;
95        TotalChars += chars;
96        TotalBytes += bytes;
97    }
98}
99
100//
101//
102
103pablo::PabloFunction * wc_gen(Encoding encoding) {
104    //  input: 8 basis bit streams
105    //  output: 3 counters
106   
107    pablo::PabloFunction * function = pablo::PabloFunction::Create("wc", 8, 0);
108    cc::CC_Compiler ccc(*function, encoding);
109   
110    pablo::PabloBuilder pBuilder(ccc.getBuilder().getPabloBlock(), ccc.getBuilder());
111    const std::vector<pablo::Var *> u8_bits = ccc.getBasisBits();
112
113    if (CountLines) {
114        pablo::PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
115        function->setResultCount(pBuilder.createCount("lineCount", LF));
116    }
117    if (CountWords) {
118        pablo::PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
119       
120        pablo::PabloAST * wordChar = pBuilder.createNot(WS);
121        // WS_follow_or_start = 1 past WS or at start of file
122        pablo::PabloAST * WS_follow_or_start = pBuilder.createNot(pBuilder.createAdvance(wordChar, 1));
123        //
124        pablo::PabloAST * wordStart = pBuilder.createInFile(pBuilder.createAnd(wordChar, WS_follow_or_start));
125        function->setResultCount(pBuilder.createCount("wordCount", wordStart));
126    }
127    if (CountChars) {
128        //
129        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
130        // not UTF-8, or is not valid?
131        //
132        pablo::PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
133        function->setResultCount(pBuilder.createCount("charCount", u8Begin));
134    }
135    return function;
136}
137
138using namespace kernel;
139
140
141Function * wcPipeline(Module * mMod, IDISA::IDISA_Builder * iBuilder, pablo::PabloFunction * function) {
142    Type * mBitBlockType = iBuilder->getBitBlockType();
143    unsigned mBlockSize = iBuilder->getBitBlockWidth();
144    s2pKernel  s2pk(iBuilder);
145    std::unique_ptr<Module> s2pM = s2pk.createKernelModule();
146    pablo_function_passes(function);
147    pablo::PabloKernel  wck(iBuilder, "wc", function, {"lineCount", "wordCount", "charCount"});
148    std::unique_ptr<Module> wcM = wck.createKernelModule();
149   
150    s2pk.addKernelDeclarations(mMod);
151    wck.addKernelDeclarations(mMod);
152
153    Constant * record_counts_routine;
154    Type * const int64ty = iBuilder->getInt64Ty();
155    Type * const voidTy = Type::getVoidTy(mMod->getContext());
156    record_counts_routine = mMod->getOrInsertFunction("record_counts", voidTy, int64ty, int64ty, int64ty, int64ty, int64ty, nullptr);
157    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
158   
159    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", voidTy, inputType, int64ty, int64ty, nullptr));
160    main->setCallingConv(CallingConv::C);
161    Function::arg_iterator args = main->arg_begin();
162   
163    Value * const inputStream = &*(args++);
164    inputStream->setName("input");
165    Value * const bufferSize = &*(args++);
166    bufferSize->setName("bufferSize");
167    Value * const fileIdx = &*(args++);
168    fileIdx->setName("fileIdx");
169   
170    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
171   
172    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
173
174    BasicBlock * fullCondBlock = BasicBlock::Create(mMod->getContext(), "fullCond", main, 0);
175    BasicBlock * fullBodyBlock = BasicBlock::Create(mMod->getContext(), "fullBody", main, 0);
176    BasicBlock * finalBlock = BasicBlock::Create(mMod->getContext(), "final", main, 0);
177
178    StreamSetBuffer ByteStream(iBuilder, StreamSetType(1, 8), 0);
179    StreamSetBuffer BasisBits(iBuilder, StreamSetType(8, 1), 1);
180    ByteStream.setStreamSetBuffer(inputStream);
181    Value * basisBits = BasisBits.allocateBuffer();
182
183    Value * s2pInstance = s2pk.createInstance({});
184    Value * wcInstance = wck.createInstance({});
185   
186    Value * initialBufferSize = bufferSize;
187    BasicBlock * initialBlock = entryBlock;
188    Value * initialBlockNo = iBuilder->getInt64(0);
189
190    iBuilder->CreateBr(fullCondBlock);
191
192   
193    iBuilder->SetInsertPoint(fullCondBlock);
194    PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
195    remainingBytes->addIncoming(initialBufferSize, initialBlock);
196    PHINode * blockNo = iBuilder->CreatePHI(int64ty, 2, "blockNo");
197    blockNo->addIncoming(initialBlockNo, initialBlock);
198
199    Constant * const step = ConstantInt::get(int64ty, mBlockSize);
200    Value * fullCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
201    iBuilder->CreateCondBr(fullCondTest, finalBlock, fullBodyBlock);
202   
203    iBuilder->SetInsertPoint(fullBodyBlock);
204
205    s2pk.createDoBlockCall(s2pInstance, {ByteStream.getBlockPointer(blockNo), basisBits});
206    wck.createDoBlockCall(wcInstance, {basisBits});
207
208    Value * diff = iBuilder->CreateSub(remainingBytes, step);
209
210    remainingBytes->addIncoming(diff, fullBodyBlock);
211    blockNo->addIncoming(iBuilder->CreateAdd(blockNo, iBuilder->getInt64(1)), fullBodyBlock);
212    iBuilder->CreateBr(fullCondBlock);
213   
214    iBuilder->SetInsertPoint(finalBlock);
215    s2pk.createFinalBlockCall(s2pInstance, remainingBytes, {ByteStream.getBlockPointer(blockNo), basisBits});
216    wck.createFinalBlockCall(wcInstance, remainingBytes, {basisBits});
217   
218    Value * lineCount = wck.createGetAccumulatorCall(wcInstance, "lineCount");
219    Value * wordCount = wck.createGetAccumulatorCall(wcInstance, "wordCount");
220    Value * charCount = wck.createGetAccumulatorCall(wcInstance, "charCount");;
221
222    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, bufferSize, fileIdx}));
223   
224    iBuilder->CreateRetVoid();
225   
226    Linker L(*mMod);
227    L.linkInModule(std::move(s2pM));
228    L.linkInModule(std::move(wcM));
229   
230    return main;
231}
232
233
234typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
235
236static ExecutionEngine * wcEngine = nullptr;
237
238wcFunctionType wcCodeGen(void) {
239                           
240    Module * M = new Module("wc", getGlobalContext());
241    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
242
243    Encoding encoding(Encoding::Type::UTF_8, 8);
244    pablo::PabloFunction * function = wc_gen(encoding);
245    llvm::Function * main_IR = wcPipeline(M, idb, function);
246
247    wcEngine = JIT_to_ExecutionEngine(M);
248   
249    wcEngine->finalizeObject();
250
251    delete idb;
252    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
253}
254
255void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
256    std::string fileName = inputFiles[fileIdx];
257    size_t fileSize;
258    char * fileBuffer;
259   
260    const path file(fileName);
261    if (exists(file)) {
262        if (is_directory(file)) {
263            return;
264        }
265    } else {
266        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
267        return;
268    }
269   
270    fileSize = file_size(file);
271    mapped_file_source mappedFile;
272    if (fileSize == 0) {
273        fileBuffer = nullptr;
274    }
275    else {
276        try {
277            mappedFile.open(fileName);
278        } catch (std::exception &e) {
279            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
280            return;
281        }
282        fileBuffer = const_cast<char *>(mappedFile.data());
283    }
284    fn_ptr(fileBuffer, fileSize, fileIdx);
285
286    mappedFile.close();
287   
288}
289
290
291
292int main(int argc, char *argv[]) {
293    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
294    cl::ParseCommandLineOptions(argc, argv);
295    if (wcOptions.size() == 0) {
296        CountLines = true;
297        CountWords = true;
298        CountBytes = true;
299    }
300    else {
301        CountLines = false;
302        CountWords = false;
303        CountBytes = false;
304        CountChars = false;
305        for (unsigned i = 0; i < wcOptions.size(); i++) {
306            switch (wcOptions[i]) {
307                case WordOption: CountWords = true; break;
308                case LineOption: CountLines = true; break;
309                case CharOption: CountBytes = true; CountChars = false; break;
310                case ByteOption: CountChars = true; CountBytes = false; break;
311            }
312        }
313    }
314   
315   
316    wcFunctionType fn_ptr = wcCodeGen();
317
318    int fileCount = inputFiles.size();
319    lineCount.resize(fileCount);
320    wordCount.resize(fileCount);
321    charCount.resize(fileCount);
322    byteCount.resize(fileCount);
323   
324    for (unsigned i = 0; i < inputFiles.size(); ++i) {
325        wc(fn_ptr, i);
326    }
327   
328    delete wcEngine;
329   
330    size_t maxCount = 0;
331    if (CountLines) maxCount = TotalLines;
332    if (CountWords) maxCount = TotalWords;
333    if (CountChars) maxCount = TotalChars;
334    if (CountBytes) maxCount = TotalBytes;
335   
336    int fieldWidth = std::to_string(maxCount).size() + 1;
337    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
338
339    for (unsigned i = 0; i < inputFiles.size(); ++i) {
340        std::cout << std::setw(fieldWidth-1);
341        if (CountLines) {
342            std::cout << lineCount[i] << std::setw(fieldWidth);
343        }
344        if (CountWords) {
345            std::cout << wordCount[i] << std::setw(fieldWidth);
346        }
347        if (CountChars) {
348            std::cout << charCount[i] << std::setw(fieldWidth);
349        }
350        if (CountBytes) {
351            std::cout << byteCount[i];
352        }
353        std::cout << " " << inputFiles[i] << std::endl;
354    }
355    if (inputFiles.size() > 1) {
356        std::cout << std::setw(fieldWidth-1);
357        if (CountLines) {
358            std::cout << TotalLines << std::setw(fieldWidth);
359        }
360        if (CountWords) {
361            std::cout << TotalWords << std::setw(fieldWidth);
362        }
363        if (CountChars) {
364            std::cout << TotalChars << std::setw(fieldWidth);
365        }
366        if (CountBytes) {
367            std::cout << TotalBytes;
368        }
369        std::cout << " total" << std::endl;
370    }
371
372    return 0;
373}
374
375                       
Note: See TracBrowser for help on using the repository browser.