source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5036

Last change on this file since 5036 was 5036, checked in by cameron, 3 years ago

Include codegen options in -help

File size: 15.7 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <sstream>
12
13
14#include <toolchain.h>
15#include <pablo/pablo_toolchain.h>
16#include <llvm/IR/Function.h>
17#include <llvm/IR/Module.h>
18#include <llvm/ExecutionEngine/ExecutionEngine.h>
19#include <llvm/ExecutionEngine/MCJIT.h>
20
21#include <llvm/Support/CommandLine.h>
22#include <llvm/Support/raw_ostream.h>
23
24#include <utf_encoding.h>
25#include <re/re_cc.h>
26#include <cc/cc_compiler.h>
27#include <pablo/function.h>
28#include <IDISA/idisa_builder.h>
29#include <IDISA/idisa_target.h>
30#include <kernels/instance.h>
31#include <kernels/kernel.h>
32#include <kernels/s2p_kernel.h>
33
34#include <pablo/pablo_compiler.h>
35#include <pablo/pablo_toolchain.h>
36
37
38#include <utf_encoding.h>
39
40// mmap system
41#include <boost/filesystem.hpp>
42#include <boost/iostreams/device/mapped_file.hpp>
43using namespace boost::iostreams;
44using namespace boost::filesystem;
45
46#include <fcntl.h>
47static cl::OptionCategory wcFlags("Command Flags", "wc options");
48
49static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
50
51enum CountOptions {
52    LineOption, WordOption, CharOption, ByteOption
53};
54
55static cl::list<CountOptions> wcOptions(
56  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
57             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
58             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
59             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
60             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
61                                                 
62
63
64static int defaultFieldWidth = 7;  // default field width
65
66
67bool CountLines = false;
68bool CountWords = false;
69bool CountChars = false;
70bool CountBytes = false;
71
72std::vector<uint64_t> lineCount;
73std::vector<uint64_t> wordCount;
74std::vector<uint64_t> charCount;
75std::vector<uint64_t> byteCount;
76
77uint64_t TotalLines = 0;
78uint64_t TotalWords = 0;
79uint64_t TotalChars = 0;
80uint64_t TotalBytes = 0;
81
82
83//  The callback routine that records counts in progress.
84//
85extern "C" {
86    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
87        lineCount[fileIdx] = lines;
88        wordCount[fileIdx] = words;
89        charCount[fileIdx] = chars;
90        byteCount[fileIdx] = bytes;
91        TotalLines += lines;
92        TotalWords += words;
93        TotalChars += chars;
94        TotalBytes += bytes;
95    }
96}
97
98//
99//
100
101pablo::PabloFunction * wc_gen(Encoding encoding) {
102    //  input: 8 basis bit streams
103    //  output: 3 count streams
104   
105    pablo::PabloFunction * function = pablo::PabloFunction::Create("wc", 8, 3);
106    cc::CC_Compiler ccc(*function, encoding);
107   
108    pablo::PabloBuilder pBuilder(ccc.getBuilder().getPabloBlock(), ccc.getBuilder());
109    const std::vector<pablo::Var *> u8_bits = ccc.getBasisBits();
110
111    if (CountLines) {
112        pablo::PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
113        function->setResult(0, pBuilder.createAssign("lineCount", pBuilder.createCount(LF)));
114    }
115    else function->setResult(0, pBuilder.createAssign("lineCount", pBuilder.createZeroes()));
116    if (CountWords) {
117        pablo::PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
118       
119        pablo::PabloAST * wordChar = pBuilder.createNot(WS);
120        // WS_follow_or_start = 1 past WS or at start of file
121        pablo::PabloAST * WS_follow_or_start = pBuilder.createNot(pBuilder.createAdvance(wordChar, 1));
122        //
123        pablo::PabloAST * wordStart = pBuilder.createInFile(pBuilder.createAnd(wordChar, WS_follow_or_start));
124        function->setResult(1, pBuilder.createAssign("wordCount", pBuilder.createCount(wordStart)));
125    }
126    else function->setResult(1, pBuilder.createAssign("wordCount", pBuilder.createZeroes()));
127    if (CountChars) {
128        //
129        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
130        // not UTF-8, or is not valid?
131        //
132        pablo::PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
133        function->setResult(2, pBuilder.createAssign("charCount", pBuilder.createCount(u8Begin)));
134    }
135    else function->setResult(2, pBuilder.createAssign("charCount", pBuilder.createZeroes()));
136    return function;
137}
138
139using namespace kernel;
140
141
142class wcPipelineBuilder {
143public:
144    wcPipelineBuilder(llvm::Module * m, IDISA::IDISA_Builder * b);
145   
146    ~wcPipelineBuilder();
147   
148    void CreateKernels(pablo::PabloFunction * function);
149    llvm::Function * ExecuteKernels();
150   
151private:
152    llvm::Module *                      mMod;
153    IDISA::IDISA_Builder *              iBuilder;
154    KernelBuilder *                     mS2PKernel;
155    KernelBuilder *                     mWC_Kernel;
156    llvm::Type *                        mBitBlockType;
157    int                                 mBlockSize;
158};
159
160
161using namespace pablo;
162using namespace kernel;
163
164wcPipelineBuilder::wcPipelineBuilder(Module * m, IDISA::IDISA_Builder * b)
165: mMod(m)
166, iBuilder(b)
167, mBitBlockType(b->getBitBlockType())
168, mBlockSize(b->getBitBlockWidth()){
169   
170}
171
172wcPipelineBuilder::~wcPipelineBuilder(){
173    delete mS2PKernel;
174    delete mWC_Kernel;
175}
176
177void wcPipelineBuilder::CreateKernels(PabloFunction * function){
178    mS2PKernel = new KernelBuilder(iBuilder, "s2p", codegen::SegmentSize);
179    mWC_Kernel = new KernelBuilder(iBuilder, "wc", codegen::SegmentSize);
180   
181    generateS2PKernel(mMod, iBuilder, mS2PKernel);
182   
183    pablo_function_passes(function);
184   
185    PabloCompiler pablo_compiler(mMod, iBuilder);
186    try {
187        pablo_compiler.setKernel(mWC_Kernel);
188        pablo_compiler.compile(function);
189        delete function;
190        releaseSlabAllocatorMemory();
191    } catch (std::runtime_error e) {
192        delete function;
193        releaseSlabAllocatorMemory();
194        std::cerr << "Runtime error: " << e.what() << std::endl;
195        exit(1);
196    }
197   
198}
199
200
201
202
203Function * wcPipelineBuilder::ExecuteKernels() {
204    Constant * record_counts_routine;
205    Type * const int64ty = iBuilder->getInt64Ty();
206    Type * const voidTy = Type::getVoidTy(mMod->getContext());
207    record_counts_routine = mMod->getOrInsertFunction("record_counts", voidTy, int64ty, int64ty, int64ty, int64ty, int64ty, nullptr);
208    Type * const inputType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), std::vector<Type *>({ArrayType::get(mBitBlockType, 8)})), 1), 0);
209   
210    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", voidTy, inputType, int64ty, int64ty, nullptr));
211    main->setCallingConv(CallingConv::C);
212    Function::arg_iterator args = main->arg_begin();
213   
214    Value * const inputStream = &*(args++);
215    inputStream->setName("input");
216    Value * const bufferSize = &*(args++);
217    bufferSize->setName("bufferSize");
218    Value * const fileIdx = &*(args++);
219    fileIdx->setName("fileIdx");
220   
221    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
222   
223    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
224
225    BasicBlock * segmentCondBlock = nullptr;
226    BasicBlock * segmentBodyBlock = nullptr;
227    const unsigned segmentSize = codegen::SegmentSize;
228    if (segmentSize > 1) {
229        segmentCondBlock = BasicBlock::Create(mMod->getContext(), "segmentCond", main, 0);
230        segmentBodyBlock = BasicBlock::Create(mMod->getContext(), "segmentBody", main, 0);
231    }
232    BasicBlock * fullCondBlock = BasicBlock::Create(mMod->getContext(), "fullCond", main, 0);
233    BasicBlock * fullBodyBlock = BasicBlock::Create(mMod->getContext(), "fullBody", main, 0);
234    BasicBlock * finalBlock = BasicBlock::Create(mMod->getContext(), "final", main, 0);
235    BasicBlock * finalPartialBlock = BasicBlock::Create(mMod->getContext(), "partial", main, 0);
236    BasicBlock * finalEmptyBlock = BasicBlock::Create(mMod->getContext(), "empty", main, 0);
237    BasicBlock * endBlock = BasicBlock::Create(mMod->getContext(), "end", main, 0);
238
239    Instance * s2pInstance = mS2PKernel->instantiate(inputStream);
240    Instance * wcInstance = mWC_Kernel->instantiate(s2pInstance->getOutputStreamBuffer());
241
242    Value * initialBufferSize = nullptr;
243    BasicBlock * initialBlock = nullptr;
244   
245    if (segmentSize > 1) {
246        iBuilder->CreateBr(segmentCondBlock);
247        iBuilder->SetInsertPoint(segmentCondBlock);
248        PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
249        remainingBytes->addIncoming(bufferSize, entryBlock);
250        Constant * const step = ConstantInt::get(int64ty, mBlockSize * segmentSize);
251        Value * segmentCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
252        iBuilder->CreateCondBr(segmentCondTest, fullCondBlock, segmentBodyBlock);
253        iBuilder->SetInsertPoint(segmentBodyBlock);
254        for (unsigned i = 0; i < segmentSize; ++i) {
255            s2pInstance->CreateDoBlockCall();
256        }
257        for (unsigned i = 0; i < segmentSize; ++i) {
258            wcInstance->CreateDoBlockCall();
259        }
260        remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), segmentBodyBlock);
261        iBuilder->CreateBr(segmentCondBlock);
262        initialBufferSize = remainingBytes;
263        initialBlock = segmentCondBlock;
264    } else {
265        initialBufferSize = bufferSize;
266        initialBlock = entryBlock;
267        iBuilder->CreateBr(fullCondBlock);
268    }
269
270    iBuilder->SetInsertPoint(fullCondBlock);
271    PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
272    remainingBytes->addIncoming(initialBufferSize, initialBlock);
273
274    Constant * const step = ConstantInt::get(int64ty, mBlockSize);
275    Value * fullCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
276    iBuilder->CreateCondBr(fullCondTest, finalBlock, fullBodyBlock);
277   
278    iBuilder->SetInsertPoint(fullBodyBlock);
279
280    s2pInstance->CreateDoBlockCall();
281    wcInstance->CreateDoBlockCall();
282
283    Value * diff = iBuilder->CreateSub(remainingBytes, step);
284
285    remainingBytes->addIncoming(diff, fullBodyBlock);
286    iBuilder->CreateBr(fullCondBlock);
287   
288    iBuilder->SetInsertPoint(finalBlock);
289    Value * EOF_mask = iBuilder->CreateShl(Constant::getAllOnesValue(iBuilder->getIntNTy(mBlockSize)), remainingBytes);
290        wcInstance->setInternalState("EOFmask", iBuilder->CreateBitCast(EOF_mask, mBitBlockType));
291   
292    Value * emptyBlockCond = iBuilder->CreateICmpEQ(remainingBytes, ConstantInt::get(int64ty, 0));
293    iBuilder->CreateCondBr(emptyBlockCond, finalEmptyBlock, finalPartialBlock);
294   
295   
296    iBuilder->SetInsertPoint(finalPartialBlock);
297    s2pInstance->CreateDoBlockCall();
298
299    iBuilder->CreateBr(endBlock);
300   
301    iBuilder->SetInsertPoint(finalEmptyBlock);
302    s2pInstance->clearOutputStreamSet();
303    iBuilder->CreateBr(endBlock);
304   
305    iBuilder->SetInsertPoint(endBlock);
306
307    wcInstance->CreateDoBlockCall();
308   
309    Value * lineCount = iBuilder->CreateExtractElement(iBuilder->CreateBlockAlignedLoad(wcInstance->getOutputStream((int) 0)), iBuilder->getInt32(0));
310    Value * wordCount = iBuilder->CreateExtractElement(iBuilder->CreateBlockAlignedLoad(wcInstance->getOutputStream(1)), iBuilder->getInt32(0));
311    Value * charCount = iBuilder->CreateExtractElement(iBuilder->CreateBlockAlignedLoad(wcInstance->getOutputStream(2)), iBuilder->getInt32(0));
312   
313    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, bufferSize, fileIdx}));
314   
315    iBuilder->CreateRetVoid();
316   
317    return main;
318}
319
320
321typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
322
323static ExecutionEngine * wcEngine = nullptr;
324
325wcFunctionType wcCodeGen(void) {
326                           
327    Module * M = new Module("wc", getGlobalContext());
328    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
329
330    wcPipelineBuilder pipelineBuilder(M, idb);
331    Encoding encoding(Encoding::Type::UTF_8, 8);
332    pablo::PabloFunction * function = wc_gen(encoding);
333    pipelineBuilder.CreateKernels(function);
334    llvm::Function * main_IR = pipelineBuilder.ExecuteKernels();
335
336    wcEngine = JIT_to_ExecutionEngine(M);
337   
338    wcEngine->finalizeObject();
339
340    delete idb;
341    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
342}
343
344void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
345    std::string fileName = inputFiles[fileIdx];
346    size_t fileSize;
347    char * fileBuffer;
348   
349    const path file(fileName);
350    if (exists(file)) {
351        if (is_directory(file)) {
352            return;
353        }
354    } else {
355        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
356        return;
357    }
358   
359    fileSize = file_size(file);
360    mapped_file_source mappedFile;
361    if (fileSize == 0) {
362        fileBuffer = nullptr;
363    }
364    else {
365        try {
366            mappedFile.open(fileName);
367        } catch (std::exception &e) {
368            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
369            return;
370        }
371        fileBuffer = const_cast<char *>(mappedFile.data());
372    }
373    fn_ptr(fileBuffer, fileSize, fileIdx);
374
375    mappedFile.close();
376   
377}
378
379
380
381
382int main(int argc, char *argv[]) {
383    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
384    cl::ParseCommandLineOptions(argc, argv);
385    if (wcOptions.size() == 0) {
386        CountLines = true;
387        CountWords = true;
388        CountBytes = true;
389    }
390    else {
391        CountLines = false;
392        CountWords = false;
393        CountBytes = false;
394        CountChars = false;
395        for (unsigned i = 0; i < wcOptions.size(); i++) {
396            switch (wcOptions[i]) {
397                case WordOption: CountWords = true; break;
398                case LineOption: CountLines = true; break;
399                case CharOption: CountBytes = true; CountChars = false; break;
400                case ByteOption: CountChars = true; CountBytes = false; break;
401            }
402        }
403    }
404   
405   
406    wcFunctionType fn_ptr = wcCodeGen();
407
408    int fileCount = inputFiles.size();
409    lineCount.resize(fileCount);
410    wordCount.resize(fileCount);
411    charCount.resize(fileCount);
412    byteCount.resize(fileCount);
413   
414    for (unsigned i = 0; i < inputFiles.size(); ++i) {
415        wc(fn_ptr, i);
416    }
417   
418    delete wcEngine;
419   
420    size_t maxCount = 0;
421    if (CountLines) maxCount = TotalLines;
422    if (CountWords) maxCount = TotalWords;
423    if (CountChars) maxCount = TotalChars;
424    if (CountBytes) maxCount = TotalBytes;
425   
426    int fieldWidth = std::to_string(maxCount).size() + 1;
427    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
428
429    for (unsigned i = 0; i < inputFiles.size(); ++i) {
430        std::cout << std::setw(fieldWidth-1);
431        if (CountLines) {
432            std::cout << lineCount[i] << std::setw(fieldWidth);
433        }
434        if (CountWords) {
435            std::cout << wordCount[i] << std::setw(fieldWidth);
436        }
437        if (CountChars) {
438            std::cout << charCount[i] << std::setw(fieldWidth);
439        }
440        if (CountBytes) {
441            std::cout << byteCount[i];
442        }
443        std::cout << " " << inputFiles[i] << std::endl;
444    }
445    if (inputFiles.size() > 1) {
446        std::cout << std::setw(fieldWidth-1);
447        if (CountLines) {
448            std::cout << TotalLines << std::setw(fieldWidth);
449        }
450        if (CountWords) {
451            std::cout << TotalWords << std::setw(fieldWidth);
452        }
453        if (CountChars) {
454            std::cout << TotalChars << std::setw(fieldWidth);
455        }
456        if (CountBytes) {
457            std::cout << TotalBytes;
458        }
459        std::cout << " total" << std::endl;
460    }
461
462    return 0;
463}
464
465                       
Note: See TracBrowser for help on using the repository browser.