source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5217

Last change on this file since 5217 was 5217, checked in by nmedfort, 2 years ago

Merged PabloFunction? and PabloKernel? classes. Updated projects where necessary.

File size: 10.4 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <sstream>
12
13
14#include <toolchain.h>
15#include <pablo/pablo_toolchain.h>
16#include <llvm/IR/Function.h>
17#include <llvm/IR/Module.h>
18#include <llvm/ExecutionEngine/ExecutionEngine.h>
19#include <llvm/ExecutionEngine/MCJIT.h>
20#include "llvm/Linker/Linker.h"
21
22#include <llvm/Support/CommandLine.h>
23#include <llvm/Support/raw_ostream.h>
24
25#include <re/re_cc.h>
26#include <cc/cc_compiler.h>
27#include <pablo/prototype.h>
28#include <pablo/pablo_kernel.h>
29#include <IDISA/idisa_builder.h>
30#include <IDISA/idisa_target.h>
31#include <kernels/streamset.h>
32#include <kernels/interface.h>
33#include <kernels/kernel.h>
34#include <kernels/s2p_kernel.h>
35#include <kernels/pipeline.h>
36
37#include <pablo/pablo_compiler.h>
38#include <pablo/pablo_toolchain.h>
39
40// mmap system
41#include <boost/filesystem.hpp>
42#include <boost/iostreams/device/mapped_file.hpp>
43
44#include <fcntl.h>
45
46static cl::OptionCategory wcFlags("Command Flags", "wc options");
47
48static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
49
50enum CountOptions {
51    LineOption, WordOption, CharOption, ByteOption
52};
53
54static cl::list<CountOptions> wcOptions(
55  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
56             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
57             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
58             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
59             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
60                                                 
61
62
63static int defaultFieldWidth = 7;  // default field width
64
65
66bool CountLines = false;
67bool CountWords = false;
68bool CountChars = false;
69bool CountBytes = false;
70
71std::vector<uint64_t> lineCount;
72std::vector<uint64_t> wordCount;
73std::vector<uint64_t> charCount;
74std::vector<uint64_t> byteCount;
75
76uint64_t TotalLines = 0;
77uint64_t TotalWords = 0;
78uint64_t TotalChars = 0;
79uint64_t TotalBytes = 0;
80
81using namespace pablo;
82using namespace kernel;
83using namespace parabix;
84
85//  The callback routine that records counts in progress.
86//
87extern "C" {
88    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
89        lineCount[fileIdx] = lines;
90        wordCount[fileIdx] = words;
91        charCount[fileIdx] = chars;
92        byteCount[fileIdx] = bytes;
93        TotalLines += lines;
94        TotalWords += words;
95        TotalChars += chars;
96        TotalBytes += bytes;
97    }
98}
99
100//
101//
102
103void wc_gen(PabloKernel * kernel) {
104    //  input: 8 basis bit streams
105    //  output: 3 counters
106   
107    cc::CC_Compiler ccc(kernel);
108   
109    PabloBuilder & pb = ccc.getBuilder();
110
111    Var * lc = kernel->addOutput("lineCount", kernel->getSizeTy());
112    Var * wc = kernel->addOutput("wordCount", kernel->getSizeTy());
113    Var * cc = kernel->addOutput("charCount", kernel->getSizeTy());
114
115    if (CountLines) {
116        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
117        pb.createAssign(lc, pb.createCount(LF));
118    }
119    if (CountWords) {
120        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
121        PabloAST * wordChar = pb.createNot(WS);
122        // WS_follow_or_start = 1 past WS or at start of file
123        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
124        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
125        pb.createAssign(wc, pb.createCount(wordStart));
126    }
127    if (CountChars) {
128        //
129        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
130        // not UTF-8, or is not valid?
131        //
132        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
133        pb.createAssign(cc, pb.createCount(u8Begin));
134    }
135}
136
137Function * wcPipeline(Module * mMod, IDISA::IDISA_Builder * iBuilder) {
138    Type * mBitBlockType = iBuilder->getBitBlockType();
139   
140    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
141
142    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8, 1));
143
144    s2pKernel  s2pk(iBuilder);
145    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
146   
147    PabloKernel wck(iBuilder, "wc");
148    wc_gen(&wck);
149    pablo_function_passes(&wck);
150   
151    std::unique_ptr<Module> wcM = wck.createKernelModule({&BasisBits}, {});
152   
153    s2pk.addKernelDeclarations(mMod);
154    wck.addKernelDeclarations(mMod);
155
156    Constant * record_counts_routine;
157    Type * const size_ty = iBuilder->getSizeTy();
158    Type * const voidTy = Type::getVoidTy(mMod->getContext());
159    record_counts_routine = mMod->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
160    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
161   
162    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
163    main->setCallingConv(CallingConv::C);
164    Function::arg_iterator args = main->arg_begin();
165   
166    Value * const inputStream = &*(args++);
167    inputStream->setName("input");
168    Value * const fileSize = &*(args++);
169    fileSize->setName("fileSize");
170    Value * const fileIdx = &*(args++);
171    fileIdx->setName("fileIdx");
172   
173    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
174
175    ByteStream.setStreamSetBuffer(inputStream, fileSize);
176    BasisBits.allocateBuffer();
177   
178    Value * s2pInstance = s2pk.createInstance({});
179    Value * wcInstance = wck.createInstance({});
180   
181    generatePipelineLoop(iBuilder, {&s2pk, &wck}, {s2pInstance, wcInstance}, fileSize);
182   
183    Value * lineCount = wck.createGetAccumulatorCall(wcInstance, "lineCount");
184    Value * wordCount = wck.createGetAccumulatorCall(wcInstance, "wordCount");
185    Value * charCount = wck.createGetAccumulatorCall(wcInstance, "charCount");
186
187    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
188   
189    iBuilder->CreateRetVoid();
190   
191    Linker L(*mMod);
192    L.linkInModule(std::move(s2pM));
193    L.linkInModule(std::move(wcM));
194   
195    return main;
196}
197
198
199typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
200
201static ExecutionEngine * wcEngine = nullptr;
202
203wcFunctionType wcCodeGen(void) { 
204    Module * M = new Module("wc", getGlobalContext());
205    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
206
207    llvm::Function * main_IR = wcPipeline(M, idb);
208
209    wcEngine = JIT_to_ExecutionEngine(M);
210   
211    wcEngine->finalizeObject();
212
213    delete idb;
214    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
215}
216
217void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
218    std::string fileName = inputFiles[fileIdx];
219    size_t fileSize;
220    char * fileBuffer;
221   
222    const boost::filesystem::path file(fileName);
223    if (exists(file)) {
224        if (is_directory(file)) {
225            return;
226        }
227    } else {
228        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
229        return;
230    }
231   
232    fileSize = file_size(file);
233    boost::iostreams::mapped_file_source mappedFile;
234    if (fileSize == 0) {
235        fileBuffer = nullptr;
236    }
237    else {
238        try {
239            mappedFile.open(fileName);
240        } catch (std::exception &e) {
241            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
242            return;
243        }
244        fileBuffer = const_cast<char *>(mappedFile.data());
245    }
246    fn_ptr(fileBuffer, fileSize, fileIdx);
247
248    mappedFile.close();
249   
250}
251
252
253
254int main(int argc, char *argv[]) {
255    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
256    cl::ParseCommandLineOptions(argc, argv);
257    if (wcOptions.size() == 0) {
258        CountLines = true;
259        CountWords = true;
260        CountBytes = true;
261    }
262    else {
263        CountLines = false;
264        CountWords = false;
265        CountBytes = false;
266        CountChars = false;
267        for (unsigned i = 0; i < wcOptions.size(); i++) {
268            switch (wcOptions[i]) {
269                case WordOption: CountWords = true; break;
270                case LineOption: CountLines = true; break;
271                case CharOption: CountBytes = true; CountChars = false; break;
272                case ByteOption: CountChars = true; CountBytes = false; break;
273            }
274        }
275    }
276   
277   
278    wcFunctionType fn_ptr = wcCodeGen();
279
280    int fileCount = inputFiles.size();
281    lineCount.resize(fileCount);
282    wordCount.resize(fileCount);
283    charCount.resize(fileCount);
284    byteCount.resize(fileCount);
285   
286    for (unsigned i = 0; i < inputFiles.size(); ++i) {
287        wc(fn_ptr, i);
288    }
289   
290    size_t maxCount = 0;
291    if (CountLines) maxCount = TotalLines;
292    if (CountWords) maxCount = TotalWords;
293    if (CountChars) maxCount = TotalChars;
294    if (CountBytes) maxCount = TotalBytes;
295   
296    int fieldWidth = std::to_string(maxCount).size() + 1;
297    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
298
299    for (unsigned i = 0; i < inputFiles.size(); ++i) {
300        std::cout << std::setw(fieldWidth-1);
301        if (CountLines) {
302            std::cout << lineCount[i] << std::setw(fieldWidth);
303        }
304        if (CountWords) {
305            std::cout << wordCount[i] << std::setw(fieldWidth);
306        }
307        if (CountChars) {
308            std::cout << charCount[i] << std::setw(fieldWidth);
309        }
310        if (CountBytes) {
311            std::cout << byteCount[i];
312        }
313        std::cout << " " << inputFiles[i] << std::endl;
314    }
315    if (inputFiles.size() > 1) {
316        std::cout << std::setw(fieldWidth-1);
317        if (CountLines) {
318            std::cout << TotalLines << std::setw(fieldWidth);
319        }
320        if (CountWords) {
321            std::cout << TotalWords << std::setw(fieldWidth);
322        }
323        if (CountChars) {
324            std::cout << TotalChars << std::setw(fieldWidth);
325        }
326        if (CountBytes) {
327            std::cout << TotalBytes;
328        }
329        std::cout << " total" << std::endl;
330    }
331
332    return 0;
333}
334
335                       
Note: See TracBrowser for help on using the repository browser.