source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5249

Last change on this file since 5249 was 5249, checked in by cameron, 2 years ago

wc uses mmap kernel

File size: 10.5 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <sstream>
12
13
14#include <toolchain.h>
15#include <pablo/pablo_toolchain.h>
16#include <llvm/IR/Function.h>
17#include <llvm/IR/Module.h>
18#include <llvm/ExecutionEngine/ExecutionEngine.h>
19#include <llvm/ExecutionEngine/MCJIT.h>
20#include "llvm/Linker/Linker.h"
21
22#include <llvm/Support/CommandLine.h>
23#include <llvm/Support/raw_ostream.h>
24
25#include <re/re_cc.h>
26#include <cc/cc_compiler.h>
27#include <pablo/pablo_kernel.h>
28#include <IR_Gen/idisa_builder.h>
29#include <IR_Gen/idisa_target.h>
30#include <kernels/streamset.h>
31#include <kernels/interface.h>
32#include <kernels/kernel.h>
33#include <kernels/mmap_kernel.h>
34#include <kernels/s2p_kernel.h>
35#include <kernels/pipeline.h>
36
37#include <pablo/pablo_compiler.h>
38#include <pablo/pablo_toolchain.h>
39
40// mmap system
41#include <boost/filesystem.hpp>
42#include <boost/iostreams/device/mapped_file.hpp>
43
44#include <fcntl.h>
45
46static cl::OptionCategory wcFlags("Command Flags", "wc options");
47
48static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
49
50enum CountOptions {
51    LineOption, WordOption, CharOption, ByteOption
52};
53
54static cl::list<CountOptions> wcOptions(
55  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
56             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
57             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
58             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
59             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
60                                                 
61
62
63static int defaultFieldWidth = 7;  // default field width
64
65
66bool CountLines = false;
67bool CountWords = false;
68bool CountChars = false;
69bool CountBytes = false;
70
71std::vector<uint64_t> lineCount;
72std::vector<uint64_t> wordCount;
73std::vector<uint64_t> charCount;
74std::vector<uint64_t> byteCount;
75
76uint64_t TotalLines = 0;
77uint64_t TotalWords = 0;
78uint64_t TotalChars = 0;
79uint64_t TotalBytes = 0;
80
81using namespace pablo;
82using namespace kernel;
83using namespace parabix;
84
85//  The callback routine that records counts in progress.
86//
87extern "C" {
88    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
89        lineCount[fileIdx] = lines;
90        wordCount[fileIdx] = words;
91        charCount[fileIdx] = chars;
92        byteCount[fileIdx] = bytes;
93        TotalLines += lines;
94        TotalWords += words;
95        TotalChars += chars;
96        TotalBytes += bytes;
97    }
98}
99
100//
101//
102
103void wc_gen(PabloKernel * kernel) {
104    //  input: 8 basis bit streams
105    //  output: 3 counters
106   
107    cc::CC_Compiler ccc(kernel);
108   
109    PabloBuilder & pb = ccc.getBuilder();
110
111    Var * lc = kernel->addOutput("lineCount", kernel->getSizeTy());
112    Var * wc = kernel->addOutput("wordCount", kernel->getSizeTy());
113    Var * cc = kernel->addOutput("charCount", kernel->getSizeTy());
114
115    if (CountLines) {
116        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
117        pb.createAssign(lc, pb.createCount(LF));
118    }
119    if (CountWords) {
120        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
121        PabloAST * wordChar = pb.createNot(WS);
122        // WS_follow_or_start = 1 past WS or at start of file
123        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
124        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
125        pb.createAssign(wc, pb.createCount(wordStart));
126    }
127    if (CountChars) {
128        //
129        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
130        // not UTF-8, or is not valid?
131        //
132        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
133        pb.createAssign(cc, pb.createCount(u8Begin));
134    }
135}
136
137Function * pipeline(Module * mMod, IDISA::IDISA_Builder * iBuilder) {
138    Type * mBitBlockType = iBuilder->getBitBlockType();
139    Constant * record_counts_routine;
140    Type * const size_ty = iBuilder->getSizeTy();
141    Type * const voidTy = iBuilder->getVoidTy();
142    record_counts_routine = mMod->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
143    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
144   
145    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
146    main->setCallingConv(CallingConv::C);
147    Function::arg_iterator args = main->arg_begin();
148   
149    Value * const inputStream = &*(args++);
150    inputStream->setName("input");
151    Value * const fileSize = &*(args++);
152    fileSize->setName("fileSize");
153    Value * const fileIdx = &*(args++);
154    fileIdx->setName("fileIdx");
155   
156    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
157
158    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8, 1));
159   
160    MMapSourceKernel mmapK(iBuilder, iBuilder->getStride());
161    std::unique_ptr<Module> mmapM = mmapK.createKernelModule({}, {&ByteStream});
162    mmapK.setInitialArguments({fileSize});
163   
164    S2PKernel  s2pk(iBuilder);
165    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
166
167    PabloKernel wck(iBuilder, "wc");
168    wc_gen(&wck);
169    pablo_function_passes(&wck);
170   
171    std::unique_ptr<Module> wcM = wck.createKernelModule({&BasisBits}, {});
172   
173    mmapK.addKernelDeclarations(mMod);
174    s2pk.addKernelDeclarations(mMod);
175    wck.addKernelDeclarations(mMod);
176   
177    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
178
179    ByteStream.setStreamSetBuffer(inputStream, fileSize);
180    BasisBits.allocateBuffer();
181   
182    generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &wck});
183   
184    Value * lineCount = wck.createGetAccumulatorCall(wck.getInstance(), "lineCount");
185    Value * wordCount = wck.createGetAccumulatorCall(wck.getInstance(), "wordCount");
186    Value * charCount = wck.createGetAccumulatorCall(wck.getInstance(), "charCount");
187
188    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
189   
190    iBuilder->CreateRetVoid();
191   
192    Linker L(*mMod);
193    L.linkInModule(std::move(mmapM));
194    L.linkInModule(std::move(s2pM));
195    L.linkInModule(std::move(wcM));
196   
197    return main;
198}
199
200
201typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
202
203static ExecutionEngine * wcEngine = nullptr;
204
205wcFunctionType wcCodeGen(void) { 
206    Module * M = new Module("wc", getGlobalContext());
207    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
208
209    llvm::Function * main_IR = pipeline(M, idb);
210
211    wcEngine = JIT_to_ExecutionEngine(M);
212   
213    wcEngine->finalizeObject();
214
215    delete idb;
216    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
217}
218
219void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
220    std::string fileName = inputFiles[fileIdx];
221    size_t fileSize;
222    char * fileBuffer;
223   
224    const boost::filesystem::path file(fileName);
225    if (exists(file)) {
226        if (is_directory(file)) {
227            return;
228        }
229    } else {
230        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
231        return;
232    }
233   
234    fileSize = file_size(file);
235    boost::iostreams::mapped_file_source mappedFile;
236    if (fileSize == 0) {
237        fileBuffer = nullptr;
238    }
239    else {
240        try {
241            mappedFile.open(fileName);
242        } catch (std::exception &e) {
243            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
244            return;
245        }
246        fileBuffer = const_cast<char *>(mappedFile.data());
247    }
248    fn_ptr(fileBuffer, fileSize, fileIdx);
249
250    mappedFile.close();
251   
252}
253
254
255
256int main(int argc, char *argv[]) {
257    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
258    cl::ParseCommandLineOptions(argc, argv);
259    if (wcOptions.size() == 0) {
260        CountLines = true;
261        CountWords = true;
262        CountBytes = true;
263    }
264    else {
265        CountLines = false;
266        CountWords = false;
267        CountBytes = false;
268        CountChars = false;
269        for (unsigned i = 0; i < wcOptions.size(); i++) {
270            switch (wcOptions[i]) {
271                case WordOption: CountWords = true; break;
272                case LineOption: CountLines = true; break;
273                case CharOption: CountBytes = true; CountChars = false; break;
274                case ByteOption: CountChars = true; CountBytes = false; break;
275            }
276        }
277    }
278   
279   
280    wcFunctionType fn_ptr = wcCodeGen();
281
282    int fileCount = inputFiles.size();
283    lineCount.resize(fileCount);
284    wordCount.resize(fileCount);
285    charCount.resize(fileCount);
286    byteCount.resize(fileCount);
287   
288    for (unsigned i = 0; i < inputFiles.size(); ++i) {
289        wc(fn_ptr, i);
290    }
291   
292    size_t maxCount = 0;
293    if (CountLines) maxCount = TotalLines;
294    if (CountWords) maxCount = TotalWords;
295    if (CountChars) maxCount = TotalChars;
296    if (CountBytes) maxCount = TotalBytes;
297   
298    int fieldWidth = std::to_string(maxCount).size() + 1;
299    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
300
301    for (unsigned i = 0; i < inputFiles.size(); ++i) {
302        std::cout << std::setw(fieldWidth-1);
303        if (CountLines) {
304            std::cout << lineCount[i] << std::setw(fieldWidth);
305        }
306        if (CountWords) {
307            std::cout << wordCount[i] << std::setw(fieldWidth);
308        }
309        if (CountChars) {
310            std::cout << charCount[i] << std::setw(fieldWidth);
311        }
312        if (CountBytes) {
313            std::cout << byteCount[i];
314        }
315        std::cout << " " << inputFiles[i] << std::endl;
316    }
317    if (inputFiles.size() > 1) {
318        std::cout << std::setw(fieldWidth-1);
319        if (CountLines) {
320            std::cout << TotalLines << std::setw(fieldWidth);
321        }
322        if (CountWords) {
323            std::cout << TotalWords << std::setw(fieldWidth);
324        }
325        if (CountChars) {
326            std::cout << TotalChars << std::setw(fieldWidth);
327        }
328        if (CountBytes) {
329            std::cout << TotalBytes;
330        }
331        std::cout << " total" << std::endl;
332    }
333
334    return 0;
335}
Note: See TracBrowser for help on using the repository browser.