source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5221

Last change on this file since 5221 was 5221, checked in by cameron, 2 years ago

Eliminate filesize parameter for pipeline construction

File size: 10.3 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <sstream>
12
13
14#include <toolchain.h>
15#include <pablo/pablo_toolchain.h>
16#include <llvm/IR/Function.h>
17#include <llvm/IR/Module.h>
18#include <llvm/ExecutionEngine/ExecutionEngine.h>
19#include <llvm/ExecutionEngine/MCJIT.h>
20#include "llvm/Linker/Linker.h"
21
22#include <llvm/Support/CommandLine.h>
23#include <llvm/Support/raw_ostream.h>
24
25#include <re/re_cc.h>
26#include <cc/cc_compiler.h>
27#include <pablo/prototype.h>
28#include <pablo/pablo_kernel.h>
29#include <IDISA/idisa_builder.h>
30#include <IDISA/idisa_target.h>
31#include <kernels/streamset.h>
32#include <kernels/interface.h>
33#include <kernels/kernel.h>
34#include <kernels/s2p_kernel.h>
35#include <kernels/pipeline.h>
36
37#include <pablo/pablo_compiler.h>
38#include <pablo/pablo_toolchain.h>
39
40// mmap system
41#include <boost/filesystem.hpp>
42#include <boost/iostreams/device/mapped_file.hpp>
43
44#include <fcntl.h>
45
46static cl::OptionCategory wcFlags("Command Flags", "wc options");
47
48static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
49
50enum CountOptions {
51    LineOption, WordOption, CharOption, ByteOption
52};
53
54static cl::list<CountOptions> wcOptions(
55  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
56             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
57             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
58             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
59             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
60                                                 
61
62
63static int defaultFieldWidth = 7;  // default field width
64
65
66bool CountLines = false;
67bool CountWords = false;
68bool CountChars = false;
69bool CountBytes = false;
70
71std::vector<uint64_t> lineCount;
72std::vector<uint64_t> wordCount;
73std::vector<uint64_t> charCount;
74std::vector<uint64_t> byteCount;
75
76uint64_t TotalLines = 0;
77uint64_t TotalWords = 0;
78uint64_t TotalChars = 0;
79uint64_t TotalBytes = 0;
80
81using namespace pablo;
82using namespace kernel;
83using namespace parabix;
84
85//  The callback routine that records counts in progress.
86//
87extern "C" {
88    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
89        lineCount[fileIdx] = lines;
90        wordCount[fileIdx] = words;
91        charCount[fileIdx] = chars;
92        byteCount[fileIdx] = bytes;
93        TotalLines += lines;
94        TotalWords += words;
95        TotalChars += chars;
96        TotalBytes += bytes;
97    }
98}
99
100//
101//
102
103void wc_gen(PabloKernel * kernel) {
104    //  input: 8 basis bit streams
105    //  output: 3 counters
106   
107    cc::CC_Compiler ccc(kernel);
108   
109    PabloBuilder & pb = ccc.getBuilder();
110
111    Var * lc = kernel->addOutput("lineCount", kernel->getSizeTy());
112    Var * wc = kernel->addOutput("wordCount", kernel->getSizeTy());
113    Var * cc = kernel->addOutput("charCount", kernel->getSizeTy());
114
115    if (CountLines) {
116        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
117        pb.createAssign(lc, pb.createCount(LF));
118    }
119    if (CountWords) {
120        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
121        PabloAST * wordChar = pb.createNot(WS);
122        // WS_follow_or_start = 1 past WS or at start of file
123        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
124        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
125        pb.createAssign(wc, pb.createCount(wordStart));
126    }
127    if (CountChars) {
128        //
129        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
130        // not UTF-8, or is not valid?
131        //
132        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
133        pb.createAssign(cc, pb.createCount(u8Begin));
134    }
135}
136
137Function * wcPipeline(Module * mMod, IDISA::IDISA_Builder * iBuilder) {
138    Type * mBitBlockType = iBuilder->getBitBlockType();
139   
140    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
141
142    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8, 1));
143
144    s2pKernel  s2pk(iBuilder);
145    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
146   
147    PabloKernel wck(iBuilder, "wc");
148    wc_gen(&wck);
149    pablo_function_passes(&wck);
150   
151    std::unique_ptr<Module> wcM = wck.createKernelModule({&BasisBits}, {});
152   
153    s2pk.addKernelDeclarations(mMod);
154    wck.addKernelDeclarations(mMod);
155
156    Constant * record_counts_routine;
157    Type * const size_ty = iBuilder->getSizeTy();
158    Type * const voidTy = Type::getVoidTy(mMod->getContext());
159    record_counts_routine = mMod->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
160    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
161   
162    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
163    main->setCallingConv(CallingConv::C);
164    Function::arg_iterator args = main->arg_begin();
165   
166    Value * const inputStream = &*(args++);
167    inputStream->setName("input");
168    Value * const fileSize = &*(args++);
169    fileSize->setName("fileSize");
170    Value * const fileIdx = &*(args++);
171    fileIdx->setName("fileIdx");
172   
173    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
174
175    ByteStream.setStreamSetBuffer(inputStream, fileSize);
176    BasisBits.allocateBuffer();
177   
178    generatePipelineLoop(iBuilder, {&s2pk, &wck});
179   
180    Value * lineCount = wck.createGetAccumulatorCall(wck.getInstance(), "lineCount");
181    Value * wordCount = wck.createGetAccumulatorCall(wck.getInstance(), "wordCount");
182    Value * charCount = wck.createGetAccumulatorCall(wck.getInstance(), "charCount");
183
184    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
185   
186    iBuilder->CreateRetVoid();
187   
188    Linker L(*mMod);
189    L.linkInModule(std::move(s2pM));
190    L.linkInModule(std::move(wcM));
191   
192    return main;
193}
194
195
196typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
197
198static ExecutionEngine * wcEngine = nullptr;
199
200wcFunctionType wcCodeGen(void) { 
201    Module * M = new Module("wc", getGlobalContext());
202    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
203
204    llvm::Function * main_IR = wcPipeline(M, idb);
205
206    wcEngine = JIT_to_ExecutionEngine(M);
207   
208    wcEngine->finalizeObject();
209
210    delete idb;
211    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
212}
213
214void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
215    std::string fileName = inputFiles[fileIdx];
216    size_t fileSize;
217    char * fileBuffer;
218   
219    const boost::filesystem::path file(fileName);
220    if (exists(file)) {
221        if (is_directory(file)) {
222            return;
223        }
224    } else {
225        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
226        return;
227    }
228   
229    fileSize = file_size(file);
230    boost::iostreams::mapped_file_source mappedFile;
231    if (fileSize == 0) {
232        fileBuffer = nullptr;
233    }
234    else {
235        try {
236            mappedFile.open(fileName);
237        } catch (std::exception &e) {
238            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
239            return;
240        }
241        fileBuffer = const_cast<char *>(mappedFile.data());
242    }
243    fn_ptr(fileBuffer, fileSize, fileIdx);
244
245    mappedFile.close();
246   
247}
248
249
250
251int main(int argc, char *argv[]) {
252    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
253    cl::ParseCommandLineOptions(argc, argv);
254    if (wcOptions.size() == 0) {
255        CountLines = true;
256        CountWords = true;
257        CountBytes = true;
258    }
259    else {
260        CountLines = false;
261        CountWords = false;
262        CountBytes = false;
263        CountChars = false;
264        for (unsigned i = 0; i < wcOptions.size(); i++) {
265            switch (wcOptions[i]) {
266                case WordOption: CountWords = true; break;
267                case LineOption: CountLines = true; break;
268                case CharOption: CountBytes = true; CountChars = false; break;
269                case ByteOption: CountChars = true; CountBytes = false; break;
270            }
271        }
272    }
273   
274   
275    wcFunctionType fn_ptr = wcCodeGen();
276
277    int fileCount = inputFiles.size();
278    lineCount.resize(fileCount);
279    wordCount.resize(fileCount);
280    charCount.resize(fileCount);
281    byteCount.resize(fileCount);
282   
283    for (unsigned i = 0; i < inputFiles.size(); ++i) {
284        wc(fn_ptr, i);
285    }
286   
287    size_t maxCount = 0;
288    if (CountLines) maxCount = TotalLines;
289    if (CountWords) maxCount = TotalWords;
290    if (CountChars) maxCount = TotalChars;
291    if (CountBytes) maxCount = TotalBytes;
292   
293    int fieldWidth = std::to_string(maxCount).size() + 1;
294    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
295
296    for (unsigned i = 0; i < inputFiles.size(); ++i) {
297        std::cout << std::setw(fieldWidth-1);
298        if (CountLines) {
299            std::cout << lineCount[i] << std::setw(fieldWidth);
300        }
301        if (CountWords) {
302            std::cout << wordCount[i] << std::setw(fieldWidth);
303        }
304        if (CountChars) {
305            std::cout << charCount[i] << std::setw(fieldWidth);
306        }
307        if (CountBytes) {
308            std::cout << byteCount[i];
309        }
310        std::cout << " " << inputFiles[i] << std::endl;
311    }
312    if (inputFiles.size() > 1) {
313        std::cout << std::setw(fieldWidth-1);
314        if (CountLines) {
315            std::cout << TotalLines << std::setw(fieldWidth);
316        }
317        if (CountWords) {
318            std::cout << TotalWords << std::setw(fieldWidth);
319        }
320        if (CountChars) {
321            std::cout << TotalChars << std::setw(fieldWidth);
322        }
323        if (CountBytes) {
324            std::cout << TotalBytes;
325        }
326        std::cout << " total" << std::endl;
327    }
328
329    return 0;
330}
331
332                       
Note: See TracBrowser for help on using the repository browser.