source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5202

Last change on this file since 5202 was 5202, checked in by nmedfort, 2 years ago

Initial work on adding types to PabloAST and mutable Var objects.

File size: 10.7 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <sstream>
12
13
14#include <toolchain.h>
15#include <pablo/pablo_toolchain.h>
16#include <llvm/IR/Function.h>
17#include <llvm/IR/Module.h>
18#include <llvm/ExecutionEngine/ExecutionEngine.h>
19#include <llvm/ExecutionEngine/MCJIT.h>
20#include "llvm/Linker/Linker.h"
21
22#include <llvm/Support/CommandLine.h>
23#include <llvm/Support/raw_ostream.h>
24
25#include <re/re_cc.h>
26#include <cc/cc_compiler.h>
27#include <pablo/function.h>
28#include <pablo/pablo_kernel.h>
29#include <IDISA/idisa_builder.h>
30#include <IDISA/idisa_target.h>
31#include <kernels/streamset.h>
32#include <kernels/interface.h>
33#include <kernels/kernel.h>
34#include <kernels/s2p_kernel.h>
35#include <kernels/pipeline.h>
36
37#include <pablo/pablo_compiler.h>
38#include <pablo/pablo_toolchain.h>
39
40// mmap system
41#include <boost/filesystem.hpp>
42#include <boost/iostreams/device/mapped_file.hpp>
43
44#include <fcntl.h>
45using namespace pablo;
46
47static cl::OptionCategory wcFlags("Command Flags", "wc options");
48
49static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
50
51enum CountOptions {
52    LineOption, WordOption, CharOption, ByteOption
53};
54
55static cl::list<CountOptions> wcOptions(
56  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
57             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
58             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
59             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
60             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
61                                                 
62
63
64static int defaultFieldWidth = 7;  // default field width
65
66
67bool CountLines = false;
68bool CountWords = false;
69bool CountChars = false;
70bool CountBytes = false;
71
72std::vector<uint64_t> lineCount;
73std::vector<uint64_t> wordCount;
74std::vector<uint64_t> charCount;
75std::vector<uint64_t> byteCount;
76
77uint64_t TotalLines = 0;
78uint64_t TotalWords = 0;
79uint64_t TotalChars = 0;
80uint64_t TotalBytes = 0;
81
82
83//  The callback routine that records counts in progress.
84//
85extern "C" {
86    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
87        lineCount[fileIdx] = lines;
88        wordCount[fileIdx] = words;
89        charCount[fileIdx] = chars;
90        byteCount[fileIdx] = bytes;
91        TotalLines += lines;
92        TotalWords += words;
93        TotalChars += chars;
94        TotalBytes += bytes;
95    }
96}
97
98//
99//
100
101PabloFunction * wc_gen() {
102    //  input: 8 basis bit streams
103    //  output: 3 counters
104   
105    PabloFunction * function = PabloFunction::Create("wc"); // , 8, 0
106    cc::CC_Compiler ccc(*function);
107   
108    PabloBuilder & pb = ccc.getBuilder();
109    // const std::vector<Parameter *> u8_bits = ccc.getBasisBits();
110
111    Var * lc = function->addResult("lineCount", getScalarTy());
112    Var * wc = function->addResult("wordCount", getScalarTy());
113    Var * cc = function->addResult("charCount", getScalarTy());
114
115    if (CountLines) {
116        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
117        pb.createAssign(lc, pb.createCount(LF));
118    }
119    if (CountWords) {
120        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
121        PabloAST * wordChar = pb.createNot(WS);
122        // WS_follow_or_start = 1 past WS or at start of file
123        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
124        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
125        pb.createAssign(wc, pb.createCount(wordStart));
126    }
127    if (CountChars) {
128        //
129        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
130        // not UTF-8, or is not valid?
131        //
132        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
133        pb.createAssign(cc, pb.createCount(u8Begin));
134    }
135    return function;
136}
137
138using namespace kernel;
139using namespace parabix;
140
141
142Function * wcPipeline(Module * mMod, IDISA::IDISA_Builder * iBuilder, PabloFunction * function) {
143    Type * mBitBlockType = iBuilder->getBitBlockType();
144   
145    ExternalFileBuffer ByteStream(iBuilder, StreamSetType(iBuilder,1, 8));
146    SingleBlockBuffer BasisBits(iBuilder, StreamSetType(iBuilder,8, 1));
147    //CircularBuffer BasisBits(iBuilder, StreamSetType(iBuilder,8, 1), codegen::SegmentSize * codegen::BufferSegments);
148
149    s2pKernel  s2pk(iBuilder);
150    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
151   
152    pablo_function_passes(function);
153    PabloKernel wck(iBuilder, "wc", function);
154   
155    std::unique_ptr<Module> wcM = wck.createKernelModule({&BasisBits}, {});
156   
157    s2pk.addKernelDeclarations(mMod);
158    wck.addKernelDeclarations(mMod);
159
160    Constant * record_counts_routine;
161    Type * const size_ty = iBuilder->getSizeTy();
162    Type * const voidTy = Type::getVoidTy(mMod->getContext());
163    record_counts_routine = mMod->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
164    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
165   
166    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
167    main->setCallingConv(CallingConv::C);
168    Function::arg_iterator args = main->arg_begin();
169   
170    Value * const inputStream = &*(args++);
171    inputStream->setName("input");
172    Value * const fileSize = &*(args++);
173    fileSize->setName("fileSize");
174    Value * const fileIdx = &*(args++);
175    fileIdx->setName("fileIdx");
176   
177    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
178
179    ByteStream.setStreamSetBuffer(inputStream, fileSize);
180    BasisBits.allocateBuffer();
181   
182    Value * s2pInstance = s2pk.createInstance({});
183    Value * wcInstance = wck.createInstance({});
184   
185    generatePipelineLoop(iBuilder, {&s2pk, &wck}, {s2pInstance, wcInstance}, fileSize);
186   
187    Value * lineCount = wck.createGetAccumulatorCall(wcInstance, "lineCount");
188    Value * wordCount = wck.createGetAccumulatorCall(wcInstance, "wordCount");
189    Value * charCount = wck.createGetAccumulatorCall(wcInstance, "charCount");
190
191    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
192   
193    iBuilder->CreateRetVoid();
194   
195    Linker L(*mMod);
196    L.linkInModule(std::move(s2pM));
197    L.linkInModule(std::move(wcM));
198   
199    return main;
200}
201
202
203typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
204
205static ExecutionEngine * wcEngine = nullptr;
206
207wcFunctionType wcCodeGen(void) { 
208    Module * M = new Module("wc", getGlobalContext());
209    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
210
211    PabloFunction * function = wc_gen();
212    llvm::Function * main_IR = wcPipeline(M, idb, function);
213
214    wcEngine = JIT_to_ExecutionEngine(M);
215   
216    wcEngine->finalizeObject();
217
218    delete idb;
219    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
220}
221
222void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
223    std::string fileName = inputFiles[fileIdx];
224    size_t fileSize;
225    char * fileBuffer;
226   
227    const boost::filesystem::path file(fileName);
228    if (exists(file)) {
229        if (is_directory(file)) {
230            return;
231        }
232    } else {
233        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
234        return;
235    }
236   
237    fileSize = file_size(file);
238    boost::iostreams::mapped_file_source mappedFile;
239    if (fileSize == 0) {
240        fileBuffer = nullptr;
241    }
242    else {
243        try {
244            mappedFile.open(fileName);
245        } catch (std::exception &e) {
246            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
247            return;
248        }
249        fileBuffer = const_cast<char *>(mappedFile.data());
250    }
251    fn_ptr(fileBuffer, fileSize, fileIdx);
252
253    mappedFile.close();
254   
255}
256
257
258
259int main(int argc, char *argv[]) {
260    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
261    cl::ParseCommandLineOptions(argc, argv);
262    if (wcOptions.size() == 0) {
263        CountLines = true;
264        CountWords = true;
265        CountBytes = true;
266    }
267    else {
268        CountLines = false;
269        CountWords = false;
270        CountBytes = false;
271        CountChars = false;
272        for (unsigned i = 0; i < wcOptions.size(); i++) {
273            switch (wcOptions[i]) {
274                case WordOption: CountWords = true; break;
275                case LineOption: CountLines = true; break;
276                case CharOption: CountBytes = true; CountChars = false; break;
277                case ByteOption: CountChars = true; CountBytes = false; break;
278            }
279        }
280    }
281   
282   
283    wcFunctionType fn_ptr = wcCodeGen();
284
285    int fileCount = inputFiles.size();
286    lineCount.resize(fileCount);
287    wordCount.resize(fileCount);
288    charCount.resize(fileCount);
289    byteCount.resize(fileCount);
290   
291    for (unsigned i = 0; i < inputFiles.size(); ++i) {
292        wc(fn_ptr, i);
293    }
294   
295    size_t maxCount = 0;
296    if (CountLines) maxCount = TotalLines;
297    if (CountWords) maxCount = TotalWords;
298    if (CountChars) maxCount = TotalChars;
299    if (CountBytes) maxCount = TotalBytes;
300   
301    int fieldWidth = std::to_string(maxCount).size() + 1;
302    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
303
304    for (unsigned i = 0; i < inputFiles.size(); ++i) {
305        std::cout << std::setw(fieldWidth-1);
306        if (CountLines) {
307            std::cout << lineCount[i] << std::setw(fieldWidth);
308        }
309        if (CountWords) {
310            std::cout << wordCount[i] << std::setw(fieldWidth);
311        }
312        if (CountChars) {
313            std::cout << charCount[i] << std::setw(fieldWidth);
314        }
315        if (CountBytes) {
316            std::cout << byteCount[i];
317        }
318        std::cout << " " << inputFiles[i] << std::endl;
319    }
320    if (inputFiles.size() > 1) {
321        std::cout << std::setw(fieldWidth-1);
322        if (CountLines) {
323            std::cout << TotalLines << std::setw(fieldWidth);
324        }
325        if (CountWords) {
326            std::cout << TotalWords << std::setw(fieldWidth);
327        }
328        if (CountChars) {
329            std::cout << TotalChars << std::setw(fieldWidth);
330        }
331        if (CountBytes) {
332            std::cout << TotalBytes;
333        }
334        std::cout << " total" << std::endl;
335    }
336
337    return 0;
338}
339
340                       
Note: See TracBrowser for help on using the repository browser.