source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5137

Last change on this file since 5137 was 5137, checked in by cameron, 3 years ago

Some clean ups of encoding info for ccc restructuring.

File size: 10.7 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <sstream>
12
13
14#include <toolchain.h>
15#include <pablo/pablo_toolchain.h>
16#include <llvm/IR/Function.h>
17#include <llvm/IR/Module.h>
18#include <llvm/ExecutionEngine/ExecutionEngine.h>
19#include <llvm/ExecutionEngine/MCJIT.h>
20#include "llvm/Linker/Linker.h"
21
22#include <llvm/Support/CommandLine.h>
23#include <llvm/Support/raw_ostream.h>
24
25#include <re/re_cc.h>
26#include <cc/cc_compiler.h>
27#include <pablo/function.h>
28#include <pablo/pablo_kernel.h>
29#include <IDISA/idisa_builder.h>
30#include <IDISA/idisa_target.h>
31#include <kernels/streamset.h>
32#include <kernels/interface.h>
33#include <kernels/kernel.h>
34#include <kernels/s2p_kernel.h>
35#include <kernels/pipeline.h>
36
37#include <pablo/pablo_compiler.h>
38#include <pablo/pablo_toolchain.h>
39
40// mmap system
41#include <boost/filesystem.hpp>
42#include <boost/iostreams/device/mapped_file.hpp>
43
44#include <fcntl.h>
45static cl::OptionCategory wcFlags("Command Flags", "wc options");
46
47static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
48
49enum CountOptions {
50    LineOption, WordOption, CharOption, ByteOption
51};
52
53static cl::list<CountOptions> wcOptions(
54  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
55             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
56             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
57             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
58             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
59                                                 
60
61
62static int defaultFieldWidth = 7;  // default field width
63
64
65bool CountLines = false;
66bool CountWords = false;
67bool CountChars = false;
68bool CountBytes = false;
69
70std::vector<uint64_t> lineCount;
71std::vector<uint64_t> wordCount;
72std::vector<uint64_t> charCount;
73std::vector<uint64_t> byteCount;
74
75uint64_t TotalLines = 0;
76uint64_t TotalWords = 0;
77uint64_t TotalChars = 0;
78uint64_t TotalBytes = 0;
79
80
81//  The callback routine that records counts in progress.
82//
83extern "C" {
84    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
85        lineCount[fileIdx] = lines;
86        wordCount[fileIdx] = words;
87        charCount[fileIdx] = chars;
88        byteCount[fileIdx] = bytes;
89        TotalLines += lines;
90        TotalWords += words;
91        TotalChars += chars;
92        TotalBytes += bytes;
93    }
94}
95
96//
97//
98
99pablo::PabloFunction * wc_gen() {
100    //  input: 8 basis bit streams
101    //  output: 3 counters
102   
103    pablo::PabloFunction * function = pablo::PabloFunction::Create("wc", 8, 0);
104    cc::CC_Compiler ccc(*function);
105   
106    pablo::PabloBuilder pBuilder(ccc.getBuilder().getPabloBlock(), ccc.getBuilder());
107    const std::vector<pablo::Var *> u8_bits = ccc.getBasisBits();
108
109    if (CountLines) {
110        pablo::PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
111        function->setResultCount(pBuilder.createCount("lineCount", LF));
112    }
113    if (CountWords) {
114        pablo::PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
115       
116        pablo::PabloAST * wordChar = pBuilder.createNot(WS);
117        // WS_follow_or_start = 1 past WS or at start of file
118        pablo::PabloAST * WS_follow_or_start = pBuilder.createNot(pBuilder.createAdvance(wordChar, 1));
119        //
120        pablo::PabloAST * wordStart = pBuilder.createInFile(pBuilder.createAnd(wordChar, WS_follow_or_start));
121        function->setResultCount(pBuilder.createCount("wordCount", wordStart));
122    }
123    if (CountChars) {
124        //
125        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
126        // not UTF-8, or is not valid?
127        //
128        pablo::PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
129        function->setResultCount(pBuilder.createCount("charCount", u8Begin));
130    }
131    return function;
132}
133
134using namespace kernel;
135using namespace parabix;
136
137
138Function * wcPipeline(Module * mMod, IDISA::IDISA_Builder * iBuilder, pablo::PabloFunction * function) {
139    Type * mBitBlockType = iBuilder->getBitBlockType();
140   
141    ExternalUnboundedBuffer ByteStream(iBuilder, StreamSetType(1, i8));
142    SingleBlockBuffer BasisBits(iBuilder, StreamSetType(8, i1));
143   
144    s2pKernel  s2pk(iBuilder);
145    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
146   
147    pablo_function_passes(function);
148    pablo::PabloKernel  wck(iBuilder, "wc", function, {"lineCount", "wordCount", "charCount"});
149   
150    std::unique_ptr<Module> wcM = wck.createKernelModule({&BasisBits}, {});
151   
152    s2pk.addKernelDeclarations(mMod);
153    wck.addKernelDeclarations(mMod);
154
155    Constant * record_counts_routine;
156    Type * const size_ty = iBuilder->getSizeTy();
157    Type * const voidTy = Type::getVoidTy(mMod->getContext());
158    record_counts_routine = mMod->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
159    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
160   
161    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
162    main->setCallingConv(CallingConv::C);
163    Function::arg_iterator args = main->arg_begin();
164   
165    Value * const inputStream = &*(args++);
166    inputStream->setName("input");
167    Value * const fileSize = &*(args++);
168    fileSize->setName("fileSize");
169    Value * const fileIdx = &*(args++);
170    fileIdx->setName("fileIdx");
171   
172    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
173
174    ByteStream.setStreamSetBuffer(inputStream);
175    BasisBits.allocateBuffer();
176   
177    Value * s2pInstance = s2pk.createInstance({});
178    Value * wcInstance = wck.createInstance({});
179   
180    generatePipelineLoop(iBuilder, {&s2pk, &wck}, {s2pInstance, wcInstance}, fileSize);
181   
182    Value * lineCount = wck.createGetAccumulatorCall(wcInstance, "lineCount");
183    Value * wordCount = wck.createGetAccumulatorCall(wcInstance, "wordCount");
184    Value * charCount = wck.createGetAccumulatorCall(wcInstance, "charCount");;
185
186    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
187   
188    iBuilder->CreateRetVoid();
189   
190    Linker L(*mMod);
191    L.linkInModule(std::move(s2pM));
192    L.linkInModule(std::move(wcM));
193   
194    return main;
195}
196
197
198typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
199
200static ExecutionEngine * wcEngine = nullptr;
201
202wcFunctionType wcCodeGen(void) {
203                           
204    Module * M = new Module("wc", getGlobalContext());
205    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
206
207    pablo::PabloFunction * function = wc_gen();
208    llvm::Function * main_IR = wcPipeline(M, idb, function);
209
210    wcEngine = JIT_to_ExecutionEngine(M);
211   
212    wcEngine->finalizeObject();
213
214    delete idb;
215    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
216}
217
218void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
219    std::string fileName = inputFiles[fileIdx];
220    size_t fileSize;
221    char * fileBuffer;
222   
223    const boost::filesystem::path file(fileName);
224    if (exists(file)) {
225        if (is_directory(file)) {
226            return;
227        }
228    } else {
229        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
230        return;
231    }
232   
233    fileSize = file_size(file);
234    boost::iostreams::mapped_file_source mappedFile;
235    if (fileSize == 0) {
236        fileBuffer = nullptr;
237    }
238    else {
239        try {
240            mappedFile.open(fileName);
241        } catch (std::exception &e) {
242            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
243            return;
244        }
245        fileBuffer = const_cast<char *>(mappedFile.data());
246    }
247    fn_ptr(fileBuffer, fileSize, fileIdx);
248
249    mappedFile.close();
250   
251}
252
253
254
255int main(int argc, char *argv[]) {
256    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
257    cl::ParseCommandLineOptions(argc, argv);
258    if (wcOptions.size() == 0) {
259        CountLines = true;
260        CountWords = true;
261        CountBytes = true;
262    }
263    else {
264        CountLines = false;
265        CountWords = false;
266        CountBytes = false;
267        CountChars = false;
268        for (unsigned i = 0; i < wcOptions.size(); i++) {
269            switch (wcOptions[i]) {
270                case WordOption: CountWords = true; break;
271                case LineOption: CountLines = true; break;
272                case CharOption: CountBytes = true; CountChars = false; break;
273                case ByteOption: CountChars = true; CountBytes = false; break;
274            }
275        }
276    }
277   
278   
279    wcFunctionType fn_ptr = wcCodeGen();
280
281    int fileCount = inputFiles.size();
282    lineCount.resize(fileCount);
283    wordCount.resize(fileCount);
284    charCount.resize(fileCount);
285    byteCount.resize(fileCount);
286   
287    for (unsigned i = 0; i < inputFiles.size(); ++i) {
288        wc(fn_ptr, i);
289    }
290   
291    delete wcEngine;
292   
293    size_t maxCount = 0;
294    if (CountLines) maxCount = TotalLines;
295    if (CountWords) maxCount = TotalWords;
296    if (CountChars) maxCount = TotalChars;
297    if (CountBytes) maxCount = TotalBytes;
298   
299    int fieldWidth = std::to_string(maxCount).size() + 1;
300    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
301
302    for (unsigned i = 0; i < inputFiles.size(); ++i) {
303        std::cout << std::setw(fieldWidth-1);
304        if (CountLines) {
305            std::cout << lineCount[i] << std::setw(fieldWidth);
306        }
307        if (CountWords) {
308            std::cout << wordCount[i] << std::setw(fieldWidth);
309        }
310        if (CountChars) {
311            std::cout << charCount[i] << std::setw(fieldWidth);
312        }
313        if (CountBytes) {
314            std::cout << byteCount[i];
315        }
316        std::cout << " " << inputFiles[i] << std::endl;
317    }
318    if (inputFiles.size() > 1) {
319        std::cout << std::setw(fieldWidth-1);
320        if (CountLines) {
321            std::cout << TotalLines << std::setw(fieldWidth);
322        }
323        if (CountWords) {
324            std::cout << TotalWords << std::setw(fieldWidth);
325        }
326        if (CountChars) {
327            std::cout << TotalChars << std::setw(fieldWidth);
328        }
329        if (CountBytes) {
330            std::cout << TotalBytes;
331        }
332        std::cout << " total" << std::endl;
333    }
334
335    return 0;
336}
337
338                       
Note: See TracBrowser for help on using the repository browser.