source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5142

Last change on this file since 5142 was 5142, checked in by cameron, 3 years ago

ExternalFileBuffer?, SingleBlockBuffer? fixes

File size: 10.8 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <sstream>
12
13
14#include <toolchain.h>
15#include <pablo/pablo_toolchain.h>
16#include <llvm/IR/Function.h>
17#include <llvm/IR/Module.h>
18#include <llvm/ExecutionEngine/ExecutionEngine.h>
19#include <llvm/ExecutionEngine/MCJIT.h>
20#include "llvm/Linker/Linker.h"
21
22#include <llvm/Support/CommandLine.h>
23#include <llvm/Support/raw_ostream.h>
24
25#include <re/re_cc.h>
26#include <cc/cc_compiler.h>
27#include <pablo/function.h>
28#include <pablo/pablo_kernel.h>
29#include <IDISA/idisa_builder.h>
30#include <IDISA/idisa_target.h>
31#include <kernels/streamset.h>
32#include <kernels/interface.h>
33#include <kernels/kernel.h>
34#include <kernels/s2p_kernel.h>
35#include <kernels/pipeline.h>
36
37#include <pablo/pablo_compiler.h>
38#include <pablo/pablo_toolchain.h>
39
40// mmap system
41#include <boost/filesystem.hpp>
42#include <boost/iostreams/device/mapped_file.hpp>
43
44#include <fcntl.h>
45static cl::OptionCategory wcFlags("Command Flags", "wc options");
46
47static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
48
49enum CountOptions {
50    LineOption, WordOption, CharOption, ByteOption
51};
52
53static cl::list<CountOptions> wcOptions(
54  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
55             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
56             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
57             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
58             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
59                                                 
60
61
62static int defaultFieldWidth = 7;  // default field width
63
64
65bool CountLines = false;
66bool CountWords = false;
67bool CountChars = false;
68bool CountBytes = false;
69
70std::vector<uint64_t> lineCount;
71std::vector<uint64_t> wordCount;
72std::vector<uint64_t> charCount;
73std::vector<uint64_t> byteCount;
74
75uint64_t TotalLines = 0;
76uint64_t TotalWords = 0;
77uint64_t TotalChars = 0;
78uint64_t TotalBytes = 0;
79
80
81//  The callback routine that records counts in progress.
82//
83extern "C" {
84    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
85        lineCount[fileIdx] = lines;
86        wordCount[fileIdx] = words;
87        charCount[fileIdx] = chars;
88        byteCount[fileIdx] = bytes;
89        TotalLines += lines;
90        TotalWords += words;
91        TotalChars += chars;
92        TotalBytes += bytes;
93    }
94}
95
96//
97//
98
99pablo::PabloFunction * wc_gen() {
100    //  input: 8 basis bit streams
101    //  output: 3 counters
102   
103    pablo::PabloFunction * function = pablo::PabloFunction::Create("wc", 8, 0);
104    cc::CC_Compiler ccc(*function);
105   
106    pablo::PabloBuilder pBuilder(ccc.getBuilder().getPabloBlock(), ccc.getBuilder());
107    const std::vector<pablo::Var *> u8_bits = ccc.getBasisBits();
108
109    if (CountLines) {
110        pablo::PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
111        function->setResultCount(pBuilder.createCount("lineCount", LF));
112    }
113    if (CountWords) {
114        pablo::PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
115       
116        pablo::PabloAST * wordChar = pBuilder.createNot(WS);
117        // WS_follow_or_start = 1 past WS or at start of file
118        pablo::PabloAST * WS_follow_or_start = pBuilder.createNot(pBuilder.createAdvance(wordChar, 1));
119        //
120        pablo::PabloAST * wordStart = pBuilder.createInFile(pBuilder.createAnd(wordChar, WS_follow_or_start));
121        function->setResultCount(pBuilder.createCount("wordCount", wordStart));
122    }
123    if (CountChars) {
124        //
125        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
126        // not UTF-8, or is not valid?
127        //
128        pablo::PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));
129        function->setResultCount(pBuilder.createCount("charCount", u8Begin));
130    }
131    return function;
132}
133
134using namespace kernel;
135using namespace parabix;
136
137
138Function * wcPipeline(Module * mMod, IDISA::IDISA_Builder * iBuilder, pablo::PabloFunction * function) {
139    Type * mBitBlockType = iBuilder->getBitBlockType();
140   
141    ExternalFileBuffer ByteStream(iBuilder, StreamSetType(1, i8));
142    SingleBlockBuffer BasisBits(iBuilder, StreamSetType(8, i1));
143    //CircularBuffer BasisBits(iBuilder, StreamSetType(8, i1), codegen::SegmentSize * codegen::BufferSegments);
144
145    s2pKernel  s2pk(iBuilder);
146    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
147   
148    pablo_function_passes(function);
149    pablo::PabloKernel  wck(iBuilder, "wc", function, {"lineCount", "wordCount", "charCount"});
150   
151    std::unique_ptr<Module> wcM = wck.createKernelModule({&BasisBits}, {});
152   
153    s2pk.addKernelDeclarations(mMod);
154    wck.addKernelDeclarations(mMod);
155
156    Constant * record_counts_routine;
157    Type * const size_ty = iBuilder->getSizeTy();
158    Type * const voidTy = Type::getVoidTy(mMod->getContext());
159    record_counts_routine = mMod->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
160    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
161   
162    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
163    main->setCallingConv(CallingConv::C);
164    Function::arg_iterator args = main->arg_begin();
165   
166    Value * const inputStream = &*(args++);
167    inputStream->setName("input");
168    Value * const fileSize = &*(args++);
169    fileSize->setName("fileSize");
170    Value * const fileIdx = &*(args++);
171    fileIdx->setName("fileIdx");
172   
173    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
174
175    ByteStream.setStreamSetBuffer(inputStream, fileSize);
176    BasisBits.allocateBuffer();
177   
178    Value * s2pInstance = s2pk.createInstance({});
179    Value * wcInstance = wck.createInstance({});
180   
181    generatePipelineLoop(iBuilder, {&s2pk, &wck}, {s2pInstance, wcInstance}, fileSize);
182   
183    Value * lineCount = wck.createGetAccumulatorCall(wcInstance, "lineCount");
184    Value * wordCount = wck.createGetAccumulatorCall(wcInstance, "wordCount");
185    Value * charCount = wck.createGetAccumulatorCall(wcInstance, "charCount");;
186
187    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
188   
189    iBuilder->CreateRetVoid();
190   
191    Linker L(*mMod);
192    L.linkInModule(std::move(s2pM));
193    L.linkInModule(std::move(wcM));
194   
195    return main;
196}
197
198
199typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
200
201static ExecutionEngine * wcEngine = nullptr;
202
203wcFunctionType wcCodeGen(void) {
204                           
205    Module * M = new Module("wc", getGlobalContext());
206    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
207
208    pablo::PabloFunction * function = wc_gen();
209    llvm::Function * main_IR = wcPipeline(M, idb, function);
210
211    wcEngine = JIT_to_ExecutionEngine(M);
212   
213    wcEngine->finalizeObject();
214
215    delete idb;
216    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
217}
218
219void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
220    std::string fileName = inputFiles[fileIdx];
221    size_t fileSize;
222    char * fileBuffer;
223   
224    const boost::filesystem::path file(fileName);
225    if (exists(file)) {
226        if (is_directory(file)) {
227            return;
228        }
229    } else {
230        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
231        return;
232    }
233   
234    fileSize = file_size(file);
235    boost::iostreams::mapped_file_source mappedFile;
236    if (fileSize == 0) {
237        fileBuffer = nullptr;
238    }
239    else {
240        try {
241            mappedFile.open(fileName);
242        } catch (std::exception &e) {
243            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
244            return;
245        }
246        fileBuffer = const_cast<char *>(mappedFile.data());
247    }
248    fn_ptr(fileBuffer, fileSize, fileIdx);
249
250    mappedFile.close();
251   
252}
253
254
255
256int main(int argc, char *argv[]) {
257    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
258    cl::ParseCommandLineOptions(argc, argv);
259    if (wcOptions.size() == 0) {
260        CountLines = true;
261        CountWords = true;
262        CountBytes = true;
263    }
264    else {
265        CountLines = false;
266        CountWords = false;
267        CountBytes = false;
268        CountChars = false;
269        for (unsigned i = 0; i < wcOptions.size(); i++) {
270            switch (wcOptions[i]) {
271                case WordOption: CountWords = true; break;
272                case LineOption: CountLines = true; break;
273                case CharOption: CountBytes = true; CountChars = false; break;
274                case ByteOption: CountChars = true; CountBytes = false; break;
275            }
276        }
277    }
278   
279   
280    wcFunctionType fn_ptr = wcCodeGen();
281
282    int fileCount = inputFiles.size();
283    lineCount.resize(fileCount);
284    wordCount.resize(fileCount);
285    charCount.resize(fileCount);
286    byteCount.resize(fileCount);
287   
288    for (unsigned i = 0; i < inputFiles.size(); ++i) {
289        wc(fn_ptr, i);
290    }
291   
292    delete wcEngine;
293   
294    size_t maxCount = 0;
295    if (CountLines) maxCount = TotalLines;
296    if (CountWords) maxCount = TotalWords;
297    if (CountChars) maxCount = TotalChars;
298    if (CountBytes) maxCount = TotalBytes;
299   
300    int fieldWidth = std::to_string(maxCount).size() + 1;
301    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
302
303    for (unsigned i = 0; i < inputFiles.size(); ++i) {
304        std::cout << std::setw(fieldWidth-1);
305        if (CountLines) {
306            std::cout << lineCount[i] << std::setw(fieldWidth);
307        }
308        if (CountWords) {
309            std::cout << wordCount[i] << std::setw(fieldWidth);
310        }
311        if (CountChars) {
312            std::cout << charCount[i] << std::setw(fieldWidth);
313        }
314        if (CountBytes) {
315            std::cout << byteCount[i];
316        }
317        std::cout << " " << inputFiles[i] << std::endl;
318    }
319    if (inputFiles.size() > 1) {
320        std::cout << std::setw(fieldWidth-1);
321        if (CountLines) {
322            std::cout << TotalLines << std::setw(fieldWidth);
323        }
324        if (CountWords) {
325            std::cout << TotalWords << std::setw(fieldWidth);
326        }
327        if (CountChars) {
328            std::cout << TotalChars << std::setw(fieldWidth);
329        }
330        if (CountBytes) {
331            std::cout << TotalBytes;
332        }
333        std::cout << " total" << std::endl;
334    }
335
336    return 0;
337}
338
339                       
Note: See TracBrowser for help on using the repository browser.