source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5300

Last change on this file since 5300 was 5300, checked in by cameron, 3 years ago

getScalarOutput for Pablo kernels; use in wc

File size: 10.5 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <IR_Gen/idisa_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/mmap_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <kernels/pipeline.h>
25#include <pablo/pablo_compiler.h>
26#include <pablo/pablo_toolchain.h>
27#include <boost/filesystem.hpp>
28#include <boost/iostreams/device/mapped_file.hpp>
29
30
31using namespace llvm;
32
33static cl::OptionCategory wcFlags("Command Flags", "wc options");
34
35static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
36
37enum CountOptions {
38    LineOption, WordOption, CharOption, ByteOption
39};
40
41static cl::list<CountOptions> wcOptions(
42  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
43             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
44             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
45             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
46             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
47                                                 
48
49
50static int defaultFieldWidth = 7;  // default field width
51
52
53bool CountLines = false;
54bool CountWords = false;
55bool CountChars = false;
56bool CountBytes = false;
57
58std::vector<uint64_t> lineCount;
59std::vector<uint64_t> wordCount;
60std::vector<uint64_t> charCount;
61std::vector<uint64_t> byteCount;
62
63uint64_t TotalLines = 0;
64uint64_t TotalWords = 0;
65uint64_t TotalChars = 0;
66uint64_t TotalBytes = 0;
67
68using namespace pablo;
69using namespace kernel;
70using namespace parabix;
71
72//  The callback routine that records counts in progress.
73//
74extern "C" {
75    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
76        lineCount[fileIdx] = lines;
77        wordCount[fileIdx] = words;
78        charCount[fileIdx] = chars;
79        byteCount[fileIdx] = bytes;
80        TotalLines += lines;
81        TotalWords += words;
82        TotalChars += chars;
83        TotalBytes += bytes;
84    }
85}
86
87//
88//
89
90void wc_gen(PabloKernel * kernel) {
91    //  input: 8 basis bit streams
92    const auto u8bitSet = kernel->getInputSet("u8bit");
93    //  output: 3 counters
94   
95    cc::CC_Compiler ccc(kernel, u8bitSet);
96   
97    PabloBuilder & pb = ccc.getBuilder();
98
99    Var * lc = kernel->getScalarOutput("lineCount");
100    Var * wc = kernel->getScalarOutput("wordCount");
101    Var * cc = kernel->getScalarOutput("charCount");
102
103    if (CountLines) {
104        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
105        pb.createAssign(lc, pb.createCount(LF));
106    }
107    if (CountWords) {
108        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
109        PabloAST * wordChar = pb.createNot(WS);
110        // WS_follow_or_start = 1 past WS or at start of file
111        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
112        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
113        pb.createAssign(wc, pb.createCount(wordStart));
114    }
115    if (CountChars) {
116        //
117        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
118        // not UTF-8, or is not valid?
119        //
120        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
121        pb.createAssign(cc, pb.createCount(u8Begin));
122    }
123}
124
125Function * pipeline(Module * m, IDISA::IDISA_Builder * iBuilder) {
126    Type * mBitBlockType = iBuilder->getBitBlockType();
127    Constant * record_counts_routine;
128    Type * const size_ty = iBuilder->getSizeTy();
129    Type * const voidTy = iBuilder->getVoidTy();
130    record_counts_routine = m->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
131    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
132   
133    Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
134    main->setCallingConv(CallingConv::C);
135    Function::arg_iterator args = main->arg_begin();
136   
137    Value * const inputStream = &*(args++);
138    inputStream->setName("input");
139    Value * const fileSize = &*(args++);
140    fileSize->setName("fileSize");
141    Value * const fileIdx = &*(args++);
142    fileIdx->setName("fileIdx");
143   
144    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
145
146    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8, 1));
147   
148    MMapSourceKernel mmapK(iBuilder);
149    std::unique_ptr<Module> mmapM = mmapK.createKernelModule({}, {&ByteStream});
150    mmapK.setInitialArguments({fileSize});
151   
152    S2PKernel  s2pk(iBuilder);
153    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
154   
155    PabloKernel wck(iBuilder, "wc", {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}}, {}, 
156                      {Binding{iBuilder->getSizeTy(), "lineCount"}, Binding{iBuilder->getSizeTy(), "wordCount"}, Binding{iBuilder->getSizeTy(), "charCount"}});
157
158    wc_gen(&wck);
159    pablo_function_passes(&wck);
160   
161    std::unique_ptr<Module> wcM = wck.createKernelModule({&BasisBits}, {});
162   
163    mmapK.addKernelDeclarations(m);
164    s2pk.addKernelDeclarations(m);
165    wck.addKernelDeclarations(m);
166   
167    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
168
169    ByteStream.setStreamSetBuffer(inputStream, fileSize);
170    BasisBits.allocateBuffer();
171   
172    generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &wck});
173   
174    Value * lineCount = wck.createGetAccumulatorCall(wck.getInstance(), "lineCount");
175    Value * wordCount = wck.createGetAccumulatorCall(wck.getInstance(), "wordCount");
176    Value * charCount = wck.createGetAccumulatorCall(wck.getInstance(), "charCount");
177
178    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
179   
180    iBuilder->CreateRetVoid();
181   
182    Linker L(*m);
183    L.linkInModule(std::move(mmapM));
184    L.linkInModule(std::move(s2pM));
185    L.linkInModule(std::move(wcM));
186   
187    return main;
188}
189
190
191typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
192
193static ExecutionEngine * wcEngine = nullptr;
194
195wcFunctionType wcCodeGen(void) { 
196    Module * M = new Module("wc", getGlobalContext());
197    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
198
199    llvm::Function * main_IR = pipeline(M, idb);
200
201    wcEngine = JIT_to_ExecutionEngine(M);
202   
203    wcEngine->finalizeObject();
204
205    delete idb;
206    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
207}
208
209void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
210    std::string fileName = inputFiles[fileIdx];
211    size_t fileSize;
212    char * fileBuffer;
213   
214    const boost::filesystem::path file(fileName);
215    if (exists(file)) {
216        if (is_directory(file)) {
217            return;
218        }
219    } else {
220        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
221        return;
222    }
223   
224    fileSize = file_size(file);
225    boost::iostreams::mapped_file_source mappedFile;
226    if (fileSize == 0) {
227        fileBuffer = nullptr;
228    }
229    else {
230        try {
231            mappedFile.open(fileName);
232        } catch (std::exception &e) {
233            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
234            return;
235        }
236        fileBuffer = const_cast<char *>(mappedFile.data());
237    }
238    fn_ptr(fileBuffer, fileSize, fileIdx);
239
240    mappedFile.close();
241   
242}
243
244
245
246int main(int argc, char *argv[]) {
247    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
248    cl::ParseCommandLineOptions(argc, argv);
249    if (wcOptions.size() == 0) {
250        CountLines = true;
251        CountWords = true;
252        CountBytes = true;
253    }
254    else {
255        CountLines = false;
256        CountWords = false;
257        CountBytes = false;
258        CountChars = false;
259        for (unsigned i = 0; i < wcOptions.size(); i++) {
260            switch (wcOptions[i]) {
261                case WordOption: CountWords = true; break;
262                case LineOption: CountLines = true; break;
263                case CharOption: CountBytes = true; CountChars = false; break;
264                case ByteOption: CountChars = true; CountBytes = false; break;
265            }
266        }
267    }
268   
269   
270    wcFunctionType fn_ptr = wcCodeGen();
271
272    int fileCount = inputFiles.size();
273    lineCount.resize(fileCount);
274    wordCount.resize(fileCount);
275    charCount.resize(fileCount);
276    byteCount.resize(fileCount);
277   
278    for (unsigned i = 0; i < inputFiles.size(); ++i) {
279        wc(fn_ptr, i);
280    }
281   
282    size_t maxCount = 0;
283    if (CountLines) maxCount = TotalLines;
284    if (CountWords) maxCount = TotalWords;
285    if (CountChars) maxCount = TotalChars;
286    if (CountBytes) maxCount = TotalBytes;
287   
288    int fieldWidth = std::to_string(maxCount).size() + 1;
289    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
290
291    for (unsigned i = 0; i < inputFiles.size(); ++i) {
292        std::cout << std::setw(fieldWidth-1);
293        if (CountLines) {
294            std::cout << lineCount[i] << std::setw(fieldWidth);
295        }
296        if (CountWords) {
297            std::cout << wordCount[i] << std::setw(fieldWidth);
298        }
299        if (CountChars) {
300            std::cout << charCount[i] << std::setw(fieldWidth);
301        }
302        if (CountBytes) {
303            std::cout << byteCount[i];
304        }
305        std::cout << " " << inputFiles[i] << std::endl;
306    }
307    if (inputFiles.size() > 1) {
308        std::cout << std::setw(fieldWidth-1);
309        if (CountLines) {
310            std::cout << TotalLines << std::setw(fieldWidth);
311        }
312        if (CountWords) {
313            std::cout << TotalWords << std::setw(fieldWidth);
314        }
315        if (CountChars) {
316            std::cout << TotalChars << std::setw(fieldWidth);
317        }
318        if (CountBytes) {
319            std::cout << TotalBytes;
320        }
321        std::cout << " total" << std::endl;
322    }
323
324    return 0;
325}
Note: See TracBrowser for help on using the repository browser.