source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5319

Last change on this file since 5319 was 5319, checked in by cameron, 2 years ago

Bug fix for Pablo scalar outputs; wc

File size: 10.5 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <IR_Gen/idisa_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/mmap_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <kernels/pipeline.h>
25#include <pablo/pablo_compiler.h>
26#include <pablo/pablo_toolchain.h>
27#include <boost/filesystem.hpp>
28#include <boost/iostreams/device/mapped_file.hpp>
29
30
31using namespace llvm;
32
33static cl::OptionCategory wcFlags("Command Flags", "wc options");
34
35static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
36
37enum CountOptions {
38    LineOption, WordOption, CharOption, ByteOption
39};
40
41static cl::list<CountOptions> wcOptions(
42  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
43             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
44             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
45             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
46             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
47                                                 
48
49
50static int defaultFieldWidth = 7;  // default field width
51
52
53bool CountLines = false;
54bool CountWords = false;
55bool CountChars = false;
56bool CountBytes = false;
57
58std::vector<uint64_t> lineCount;
59std::vector<uint64_t> wordCount;
60std::vector<uint64_t> charCount;
61std::vector<uint64_t> byteCount;
62
63uint64_t TotalLines = 0;
64uint64_t TotalWords = 0;
65uint64_t TotalChars = 0;
66uint64_t TotalBytes = 0;
67
68using namespace pablo;
69using namespace kernel;
70using namespace parabix;
71
72//  The callback routine that records counts in progress.
73//
74extern "C" {
75    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
76        lineCount[fileIdx] = lines;
77        wordCount[fileIdx] = words;
78        charCount[fileIdx] = chars;
79        byteCount[fileIdx] = bytes;
80        TotalLines += lines;
81        TotalWords += words;
82        TotalChars += chars;
83        TotalBytes += bytes;
84    }
85}
86
87//
88//
89
90void wc_gen(PabloKernel * kernel) {
91    //  input: 8 basis bit streams
92    const auto u8bitSet = kernel->getInputStreamVar("u8bit");
93    //  output: 3 counters
94   
95    cc::CC_Compiler ccc(kernel, u8bitSet);
96   
97    PabloBuilder & pb = ccc.getBuilder();
98
99    Var * lc = kernel->getOutputScalarVar("lineCount");
100    Var * wc = kernel->getOutputScalarVar("wordCount");
101    Var * cc = kernel->getOutputScalarVar("charCount");
102
103    if (CountLines) {
104        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
105        pb.createAssign(lc, pb.createCount(LF));
106    }
107    if (CountWords) {
108        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
109        PabloAST * wordChar = pb.createNot(WS);
110        // WS_follow_or_start = 1 past WS or at start of file
111        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
112        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
113        pb.createAssign(wc, pb.createCount(wordStart));
114    }
115    if (CountChars) {
116        //
117        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
118        // not UTF-8, or is not valid?
119        //
120        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
121        pb.createAssign(cc, pb.createCount(u8Begin));
122    }
123}
124
125Function * pipeline(Module * m, IDISA::IDISA_Builder * iBuilder) {
126    Type * mBitBlockType = iBuilder->getBitBlockType();
127    Constant * record_counts_routine;
128    Type * const size_ty = iBuilder->getSizeTy();
129    Type * const voidTy = iBuilder->getVoidTy();
130    record_counts_routine = m->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
131    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
132   
133    Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
134    main->setCallingConv(CallingConv::C);
135    Function::arg_iterator args = main->arg_begin();
136   
137    Value * const inputStream = &*(args++);
138    inputStream->setName("input");
139    Value * const fileSize = &*(args++);
140    fileSize->setName("fileSize");
141    Value * const fileIdx = &*(args++);
142    fileIdx->setName("fileIdx");
143   
144    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
145
146    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8, 1));
147   
148    MMapSourceKernel mmapK(iBuilder);
149    std::unique_ptr<Module> mmapM = mmapK.createKernelModule({}, {&ByteStream});
150    mmapK.setInitialArguments({fileSize});
151   
152    S2PKernel  s2pk(iBuilder);
153    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
154   
155    PabloKernel wck(iBuilder, "wc",
156        {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
157        {},
158        {},
159        {Binding{iBuilder->getSizeTy(), "lineCount"}, Binding{iBuilder->getSizeTy(), "wordCount"}, Binding{iBuilder->getSizeTy(), "charCount"}});
160
161    wc_gen(&wck);
162    pablo_function_passes(&wck);
163   
164    std::unique_ptr<Module> wcM = wck.createKernelModule({&BasisBits}, {});
165   
166    mmapK.addKernelDeclarations(m);
167    s2pk.addKernelDeclarations(m);
168    wck.addKernelDeclarations(m);
169   
170    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
171
172    ByteStream.setStreamSetBuffer(inputStream, fileSize);
173    BasisBits.allocateBuffer();
174   
175    generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &wck});
176   
177    Value * lineCount = wck.createGetAccumulatorCall(wck.getInstance(), "lineCount");
178    Value * wordCount = wck.createGetAccumulatorCall(wck.getInstance(), "wordCount");
179    Value * charCount = wck.createGetAccumulatorCall(wck.getInstance(), "charCount");
180
181    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
182   
183    iBuilder->CreateRetVoid();
184   
185    Linker L(*m);
186    L.linkInModule(std::move(mmapM));
187    L.linkInModule(std::move(s2pM));
188    L.linkInModule(std::move(wcM));
189   
190    return main;
191}
192
193
194typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
195
196static ExecutionEngine * wcEngine = nullptr;
197
198wcFunctionType wcCodeGen(void) { 
199    Module * M = new Module("wc", getGlobalContext());
200    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
201
202    llvm::Function * main_IR = pipeline(M, idb);
203
204    wcEngine = JIT_to_ExecutionEngine(M);
205   
206    wcEngine->finalizeObject();
207
208    delete idb;
209    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
210}
211
212void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
213    std::string fileName = inputFiles[fileIdx];
214    size_t fileSize;
215    char * fileBuffer;
216   
217    const boost::filesystem::path file(fileName);
218    if (exists(file)) {
219        if (is_directory(file)) {
220            return;
221        }
222    } else {
223        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
224        return;
225    }
226   
227    fileSize = file_size(file);
228    boost::iostreams::mapped_file_source mappedFile;
229    if (fileSize == 0) {
230        fileBuffer = nullptr;
231    }
232    else {
233        try {
234            mappedFile.open(fileName);
235        } catch (std::exception &e) {
236            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
237            return;
238        }
239        fileBuffer = const_cast<char *>(mappedFile.data());
240    }
241    fn_ptr(fileBuffer, fileSize, fileIdx);
242
243    mappedFile.close();
244   
245}
246
247
248
249int main(int argc, char *argv[]) {
250    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
251    cl::ParseCommandLineOptions(argc, argv);
252    if (wcOptions.size() == 0) {
253        CountLines = true;
254        CountWords = true;
255        CountBytes = true;
256    }
257    else {
258        CountLines = false;
259        CountWords = false;
260        CountBytes = false;
261        CountChars = false;
262        for (unsigned i = 0; i < wcOptions.size(); i++) {
263            switch (wcOptions[i]) {
264                case WordOption: CountWords = true; break;
265                case LineOption: CountLines = true; break;
266                case CharOption: CountBytes = true; CountChars = false; break;
267                case ByteOption: CountChars = true; CountBytes = false; break;
268            }
269        }
270    }
271   
272   
273    wcFunctionType fn_ptr = wcCodeGen();
274
275    int fileCount = inputFiles.size();
276    lineCount.resize(fileCount);
277    wordCount.resize(fileCount);
278    charCount.resize(fileCount);
279    byteCount.resize(fileCount);
280   
281    for (unsigned i = 0; i < inputFiles.size(); ++i) {
282        wc(fn_ptr, i);
283    }
284   
285    size_t maxCount = 0;
286    if (CountLines) maxCount = TotalLines;
287    if (CountWords) maxCount = TotalWords;
288    if (CountChars) maxCount = TotalChars;
289    if (CountBytes) maxCount = TotalBytes;
290   
291    int fieldWidth = std::to_string(maxCount).size() + 1;
292    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
293
294    for (unsigned i = 0; i < inputFiles.size(); ++i) {
295        std::cout << std::setw(fieldWidth-1);
296        if (CountLines) {
297            std::cout << lineCount[i] << std::setw(fieldWidth);
298        }
299        if (CountWords) {
300            std::cout << wordCount[i] << std::setw(fieldWidth);
301        }
302        if (CountChars) {
303            std::cout << charCount[i] << std::setw(fieldWidth);
304        }
305        if (CountBytes) {
306            std::cout << byteCount[i];
307        }
308        std::cout << " " << inputFiles[i] << std::endl;
309    }
310    if (inputFiles.size() > 1) {
311        std::cout << std::setw(fieldWidth-1);
312        if (CountLines) {
313            std::cout << TotalLines << std::setw(fieldWidth);
314        }
315        if (CountWords) {
316            std::cout << TotalWords << std::setw(fieldWidth);
317        }
318        if (CountChars) {
319            std::cout << TotalChars << std::setw(fieldWidth);
320        }
321        if (CountBytes) {
322            std::cout << TotalBytes;
323        }
324        std::cout << " total" << std::endl;
325    }
326
327    return 0;
328}
Note: See TracBrowser for help on using the repository browser.