source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5310

Last change on this file since 5310 was 5310, checked in by nmedfort, 2 years ago

Adjusted pablo compiler to use getInputStream and getOutputStream when accessing packed stream fields.

File size: 10.5 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <IR_Gen/idisa_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/mmap_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <kernels/pipeline.h>
25#include <pablo/pablo_compiler.h>
26#include <pablo/pablo_toolchain.h>
27#include <boost/filesystem.hpp>
28#include <boost/iostreams/device/mapped_file.hpp>
29
30
31using namespace llvm;
32
33static cl::OptionCategory wcFlags("Command Flags", "wc options");
34
35static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
36
37enum CountOptions {
38    LineOption, WordOption, CharOption, ByteOption
39};
40
41static cl::list<CountOptions> wcOptions(
42  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
43             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
44             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
45             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
46             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
47                                                 
48
49
50static int defaultFieldWidth = 7;  // default field width
51
52
53bool CountLines = false;
54bool CountWords = false;
55bool CountChars = false;
56bool CountBytes = false;
57
58std::vector<uint64_t> lineCount;
59std::vector<uint64_t> wordCount;
60std::vector<uint64_t> charCount;
61std::vector<uint64_t> byteCount;
62
63uint64_t TotalLines = 0;
64uint64_t TotalWords = 0;
65uint64_t TotalChars = 0;
66uint64_t TotalBytes = 0;
67
68using namespace pablo;
69using namespace kernel;
70using namespace parabix;
71
72//  The callback routine that records counts in progress.
73//
74extern "C" {
75    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
76        lineCount[fileIdx] = lines;
77        wordCount[fileIdx] = words;
78        charCount[fileIdx] = chars;
79        byteCount[fileIdx] = bytes;
80        TotalLines += lines;
81        TotalWords += words;
82        TotalChars += chars;
83        TotalBytes += bytes;
84    }
85}
86
87//
88//
89
90void wc_gen(PabloKernel * kernel) {
91    //  input: 8 basis bit streams
92    const auto u8bitSet = kernel->getInputStreamVar("u8bit");
93    //  output: 3 counters
94   
95    cc::CC_Compiler ccc(kernel, u8bitSet);
96   
97    PabloBuilder & pb = ccc.getBuilder();
98
99    Var * lc = kernel->getOutputScalarVar("lineCount");
100    Var * wc = kernel->getOutputScalarVar("wordCount");
101    Var * cc = kernel->getOutputScalarVar("charCount");
102
103    if (CountLines) {
104        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
105        pb.createAssign(lc, pb.createCount(LF));
106    }
107    if (CountWords) {
108        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
109        PabloAST * wordChar = pb.createNot(WS);
110        // WS_follow_or_start = 1 past WS or at start of file
111        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
112        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
113        pb.createAssign(wc, pb.createCount(wordStart));
114    }
115    if (CountChars) {
116        //
117        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
118        // not UTF-8, or is not valid?
119        //
120        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
121        pb.createAssign(cc, pb.createCount(u8Begin));
122    }
123}
124
125Function * pipeline(Module * m, IDISA::IDISA_Builder * iBuilder) {
126    Type * mBitBlockType = iBuilder->getBitBlockType();
127    Constant * record_counts_routine;
128    Type * const size_ty = iBuilder->getSizeTy();
129    Type * const voidTy = iBuilder->getVoidTy();
130    record_counts_routine = m->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
131    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
132   
133    Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
134    main->setCallingConv(CallingConv::C);
135    Function::arg_iterator args = main->arg_begin();
136   
137    Value * const inputStream = &*(args++);
138    inputStream->setName("input");
139    Value * const fileSize = &*(args++);
140    fileSize->setName("fileSize");
141    Value * const fileIdx = &*(args++);
142    fileIdx->setName("fileIdx");
143   
144    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
145
146    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8, 1));
147   
148    MMapSourceKernel mmapK(iBuilder);
149    std::unique_ptr<Module> mmapM = mmapK.createKernelModule({}, {&ByteStream});
150    mmapK.setInitialArguments({fileSize});
151   
152    S2PKernel  s2pk(iBuilder);
153    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
154   
155    PabloKernel wck(iBuilder, "wc",
156        {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
157        {},
158        {Binding{iBuilder->getSizeTy(), "lineCount"}, Binding{iBuilder->getSizeTy(), "wordCount"}, Binding{iBuilder->getSizeTy(), "charCount"}});
159
160    wc_gen(&wck);
161    pablo_function_passes(&wck);
162   
163    std::unique_ptr<Module> wcM = wck.createKernelModule({&BasisBits}, {});
164   
165    mmapK.addKernelDeclarations(m);
166    s2pk.addKernelDeclarations(m);
167    wck.addKernelDeclarations(m);
168   
169    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
170
171    ByteStream.setStreamSetBuffer(inputStream, fileSize);
172    BasisBits.allocateBuffer();
173   
174    generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &wck});
175   
176    Value * lineCount = wck.createGetAccumulatorCall(wck.getInstance(), "lineCount");
177    Value * wordCount = wck.createGetAccumulatorCall(wck.getInstance(), "wordCount");
178    Value * charCount = wck.createGetAccumulatorCall(wck.getInstance(), "charCount");
179
180    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
181   
182    iBuilder->CreateRetVoid();
183   
184    Linker L(*m);
185    L.linkInModule(std::move(mmapM));
186    L.linkInModule(std::move(s2pM));
187    L.linkInModule(std::move(wcM));
188   
189    return main;
190}
191
192
193typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
194
195static ExecutionEngine * wcEngine = nullptr;
196
197wcFunctionType wcCodeGen(void) { 
198    Module * M = new Module("wc", getGlobalContext());
199    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
200
201    llvm::Function * main_IR = pipeline(M, idb);
202
203    wcEngine = JIT_to_ExecutionEngine(M);
204   
205    wcEngine->finalizeObject();
206
207    delete idb;
208    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
209}
210
211void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
212    std::string fileName = inputFiles[fileIdx];
213    size_t fileSize;
214    char * fileBuffer;
215   
216    const boost::filesystem::path file(fileName);
217    if (exists(file)) {
218        if (is_directory(file)) {
219            return;
220        }
221    } else {
222        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
223        return;
224    }
225   
226    fileSize = file_size(file);
227    boost::iostreams::mapped_file_source mappedFile;
228    if (fileSize == 0) {
229        fileBuffer = nullptr;
230    }
231    else {
232        try {
233            mappedFile.open(fileName);
234        } catch (std::exception &e) {
235            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
236            return;
237        }
238        fileBuffer = const_cast<char *>(mappedFile.data());
239    }
240    fn_ptr(fileBuffer, fileSize, fileIdx);
241
242    mappedFile.close();
243   
244}
245
246
247
248int main(int argc, char *argv[]) {
249    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
250    cl::ParseCommandLineOptions(argc, argv);
251    if (wcOptions.size() == 0) {
252        CountLines = true;
253        CountWords = true;
254        CountBytes = true;
255    }
256    else {
257        CountLines = false;
258        CountWords = false;
259        CountBytes = false;
260        CountChars = false;
261        for (unsigned i = 0; i < wcOptions.size(); i++) {
262            switch (wcOptions[i]) {
263                case WordOption: CountWords = true; break;
264                case LineOption: CountLines = true; break;
265                case CharOption: CountBytes = true; CountChars = false; break;
266                case ByteOption: CountChars = true; CountBytes = false; break;
267            }
268        }
269    }
270   
271   
272    wcFunctionType fn_ptr = wcCodeGen();
273
274    int fileCount = inputFiles.size();
275    lineCount.resize(fileCount);
276    wordCount.resize(fileCount);
277    charCount.resize(fileCount);
278    byteCount.resize(fileCount);
279   
280    for (unsigned i = 0; i < inputFiles.size(); ++i) {
281        wc(fn_ptr, i);
282    }
283   
284    size_t maxCount = 0;
285    if (CountLines) maxCount = TotalLines;
286    if (CountWords) maxCount = TotalWords;
287    if (CountChars) maxCount = TotalChars;
288    if (CountBytes) maxCount = TotalBytes;
289   
290    int fieldWidth = std::to_string(maxCount).size() + 1;
291    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
292
293    for (unsigned i = 0; i < inputFiles.size(); ++i) {
294        std::cout << std::setw(fieldWidth-1);
295        if (CountLines) {
296            std::cout << lineCount[i] << std::setw(fieldWidth);
297        }
298        if (CountWords) {
299            std::cout << wordCount[i] << std::setw(fieldWidth);
300        }
301        if (CountChars) {
302            std::cout << charCount[i] << std::setw(fieldWidth);
303        }
304        if (CountBytes) {
305            std::cout << byteCount[i];
306        }
307        std::cout << " " << inputFiles[i] << std::endl;
308    }
309    if (inputFiles.size() > 1) {
310        std::cout << std::setw(fieldWidth-1);
311        if (CountLines) {
312            std::cout << TotalLines << std::setw(fieldWidth);
313        }
314        if (CountWords) {
315            std::cout << TotalWords << std::setw(fieldWidth);
316        }
317        if (CountChars) {
318            std::cout << TotalChars << std::setw(fieldWidth);
319        }
320        if (CountBytes) {
321            std::cout << TotalBytes;
322        }
323        std::cout << " total" << std::endl;
324    }
325
326    return 0;
327}
Note: See TracBrowser for help on using the repository browser.