source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5409

Last change on this file since 5409 was 5409, checked in by cameron, 2 years ago

Parabix driver can take ownership and allocate buffers

File size: 10.3 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <kernels/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <IR_Gen/idisa_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/mmap_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <kernels/pipeline.h>
25#include <pablo/pablo_compiler.h>
26#include <pablo/pablo_toolchain.h>
27#include <boost/filesystem.hpp>
28#include <boost/iostreams/device/mapped_file.hpp>
29
30
31using namespace llvm;
32
33static cl::OptionCategory wcFlags("Command Flags", "wc options");
34
35static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
36
37enum CountOptions {
38    LineOption, WordOption, CharOption, ByteOption
39};
40
41static cl::list<CountOptions> wcOptions(
42  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
43             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
44             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
45             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
46             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
47                                                 
48
49
50static int defaultFieldWidth = 7;  // default field width
51
52
53bool CountLines = false;
54bool CountWords = false;
55bool CountChars = false;
56bool CountBytes = false;
57
58std::vector<uint64_t> lineCount;
59std::vector<uint64_t> wordCount;
60std::vector<uint64_t> charCount;
61std::vector<uint64_t> byteCount;
62
63uint64_t TotalLines = 0;
64uint64_t TotalWords = 0;
65uint64_t TotalChars = 0;
66uint64_t TotalBytes = 0;
67
68using namespace pablo;
69using namespace kernel;
70using namespace parabix;
71
72//  The callback routine that records counts in progress.
73//
74extern "C" {
75    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
76        lineCount[fileIdx] = lines;
77        wordCount[fileIdx] = words;
78        charCount[fileIdx] = chars;
79        byteCount[fileIdx] = bytes;
80        TotalLines += lines;
81        TotalWords += words;
82        TotalChars += chars;
83        TotalBytes += bytes;
84    }
85}
86
87//
88//
89
90void wc_gen(PabloKernel * kernel) {
91    //  input: 8 basis bit streams
92    const auto u8bitSet = kernel->getInputStreamVar("u8bit");
93    //  output: 3 counters
94   
95    cc::CC_Compiler ccc(kernel, u8bitSet);
96   
97    PabloBuilder & pb = ccc.getBuilder();
98
99    Var * lc = kernel->getOutputScalarVar("lineCount");
100    Var * wc = kernel->getOutputScalarVar("wordCount");
101    Var * cc = kernel->getOutputScalarVar("charCount");
102
103    if (CountLines) {
104        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
105        pb.createAssign(lc, pb.createCount(LF));
106    }
107    if (CountWords) {
108        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
109        PabloAST * wordChar = pb.createNot(WS);
110        // WS_follow_or_start = 1 past WS or at start of file
111        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
112        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
113        pb.createAssign(wc, pb.createCount(wordStart));
114    }
115    if (CountChars) {
116        //
117        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
118        // not UTF-8, or is not valid?
119        //
120        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
121        pb.createAssign(cc, pb.createCount(u8Begin));
122    }
123}
124
125
126
127
128typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
129
130void wcPipelineGen(ParabixDriver & pxDriver) {
131
132    IDISA::IDISA_Builder * iBuilder = pxDriver.getIDISA_Builder();
133    Module * m = iBuilder->getModule();
134   
135    Type * mBitBlockType = iBuilder->getBitBlockType();
136    Constant * record_counts_routine;
137    Type * const size_ty = iBuilder->getSizeTy();
138    Type * const voidTy = iBuilder->getVoidTy();
139    record_counts_routine = m->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
140    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
141   
142    Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
143    main->setCallingConv(CallingConv::C);
144    Function::arg_iterator args = main->arg_begin();
145   
146    Value * const inputStream = &*(args++);
147    inputStream->setName("input");
148    Value * const fileSize = &*(args++);
149    fileSize->setName("fileSize");
150    Value * const fileIdx = &*(args++);
151    fileIdx->setName("fileIdx");
152    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
153
154    StreamSetBuffer * ByteStream = pxDriver.addExternalBuffer(make_unique<ExternalFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)), inputStream);
155
156    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<SingleBlockBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1)));
157
158    MMapSourceKernel mmapK(iBuilder);
159    mmapK.setInitialArguments({fileSize});
160    pxDriver.addKernelCall(mmapK, {}, {ByteStream});
161
162    S2PKernel  s2pk(iBuilder);
163    pxDriver.addKernelCall(s2pk, {ByteStream}, {BasisBits});
164   
165    PabloKernel wck(iBuilder, "Parabix:wc",
166        {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
167        {},
168        {},
169        {Binding{iBuilder->getSizeTy(), "lineCount"}, Binding{iBuilder->getSizeTy(), "wordCount"}, Binding{iBuilder->getSizeTy(), "charCount"}});
170
171    wc_gen(&wck);
172    pablo_function_passes(&wck);
173    pxDriver.addKernelCall(wck, {BasisBits}, {});
174
175
176    pxDriver.generatePipelineIR();
177   
178    Value * lineCount = wck.createGetAccumulatorCall(wck.getInstance(), "lineCount");
179    Value * wordCount = wck.createGetAccumulatorCall(wck.getInstance(), "wordCount");
180    Value * charCount = wck.createGetAccumulatorCall(wck.getInstance(), "charCount");
181
182    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
183   
184    iBuilder->CreateRetVoid();
185
186    pxDriver.linkAndFinalize();
187}
188
189
190wcFunctionType wcCodeGen(void) {
191    Module * M = new Module("wc", getGlobalContext());
192    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
193    ParabixDriver pxDriver(idb);
194   
195    wcPipelineGen(pxDriver);
196
197    wcFunctionType main = reinterpret_cast<wcFunctionType>(pxDriver.getPointerToMain());
198    delete idb;
199    return main;
200}
201
202void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
203    std::string fileName = inputFiles[fileIdx];
204    size_t fileSize;
205    char * fileBuffer;
206   
207    const boost::filesystem::path file(fileName);
208    if (exists(file)) {
209        if (is_directory(file)) {
210            return;
211        }
212    } else {
213        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
214        return;
215    }
216   
217    fileSize = file_size(file);
218    boost::iostreams::mapped_file_source mappedFile;
219    if (fileSize == 0) {
220        fileBuffer = nullptr;
221    }
222    else {
223        try {
224            mappedFile.open(fileName);
225        } catch (std::exception &e) {
226            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
227            return;
228        }
229        fileBuffer = const_cast<char *>(mappedFile.data());
230    }
231    fn_ptr(fileBuffer, fileSize, fileIdx);
232
233    mappedFile.close();
234   
235}
236
237
238
239int main(int argc, char *argv[]) {
240    AddParabixVersionPrinter();
241    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
242    cl::ParseCommandLineOptions(argc, argv);
243    if (wcOptions.size() == 0) {
244        CountLines = true;
245        CountWords = true;
246        CountBytes = true;
247    }
248    else {
249        CountLines = false;
250        CountWords = false;
251        CountBytes = false;
252        CountChars = false;
253        for (unsigned i = 0; i < wcOptions.size(); i++) {
254            switch (wcOptions[i]) {
255                case WordOption: CountWords = true; break;
256                case LineOption: CountLines = true; break;
257                case CharOption: CountBytes = true; CountChars = false; break;
258                case ByteOption: CountChars = true; CountBytes = false; break;
259            }
260        }
261    }
262   
263   
264    wcFunctionType fn_ptr = wcCodeGen();
265
266    int fileCount = inputFiles.size();
267    lineCount.resize(fileCount);
268    wordCount.resize(fileCount);
269    charCount.resize(fileCount);
270    byteCount.resize(fileCount);
271   
272    for (unsigned i = 0; i < inputFiles.size(); ++i) {
273        wc(fn_ptr, i);
274    }
275   
276    size_t maxCount = 0;
277    if (CountLines) maxCount = TotalLines;
278    if (CountWords) maxCount = TotalWords;
279    if (CountChars) maxCount = TotalChars;
280    if (CountBytes) maxCount = TotalBytes;
281   
282    int fieldWidth = std::to_string(maxCount).size() + 1;
283    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
284
285    for (unsigned i = 0; i < inputFiles.size(); ++i) {
286        std::cout << std::setw(fieldWidth-1);
287        if (CountLines) {
288            std::cout << lineCount[i] << std::setw(fieldWidth);
289        }
290        if (CountWords) {
291            std::cout << wordCount[i] << std::setw(fieldWidth);
292        }
293        if (CountChars) {
294            std::cout << charCount[i] << std::setw(fieldWidth);
295        }
296        if (CountBytes) {
297            std::cout << byteCount[i];
298        }
299        std::cout << " " << inputFiles[i] << std::endl;
300    }
301    if (inputFiles.size() > 1) {
302        std::cout << std::setw(fieldWidth-1);
303        if (CountLines) {
304            std::cout << TotalLines << std::setw(fieldWidth);
305        }
306        if (CountWords) {
307            std::cout << TotalWords << std::setw(fieldWidth);
308        }
309        if (CountChars) {
310            std::cout << TotalChars << std::setw(fieldWidth);
311        }
312        if (CountBytes) {
313            std::cout << TotalBytes;
314        }
315        std::cout << " total" << std::endl;
316    }
317
318    return 0;
319}
Note: See TracBrowser for help on using the repository browser.