source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5402

Last change on this file since 5402 was 5402, checked in by nmedfort, 2 years ago

Moved toolchain and object_cache to kernels directory. Continued work on providing input consumed information.

File size: 10.2 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <kernels/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <IR_Gen/idisa_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/mmap_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <kernels/pipeline.h>
25#include <pablo/pablo_compiler.h>
26#include <pablo/pablo_toolchain.h>
27#include <boost/filesystem.hpp>
28#include <boost/iostreams/device/mapped_file.hpp>
29
30
31using namespace llvm;
32
33static cl::OptionCategory wcFlags("Command Flags", "wc options");
34
35static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
36
37enum CountOptions {
38    LineOption, WordOption, CharOption, ByteOption
39};
40
41static cl::list<CountOptions> wcOptions(
42  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
43             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
44             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
45             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
46             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
47                                                 
48
49
50static int defaultFieldWidth = 7;  // default field width
51
52
53bool CountLines = false;
54bool CountWords = false;
55bool CountChars = false;
56bool CountBytes = false;
57
58std::vector<uint64_t> lineCount;
59std::vector<uint64_t> wordCount;
60std::vector<uint64_t> charCount;
61std::vector<uint64_t> byteCount;
62
63uint64_t TotalLines = 0;
64uint64_t TotalWords = 0;
65uint64_t TotalChars = 0;
66uint64_t TotalBytes = 0;
67
68using namespace pablo;
69using namespace kernel;
70using namespace parabix;
71
72//  The callback routine that records counts in progress.
73//
74extern "C" {
75    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
76        lineCount[fileIdx] = lines;
77        wordCount[fileIdx] = words;
78        charCount[fileIdx] = chars;
79        byteCount[fileIdx] = bytes;
80        TotalLines += lines;
81        TotalWords += words;
82        TotalChars += chars;
83        TotalBytes += bytes;
84    }
85}
86
87//
88//
89
90void wc_gen(PabloKernel * kernel) {
91    //  input: 8 basis bit streams
92    const auto u8bitSet = kernel->getInputStreamVar("u8bit");
93    //  output: 3 counters
94   
95    cc::CC_Compiler ccc(kernel, u8bitSet);
96   
97    PabloBuilder & pb = ccc.getBuilder();
98
99    Var * lc = kernel->getOutputScalarVar("lineCount");
100    Var * wc = kernel->getOutputScalarVar("wordCount");
101    Var * cc = kernel->getOutputScalarVar("charCount");
102
103    if (CountLines) {
104        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
105        pb.createAssign(lc, pb.createCount(LF));
106    }
107    if (CountWords) {
108        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
109        PabloAST * wordChar = pb.createNot(WS);
110        // WS_follow_or_start = 1 past WS or at start of file
111        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
112        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
113        pb.createAssign(wc, pb.createCount(wordStart));
114    }
115    if (CountChars) {
116        //
117        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
118        // not UTF-8, or is not valid?
119        //
120        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
121        pb.createAssign(cc, pb.createCount(u8Begin));
122    }
123}
124
125
126
127
128typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
129
130void wcPipelineGen(ParabixDriver & pxDriver) {
131
132    IDISA::IDISA_Builder * iBuilder = pxDriver.getIDISA_Builder();
133    Module * m = iBuilder->getModule();
134   
135    Type * mBitBlockType = iBuilder->getBitBlockType();
136    Constant * record_counts_routine;
137    Type * const size_ty = iBuilder->getSizeTy();
138    Type * const voidTy = iBuilder->getVoidTy();
139    record_counts_routine = m->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
140    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
141   
142    Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
143    main->setCallingConv(CallingConv::C);
144    Function::arg_iterator args = main->arg_begin();
145   
146    Value * const inputStream = &*(args++);
147    inputStream->setName("input");
148    Value * const fileSize = &*(args++);
149    fileSize->setName("fileSize");
150    Value * const fileIdx = &*(args++);
151    fileIdx->setName("fileIdx");
152   
153    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
154
155    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8, 1));
156    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
157
158    MMapSourceKernel mmapK(iBuilder);
159    mmapK.setInitialArguments({fileSize});
160    pxDriver.addKernelCall(mmapK, {}, {&ByteStream});
161
162    S2PKernel  s2pk(iBuilder);
163    pxDriver.addKernelCall(s2pk, {&ByteStream}, {&BasisBits});
164   
165    PabloKernel wck(iBuilder, "Parabix:wc",
166        {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
167        {},
168        {},
169        {Binding{iBuilder->getSizeTy(), "lineCount"}, Binding{iBuilder->getSizeTy(), "wordCount"}, Binding{iBuilder->getSizeTy(), "charCount"}});
170
171    wc_gen(&wck);
172    pablo_function_passes(&wck);
173    pxDriver.addKernelCall(wck, {&BasisBits}, {});
174
175    ByteStream.setStreamSetBuffer(inputStream);
176    BasisBits.allocateBuffer();
177
178    pxDriver.generatePipelineIR();
179   
180    Value * lineCount = wck.createGetAccumulatorCall(wck.getInstance(), "lineCount");
181    Value * wordCount = wck.createGetAccumulatorCall(wck.getInstance(), "wordCount");
182    Value * charCount = wck.createGetAccumulatorCall(wck.getInstance(), "charCount");
183
184    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
185   
186    iBuilder->CreateRetVoid();
187
188    pxDriver.linkAndFinalize();
189}
190
191
192wcFunctionType wcCodeGen(void) {
193    Module * M = new Module("wc", getGlobalContext());
194    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
195    ParabixDriver pxDriver(idb);
196   
197    wcPipelineGen(pxDriver);
198
199    wcFunctionType main = reinterpret_cast<wcFunctionType>(pxDriver.getPointerToMain());
200    delete idb;
201    return main;
202}
203
204void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
205    std::string fileName = inputFiles[fileIdx];
206    size_t fileSize;
207    char * fileBuffer;
208   
209    const boost::filesystem::path file(fileName);
210    if (exists(file)) {
211        if (is_directory(file)) {
212            return;
213        }
214    } else {
215        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
216        return;
217    }
218   
219    fileSize = file_size(file);
220    boost::iostreams::mapped_file_source mappedFile;
221    if (fileSize == 0) {
222        fileBuffer = nullptr;
223    }
224    else {
225        try {
226            mappedFile.open(fileName);
227        } catch (std::exception &e) {
228            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
229            return;
230        }
231        fileBuffer = const_cast<char *>(mappedFile.data());
232    }
233    fn_ptr(fileBuffer, fileSize, fileIdx);
234
235    mappedFile.close();
236   
237}
238
239
240
241int main(int argc, char *argv[]) {
242    AddParabixVersionPrinter();
243    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
244    cl::ParseCommandLineOptions(argc, argv);
245    if (wcOptions.size() == 0) {
246        CountLines = true;
247        CountWords = true;
248        CountBytes = true;
249    }
250    else {
251        CountLines = false;
252        CountWords = false;
253        CountBytes = false;
254        CountChars = false;
255        for (unsigned i = 0; i < wcOptions.size(); i++) {
256            switch (wcOptions[i]) {
257                case WordOption: CountWords = true; break;
258                case LineOption: CountLines = true; break;
259                case CharOption: CountBytes = true; CountChars = false; break;
260                case ByteOption: CountChars = true; CountBytes = false; break;
261            }
262        }
263    }
264   
265   
266    wcFunctionType fn_ptr = wcCodeGen();
267
268    int fileCount = inputFiles.size();
269    lineCount.resize(fileCount);
270    wordCount.resize(fileCount);
271    charCount.resize(fileCount);
272    byteCount.resize(fileCount);
273   
274    for (unsigned i = 0; i < inputFiles.size(); ++i) {
275        wc(fn_ptr, i);
276    }
277   
278    size_t maxCount = 0;
279    if (CountLines) maxCount = TotalLines;
280    if (CountWords) maxCount = TotalWords;
281    if (CountChars) maxCount = TotalChars;
282    if (CountBytes) maxCount = TotalBytes;
283   
284    int fieldWidth = std::to_string(maxCount).size() + 1;
285    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
286
287    for (unsigned i = 0; i < inputFiles.size(); ++i) {
288        std::cout << std::setw(fieldWidth-1);
289        if (CountLines) {
290            std::cout << lineCount[i] << std::setw(fieldWidth);
291        }
292        if (CountWords) {
293            std::cout << wordCount[i] << std::setw(fieldWidth);
294        }
295        if (CountChars) {
296            std::cout << charCount[i] << std::setw(fieldWidth);
297        }
298        if (CountBytes) {
299            std::cout << byteCount[i];
300        }
301        std::cout << " " << inputFiles[i] << std::endl;
302    }
303    if (inputFiles.size() > 1) {
304        std::cout << std::setw(fieldWidth-1);
305        if (CountLines) {
306            std::cout << TotalLines << std::setw(fieldWidth);
307        }
308        if (CountWords) {
309            std::cout << TotalWords << std::setw(fieldWidth);
310        }
311        if (CountChars) {
312            std::cout << TotalChars << std::setw(fieldWidth);
313        }
314        if (CountBytes) {
315            std::cout << TotalBytes;
316        }
317        std::cout << " total" << std::endl;
318    }
319
320    return 0;
321}
Note: See TracBrowser for help on using the repository browser.