source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5394

Last change on this file since 5394 was 5394, checked in by cameron, 2 years ago

Object cache system with signature checking complete

File size: 10.3 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <IR_Gen/idisa_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/mmap_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <kernels/pipeline.h>
25#include <pablo/pablo_compiler.h>
26#include <pablo/pablo_toolchain.h>
27#include <boost/filesystem.hpp>
28#include <boost/iostreams/device/mapped_file.hpp>
29
30
31using namespace llvm;
32
33static cl::OptionCategory wcFlags("Command Flags", "wc options");
34
35static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
36
37enum CountOptions {
38    LineOption, WordOption, CharOption, ByteOption
39};
40
41static cl::list<CountOptions> wcOptions(
42  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
43             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
44             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
45             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
46             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
47                                                 
48
49
50static int defaultFieldWidth = 7;  // default field width
51
52
53bool CountLines = false;
54bool CountWords = false;
55bool CountChars = false;
56bool CountBytes = false;
57
58std::vector<uint64_t> lineCount;
59std::vector<uint64_t> wordCount;
60std::vector<uint64_t> charCount;
61std::vector<uint64_t> byteCount;
62
63uint64_t TotalLines = 0;
64uint64_t TotalWords = 0;
65uint64_t TotalChars = 0;
66uint64_t TotalBytes = 0;
67
68using namespace pablo;
69using namespace kernel;
70using namespace parabix;
71
72//  The callback routine that records counts in progress.
73//
74extern "C" {
75    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
76        lineCount[fileIdx] = lines;
77        wordCount[fileIdx] = words;
78        charCount[fileIdx] = chars;
79        byteCount[fileIdx] = bytes;
80        TotalLines += lines;
81        TotalWords += words;
82        TotalChars += chars;
83        TotalBytes += bytes;
84    }
85}
86
87//
88//
89
90void wc_gen(PabloKernel * kernel) {
91    //  input: 8 basis bit streams
92    const auto u8bitSet = kernel->getInputStreamVar("u8bit");
93    //  output: 3 counters
94   
95    cc::CC_Compiler ccc(kernel, u8bitSet);
96   
97    PabloBuilder & pb = ccc.getBuilder();
98
99    Var * lc = kernel->getOutputScalarVar("lineCount");
100    Var * wc = kernel->getOutputScalarVar("wordCount");
101    Var * cc = kernel->getOutputScalarVar("charCount");
102
103    if (CountLines) {
104        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
105        pb.createAssign(lc, pb.createCount(LF));
106    }
107    if (CountWords) {
108        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
109        PabloAST * wordChar = pb.createNot(WS);
110        // WS_follow_or_start = 1 past WS or at start of file
111        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
112        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
113        pb.createAssign(wc, pb.createCount(wordStart));
114    }
115    if (CountChars) {
116        //
117        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
118        // not UTF-8, or is not valid?
119        //
120        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
121        pb.createAssign(cc, pb.createCount(u8Begin));
122    }
123}
124
125
126
127
128typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
129
130void wcPipelineGen(ParabixDriver & pxDriver) {
131
132    IDISA::IDISA_Builder * iBuilder = pxDriver.getIDISA_Builder();
133    Module * m = iBuilder->getModule();
134   
135    Type * mBitBlockType = iBuilder->getBitBlockType();
136    Constant * record_counts_routine;
137    Type * const size_ty = iBuilder->getSizeTy();
138    Type * const voidTy = iBuilder->getVoidTy();
139    record_counts_routine = m->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
140    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
141   
142    Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
143    main->setCallingConv(CallingConv::C);
144    Function::arg_iterator args = main->arg_begin();
145   
146    Value * const inputStream = &*(args++);
147    inputStream->setName("input");
148    Value * const fileSize = &*(args++);
149    fileSize->setName("fileSize");
150    Value * const fileIdx = &*(args++);
151    fileIdx->setName("fileIdx");
152   
153    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
154
155    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8, 1));
156    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
157
158    MMapSourceKernel mmapK(iBuilder);
159    mmapK.setInitialArguments({fileSize});
160    pxDriver.addKernelCall(mmapK, {}, {&ByteStream});
161
162    S2PKernel  s2pk(iBuilder);
163    pxDriver.addKernelCall(s2pk, {&ByteStream}, {&BasisBits});
164   
165    PabloKernel wck(iBuilder, "Parabix:wc",
166        {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
167        {},
168        {},
169        {Binding{iBuilder->getSizeTy(), "lineCount"}, Binding{iBuilder->getSizeTy(), "wordCount"}, Binding{iBuilder->getSizeTy(), "charCount"}});
170
171    wc_gen(&wck);
172    pablo_function_passes(&wck);
173    pxDriver.addKernelCall(wck, {&BasisBits}, {});
174
175    ByteStream.setStreamSetBuffer(inputStream);
176    BasisBits.allocateBuffer();
177
178    pxDriver.generatePipelineIR();
179   
180    Value * lineCount = wck.createGetAccumulatorCall(wck.getInstance(), "lineCount");
181    Value * wordCount = wck.createGetAccumulatorCall(wck.getInstance(), "wordCount");
182    Value * charCount = wck.createGetAccumulatorCall(wck.getInstance(), "charCount");
183
184    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
185   
186    iBuilder->CreateRetVoid();
187   
188    pxDriver.JITcompileMain();
189    pxDriver.linkAndFinalize();
190}
191
192
193wcFunctionType wcCodeGen(void) {
194    Module * M = new Module("wc", getGlobalContext());
195    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
196    ParabixDriver pxDriver(idb);
197   
198    wcPipelineGen(pxDriver);
199
200    wcFunctionType main = reinterpret_cast<wcFunctionType>(pxDriver.getPointerToMain());
201    delete idb;
202    return main;
203}
204
205void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
206    std::string fileName = inputFiles[fileIdx];
207    size_t fileSize;
208    char * fileBuffer;
209   
210    const boost::filesystem::path file(fileName);
211    if (exists(file)) {
212        if (is_directory(file)) {
213            return;
214        }
215    } else {
216        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
217        return;
218    }
219   
220    fileSize = file_size(file);
221    boost::iostreams::mapped_file_source mappedFile;
222    if (fileSize == 0) {
223        fileBuffer = nullptr;
224    }
225    else {
226        try {
227            mappedFile.open(fileName);
228        } catch (std::exception &e) {
229            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
230            return;
231        }
232        fileBuffer = const_cast<char *>(mappedFile.data());
233    }
234    fn_ptr(fileBuffer, fileSize, fileIdx);
235
236    mappedFile.close();
237   
238}
239
240
241
242int main(int argc, char *argv[]) {
243    AddParabixVersionPrinter();
244    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
245    cl::ParseCommandLineOptions(argc, argv);
246    if (wcOptions.size() == 0) {
247        CountLines = true;
248        CountWords = true;
249        CountBytes = true;
250    }
251    else {
252        CountLines = false;
253        CountWords = false;
254        CountBytes = false;
255        CountChars = false;
256        for (unsigned i = 0; i < wcOptions.size(); i++) {
257            switch (wcOptions[i]) {
258                case WordOption: CountWords = true; break;
259                case LineOption: CountLines = true; break;
260                case CharOption: CountBytes = true; CountChars = false; break;
261                case ByteOption: CountChars = true; CountBytes = false; break;
262            }
263        }
264    }
265   
266   
267    wcFunctionType fn_ptr = wcCodeGen();
268
269    int fileCount = inputFiles.size();
270    lineCount.resize(fileCount);
271    wordCount.resize(fileCount);
272    charCount.resize(fileCount);
273    byteCount.resize(fileCount);
274   
275    for (unsigned i = 0; i < inputFiles.size(); ++i) {
276        wc(fn_ptr, i);
277    }
278   
279    size_t maxCount = 0;
280    if (CountLines) maxCount = TotalLines;
281    if (CountWords) maxCount = TotalWords;
282    if (CountChars) maxCount = TotalChars;
283    if (CountBytes) maxCount = TotalBytes;
284   
285    int fieldWidth = std::to_string(maxCount).size() + 1;
286    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
287
288    for (unsigned i = 0; i < inputFiles.size(); ++i) {
289        std::cout << std::setw(fieldWidth-1);
290        if (CountLines) {
291            std::cout << lineCount[i] << std::setw(fieldWidth);
292        }
293        if (CountWords) {
294            std::cout << wordCount[i] << std::setw(fieldWidth);
295        }
296        if (CountChars) {
297            std::cout << charCount[i] << std::setw(fieldWidth);
298        }
299        if (CountBytes) {
300            std::cout << byteCount[i];
301        }
302        std::cout << " " << inputFiles[i] << std::endl;
303    }
304    if (inputFiles.size() > 1) {
305        std::cout << std::setw(fieldWidth-1);
306        if (CountLines) {
307            std::cout << TotalLines << std::setw(fieldWidth);
308        }
309        if (CountWords) {
310            std::cout << TotalWords << std::setw(fieldWidth);
311        }
312        if (CountChars) {
313            std::cout << TotalChars << std::setw(fieldWidth);
314        }
315        if (CountBytes) {
316            std::cout << TotalBytes;
317        }
318        std::cout << " total" << std::endl;
319    }
320
321    return 0;
322}
Note: See TracBrowser for help on using the repository browser.