source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5425

Last change on this file since 5425 was 5425, checked in by nmedfort, 2 years ago

Changes towards separate compilation

File size: 9.7 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <iostream>
8#include <iomanip>
9#include <sstream>
10#include <toolchain/toolchain.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Module.h>
13#include <llvm/ExecutionEngine/ExecutionEngine.h>
14#include "llvm/Linker/Linker.h"
15#include <llvm/Support/CommandLine.h>
16#include <llvm/Support/raw_ostream.h>
17#include <cc/cc_compiler.h>
18#include <pablo/pablo_kernel.h>
19#include <IR_Gen/idisa_builder.h>
20#include <IR_Gen/idisa_target.h>
21#include <kernels/streamset.h>
22#include <kernels/mmap_kernel.h>
23#include <kernels/s2p_kernel.h>
24#include <pablo/pablo_compiler.h>
25#include <pablo/pablo_toolchain.h>
26#include <fcntl.h>
27
28using namespace llvm;
29
30static cl::OptionCategory wcFlags("Command Flags", "wc options");
31
32static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
33
34enum CountOptions {
35    LineOption, WordOption, CharOption, ByteOption
36};
37
38static cl::list<CountOptions> wcOptions(
39  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
40             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
41             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
42             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
43             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
44                                                 
45
46
47static int defaultFieldWidth = 7;  // default field width
48
49
50bool CountLines = false;
51bool CountWords = false;
52bool CountChars = false;
53bool CountBytes = false;
54
55std::vector<uint64_t> lineCount;
56std::vector<uint64_t> wordCount;
57std::vector<uint64_t> charCount;
58std::vector<uint64_t> byteCount;
59
60uint64_t TotalLines = 0;
61uint64_t TotalWords = 0;
62uint64_t TotalChars = 0;
63uint64_t TotalBytes = 0;
64
65using namespace pablo;
66using namespace kernel;
67using namespace parabix;
68
69//  The callback routine that records counts in progress.
70//
71extern "C" {
72    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
73        lineCount[fileIdx] = lines;
74        wordCount[fileIdx] = words;
75        charCount[fileIdx] = chars;
76        byteCount[fileIdx] = bytes;
77        TotalLines += lines;
78        TotalWords += words;
79        TotalChars += chars;
80        TotalBytes += bytes;
81    }
82}
83
84//
85//
86
87std::unique_ptr<PabloKernel> wc_gen(IDISA::IDISA_Builder * iBuilder) {
88   
89    auto kernel = std::unique_ptr<PabloKernel>(new PabloKernel(iBuilder, "Parabix:wc",
90                    {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
91                    {},
92                    {},
93                    {Binding{iBuilder->getSizeTy(), "lineCount"}, Binding{iBuilder->getSizeTy(), "wordCount"}, Binding{iBuilder->getSizeTy(), "charCount"}}));
94   
95    //  input: 8 basis bit streams
96    const auto u8bitSet = kernel->getInputStreamVar("u8bit");
97    //  output: 3 counters
98   
99    cc::CC_Compiler ccc(kernel.get(), u8bitSet);
100   
101    PabloBuilder & pb = ccc.getBuilder();
102
103    Var * lc = kernel->getOutputScalarVar("lineCount");
104    Var * wc = kernel->getOutputScalarVar("wordCount");
105    Var * cc = kernel->getOutputScalarVar("charCount");
106
107    if (CountLines) {
108        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
109        pb.createAssign(lc, pb.createCount(LF));
110    }
111    if (CountWords) {
112        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
113        PabloAST * wordChar = pb.createNot(WS);
114        // WS_follow_or_start = 1 past WS or at start of file
115        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
116        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
117        pb.createAssign(wc, pb.createCount(wordStart));
118    }
119    if (CountChars) {
120        //
121        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
122        // not UTF-8, or is not valid?
123        //
124        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
125        pb.createAssign(cc, pb.createCount(u8Begin));
126    }
127    pablo_function_passes(kernel.get());
128    return kernel;
129}
130
131
132
133
134typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
135
136void wcPipelineGen(ParabixDriver & pxDriver) {
137
138    auto iBuilder = pxDriver.getIDISA_Builder();
139    Module * m = iBuilder->getModule();
140   
141    Type * const int32Ty = iBuilder->getInt32Ty();
142    Type * const sizeTy = iBuilder->getSizeTy();
143    Type * const voidTy = iBuilder->getVoidTy();
144
145    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
146    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
147
148    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
149    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
150    main->setCallingConv(CallingConv::C);
151    Function::arg_iterator args = main->arg_begin();   
152    Value * const fileDecriptor = &*(args++);
153    fileDecriptor->setName("fileDecriptor");
154    Value * const fileIdx = &*(args++);
155    fileIdx->setName("fileIdx");
156
157    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
158
159    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
160
161    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<SingleBlockBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1)));
162
163    KernelBuilder * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder));
164    mmapK->setInitialArguments({fileDecriptor});
165    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
166
167    KernelBuilder * s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
168    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
169   
170    KernelBuilder * wck = pxDriver.addKernelInstance(wc_gen(iBuilder));
171    pxDriver.makeKernelCall(wck, {BasisBits}, {});
172
173
174    pxDriver.generatePipelineIR();
175   
176    Value * const fileSize = mmapK->getAccumulator("fileSize");
177    Value * const lineCount = wck->getAccumulator("lineCount");
178    Value * const wordCount = wck->getAccumulator("wordCount");
179    Value * const charCount = wck->getAccumulator("charCount");
180
181    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
182   
183    iBuilder->CreateRetVoid();
184
185    pxDriver.linkAndFinalize();
186}
187
188
189WordCountFunctionType wcCodeGen() {
190    ParabixDriver pxDriver("wc");
191    wcPipelineGen(pxDriver);
192    return reinterpret_cast<WordCountFunctionType>(pxDriver.getPointerToMain());
193}
194
195void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
196    std::string fileName = inputFiles[fileIdx];
197    const int fd = open(fileName.c_str(), O_RDONLY);
198    if (LLVM_UNLIKELY(fd == -1)) {
199        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
200    } else {
201        fn_ptr(fd, fileIdx);
202        close(fd);
203    }
204}
205
206int main(int argc, char *argv[]) {
207    AddParabixVersionPrinter();
208    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
209    cl::ParseCommandLineOptions(argc, argv);
210    if (wcOptions.size() == 0) {
211        CountLines = true;
212        CountWords = true;
213        CountBytes = true;
214    } else {
215        CountLines = false;
216        CountWords = false;
217        CountBytes = false;
218        CountChars = false;
219        for (unsigned i = 0; i < wcOptions.size(); i++) {
220            switch (wcOptions[i]) {
221                case WordOption: CountWords = true; break;
222                case LineOption: CountLines = true; break;
223                case CharOption: CountBytes = true; CountChars = false; break;
224                case ByteOption: CountChars = true; CountBytes = false; break;
225            }
226        }
227    }
228   
229    WordCountFunctionType wordCountFunctionPtr = wcCodeGen();
230
231    const auto fileCount = inputFiles.size();
232    lineCount.resize(fileCount);
233    wordCount.resize(fileCount);
234    charCount.resize(fileCount);
235    byteCount.resize(fileCount);
236   
237    for (unsigned i = 0; i < fileCount; ++i) {
238        wc(wordCountFunctionPtr, i);
239    }
240   
241    size_t maxCount = 0;
242    if (CountLines) maxCount = TotalLines;
243    if (CountWords) maxCount = TotalWords;
244    if (CountChars) maxCount = TotalChars;
245    if (CountBytes) maxCount = TotalBytes;
246   
247    int fieldWidth = std::to_string(maxCount).size() + 1;
248    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
249
250    for (unsigned i = 0; i < inputFiles.size(); ++i) {
251        std::cout << std::setw(fieldWidth-1);
252        if (CountLines) {
253            std::cout << lineCount[i] << std::setw(fieldWidth);
254        }
255        if (CountWords) {
256            std::cout << wordCount[i] << std::setw(fieldWidth);
257        }
258        if (CountChars) {
259            std::cout << charCount[i] << std::setw(fieldWidth);
260        }
261        if (CountBytes) {
262            std::cout << byteCount[i];
263        }
264        std::cout << " " << inputFiles[i] << std::endl;
265    }
266    if (inputFiles.size() > 1) {
267        std::cout << std::setw(fieldWidth-1);
268        if (CountLines) {
269            std::cout << TotalLines << std::setw(fieldWidth);
270        }
271        if (CountWords) {
272            std::cout << TotalWords << std::setw(fieldWidth);
273        }
274        if (CountChars) {
275            std::cout << TotalChars << std::setw(fieldWidth);
276        }
277        if (CountBytes) {
278            std::cout << TotalBytes;
279        }
280        std::cout << " total" << std::endl;
281    }
282
283    return 0;
284}
Note: See TracBrowser for help on using the repository browser.