source: icGREP/icgrep-devel/icgrep/wc.cpp @ 5235

Last change on this file since 5235 was 5230, checked in by nmedfort, 3 years ago

Multi-threading support for PabloAST / PabloCompiler?. Requires unique LLVM Context / Module for each thread.

File size: 10.2 KB
RevLine 
[5019]1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <sstream>
12
[5033]13
14#include <toolchain.h>
[5036]15#include <pablo/pablo_toolchain.h>
[5019]16#include <llvm/IR/Function.h>
17#include <llvm/IR/Module.h>
18#include <llvm/ExecutionEngine/ExecutionEngine.h>
19#include <llvm/ExecutionEngine/MCJIT.h>
[5074]20#include "llvm/Linker/Linker.h"
[5019]21
22#include <llvm/Support/CommandLine.h>
23#include <llvm/Support/raw_ostream.h>
24
25#include <re/re_cc.h>
26#include <cc/cc_compiler.h>
[5063]27#include <pablo/pablo_kernel.h>
[5019]28#include <IDISA/idisa_builder.h>
29#include <IDISA/idisa_target.h>
[5100]30#include <kernels/streamset.h>
[5063]31#include <kernels/interface.h>
[5019]32#include <kernels/kernel.h>
33#include <kernels/s2p_kernel.h>
[5088]34#include <kernels/pipeline.h>
[5019]35
36#include <pablo/pablo_compiler.h>
37#include <pablo/pablo_toolchain.h>
38
39// mmap system
40#include <boost/filesystem.hpp>
41#include <boost/iostreams/device/mapped_file.hpp>
42
43#include <fcntl.h>
[5202]44
[5026]45static cl::OptionCategory wcFlags("Command Flags", "wc options");
[5019]46
[5026]47static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(wcFlags));
[5019]48
[5029]49enum CountOptions {
50    LineOption, WordOption, CharOption, ByteOption
51};
[5019]52
[5030]53static cl::list<CountOptions> wcOptions(
[5029]54  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
55             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
56             clEnumValN(CharOption, "m", "Report the number of characters in each input file (override -c)."),
57             clEnumValN(ByteOption, "c", "Report the number of bytes in each input file (override -m)."),
58             clEnumValEnd), cl::cat(wcFlags), cl::Grouping);
59                                                 
[5019]60
61
[5020]62static int defaultFieldWidth = 7;  // default field width
[5019]63
[5029]64
65bool CountLines = false;
66bool CountWords = false;
67bool CountChars = false;
68bool CountBytes = false;
69
[5020]70std::vector<uint64_t> lineCount;
71std::vector<uint64_t> wordCount;
72std::vector<uint64_t> charCount;
73std::vector<uint64_t> byteCount;
74
[5019]75uint64_t TotalLines = 0;
76uint64_t TotalWords = 0;
77uint64_t TotalChars = 0;
78uint64_t TotalBytes = 0;
79
[5217]80using namespace pablo;
81using namespace kernel;
82using namespace parabix;
[5029]83
84//  The callback routine that records counts in progress.
[5019]85//
86extern "C" {
[5029]87    void record_counts(uint64_t lines, uint64_t words, uint64_t chars, uint64_t bytes, uint64_t fileIdx) {
[5020]88        lineCount[fileIdx] = lines;
89        wordCount[fileIdx] = words;
90        charCount[fileIdx] = chars;
91        byteCount[fileIdx] = bytes;
92        TotalLines += lines;
93        TotalWords += words;
94        TotalChars += chars;
95        TotalBytes += bytes;
[5019]96    }
97}
98
99//
100//
101
[5217]102void wc_gen(PabloKernel * kernel) {
[5019]103    //  input: 8 basis bit streams
[5063]104    //  output: 3 counters
[5019]105   
[5217]106    cc::CC_Compiler ccc(kernel);
[5019]107   
[5202]108    PabloBuilder & pb = ccc.getBuilder();
[5019]109
[5217]110    Var * lc = kernel->addOutput("lineCount", kernel->getSizeTy());
111    Var * wc = kernel->addOutput("wordCount", kernel->getSizeTy());
112    Var * cc = kernel->addOutput("charCount", kernel->getSizeTy());
[5202]113
[5019]114    if (CountLines) {
[5202]115        PabloAST * LF = ccc.compileCC(re::makeCC(0x0A));
116        pb.createAssign(lc, pb.createCount(LF));
[5019]117    }
118    if (CountWords) {
[5202]119        PabloAST * WS = ccc.compileCC(re::makeCC(re::makeCC(0x09, 0x0D), re::makeCC(0x20)));
120        PabloAST * wordChar = pb.createNot(WS);
[5019]121        // WS_follow_or_start = 1 past WS or at start of file
[5202]122        PabloAST * WS_follow_or_start = pb.createNot(pb.createAdvance(wordChar, 1));
123        PabloAST * wordStart = pb.createInFile(pb.createAnd(wordChar, WS_follow_or_start));
124        pb.createAssign(wc, pb.createCount(wordStart));
[5019]125    }
126    if (CountChars) {
127        //
128        // FIXME: This correctly counts characters assuming valid UTF-8 input.  But what if input is
129        // not UTF-8, or is not valid?
130        //
[5202]131        PabloAST * u8Begin = ccc.compileCC(re::makeCC(re::makeCC(0, 0x7F), re::makeCC(0xC2, 0xF4)));       
132        pb.createAssign(cc, pb.createCount(u8Begin));
[5019]133    }
134}
135
[5227]136Function * pipeline(Module * mMod, IDISA::IDISA_Builder * iBuilder) {
[5069]137    Type * mBitBlockType = iBuilder->getBitBlockType();
[5088]138   
[5217]139    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
[5142]140
[5217]141    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8, 1));
142
[5230]143    S2PKernel  s2pk(iBuilder);
[5133]144    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
[5102]145   
[5217]146    PabloKernel wck(iBuilder, "wc");
147    wc_gen(&wck);
148    pablo_function_passes(&wck);
[5102]149   
[5133]150    std::unique_ptr<Module> wcM = wck.createKernelModule({&BasisBits}, {});
[5074]151   
152    s2pk.addKernelDeclarations(mMod);
153    wck.addKernelDeclarations(mMod);
[5021]154
[5029]155    Constant * record_counts_routine;
[5109]156    Type * const size_ty = iBuilder->getSizeTy();
[5230]157    Type * const voidTy = iBuilder->getVoidTy();
[5109]158    record_counts_routine = mMod->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
[5065]159    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
[5019]160   
[5109]161    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
[5019]162    main->setCallingConv(CallingConv::C);
163    Function::arg_iterator args = main->arg_begin();
164   
165    Value * const inputStream = &*(args++);
166    inputStream->setName("input");
[5088]167    Value * const fileSize = &*(args++);
168    fileSize->setName("fileSize");
[5019]169    Value * const fileIdx = &*(args++);
170    fileIdx->setName("fileIdx");
171   
172    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
173
[5142]174    ByteStream.setStreamSetBuffer(inputStream, fileSize);
[5088]175    BasisBits.allocateBuffer();
[5019]176   
[5221]177    generatePipelineLoop(iBuilder, {&s2pk, &wck});
[5063]178   
[5220]179    Value * lineCount = wck.createGetAccumulatorCall(wck.getInstance(), "lineCount");
180    Value * wordCount = wck.createGetAccumulatorCall(wck.getInstance(), "wordCount");
181    Value * charCount = wck.createGetAccumulatorCall(wck.getInstance(), "charCount");
[5035]182
[5088]183    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
[5019]184   
185    iBuilder->CreateRetVoid();
[5074]186   
187    Linker L(*mMod);
188    L.linkInModule(std::move(s2pM));
189    L.linkInModule(std::move(wcM));
190   
[5019]191    return main;
192}
193
194
195typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
196
197static ExecutionEngine * wcEngine = nullptr;
198
[5202]199wcFunctionType wcCodeGen(void) { 
200    Module * M = new Module("wc", getGlobalContext());
[5033]201    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
[5019]202
[5227]203    llvm::Function * main_IR = pipeline(M, idb);
[5019]204
[5033]205    wcEngine = JIT_to_ExecutionEngine(M);
[5019]206   
207    wcEngine->finalizeObject();
208
209    delete idb;
210    return reinterpret_cast<wcFunctionType>(wcEngine->getPointerToFunction(main_IR));
211}
212
213void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
214    std::string fileName = inputFiles[fileIdx];
215    size_t fileSize;
216    char * fileBuffer;
217   
[5101]218    const boost::filesystem::path file(fileName);
[5019]219    if (exists(file)) {
220        if (is_directory(file)) {
221            return;
222        }
223    } else {
224        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
225        return;
226    }
227   
228    fileSize = file_size(file);
[5101]229    boost::iostreams::mapped_file_source mappedFile;
[5019]230    if (fileSize == 0) {
231        fileBuffer = nullptr;
232    }
233    else {
234        try {
[5021]235            mappedFile.open(fileName);
236        } catch (std::exception &e) {
[5019]237            std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
238            return;
239        }
[5021]240        fileBuffer = const_cast<char *>(mappedFile.data());
[5019]241    }
242    fn_ptr(fileBuffer, fileSize, fileIdx);
243
244    mappedFile.close();
245   
246}
247
248
249
250int main(int argc, char *argv[]) {
[5202]251    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&wcFlags, pablo_toolchain_flags(), codegen::codegen_flags()});
[5019]252    cl::ParseCommandLineOptions(argc, argv);
[5029]253    if (wcOptions.size() == 0) {
[5019]254        CountLines = true;
255        CountWords = true;
256        CountBytes = true;
257    }
[5029]258    else {
259        CountLines = false;
260        CountWords = false;
261        CountBytes = false;
262        CountChars = false;
263        for (unsigned i = 0; i < wcOptions.size(); i++) {
264            switch (wcOptions[i]) {
265                case WordOption: CountWords = true; break;
266                case LineOption: CountLines = true; break;
267                case CharOption: CountBytes = true; CountChars = false; break;
268                case ByteOption: CountChars = true; CountBytes = false; break;
269            }
270        }
271    }
[5019]272   
[5029]273   
[5019]274    wcFunctionType fn_ptr = wcCodeGen();
275
[5020]276    int fileCount = inputFiles.size();
277    lineCount.resize(fileCount);
278    wordCount.resize(fileCount);
279    charCount.resize(fileCount);
280    byteCount.resize(fileCount);
281   
282    for (unsigned i = 0; i < inputFiles.size(); ++i) {
[5019]283        wc(fn_ptr, i);
284    }
285   
[5021]286    size_t maxCount = 0;
[5020]287    if (CountLines) maxCount = TotalLines;
288    if (CountWords) maxCount = TotalWords;
289    if (CountChars) maxCount = TotalChars;
290    if (CountBytes) maxCount = TotalBytes;
291   
292    int fieldWidth = std::to_string(maxCount).size() + 1;
293    if (fieldWidth < defaultFieldWidth) fieldWidth = defaultFieldWidth;
294
295    for (unsigned i = 0; i < inputFiles.size(); ++i) {
[5029]296        std::cout << std::setw(fieldWidth-1);
[5020]297        if (CountLines) {
[5029]298            std::cout << lineCount[i] << std::setw(fieldWidth);
[5020]299        }
300        if (CountWords) {
[5029]301            std::cout << wordCount[i] << std::setw(fieldWidth);
[5020]302        }
303        if (CountChars) {
[5029]304            std::cout << charCount[i] << std::setw(fieldWidth);
[5020]305        }
306        if (CountBytes) {
[5029]307            std::cout << byteCount[i];
[5020]308        }
309        std::cout << " " << inputFiles[i] << std::endl;
310    }
[5019]311    if (inputFiles.size() > 1) {
[5029]312        std::cout << std::setw(fieldWidth-1);
[5019]313        if (CountLines) {
[5029]314            std::cout << TotalLines << std::setw(fieldWidth);
[5019]315        }
316        if (CountWords) {
[5029]317            std::cout << TotalWords << std::setw(fieldWidth);
[5019]318        }
319        if (CountChars) {
[5029]320            std::cout << TotalChars << std::setw(fieldWidth);
[5019]321        }
322        if (CountBytes) {
[5029]323            std::cout << TotalBytes;
[5019]324        }
325        std::cout << " total" << std::endl;
326    }
327
328    return 0;
329}
Note: See TracBrowser for help on using the repository browser.