source: icGREP/icgrep-devel/icgrep/u8u16.cpp @ 5191

Last change on this file since 5191 was 5191, checked in by cameron, 3 years ago

mmap and posix_memalign buffering options for u8u16

File size: 20.2 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <fstream>
10#include <sstream>
11#include <stdlib.h>
12
13#include <llvm/IR/Function.h>
14#include <llvm/IR/Module.h>
15#include <llvm/ExecutionEngine/ExecutionEngine.h>
16#include <llvm/ExecutionEngine/MCJIT.h>
17#include <llvm/IRReader/IRReader.h>
18#include <llvm/IR/Verifier.h>
19#include <llvm/Support/Debug.h>
20
21#include <llvm/Support/CommandLine.h>
22
23#include <toolchain.h>
24#include <re/re_cc.h>
25#include <cc/cc_compiler.h>
26#include <pablo/pablo_toolchain.h>
27#include <pablo/pablo_kernel.h>
28#include <pablo/function.h>
29#include <IDISA/idisa_builder.h>
30#include <IDISA/idisa_target.h>
31#include <kernels/pipeline.h>
32#include <kernels/interface.h>
33#include <kernels/kernel.h>
34#include <kernels/s2p_kernel.h>
35#include <kernels/p2s_kernel.h>
36#include <kernels/deletion.h>
37#include <kernels/stdout_kernel.h>
38#include <llvm/IR/TypeBuilder.h>
39
40
41// mmap system
42#include <boost/filesystem.hpp>
43#include <boost/iostreams/device/mapped_file.hpp>
44#include <boost/interprocess/anonymous_shared_memory.hpp>
45#include <boost/interprocess/mapped_region.hpp>
46#include <fcntl.h>
47static cl::OptionCategory u8u16Options("u8u16 Options",
48                                            "Transcoding control options.");
49
50static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(u8u16Options));
51
52static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(u8u16Options));
53static cl::opt<bool> mMapBuffering("mmap-buffering", cl::desc("Enable mmap buffering."), cl::cat(u8u16Options));
54static cl::opt<bool> memAlignBuffering("memalign-buffering", cl::desc("Enable posix_memalign buffering."), cl::cat(u8u16Options));
55
56//
57//
58//
59namespace pablo {
60
61PabloFunction * u8u16_pablo() {
62    //  input: 8 basis bit streams
63    //  output: 16 u8-indexed streams, + delmask stream + error stream
64    PabloFunction * function = PabloFunction::Create("u8u16", 8, 18);
65    cc::CC_Compiler ccc(*function);
66   
67    PabloBuilder pBuilder(ccc.getBuilder().getPabloBlock(), ccc.getBuilder());
68    const std::vector<Var *> u8_bits = ccc.getBasisBits();
69    // Outputs
70    Assign * u16_hi[8];
71    Assign * u16_lo[8];
72    Assign * delmask;
73    Assign * error_mask;
74   
75    // The logic for processing non-ASCII bytes is to be embedded within an if-hierarchy.
76    PabloAST * nonASCII = ccc.compileCC(re::makeCC(0x80, 0xFF));
77   
78    // Builder for the if statement handling all non-ASCII logic
79    PabloBuilder nAb = PabloBuilder::Create(pBuilder);
80    // Bits 3 through 7 of a 2-byte prefix are data bits, needed to
81    // produce the UTF-16 code unit data ...,
82    PabloAST * bit3a1 = nAb.createAdvance(u8_bits[3], 1);
83    PabloAST * bit4a1 = nAb.createAdvance(u8_bits[4], 1);
84    PabloAST * bit5a1 = nAb.createAdvance(u8_bits[5], 1);
85    PabloAST * bit6a1 = nAb.createAdvance(u8_bits[6], 1);
86    PabloAST * bit7a1 = nAb.createAdvance(u8_bits[7], 1);
87   
88    // Entry condition for 3 or 4 byte sequences: we have a prefix byte in the range 0xE0-0xFF.
89    PabloAST * pfx34 = ccc.compileCC(re::makeCC(0xE0, 0xFF), nAb);
90    // Builder for the if statement handling all logic for 3- and 4-byte sequences.
91    PabloBuilder p34b = PabloBuilder::Create(nAb);
92    // Bits 4 through 7 of a 3-byte prefix are data bits.  They must be moved
93    // to the final position of the 3-byte sequence.
94    PabloAST * bit2a1 = p34b.createAdvance(u8_bits[2], 1);
95    PabloAST * bit4a2 = p34b.createAdvance(bit4a1, 1);
96    PabloAST * bit5a2 = p34b.createAdvance(bit5a1, 1);
97    PabloAST * bit6a2 = p34b.createAdvance(bit6a1, 1);
98    PabloAST * bit7a2 = p34b.createAdvance(bit7a1, 1);
99    //
100    // Logic for 4-byte UTF-8 sequences
101    //
102    // Entry condition  or 4 byte sequences: we have a prefix byte in the range 0xF0-0xFF.
103    PabloAST * pfx4 = ccc.compileCC(re::makeCC(0xF0, 0xFF), p34b);
104    // Builder for the if statement handling all logic for 4-byte sequences only.
105    PabloBuilder p4b = PabloBuilder::Create(p34b);
106    // Illegal 4-byte sequences
107    PabloAST * F0 = ccc.compileCC(re::makeCC(0xF0), p4b);
108    PabloAST * F4 = ccc.compileCC(re::makeCC(0xF4), p4b);
109    PabloAST * F0_err = p4b.createAnd(p4b.createAdvance(F0, 1), ccc.compileCC(re::makeCC(0x80, 0x8F), p4b));
110    PabloAST * F4_err = p4b.createAnd(p4b.createAdvance(F4, 1), ccc.compileCC(re::makeCC(0x90, 0xBF), p4b));
111    PabloAST * F5_FF = ccc.compileCC(re::makeCC(0xF5, 0xFF), p4b);
112    Assign * FX_err = p4b.createAssign("FX_err", p4b.createOr(F5_FF, p4b.createOr(F0_err, F4_err)));
113    //
114    // 4-byte prefixes have a scope that extends over the next 3 bytes.
115    Assign * u8scope42 = p4b.createAssign("u8scope42", p4b.createAdvance(pfx4, 1));
116    Assign * u8scope43 = p4b.createAssign("u8scope43", p4b.createAdvance(u8scope42, 1));
117    Assign * u8scope44 = p4b.createAssign("u8scope44", p4b.createAdvance(u8scope43, 1));
118    //
119   
120    //  From the 4-byte sequence 11110abc 10defghi 10jklmno 10pqrstu,
121    //  we must calculate the value abcde - 1 to produce the bit values
122    //  for u16_hi6, hi7, lo0, lo1 at the scope43 position.
123    Assign * s43_lo1 = p4b.createAssign("scope43_lo1", p4b.createAnd(u8scope43, p4b.createNot(bit3a1)));           // e - 1
124    Assign * s43_lo0 = p4b.createAssign("scope43_lo0", p4b.createAnd(u8scope43, p4b.createXor(bit2a1, s43_lo1)));  // d - borrow
125    PabloAST * brw1 = p4b.createAnd(s43_lo1, p4b.createNot(bit2a1));
126    Assign * s43_hi7 = p4b.createAssign("scope43_hi7", p4b.createAnd(u8scope43, p4b.createXor(bit7a2, brw1)));     // c - borrow
127    PabloAST * brw2 = p4b.createAnd(brw1, p4b.createNot(bit7a2));
128    Assign * s43_hi6 = p4b.createAssign("scope43_hi6", p4b.createAnd(u8scope43, p4b.createXor(bit6a2, brw2)));     // b - borrow
129    //
130    Assign * s43_lo2 = p4b.createAssign("scope43_lo2", p4b.createAnd(u8scope43, bit4a1));
131    Assign * s43_lo3 = p4b.createAssign("scope43_lo3", p4b.createAnd(u8scope43, bit5a1));
132    Assign * s43_lo4 = p4b.createAssign("scope43_lo4", p4b.createAnd(u8scope43, bit6a1));
133    Assign * s43_lo5 = p4b.createAssign("scope43_lo5", p4b.createAnd(u8scope43, bit7a1));
134    Assign * s43_lo6 = p4b.createAssign("scope43_lo6", p4b.createAnd(u8scope43, u8_bits[2]));
135    Assign * s43_lo7 = p4b.createAssign("scope43_lo7", p4b.createAnd(u8scope43, u8_bits[3]));
136    //
137    //
138    p34b.createIf(pfx4,
139                  {FX_err, u8scope42, u8scope43, u8scope44, s43_hi6, s43_hi7,
140                   s43_lo0, s43_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
141                   p4b);
142    //
143    // Combined logic for 3 and 4 byte sequences
144    //
145    PabloAST * pfx3 = ccc.compileCC(re::makeCC(0xE0, 0xEF), p34b);
146    Assign * u8scope32 = p34b.createAssign("u8scope32", p34b.createAdvance(pfx3, 1));
147    Assign * u8scope33 = p34b.createAssign("u8scope33", p34b.createAdvance(u8scope32, 1));
148
149    // Illegal 3-byte sequences
150    PabloAST * E0 = ccc.compileCC(re::makeCC(0xE0), p34b);
151    PabloAST * ED = ccc.compileCC(re::makeCC(0xED), p34b);
152    PabloAST * E0_err = p34b.createAnd(p34b.createAdvance(E0, 1), ccc.compileCC(re::makeCC(0x80, 0x9F), p34b));
153    PabloAST * ED_err = p34b.createAnd(p34b.createAdvance(ED, 1), ccc.compileCC(re::makeCC(0xA0, 0xBF), p34b));
154    Assign * EX_FX_err = p34b.createAssign("EX_FX_err", p34b.createOr(p34b.createOr(E0_err, ED_err), FX_err));
155    // Two surrogate UTF-16 units are computed at the 3rd and 4th positions of 4-byte sequences.
156    PabloAST * surrogate = p34b.createOr(u8scope43, u8scope44);
157   
158    Assign * p34del = p34b.createAssign("p34del", p34b.createOr(u8scope32, u8scope42));
159
160
161    // The high 5 bits of the UTF-16 code unit are only nonzero for 3 and 4-byte
162    // UTF-8 sequences.
163    u16_hi[0] = p34b.createAssign("u16_hi0", p34b.createOr(p34b.createAnd(u8scope33, bit4a2), surrogate));
164    u16_hi[1] = p34b.createAssign("u16_hi1", p34b.createOr(p34b.createAnd(u8scope33, bit5a2), surrogate));
165    u16_hi[2] = p34b.createAssign("u16_hi2", p34b.createAnd(u8scope33, bit6a2));
166    u16_hi[3] = p34b.createAssign("u16_hi3", p34b.createOr(p34b.createAnd(u8scope33, bit7a2), surrogate));
167    u16_hi[4] = p34b.createAssign("u16_hi4", p34b.createOr(p34b.createAnd(u8scope33, bit2a1), surrogate));
168   
169    //
170    nAb.createIf(pfx34, 
171                 {u8scope33, EX_FX_err, p34del, 
172                  u16_hi[0], u16_hi[1], u16_hi[2], u16_hi[3], u16_hi[4], u8scope44, s43_hi6, s43_hi7,
173                  s43_lo0, s43_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
174                 p34b);
175    //
176    // Combined logic for 2, 3 and 4 byte sequences
177    //
178    PabloAST * pfx2 = ccc.compileCC(re::makeCC(0xC0, 0xDF), nAb);
179    PabloAST * u8scope22 = nAb.createAdvance(pfx2, 1);
180    Assign * u8lastscope = nAb.createAssign("u8lastscope", nAb.createOr(u8scope22, nAb.createOr(u8scope33, u8scope44)));
181    PabloAST * u8anyscope = nAb.createOr(u8lastscope, p34del);
182
183    PabloAST * C0_C1_err = ccc.compileCC(re::makeCC(0xC0, 0xC1), nAb);
184    PabloAST * scope_suffix_mismatch = nAb.createXor(u8anyscope, ccc.compileCC(re::makeCC(0x80, 0xBF), nAb));
185    error_mask = nAb.createAssign("errormask", nAb.createOr(scope_suffix_mismatch, nAb.createOr(C0_C1_err, EX_FX_err)));
186    delmask = nAb.createAssign("delmask", nAb.createOr(p34del, ccc.compileCC(re::makeCC(0xC0, 0xFF), nAb)));
187   
188    // The low 3 bits of the high byte of the UTF-16 code unit as well as the high bit of the
189    // low byte are only nonzero for 2, 3 and 4 byte sequences.
190    u16_hi[5] = nAb.createAssign("u16_hi5", nAb.createOr(nAb.createAnd(u8lastscope, bit3a1), u8scope44));
191    u16_hi[6] = nAb.createAssign("u16_hi6", nAb.createOr(nAb.createAnd(u8lastscope, bit4a1), s43_hi6));
192    u16_hi[7] = nAb.createAssign("u16_hi7", nAb.createOr(nAb.createAnd(u8lastscope, bit5a1), s43_hi7));
193    u16_lo[0] = nAb.createAssign("u16_lo0", nAb.createOr(nAb.createAnd(u8lastscope, bit6a1), s43_lo0));
194    Assign * p234_lo1 = nAb.createAssign("p234_lo1", nAb.createOr(nAb.createAnd(u8lastscope, bit7a1), s43_lo1));
195
196    pBuilder.createIf(nonASCII, 
197                      {error_mask, delmask, u8lastscope,
198                       u16_hi[0], u16_hi[1], u16_hi[2], u16_hi[3], u16_hi[4], u16_hi[5], u16_hi[6], u16_hi[7],
199                       u16_lo[0], p234_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
200                      nAb);
201    //
202    //
203    PabloAST * ASCII = ccc.compileCC(re::makeCC(0x0, 0x7F));
204    PabloAST * last_byte = pBuilder.createOr(ASCII, u8lastscope);
205    u16_lo[1] = pBuilder.createAssign("u16_lo1", pBuilder.createOr(pBuilder.createAnd(ASCII, u8_bits[1]), p234_lo1));
206    u16_lo[2] = pBuilder.createAssign("u16_lo2", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[2]), s43_lo2));
207    u16_lo[3] = pBuilder.createAssign("u16_lo3", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[3]), s43_lo3));
208    u16_lo[4] = pBuilder.createAssign("u16_lo4", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[4]), s43_lo4));
209    u16_lo[5] = pBuilder.createAssign("u16_lo5", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[5]), s43_lo5));
210    u16_lo[6] = pBuilder.createAssign("u16_lo6", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[6]), s43_lo6));
211    u16_lo[7] = pBuilder.createAssign("u16_lo7", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[7]), s43_lo7));
212   
213    for (unsigned i = 0; i < 8; i++) {
214        function->setResult(i, pBuilder.createAssign("u16_hi" + std::to_string(i), u16_hi[i]));
215        function->setResult(i+8, pBuilder.createAssign("u16_lo" + std::to_string(i), u16_lo[i]));
216    }
217    function->setResult(16, pBuilder.createAssign("delbits", delmask));
218    function->setResult(17, pBuilder.createAssign("errors", error_mask));
219
220    return function;
221}
222}
223
224
225
226using namespace kernel;
227using namespace parabix;
228
229Function * u8u16Pipeline(Module * mMod, IDISA::IDISA_Builder * iBuilder, pablo::PabloFunction * function) {
230    Type * mBitBlockType = iBuilder->getBitBlockType();
231
232    const unsigned segmentSize = codegen::SegmentSize;
233    const unsigned bufferSegments = codegen::BufferSegments;
234   
235    ExternalFileBuffer ByteStream(iBuilder, StreamSetType(1, i8));
236    //SingleBlockBuffer BasisBits(iBuilder, StreamSetType(8, i1));
237    CircularBuffer BasisBits(iBuilder, StreamSetType(8, i1), segmentSize * bufferSegments);
238
239    //SingleBlockBuffer U8u16Bits(iBuilder, StreamSetType(18, i1));
240    CircularBuffer U8u16Bits(iBuilder, StreamSetType(18, i1), segmentSize * bufferSegments);
241
242    //SingleBlockBuffer U16Bits(iBuilder, StreamSetType(16, i1));
243    CircularBuffer U16Bits(iBuilder, StreamSetType(16, i1), segmentSize * bufferSegments);
244   
245    //SingleBlockBuffer DeletionCounts(iBuilder, StreamSetType(1, i1));
246    CircularBuffer DeletionCounts(iBuilder, StreamSetType(1, i1), segmentSize * bufferSegments );
247   
248    // Different choices for the output buffer depending on chosen option.
249    ExternalFileBuffer U16external(iBuilder, StreamSetType(1, i16));
250    LinearCopybackBuffer U16out(iBuilder, StreamSetType(1, i16), segmentSize * bufferSegments + 2);
251
252    s2pKernel  s2pk(iBuilder);
253    s2pk.generateKernel({&ByteStream}, {&BasisBits});
254
255    pablo_function_passes(function);
256    pablo::PabloKernel  u8u16k(iBuilder, "u8u16", function, {});
257    u8u16k.generateKernel({&BasisBits}, {&U8u16Bits});
258   
259    deletionKernel delK(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
260    delK.generateKernel({&U8u16Bits}, {&U16Bits, &DeletionCounts});
261   
262    p2s_16Kernel_withCompressedOutput p2sk(iBuilder);
263   
264    stdOutKernel stdoutK(iBuilder, 16);
265   
266    if (mMapBuffering || memAlignBuffering) {
267        p2sk.generateKernel({&U16Bits, &DeletionCounts}, {&U16external});
268        stdoutK.generateKernel({&U16external}, {});
269    }
270    else {
271        p2sk.generateKernel({&U16Bits, &DeletionCounts}, {&U16out});
272        stdoutK.generateKernel({&U16out}, {});
273    }
274   
275    Type * const size_ty = iBuilder->getSizeTy();
276    Type * const voidTy = Type::getVoidTy(mMod->getContext());
277    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
278    Type * const outputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 16), 1), 0);
279    Type * const int32ty = iBuilder->getInt32Ty();
280    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
281    Type * const voidPtrTy = TypeBuilder<void *, false>::get(mMod->getContext());
282
283   
284    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", voidTy, inputType, outputType, size_ty, nullptr));
285    main->setCallingConv(CallingConv::C);
286    Function::arg_iterator args = main->arg_begin();
287   
288    Value * const inputStream = &*(args++);
289    inputStream->setName("inputStream");
290    Value * const outputStream = &*(args++);
291    outputStream->setName("outputStream");
292    Value * const fileSize = &*(args++);
293    fileSize->setName("fileSize");
294   
295    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
296       
297
298    ByteStream.setStreamSetBuffer(inputStream, fileSize);
299    BasisBits.allocateBuffer();
300    U8u16Bits.allocateBuffer();
301    U16Bits.allocateBuffer();
302    DeletionCounts.allocateBuffer();
303    if (mMapBuffering || memAlignBuffering) {
304        U16external.setEmptyBuffer(outputStream);
305    }
306    else {
307        U16out.allocateBuffer();
308    }
309    Value * s2pInstance = s2pk.createInstance({});
310    Value * u8u16Instance = u8u16k.createInstance({});
311    Value * delInstance = delK.createInstance({});
312    Value * p2sInstance = p2sk.createInstance({});
313    Value * stdoutInstance = stdoutK.createInstance({});
314   
315    Type * pthreadTy = size_ty;
316    FunctionType * funVoidPtrVoidTy = FunctionType::get(voidTy, int8PtrTy, false);
317   
318    Function * pthreadCreateFunc = cast<Function>(mMod->getOrInsertFunction("pthread_create",
319                                                                         int32ty,
320                                                                         pthreadTy->getPointerTo(),
321                                                                         voidPtrTy,
322                                                                         static_cast<Type *>(funVoidPtrVoidTy)->getPointerTo(),
323                                                                         voidPtrTy, nullptr));
324    pthreadCreateFunc->setCallingConv(llvm::CallingConv::C);
325    Function * pthreadJoinFunc = cast<Function>(mMod->getOrInsertFunction("pthread_join",
326                                                                       int32ty,
327                                                                       pthreadTy,
328                                                                       PointerType::get(int8PtrTy, 0), nullptr));
329    pthreadJoinFunc->setCallingConv(llvm::CallingConv::C);
330   
331    Function * pthreadExitFunc = cast<Function>(mMod->getOrInsertFunction("pthread_exit",
332                                                                       voidTy, 
333                                                                       voidPtrTy, nullptr));
334    pthreadExitFunc->addFnAttr(llvm::Attribute::NoReturn);
335    pthreadExitFunc->setCallingConv(llvm::CallingConv::C);
336
337    if (segmentPipelineParallel){
338        generateSegmentParallelPipeline(iBuilder, {&s2pk, &u8u16k, &delK, &p2sk, &stdoutK}, {s2pInstance, u8u16Instance, delInstance, p2sInstance, stdoutInstance}, fileSize);
339    }
340    else{
341        generatePipelineLoop(iBuilder, {&s2pk, &u8u16k, &delK, &p2sk, &stdoutK}, {s2pInstance, u8u16Instance, delInstance, p2sInstance, stdoutInstance}, fileSize);
342    }
343
344    iBuilder->CreateRetVoid();
345    return main;
346}
347
348
349
350
351
352typedef void (*u8u16FunctionType)(char * byte_data, char * output_data, size_t filesize);
353
354static ExecutionEngine * u8u16Engine = nullptr;
355
356u8u16FunctionType u8u16CodeGen(void) {
357    LLVMContext TheContext;                           
358    Module * M = new Module("u8u16", TheContext);
359    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
360
361    pablo::PabloFunction * function = pablo::u8u16_pablo();
362   
363    llvm::Function * main_IR = u8u16Pipeline(M, idb, function);
364   
365    verifyModule(*M, &dbgs());
366    //std::cerr << "ExecuteKernels(); done\n";
367    u8u16Engine = JIT_to_ExecutionEngine(M);
368   
369    u8u16Engine->finalizeObject();
370    //std::cerr << "finalizeObject(); done\n";
371
372    delete idb;
373    return reinterpret_cast<u8u16FunctionType>(u8u16Engine->getPointerToFunction(main_IR));
374}
375
376void u8u16(u8u16FunctionType fn_ptr, const std::string & fileName) {
377    std::string mFileName = fileName;
378    size_t mFileSize;
379    char * mFileBuffer;
380   
381    const boost::filesystem::path file(mFileName);
382    if (exists(file)) {
383        if (is_directory(file)) {
384            return;
385        }
386    } else {
387        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
388        return;
389    }
390   
391    mFileSize = file_size(file);
392    boost::iostreams::mapped_file_source mFile;
393    if (mFileSize == 0) {
394        mFileBuffer = nullptr;
395    }
396    else {
397        try {
398            mFile.open(mFileName);
399        } catch (std::exception &e) {
400            std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
401            return;
402        }
403        mFileBuffer = const_cast<char *>(mFile.data());
404    }
405
406    if (mMapBuffering) {
407        boost::interprocess::mapped_region outputBuffer(boost::interprocess::anonymous_shared_memory(2*mFileSize));
408        outputBuffer.advise(boost::interprocess::mapped_region::advice_willneed);
409        outputBuffer.advise(boost::interprocess::mapped_region::advice_sequential);
410        fn_ptr(mFileBuffer, static_cast<char*>(outputBuffer.get_address()), mFileSize);
411    }
412    else if (memAlignBuffering) {
413        char * outputBuffer;
414        posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, 2*mFileSize);
415        fn_ptr(mFileBuffer, outputBuffer, mFileSize);
416        free(reinterpret_cast<void *>(outputBuffer));
417    }
418    else {
419        /* No external output buffer */
420        fn_ptr(mFileBuffer, nullptr, mFileSize);
421    }
422    mFile.close();
423   
424}
425
426
427int main(int argc, char *argv[]) {
428    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&u8u16Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
429    cl::ParseCommandLineOptions(argc, argv);
430
431    u8u16FunctionType fn_ptr = u8u16CodeGen();
432
433    for (unsigned i = 0; i != inputFiles.size(); ++i) {
434        u8u16(fn_ptr, inputFiles[i]);
435    }
436
437    return 0;
438}
439
440                       
Note: See TracBrowser for help on using the repository browser.