source: icGREP/icgrep-devel/icgrep/u8u16.cpp @ 6079

Last change on this file since 6079 was 6047, checked in by nmedfort, 17 months ago

Major refactoring of buffer types. Static buffers replace Circular and CircularCopyback?. External buffers unify Source/External?.

File size: 19.6 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <IR_Gen/idisa_target.h>                   // for GetIDISA_Builder
8#include <cc/cc_compiler.h>                        // for CC_Compiler
9#include <kernels/deletion.h>                      // for DeletionKernel
10#include <kernels/swizzle.h>                      // for DeletionKernel
11#include <kernels/source_kernel.h>
12#include <kernels/p2s_kernel.h>                    // for P2S16KernelWithCom...
13#include <kernels/s2p_kernel.h>                    // for S2PKernel
14#include <kernels/stdout_kernel.h>                 // for StdOutKernel_
15#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for ExecutionEngine
16#include <llvm/IR/Function.h>                      // for Function, Function...
17#include <llvm/IR/Module.h>                        // for Module
18#include <llvm/IR/Verifier.h>                      // for verifyModule
19#include <llvm/Support/CommandLine.h>              // for ParseCommandLineOp...
20#include <llvm/Support/Debug.h>                    // for dbgs
21#include <pablo/pablo_kernel.h>                    // for PabloKernel
22#include <pablo/pablo_toolchain.h>                 // for pablo_function_passes
23#include <kernels/kernel_builder.h>
24#include <pablo/pe_zeroes.h>
25#include <toolchain/toolchain.h>
26#include <toolchain/cpudriver.h>
27#include <kernels/streamset.h>
28#include <llvm/ADT/StringRef.h>
29#include <llvm/IR/CallingConv.h>
30#include <llvm/IR/DerivedTypes.h>
31#include <llvm/IR/LLVMContext.h>
32#include <llvm/IR/Value.h>
33#include <llvm/Support/Compiler.h>
34#include <pablo/builder.hpp>
35#include <boost/interprocess/anonymous_shared_memory.hpp>
36#include <boost/interprocess/mapped_region.hpp>
37#include <iostream>
38
39using namespace pablo;
40using namespace kernel;
41using namespace parabix;
42using namespace llvm;
43
44static cl::OptionCategory u8u16Options("u8u16 Options", "Transcoding control options.");
45static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(u8u16Options));
46static cl::opt<std::string> outputFile(cl::Positional, cl::desc("<output file>"), cl::cat(u8u16Options));
47static cl::opt<bool> enableAVXdel("enable-AVX-deletion", cl::desc("Enable AVX2 deletion algorithms."), cl::cat(u8u16Options));
48static cl::opt<bool> mMapBuffering("mmap-buffering", cl::desc("Enable mmap buffering."), cl::cat(u8u16Options));
49static cl::opt<bool> memAlignBuffering("memalign-buffering", cl::desc("Enable posix_memalign buffering."), cl::cat(u8u16Options));
50
51class U8U16Kernel final: public pablo::PabloKernel {
52public:
53    U8U16Kernel(const std::unique_ptr<kernel::KernelBuilder> & b);
54    bool isCachable() const override { return true; }
55    bool hasSignature() const override { return false; }
56    void generatePabloMethod() override;
57};
58
59U8U16Kernel::U8U16Kernel(const std::unique_ptr<kernel::KernelBuilder> & b)
60: PabloKernel(b, "u8u16",
61{Binding{b->getStreamSetTy(8, 1), "u8bit"}},
62{Binding{b->getStreamSetTy(16, 1), "u16bit"}, Binding{b->getStreamSetTy(1, 1), "delMask"}}) {
63
64}
65
66void U8U16Kernel::generatePabloMethod() {
67    PabloBuilder main(getEntryScope());
68    Zeroes * zeroes = main.createZeroes();
69   
70    //  input: 8 basis bit streams
71    std::vector<PabloAST *> u8_bits = getInputStreamSet("u8bit");
72
73    //  output: 16 u8-indexed streams, + delmask stream + error stream
74    Var * u16_hi[8];
75    for (int i = 0; i < 8; ++i) {
76        u16_hi[i] = main.createVar("u16_hi" + std::to_string(i), zeroes);
77    }
78    Var * u16_lo[8];
79    for (int i = 0; i < 8; ++i) {
80        u16_lo[i] = main.createVar("u16_lo" + std::to_string(i), zeroes);
81    }
82   
83    Var * delmask = main.createVar("delmask", zeroes);
84    Var * error_mask = main.createVar("error_mask", zeroes);
85
86    cc::Parabix_CC_Compiler ccc(getEntryScope(), u8_bits);
87
88    // The logic for processing non-ASCII bytes will be embedded within an if-hierarchy.
89    PabloAST * nonASCII = ccc.compileCC(re::makeByte(0x80, 0xFF));
90
91    // Builder for the if statement handling all non-ASCII logic
92    auto nAb = main.createScope();
93    // Bits 3 through 7 of a 2-byte prefix are data bits, needed to
94    // produce the UTF-16 code unit data ...,
95    PabloAST * bit3a1 = nAb.createAdvance(u8_bits[3], 1);
96    PabloAST * bit4a1 = nAb.createAdvance(u8_bits[4], 1);
97    PabloAST * bit5a1 = nAb.createAdvance(u8_bits[5], 1);
98    PabloAST * bit6a1 = nAb.createAdvance(u8_bits[6], 1);
99    PabloAST * bit7a1 = nAb.createAdvance(u8_bits[7], 1);
100
101    // Entry condition for 3 or 4 byte sequences: we have a prefix byte in the range 0xE0-0xFF.
102    PabloAST * pfx34 = ccc.compileCC(re::makeByte(0xE0, 0xFF), nAb);
103    // Builder for the if statement handling all logic for 3- and 4-byte sequences.
104    auto p34b = nAb.createScope();
105    // Bits 4 through 7 of a 3-byte prefix are data bits.  They must be moved
106    // to the final position of the 3-byte sequence.
107    PabloAST * bit2a1 = p34b.createAdvance(u8_bits[2], 1);
108    PabloAST * bit4a2 = p34b.createAdvance(bit4a1, 1);
109    PabloAST * bit5a2 = p34b.createAdvance(bit5a1, 1);
110    PabloAST * bit6a2 = p34b.createAdvance(bit6a1, 1);
111    PabloAST * bit7a2 = p34b.createAdvance(bit7a1, 1);
112
113    Var * const u8scope32 = nAb.createVar("u8scope32", zeroes);
114    Var * const u8scope33 = nAb.createVar("u8scope33", zeroes);
115    Var * const u8scope44 = nAb.createVar("u8scope44", zeroes);
116
117    //
118    // Logic for 4-byte UTF-8 sequences
119    //
120    // Entry condition  or 4 byte sequences: we have a prefix byte in the range 0xF0-0xFF.
121    PabloAST * pfx4 = ccc.compileCC(re::makeByte(0xF0, 0xFF), p34b);
122    // Builder for the if statement handling all logic for 4-byte sequences only.
123    auto p4b = p34b.createScope();
124    // Illegal 4-byte sequences
125    PabloAST * F0 = ccc.compileCC(re::makeByte(0xF0), p4b);
126    PabloAST * F4 = ccc.compileCC(re::makeByte(0xF4), p4b);
127    PabloAST * F0_err = p4b.createAnd(p4b.createAdvance(F0, 1), ccc.compileCC(re::makeByte(0x80, 0x8F), p4b));
128    PabloAST * F4_err = p4b.createAnd(p4b.createAdvance(F4, 1), ccc.compileCC(re::makeByte(0x90, 0xBF), p4b));
129    PabloAST * F5_FF = ccc.compileCC(re::makeByte(0xF5, 0xFF), p4b);
130
131    Var * FX_err = p34b.createVar("FX_err", zeroes);
132    p4b.createAssign(FX_err, p4b.createOr(F5_FF, p4b.createOr(F0_err, F4_err)));
133    //
134    // 4-byte prefixes have a scope that extends over the next 3 bytes.
135
136    Var * u8scope42 = p34b.createVar("u8scope42", zeroes);
137    Var * u8scope43 = p34b.createVar("u8scope43", zeroes);
138
139    p4b.createAssign(u8scope42, p4b.createAdvance(pfx4, 1));
140    p4b.createAssign(u8scope43, p4b.createAdvance(u8scope42, 1));
141    p4b.createAssign(u8scope44, p4b.createAdvance(u8scope43, 1));
142    //
143
144    //  From the 4-byte sequence 11110abc 10defghi 10jklmno 10pqrstu,
145    //  we must calculate the value abcde - 1 to produce the bit values
146    //  for u16_hi6, hi7, lo0, lo1 at the scope43 position.
147    Var * s43_lo0 = nAb.createVar("scope43_lo0", zeroes);
148    Var * s43_lo1 = nAb.createVar("scope43_lo1", zeroes);
149    Var * s43_hi6 = nAb.createVar("scope43_hi6", zeroes);
150    Var * s43_hi7 = nAb.createVar("scope43_hi7", zeroes);
151
152    Var * s43_lo2 = main.createVar("scope43_lo2", zeroes);
153    Var * s43_lo3 = main.createVar("scope43_lo3", zeroes);
154    Var * s43_lo4 = main.createVar("scope43_lo4", zeroes);
155    Var * s43_lo5 = main.createVar("scope43_lo5", zeroes);
156    Var * s43_lo6 = main.createVar("scope43_lo6", zeroes);
157    Var * s43_lo7 = main.createVar("scope43_lo7", zeroes);
158
159    p4b.createAssign(s43_lo1, p4b.createAnd(u8scope43, p4b.createNot(bit3a1)));           // e - 1
160    p4b.createAssign(s43_lo0, p4b.createAnd(u8scope43, p4b.createXor(bit2a1, s43_lo1)));  // d - borrow
161    PabloAST * brw1 = p4b.createAnd(s43_lo1, p4b.createNot(bit2a1));
162    p4b.createAssign(s43_hi7, p4b.createAnd(u8scope43, p4b.createXor(bit7a2, brw1)));     // c - borrow
163    PabloAST * brw2 = p4b.createAnd(brw1, p4b.createNot(bit7a2));
164    p4b.createAssign(s43_hi6, p4b.createAnd(u8scope43, p4b.createXor(bit6a2, brw2)));     // b - borrow
165    //
166    p4b.createAssign(s43_lo2, p4b.createAnd(u8scope43, bit4a1));
167    p4b.createAssign(s43_lo3, p4b.createAnd(u8scope43, bit5a1));
168    p4b.createAssign(s43_lo4, p4b.createAnd(u8scope43, bit6a1));
169    p4b.createAssign(s43_lo5, p4b.createAnd(u8scope43, bit7a1));
170    p4b.createAssign(s43_lo6, p4b.createAnd(u8scope43, u8_bits[2]));
171    p4b.createAssign(s43_lo7, p4b.createAnd(u8scope43, u8_bits[3]));
172    //
173    //
174    p34b.createIf(pfx4, p4b);
175    //
176    // Combined logic for 3 and 4 byte sequences
177    //
178    PabloAST * pfx3 = ccc.compileCC(re::makeByte(0xE0, 0xEF), p34b);
179
180    p34b.createAssign(u8scope32, p34b.createAdvance(pfx3, 1));
181    p34b.createAssign(u8scope33, p34b.createAdvance(u8scope32, 1));
182
183    // Illegal 3-byte sequences
184    PabloAST * E0 = ccc.compileCC(re::makeByte(0xE0), p34b);
185    PabloAST * ED = ccc.compileCC(re::makeByte(0xED), p34b);
186    PabloAST * E0_err = p34b.createAnd(p34b.createAdvance(E0, 1), ccc.compileCC(re::makeByte(0x80, 0x9F), p34b));
187    PabloAST * ED_err = p34b.createAnd(p34b.createAdvance(ED, 1), ccc.compileCC(re::makeByte(0xA0, 0xBF), p34b));
188    Var * EX_FX_err = nAb.createVar("EX_FX_err", zeroes);
189
190    p34b.createAssign(EX_FX_err, p34b.createOr(p34b.createOr(E0_err, ED_err), FX_err));
191    // Two surrogate UTF-16 units are computed at the 3rd and 4th positions of 4-byte sequences.
192    PabloAST * surrogate = p34b.createOr(u8scope43, u8scope44);
193
194    Var * p34del = nAb.createVar("p34del", zeroes);
195    p34b.createAssign(p34del, p34b.createOr(u8scope32, u8scope42));
196
197
198    // The high 5 bits of the UTF-16 code unit are only nonzero for 3 and 4-byte
199    // UTF-8 sequences.
200    p34b.createAssign(u16_hi[0], p34b.createOr(p34b.createAnd(u8scope33, bit4a2), surrogate));
201    p34b.createAssign(u16_hi[1], p34b.createOr(p34b.createAnd(u8scope33, bit5a2), surrogate));
202    p34b.createAssign(u16_hi[2], p34b.createAnd(u8scope33, bit6a2));
203    p34b.createAssign(u16_hi[3], p34b.createOr(p34b.createAnd(u8scope33, bit7a2), surrogate));
204    p34b.createAssign(u16_hi[4], p34b.createOr(p34b.createAnd(u8scope33, bit2a1), surrogate));
205
206    //
207    nAb.createIf(pfx34, p34b);
208    //
209    // Combined logic for 2, 3 and 4 byte sequences
210    //
211
212    Var * u8lastscope = main.createVar("u8lastscope", zeroes);
213
214    PabloAST * pfx2 = ccc.compileCC(re::makeByte(0xC0, 0xDF), nAb);
215    PabloAST * u8scope22 = nAb.createAdvance(pfx2, 1);
216    nAb.createAssign(u8lastscope, nAb.createOr(u8scope22, nAb.createOr(u8scope33, u8scope44)));
217    PabloAST * u8anyscope = nAb.createOr(u8lastscope, p34del);
218
219    PabloAST * C0_C1_err = ccc.compileCC(re::makeByte(0xC0, 0xC1), nAb);
220    PabloAST * scope_suffix_mismatch = nAb.createXor(u8anyscope, ccc.compileCC(re::makeByte(0x80, 0xBF), nAb));
221    nAb.createAssign(error_mask, nAb.createOr(scope_suffix_mismatch, nAb.createOr(C0_C1_err, EX_FX_err)));
222    nAb.createAssign(delmask, nAb.createOr(p34del, ccc.compileCC(re::makeByte(0xC0, 0xFF), nAb)));
223
224    // The low 3 bits of the high byte of the UTF-16 code unit as well as the high bit of the
225    // low byte are only nonzero for 2, 3 and 4 byte sequences.
226    nAb.createAssign(u16_hi[5], nAb.createOr(nAb.createAnd(u8lastscope, bit3a1), u8scope44));
227    nAb.createAssign(u16_hi[6], nAb.createOr(nAb.createAnd(u8lastscope, bit4a1), s43_hi6));
228    nAb.createAssign(u16_hi[7], nAb.createOr(nAb.createAnd(u8lastscope, bit5a1), s43_hi7));
229    nAb.createAssign(u16_lo[0], nAb.createOr(nAb.createAnd(u8lastscope, bit6a1), s43_lo0));
230
231    Var * p234_lo1 = main.createVar("p234_lo1", zeroes);
232
233    nAb.createAssign(p234_lo1, nAb.createOr(nAb.createAnd(u8lastscope, bit7a1), s43_lo1));
234
235    main.createIf(nonASCII, nAb);
236    //
237    //
238    PabloAST * ASCII = ccc.compileCC(re::makeByte(0x0, 0x7F));
239    PabloAST * last_byte = main.createOr(ASCII, u8lastscope);
240    main.createAssign(u16_lo[1], main.createOr(main.createAnd(ASCII, u8_bits[1]), p234_lo1));
241    main.createAssign(u16_lo[2], main.createOr(main.createAnd(last_byte, u8_bits[2]), s43_lo2));
242    main.createAssign(u16_lo[3], main.createOr(main.createAnd(last_byte, u8_bits[3]), s43_lo3));
243    main.createAssign(u16_lo[4], main.createOr(main.createAnd(last_byte, u8_bits[4]), s43_lo4));
244    main.createAssign(u16_lo[5], main.createOr(main.createAnd(last_byte, u8_bits[5]), s43_lo5));
245    main.createAssign(u16_lo[6], main.createOr(main.createAnd(last_byte, u8_bits[6]), s43_lo6));
246    main.createAssign(u16_lo[7], main.createOr(main.createAnd(last_byte, u8_bits[7]), s43_lo7));
247
248    Var * output = getOutputStreamVar("u16bit");
249    Var * delmask_out = getOutputStreamVar("delMask");
250    for (unsigned i = 0; i < 8; i++) {
251        main.createAssign(main.createExtract(output, i), u16_hi[i]);
252    }
253    for (unsigned i = 0; i < 8; i++) {
254        main.createAssign(main.createExtract(output, i + 8), u16_lo[i]);
255    }
256    main.createAssign(main.createExtract(delmask_out, main.getInteger(0)), main.createInFile(main.createNot(delmask)));
257}
258
259void generatePipeline(ParabixDriver & pxDriver) {
260
261    auto & iBuilder = pxDriver.getBuilder();
262    Module * mod = iBuilder->getModule();
263
264    const unsigned bufferSize = codegen::SegmentSize * codegen::ThreadNum;
265
266    assert (iBuilder);
267
268    Type * const voidTy = iBuilder->getVoidTy();
269    Type * const bitBlockType = iBuilder->getBitBlockType();
270    Type * const outputType = ArrayType::get(ArrayType::get(bitBlockType, 16), 1)->getPointerTo();
271
272    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, iBuilder->getInt32Ty(), outputType, nullptr));
273    main->setCallingConv(CallingConv::C);
274    Function::arg_iterator args = main->arg_begin();
275
276    Value * const fileDecriptor = &*(args++);
277    fileDecriptor->setName("fileDecriptor");
278    Value * const outputStream = &*(args++);
279    outputStream->setName("outputStream");
280
281    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
282
283    // File data from mmap
284    StreamSetBuffer * ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
285
286    Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder);
287    mmapK->setInitialArguments({fileDecriptor});
288    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
289
290    // Transposed bits from s2p
291    StreamSetBuffer * BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), bufferSize);
292
293    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
294    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
295
296
297    // Calculate UTF-16 data bits through bitwise logic on u8-indexed streams.
298    StreamSetBuffer * u8bits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(16), bufferSize);
299    StreamSetBuffer * DelMask = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), bufferSize);
300
301    Kernel * u8u16k = pxDriver.addKernelInstance<U8U16Kernel>(iBuilder);
302    pxDriver.makeKernelCall(u8u16k, {BasisBits}, {u8bits, DelMask});
303
304    StreamSetBuffer * u16bits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(16), bufferSize);
305
306    const auto avx2 = enableAVXdel && AVX2_available() && codegen::BlockSize==256;
307
308    // Different choices for the output buffer depending on chosen option.
309    StreamSetBuffer * u16bytes = nullptr;
310    if (mMapBuffering || memAlignBuffering) {
311        u16bytes = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), outputStream);
312    } else if (avx2) {
313        u16bytes = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), bufferSize);
314    } else {
315        u16bytes = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), bufferSize, 1);
316    }
317
318    if (avx2) {
319        // Allocate space for fully compressed swizzled UTF-16 bit streams
320        StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
321        StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
322        StreamSetBuffer * u16Swizzle2 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
323        StreamSetBuffer * u16Swizzle3 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
324        // Apply a deletion algorithm to discard all but the final position of the UTF-8
325        // sequences (bit streams) for each UTF-16 code unit. Also compresses and swizzles the result.
326        Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 16);
327        pxDriver.makeKernelCall(delK, {DelMask, u8bits}, {u16Swizzle0, u16Swizzle1, u16Swizzle2, u16Swizzle3});
328        // Produce unswizzled UTF-16 bit streams
329        Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 16, 1, 4);
330        pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1, u16Swizzle2, u16Swizzle3}, {u16bits});
331        Kernel * p2sk = pxDriver.addKernelInstance<P2S16Kernel>(iBuilder);
332        pxDriver.makeKernelCall(p2sk, {u16bits}, {u16bytes});
333    } else {
334        StreamSetBuffer * DeletionCounts = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), bufferSize);
335        Kernel * delK = pxDriver.addKernelInstance<FieldCompressKernel>(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
336        pxDriver.makeKernelCall(delK, {u8bits, DelMask}, {u16bits, DeletionCounts});
337        Kernel * p2sk = pxDriver.addKernelInstance<P2S16KernelWithCompressedOutput>(iBuilder);
338        pxDriver.makeKernelCall(p2sk, {u16bits, DeletionCounts}, {u16bytes});
339    }
340
341    Kernel * outK = nullptr;
342    if (outputFile.empty()) {
343        outK = pxDriver.addKernelInstance<StdOutKernel>(iBuilder, 16);
344    } else {
345        outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 16);
346        Value * fName = iBuilder->CreatePointerCast(iBuilder->GetString(outputFile.c_str()), iBuilder->getInt8PtrTy());
347        outK->setInitialArguments({fName});
348    }
349    pxDriver.makeKernelCall(outK, {u16bytes}, {});
350
351    pxDriver.generatePipelineIR();
352
353    pxDriver.deallocateBuffers();
354
355    iBuilder->CreateRetVoid();
356
357    pxDriver.finalizeObject();
358}
359
360typedef void (*u8u16FunctionType)(uint32_t fd, char * output_data);
361
362size_t file_size(const int fd) {
363    struct stat st;
364    if (LLVM_UNLIKELY(fstat(fd, &st) != 0)) {
365        st.st_size = 0;
366    }
367    return st.st_size;
368}
369
370void u8u16(u8u16FunctionType fn_ptr, const std::string & fileName) {
371    const int fd = open(fileName.c_str(), O_RDONLY);
372    if (LLVM_UNLIKELY(fd == -1)) {
373        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
374    } else {
375        const auto fileSize = file_size(fd);
376        if (mMapBuffering) {
377            boost::interprocess::mapped_region outputBuffer(boost::interprocess::anonymous_shared_memory(2 * fileSize));
378            outputBuffer.advise(boost::interprocess::mapped_region::advice_willneed);
379            outputBuffer.advise(boost::interprocess::mapped_region::advice_sequential);
380            fn_ptr(fd, static_cast<char*>(outputBuffer.get_address()));
381        } else if (memAlignBuffering) {
382            char * outputBuffer;
383            const auto r = posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, 2 * fileSize);
384            if (LLVM_UNLIKELY(r != 0)) {
385                throw std::runtime_error("posix_memalign failed with return code " + std::to_string(r));
386            }
387            fn_ptr(fd, outputBuffer);
388            free(reinterpret_cast<void *>(outputBuffer));
389        } else { /* No external output buffer */
390            fn_ptr(fd, nullptr);
391        }
392        close(fd);
393    }
394}
395
396int main(int argc, char *argv[]) {
397    codegen::ParseCommandLineOptions(argc, argv, {&u8u16Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
398    ParabixDriver pxDriver("u8u16");
399    generatePipeline(pxDriver);
400    auto u8u16Function = reinterpret_cast<u8u16FunctionType>(pxDriver.getMain());
401    u8u16(u8u16Function, inputFile);
402    return 0;
403}
404
405                       
406
Note: See TracBrowser for help on using the repository browser.