source: icGREP/icgrep-devel/icgrep/u8u16.cpp @ 6161

Last change on this file since 6161 was 6089, checked in by cameron, 16 months ago

Little-endian/big-endian bit number options, default to little-endian

File size: 19.8 KB
RevLine 
[5005]1/*
[5036]2 *  Copyright (c) 2016 International Characters.
[5005]3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
[5267]7#include <IR_Gen/idisa_target.h>                   // for GetIDISA_Builder
[6089]8#include <cc/alphabet.h>
[5267]9#include <cc/cc_compiler.h>                        // for CC_Compiler
10#include <kernels/deletion.h>                      // for DeletionKernel
[5355]11#include <kernels/swizzle.h>                      // for DeletionKernel
[5429]12#include <kernels/source_kernel.h>
[5267]13#include <kernels/p2s_kernel.h>                    // for P2S16KernelWithCom...
14#include <kernels/s2p_kernel.h>                    // for S2PKernel
[5540]15#include <kernels/stdout_kernel.h>                 // for StdOutKernel_
[5267]16#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for ExecutionEngine
17#include <llvm/IR/Function.h>                      // for Function, Function...
18#include <llvm/IR/Module.h>                        // for Module
19#include <llvm/IR/Verifier.h>                      // for verifyModule
20#include <llvm/Support/CommandLine.h>              // for ParseCommandLineOp...
21#include <llvm/Support/Debug.h>                    // for dbgs
22#include <pablo/pablo_kernel.h>                    // for PabloKernel
23#include <pablo/pablo_toolchain.h>                 // for pablo_function_passes
[5436]24#include <kernels/kernel_builder.h>
[5267]25#include <pablo/pe_zeroes.h>
[5425]26#include <toolchain/toolchain.h>
[5464]27#include <toolchain/cpudriver.h>
28#include <kernels/streamset.h>
29#include <llvm/ADT/StringRef.h>
30#include <llvm/IR/CallingConv.h>
31#include <llvm/IR/DerivedTypes.h>
32#include <llvm/IR/LLVMContext.h>
33#include <llvm/IR/Value.h>
34#include <llvm/Support/Compiler.h>
35#include <pablo/builder.hpp>
[5418]36#include <boost/interprocess/anonymous_shared_memory.hpp>
37#include <boost/interprocess/mapped_region.hpp>
[5005]38#include <iostream>
39
[5267]40using namespace pablo;
41using namespace kernel;
42using namespace parabix;
43using namespace llvm;
[5007]44
[5267]45static cl::OptionCategory u8u16Options("u8u16 Options", "Transcoding control options.");
[5306]46static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(u8u16Options));
[5605]47static cl::opt<std::string> outputFile(cl::Positional, cl::desc("<output file>"), cl::cat(u8u16Options));
[5355]48static cl::opt<bool> enableAVXdel("enable-AVX-deletion", cl::desc("Enable AVX2 deletion algorithms."), cl::cat(u8u16Options));
[5191]49static cl::opt<bool> mMapBuffering("mmap-buffering", cl::desc("Enable mmap buffering."), cl::cat(u8u16Options));
50static cl::opt<bool> memAlignBuffering("memalign-buffering", cl::desc("Enable posix_memalign buffering."), cl::cat(u8u16Options));
[5154]51
[5436]52class U8U16Kernel final: public pablo::PabloKernel {
53public:
54    U8U16Kernel(const std::unique_ptr<kernel::KernelBuilder> & b);
55    bool isCachable() const override { return true; }
[5464]56    bool hasSignature() const override { return false; }
[5436]57    void generatePabloMethod() override;
58};
[5005]59
[5436]60U8U16Kernel::U8U16Kernel(const std::unique_ptr<kernel::KernelBuilder> & b)
61: PabloKernel(b, "u8u16",
62{Binding{b->getStreamSetTy(8, 1), "u8bit"}},
[5757]63{Binding{b->getStreamSetTy(16, 1), "u16bit"}, Binding{b->getStreamSetTy(1, 1), "delMask"}}) {
[5414]64
[5436]65}
[5435]66
[5436]67void U8U16Kernel::generatePabloMethod() {
[5842]68    PabloBuilder main(getEntryScope());
69    Zeroes * zeroes = main.createZeroes();
70   
[5005]71    //  input: 8 basis bit streams
[5843]72    std::vector<PabloAST *> u8_bits = getInputStreamSet("u8bit");
[5436]73
[5005]74    //  output: 16 u8-indexed streams, + delmask stream + error stream
[5202]75    Var * u16_hi[8];
76    for (int i = 0; i < 8; ++i) {
77        u16_hi[i] = main.createVar("u16_hi" + std::to_string(i), zeroes);
78    }
79    Var * u16_lo[8];
80    for (int i = 0; i < 8; ++i) {
81        u16_lo[i] = main.createVar("u16_lo" + std::to_string(i), zeroes);
82    }
[5842]83   
[5202]84    Var * delmask = main.createVar("delmask", zeroes);
85    Var * error_mask = main.createVar("error_mask", zeroes);
86
[6089]87    cc::Parabix_CC_Compiler ccc(getEntryScope(), u8_bits, cc::BitNumbering::BigEndian);
[5842]88
[5202]89    // The logic for processing non-ASCII bytes will be embedded within an if-hierarchy.
[5797]90    PabloAST * nonASCII = ccc.compileCC(re::makeByte(0x80, 0xFF));
[5436]91
[5005]92    // Builder for the if statement handling all non-ASCII logic
[5836]93    auto nAb = main.createScope();
[5005]94    // Bits 3 through 7 of a 2-byte prefix are data bits, needed to
[5436]95    // produce the UTF-16 code unit data ...,
[5005]96    PabloAST * bit3a1 = nAb.createAdvance(u8_bits[3], 1);
97    PabloAST * bit4a1 = nAb.createAdvance(u8_bits[4], 1);
98    PabloAST * bit5a1 = nAb.createAdvance(u8_bits[5], 1);
99    PabloAST * bit6a1 = nAb.createAdvance(u8_bits[6], 1);
100    PabloAST * bit7a1 = nAb.createAdvance(u8_bits[7], 1);
[5436]101
[5005]102    // Entry condition for 3 or 4 byte sequences: we have a prefix byte in the range 0xE0-0xFF.
[5797]103    PabloAST * pfx34 = ccc.compileCC(re::makeByte(0xE0, 0xFF), nAb);
[5005]104    // Builder for the if statement handling all logic for 3- and 4-byte sequences.
[5836]105    auto p34b = nAb.createScope();
[5005]106    // Bits 4 through 7 of a 3-byte prefix are data bits.  They must be moved
107    // to the final position of the 3-byte sequence.
108    PabloAST * bit2a1 = p34b.createAdvance(u8_bits[2], 1);
109    PabloAST * bit4a2 = p34b.createAdvance(bit4a1, 1);
110    PabloAST * bit5a2 = p34b.createAdvance(bit5a1, 1);
111    PabloAST * bit6a2 = p34b.createAdvance(bit6a1, 1);
112    PabloAST * bit7a2 = p34b.createAdvance(bit7a1, 1);
[5202]113
114    Var * const u8scope32 = nAb.createVar("u8scope32", zeroes);
115    Var * const u8scope33 = nAb.createVar("u8scope33", zeroes);
116    Var * const u8scope44 = nAb.createVar("u8scope44", zeroes);
117
[5005]118    //
119    // Logic for 4-byte UTF-8 sequences
120    //
121    // Entry condition  or 4 byte sequences: we have a prefix byte in the range 0xF0-0xFF.
[5797]122    PabloAST * pfx4 = ccc.compileCC(re::makeByte(0xF0, 0xFF), p34b);
[5005]123    // Builder for the if statement handling all logic for 4-byte sequences only.
[5836]124    auto p4b = p34b.createScope();
[5005]125    // Illegal 4-byte sequences
[5797]126    PabloAST * F0 = ccc.compileCC(re::makeByte(0xF0), p4b);
127    PabloAST * F4 = ccc.compileCC(re::makeByte(0xF4), p4b);
128    PabloAST * F0_err = p4b.createAnd(p4b.createAdvance(F0, 1), ccc.compileCC(re::makeByte(0x80, 0x8F), p4b));
129    PabloAST * F4_err = p4b.createAnd(p4b.createAdvance(F4, 1), ccc.compileCC(re::makeByte(0x90, 0xBF), p4b));
130    PabloAST * F5_FF = ccc.compileCC(re::makeByte(0xF5, 0xFF), p4b);
[5202]131
132    Var * FX_err = p34b.createVar("FX_err", zeroes);
133    p4b.createAssign(FX_err, p4b.createOr(F5_FF, p4b.createOr(F0_err, F4_err)));
[5005]134    //
135    // 4-byte prefixes have a scope that extends over the next 3 bytes.
[5202]136
137    Var * u8scope42 = p34b.createVar("u8scope42", zeroes);
138    Var * u8scope43 = p34b.createVar("u8scope43", zeroes);
139
140    p4b.createAssign(u8scope42, p4b.createAdvance(pfx4, 1));
141    p4b.createAssign(u8scope43, p4b.createAdvance(u8scope42, 1));
142    p4b.createAssign(u8scope44, p4b.createAdvance(u8scope43, 1));
[5005]143    //
[5436]144
[5005]145    //  From the 4-byte sequence 11110abc 10defghi 10jklmno 10pqrstu,
146    //  we must calculate the value abcde - 1 to produce the bit values
147    //  for u16_hi6, hi7, lo0, lo1 at the scope43 position.
[5202]148    Var * s43_lo0 = nAb.createVar("scope43_lo0", zeroes);
149    Var * s43_lo1 = nAb.createVar("scope43_lo1", zeroes);
150    Var * s43_hi6 = nAb.createVar("scope43_hi6", zeroes);
151    Var * s43_hi7 = nAb.createVar("scope43_hi7", zeroes);
152
153    Var * s43_lo2 = main.createVar("scope43_lo2", zeroes);
154    Var * s43_lo3 = main.createVar("scope43_lo3", zeroes);
155    Var * s43_lo4 = main.createVar("scope43_lo4", zeroes);
156    Var * s43_lo5 = main.createVar("scope43_lo5", zeroes);
157    Var * s43_lo6 = main.createVar("scope43_lo6", zeroes);
158    Var * s43_lo7 = main.createVar("scope43_lo7", zeroes);
159
160    p4b.createAssign(s43_lo1, p4b.createAnd(u8scope43, p4b.createNot(bit3a1)));           // e - 1
161    p4b.createAssign(s43_lo0, p4b.createAnd(u8scope43, p4b.createXor(bit2a1, s43_lo1)));  // d - borrow
[5005]162    PabloAST * brw1 = p4b.createAnd(s43_lo1, p4b.createNot(bit2a1));
[5202]163    p4b.createAssign(s43_hi7, p4b.createAnd(u8scope43, p4b.createXor(bit7a2, brw1)));     // c - borrow
[5005]164    PabloAST * brw2 = p4b.createAnd(brw1, p4b.createNot(bit7a2));
[5202]165    p4b.createAssign(s43_hi6, p4b.createAnd(u8scope43, p4b.createXor(bit6a2, brw2)));     // b - borrow
[5005]166    //
[5202]167    p4b.createAssign(s43_lo2, p4b.createAnd(u8scope43, bit4a1));
168    p4b.createAssign(s43_lo3, p4b.createAnd(u8scope43, bit5a1));
169    p4b.createAssign(s43_lo4, p4b.createAnd(u8scope43, bit6a1));
170    p4b.createAssign(s43_lo5, p4b.createAnd(u8scope43, bit7a1));
171    p4b.createAssign(s43_lo6, p4b.createAnd(u8scope43, u8_bits[2]));
172    p4b.createAssign(s43_lo7, p4b.createAnd(u8scope43, u8_bits[3]));
[5005]173    //
174    //
[5202]175    p34b.createIf(pfx4, p4b);
[5005]176    //
177    // Combined logic for 3 and 4 byte sequences
178    //
[5797]179    PabloAST * pfx3 = ccc.compileCC(re::makeByte(0xE0, 0xEF), p34b);
[5005]180
[5202]181    p34b.createAssign(u8scope32, p34b.createAdvance(pfx3, 1));
182    p34b.createAssign(u8scope33, p34b.createAdvance(u8scope32, 1));
183
[5005]184    // Illegal 3-byte sequences
[5797]185    PabloAST * E0 = ccc.compileCC(re::makeByte(0xE0), p34b);
186    PabloAST * ED = ccc.compileCC(re::makeByte(0xED), p34b);
187    PabloAST * E0_err = p34b.createAnd(p34b.createAdvance(E0, 1), ccc.compileCC(re::makeByte(0x80, 0x9F), p34b));
188    PabloAST * ED_err = p34b.createAnd(p34b.createAdvance(ED, 1), ccc.compileCC(re::makeByte(0xA0, 0xBF), p34b));
[5202]189    Var * EX_FX_err = nAb.createVar("EX_FX_err", zeroes);
190
191    p34b.createAssign(EX_FX_err, p34b.createOr(p34b.createOr(E0_err, ED_err), FX_err));
[5005]192    // Two surrogate UTF-16 units are computed at the 3rd and 4th positions of 4-byte sequences.
193    PabloAST * surrogate = p34b.createOr(u8scope43, u8scope44);
[5436]194
[5202]195    Var * p34del = nAb.createVar("p34del", zeroes);
196    p34b.createAssign(p34del, p34b.createOr(u8scope32, u8scope42));
[5005]197
198
199    // The high 5 bits of the UTF-16 code unit are only nonzero for 3 and 4-byte
200    // UTF-8 sequences.
[5202]201    p34b.createAssign(u16_hi[0], p34b.createOr(p34b.createAnd(u8scope33, bit4a2), surrogate));
202    p34b.createAssign(u16_hi[1], p34b.createOr(p34b.createAnd(u8scope33, bit5a2), surrogate));
203    p34b.createAssign(u16_hi[2], p34b.createAnd(u8scope33, bit6a2));
204    p34b.createAssign(u16_hi[3], p34b.createOr(p34b.createAnd(u8scope33, bit7a2), surrogate));
205    p34b.createAssign(u16_hi[4], p34b.createOr(p34b.createAnd(u8scope33, bit2a1), surrogate));
[5436]206
[5005]207    //
[5202]208    nAb.createIf(pfx34, p34b);
[5005]209    //
210    // Combined logic for 2, 3 and 4 byte sequences
211    //
[5202]212
213    Var * u8lastscope = main.createVar("u8lastscope", zeroes);
214
[5797]215    PabloAST * pfx2 = ccc.compileCC(re::makeByte(0xC0, 0xDF), nAb);
[5005]216    PabloAST * u8scope22 = nAb.createAdvance(pfx2, 1);
[5202]217    nAb.createAssign(u8lastscope, nAb.createOr(u8scope22, nAb.createOr(u8scope33, u8scope44)));
[5005]218    PabloAST * u8anyscope = nAb.createOr(u8lastscope, p34del);
219
[5797]220    PabloAST * C0_C1_err = ccc.compileCC(re::makeByte(0xC0, 0xC1), nAb);
221    PabloAST * scope_suffix_mismatch = nAb.createXor(u8anyscope, ccc.compileCC(re::makeByte(0x80, 0xBF), nAb));
[5202]222    nAb.createAssign(error_mask, nAb.createOr(scope_suffix_mismatch, nAb.createOr(C0_C1_err, EX_FX_err)));
[5797]223    nAb.createAssign(delmask, nAb.createOr(p34del, ccc.compileCC(re::makeByte(0xC0, 0xFF), nAb)));
[5436]224
[5005]225    // The low 3 bits of the high byte of the UTF-16 code unit as well as the high bit of the
226    // low byte are only nonzero for 2, 3 and 4 byte sequences.
[5202]227    nAb.createAssign(u16_hi[5], nAb.createOr(nAb.createAnd(u8lastscope, bit3a1), u8scope44));
228    nAb.createAssign(u16_hi[6], nAb.createOr(nAb.createAnd(u8lastscope, bit4a1), s43_hi6));
229    nAb.createAssign(u16_hi[7], nAb.createOr(nAb.createAnd(u8lastscope, bit5a1), s43_hi7));
230    nAb.createAssign(u16_lo[0], nAb.createOr(nAb.createAnd(u8lastscope, bit6a1), s43_lo0));
[5005]231
[5202]232    Var * p234_lo1 = main.createVar("p234_lo1", zeroes);
233
234    nAb.createAssign(p234_lo1, nAb.createOr(nAb.createAnd(u8lastscope, bit7a1), s43_lo1));
235
236    main.createIf(nonASCII, nAb);
[5005]237    //
238    //
[5797]239    PabloAST * ASCII = ccc.compileCC(re::makeByte(0x0, 0x7F));
[5202]240    PabloAST * last_byte = main.createOr(ASCII, u8lastscope);
241    main.createAssign(u16_lo[1], main.createOr(main.createAnd(ASCII, u8_bits[1]), p234_lo1));
242    main.createAssign(u16_lo[2], main.createOr(main.createAnd(last_byte, u8_bits[2]), s43_lo2));
243    main.createAssign(u16_lo[3], main.createOr(main.createAnd(last_byte, u8_bits[3]), s43_lo3));
244    main.createAssign(u16_lo[4], main.createOr(main.createAnd(last_byte, u8_bits[4]), s43_lo4));
245    main.createAssign(u16_lo[5], main.createOr(main.createAnd(last_byte, u8_bits[5]), s43_lo5));
246    main.createAssign(u16_lo[6], main.createOr(main.createAnd(last_byte, u8_bits[6]), s43_lo6));
247    main.createAssign(u16_lo[7], main.createOr(main.createAnd(last_byte, u8_bits[7]), s43_lo7));
[5436]248
249    Var * output = getOutputStreamVar("u16bit");
250    Var * delmask_out = getOutputStreamVar("delMask");
[5005]251    for (unsigned i = 0; i < 8; i++) {
[5202]252        main.createAssign(main.createExtract(output, i), u16_hi[i]);
[5005]253    }
[5202]254    for (unsigned i = 0; i < 8; i++) {
255        main.createAssign(main.createExtract(output, i + 8), u16_lo[i]);
256    }
[6006]257    main.createAssign(main.createExtract(delmask_out, main.getInteger(0)), main.createInFile(main.createNot(delmask)));
[5005]258}
259
[5985]260void generatePipeline(ParabixDriver & pxDriver) {
[5005]261
[5435]262    auto & iBuilder = pxDriver.getBuilder();
[5395]263    Module * mod = iBuilder->getModule();
[5362]264
[5985]265    const unsigned bufferSize = codegen::SegmentSize * codegen::ThreadNum;
266
[5217]267    assert (iBuilder);
[5154]268
[5254]269    Type * const voidTy = iBuilder->getVoidTy();
270    Type * const bitBlockType = iBuilder->getBitBlockType();
271    Type * const outputType = ArrayType::get(ArrayType::get(bitBlockType, 16), 1)->getPointerTo();
[5418]272
273    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, iBuilder->getInt32Ty(), outputType, nullptr));
[5254]274    main->setCallingConv(CallingConv::C);
275    Function::arg_iterator args = main->arg_begin();
[5418]276
277    Value * const fileDecriptor = &*(args++);
278    fileDecriptor->setName("fileDecriptor");
[5254]279    Value * const outputStream = &*(args++);
280    outputStream->setName("outputStream");
281
[5409]282    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
[5985]283
[5355]284    // File data from mmap
[6047]285    StreamSetBuffer * ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
[5985]286
[5856]287    Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder);
[5418]288    mmapK->setInitialArguments({fileDecriptor});
[5414]289    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
[5985]290
[5355]291    // Transposed bits from s2p
[6047]292    StreamSetBuffer * BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), bufferSize);
[5985]293
[6089]294    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian);
[5414]295    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
[5985]296
297
[5355]298    // Calculate UTF-16 data bits through bitwise logic on u8-indexed streams.
[6047]299    StreamSetBuffer * u8bits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(16), bufferSize);
300    StreamSetBuffer * DelMask = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), bufferSize);
[5985]301
[5755]302    Kernel * u8u16k = pxDriver.addKernelInstance<U8U16Kernel>(iBuilder);
[5985]303    pxDriver.makeKernelCall(u8u16k, {BasisBits}, {u8bits, DelMask});
[5540]304
[6047]305    StreamSetBuffer * u16bits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(16), bufferSize);
[5985]306
307    const auto avx2 = enableAVXdel && AVX2_available() && codegen::BlockSize==256;
308
[5409]309    // Different choices for the output buffer depending on chosen option.
[5985]310    StreamSetBuffer * u16bytes = nullptr;
[5355]311    if (mMapBuffering || memAlignBuffering) {
[5985]312        u16bytes = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), outputStream);
313    } else if (avx2) {
[6047]314        u16bytes = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), bufferSize);
[5355]315    } else {
[6047]316        u16bytes = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), bufferSize, 1);
[5355]317    }
[5597]318
[5985]319    if (avx2) {
320        // Allocate space for fully compressed swizzled UTF-16 bit streams
[6047]321        StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
322        StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
323        StreamSetBuffer * u16Swizzle2 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
324        StreamSetBuffer * u16Swizzle3 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
[5985]325        // Apply a deletion algorithm to discard all but the final position of the UTF-8
326        // sequences (bit streams) for each UTF-16 code unit. Also compresses and swizzles the result.
327        Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 16);
328        pxDriver.makeKernelCall(delK, {DelMask, u8bits}, {u16Swizzle0, u16Swizzle1, u16Swizzle2, u16Swizzle3});
329        // Produce unswizzled UTF-16 bit streams
330        Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 16, 1, 4);
331        pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1, u16Swizzle2, u16Swizzle3}, {u16bits});
[6089]332        Kernel * p2sk = pxDriver.addKernelInstance<P2S16Kernel>(iBuilder, cc::BitNumbering::BigEndian);
[5985]333        pxDriver.makeKernelCall(p2sk, {u16bits}, {u16bytes});
334    } else {
[6047]335        StreamSetBuffer * DeletionCounts = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), bufferSize);
[6006]336        Kernel * delK = pxDriver.addKernelInstance<FieldCompressKernel>(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
[5985]337        pxDriver.makeKernelCall(delK, {u8bits, DelMask}, {u16bits, DeletionCounts});
[6089]338        Kernel * p2sk = pxDriver.addKernelInstance<P2S16KernelWithCompressedOutput>(iBuilder, cc::BitNumbering::BigEndian);
[5985]339        pxDriver.makeKernelCall(p2sk, {u16bits, DeletionCounts}, {u16bytes});
340    }
[5355]341
[5605]342    Kernel * outK = nullptr;
[5985]343    if (outputFile.empty()) {
[5755]344        outK = pxDriver.addKernelInstance<StdOutKernel>(iBuilder, 16);
[5985]345    } else {
[5755]346        outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 16);
[5605]347        Value * fName = iBuilder->CreatePointerCast(iBuilder->GetString(outputFile.c_str()), iBuilder->getInt8PtrTy());
348        outK->setInitialArguments({fName});
349    }
[5985]350    pxDriver.makeKernelCall(outK, {u16bytes}, {});
351
[5395]352    pxDriver.generatePipelineIR();
[5985]353
[5597]354    pxDriver.deallocateBuffers();
355
[5071]356    iBuilder->CreateRetVoid();
[5401]357
[5474]358    pxDriver.finalizeObject();
[5071]359}
360
[5418]361typedef void (*u8u16FunctionType)(uint32_t fd, char * output_data);
[5005]362
[5418]363size_t file_size(const int fd) {
364    struct stat st;
365    if (LLVM_UNLIKELY(fstat(fd, &st) != 0)) {
366        st.st_size = 0;
367    }
368    return st.st_size;
369}
370
[5007]371void u8u16(u8u16FunctionType fn_ptr, const std::string & fileName) {
[5418]372    const int fd = open(fileName.c_str(), O_RDONLY);
373    if (LLVM_UNLIKELY(fd == -1)) {
374        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
[5005]375    } else {
[5418]376        const auto fileSize = file_size(fd);
377        if (mMapBuffering) {
378            boost::interprocess::mapped_region outputBuffer(boost::interprocess::anonymous_shared_memory(2 * fileSize));
379            outputBuffer.advise(boost::interprocess::mapped_region::advice_willneed);
380            outputBuffer.advise(boost::interprocess::mapped_region::advice_sequential);
381            fn_ptr(fd, static_cast<char*>(outputBuffer.get_address()));
382        } else if (memAlignBuffering) {
383            char * outputBuffer;
384            const auto r = posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, 2 * fileSize);
385            if (LLVM_UNLIKELY(r != 0)) {
386                throw std::runtime_error("posix_memalign failed with return code " + std::to_string(r));
387            }
388            fn_ptr(fd, outputBuffer);
389            free(reinterpret_cast<void *>(outputBuffer));
390        } else { /* No external output buffer */
391            fn_ptr(fd, nullptr);
[5234]392        }
[5418]393        close(fd);
[5191]394    }
[5005]395}
396
397int main(int argc, char *argv[]) {
[5486]398    codegen::ParseCommandLineOptions(argc, argv, {&u8u16Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
[5474]399    ParabixDriver pxDriver("u8u16");
[5985]400    generatePipeline(pxDriver);
[5474]401    auto u8u16Function = reinterpret_cast<u8u16FunctionType>(pxDriver.getMain());
402    u8u16(u8u16Function, inputFile);
[5005]403    return 0;
404}
405
406                       
[5540]407
Note: See TracBrowser for help on using the repository browser.