source: icGREP/icgrep-devel/icgrep/u8u16.cpp @ 5387

Last change on this file since 5387 was 5377, checked in by nmedfort, 2 years ago

Support for stdin. Needs more testing.

File size: 25.7 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <IR_Gen/idisa_builder.h>                  // for IDISA_Builder
8#include <IR_Gen/idisa_target.h>                   // for GetIDISA_Builder
9#include <cc/cc_compiler.h>                        // for CC_Compiler
10#include <kernels/deletion.h>                      // for DeletionKernel
11#include <kernels/swizzle.h>                      // for DeletionKernel
12#include <kernels/mmap_kernel.h>                   // for MMapSourceKernel
13#include <kernels/p2s_kernel.h>                    // for P2S16KernelWithCom...
14#include <kernels/s2p_kernel.h>                    // for S2PKernel
15#include <kernels/stdout_kernel.h>                 // for StdOutKernel
16#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for ExecutionEngine
17#include <llvm/IR/Function.h>                      // for Function, Function...
18#include <llvm/IR/Module.h>                        // for Module
19#include <llvm/IR/Verifier.h>                      // for verifyModule
20#include <llvm/Support/CommandLine.h>              // for ParseCommandLineOp...
21#include <llvm/Support/Debug.h>                    // for dbgs
22#include <pablo/pablo_kernel.h>                    // for PabloKernel
23#include <pablo/pablo_toolchain.h>                 // for pablo_function_passes
24#include <pablo/pe_zeroes.h>
25#include <toolchain.h>                             // for JIT_to_ExecutionEn...
26#include <boost/iostreams/device/mapped_file.hpp>  // for mapped_file_source
27#include <boost/filesystem.hpp>
28#include <boost/interprocess/anonymous_shared_memory.hpp>
29#include "kernels/streamset.h"                     // for CircularBuffer
30#include <kernels/pipeline.h>
31#include "llvm/ADT/StringRef.h"                    // for StringRef
32#include "llvm/IR/CallingConv.h"                   // for ::C
33#include "llvm/IR/DerivedTypes.h"                  // for ArrayType, Pointer...
34#include "llvm/IR/LLVMContext.h"                   // for LLVMContext
35#include "llvm/IR/Value.h"                         // for Value
36#include "llvm/Support/Compiler.h"                 // for LLVM_UNLIKELY
37#include <pablo/builder.hpp>                       // for PabloBuilder
38#include <iostream>
39
40using namespace pablo;
41using namespace kernel;
42using namespace parabix;
43using namespace llvm;
44
45static cl::OptionCategory u8u16Options("u8u16 Options", "Transcoding control options.");
46static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(u8u16Options));
47static cl::opt<std::string> outputFile(cl::Positional, cl::desc("<output file>"),  cl::Required, cl::cat(u8u16Options));
48static cl::opt<bool> enableAVXdel("enable-AVX-deletion", cl::desc("Enable AVX2 deletion algorithms."), cl::cat(u8u16Options));
49static cl::opt<bool> mMapBuffering("mmap-buffering", cl::desc("Enable mmap buffering."), cl::cat(u8u16Options));
50static cl::opt<bool> memAlignBuffering("memalign-buffering", cl::desc("Enable posix_memalign buffering."), cl::cat(u8u16Options));
51
52
53void u8u16_pablo(PabloKernel * kernel) {
54    //  input: 8 basis bit streams
55   
56    const auto u8bitSet = kernel->getInputStreamVar("u8bit");
57   
58    //  output: 16 u8-indexed streams, + delmask stream + error stream
59   
60    cc::CC_Compiler ccc(kernel, u8bitSet);
61   
62    PabloBuilder & main = ccc.getBuilder();
63    const auto u8_bits = ccc.getBasisBits();
64   
65    Zeroes * zeroes = main.createZeroes();
66
67    // Outputs
68    Var * u16_hi[8];
69    for (int i = 0; i < 8; ++i) {
70        u16_hi[i] = main.createVar("u16_hi" + std::to_string(i), zeroes);
71    }
72    Var * u16_lo[8];
73    for (int i = 0; i < 8; ++i) {
74        u16_lo[i] = main.createVar("u16_lo" + std::to_string(i), zeroes);
75    }
76    Var * delmask = main.createVar("delmask", zeroes);
77    Var * error_mask = main.createVar("error_mask", zeroes);
78
79    // The logic for processing non-ASCII bytes will be embedded within an if-hierarchy.
80    PabloAST * nonASCII = ccc.compileCC(re::makeCC(0x80, 0xFF));
81   
82    // Builder for the if statement handling all non-ASCII logic
83    PabloBuilder nAb = PabloBuilder::Create(main);
84    // Bits 3 through 7 of a 2-byte prefix are data bits, needed to
85    // produce the UTF-16 code unit data ...,
86    PabloAST * bit3a1 = nAb.createAdvance(u8_bits[3], 1);
87    PabloAST * bit4a1 = nAb.createAdvance(u8_bits[4], 1);
88    PabloAST * bit5a1 = nAb.createAdvance(u8_bits[5], 1);
89    PabloAST * bit6a1 = nAb.createAdvance(u8_bits[6], 1);
90    PabloAST * bit7a1 = nAb.createAdvance(u8_bits[7], 1);
91   
92    // Entry condition for 3 or 4 byte sequences: we have a prefix byte in the range 0xE0-0xFF.
93    PabloAST * pfx34 = ccc.compileCC(re::makeCC(0xE0, 0xFF), nAb);
94    // Builder for the if statement handling all logic for 3- and 4-byte sequences.
95    PabloBuilder p34b = PabloBuilder::Create(nAb);
96    // Bits 4 through 7 of a 3-byte prefix are data bits.  They must be moved
97    // to the final position of the 3-byte sequence.
98    PabloAST * bit2a1 = p34b.createAdvance(u8_bits[2], 1);
99    PabloAST * bit4a2 = p34b.createAdvance(bit4a1, 1);
100    PabloAST * bit5a2 = p34b.createAdvance(bit5a1, 1);
101    PabloAST * bit6a2 = p34b.createAdvance(bit6a1, 1);
102    PabloAST * bit7a2 = p34b.createAdvance(bit7a1, 1);
103
104
105    Var * const u8scope32 = nAb.createVar("u8scope32", zeroes);
106    Var * const u8scope33 = nAb.createVar("u8scope33", zeroes);
107    Var * const u8scope44 = nAb.createVar("u8scope44", zeroes);
108
109    //
110    // Logic for 4-byte UTF-8 sequences
111    //
112    // Entry condition  or 4 byte sequences: we have a prefix byte in the range 0xF0-0xFF.
113    PabloAST * pfx4 = ccc.compileCC(re::makeCC(0xF0, 0xFF), p34b);
114    // Builder for the if statement handling all logic for 4-byte sequences only.
115    PabloBuilder p4b = PabloBuilder::Create(p34b);
116    // Illegal 4-byte sequences
117    PabloAST * F0 = ccc.compileCC(re::makeCC(0xF0), p4b);
118    PabloAST * F4 = ccc.compileCC(re::makeCC(0xF4), p4b);
119    PabloAST * F0_err = p4b.createAnd(p4b.createAdvance(F0, 1), ccc.compileCC(re::makeCC(0x80, 0x8F), p4b));
120    PabloAST * F4_err = p4b.createAnd(p4b.createAdvance(F4, 1), ccc.compileCC(re::makeCC(0x90, 0xBF), p4b));
121    PabloAST * F5_FF = ccc.compileCC(re::makeCC(0xF5, 0xFF), p4b);
122
123    Var * FX_err = p34b.createVar("FX_err", zeroes);
124    p4b.createAssign(FX_err, p4b.createOr(F5_FF, p4b.createOr(F0_err, F4_err)));
125    //
126    // 4-byte prefixes have a scope that extends over the next 3 bytes.
127
128    Var * u8scope42 = p34b.createVar("u8scope42", zeroes);
129    Var * u8scope43 = p34b.createVar("u8scope43", zeroes);
130
131    p4b.createAssign(u8scope42, p4b.createAdvance(pfx4, 1));
132    p4b.createAssign(u8scope43, p4b.createAdvance(u8scope42, 1));
133    p4b.createAssign(u8scope44, p4b.createAdvance(u8scope43, 1));
134    //
135   
136    //  From the 4-byte sequence 11110abc 10defghi 10jklmno 10pqrstu,
137    //  we must calculate the value abcde - 1 to produce the bit values
138    //  for u16_hi6, hi7, lo0, lo1 at the scope43 position.
139    Var * s43_lo0 = nAb.createVar("scope43_lo0", zeroes);
140    Var * s43_lo1 = nAb.createVar("scope43_lo1", zeroes);
141    Var * s43_hi6 = nAb.createVar("scope43_hi6", zeroes);
142    Var * s43_hi7 = nAb.createVar("scope43_hi7", zeroes);
143
144    Var * s43_lo2 = main.createVar("scope43_lo2", zeroes);
145    Var * s43_lo3 = main.createVar("scope43_lo3", zeroes);
146    Var * s43_lo4 = main.createVar("scope43_lo4", zeroes);
147    Var * s43_lo5 = main.createVar("scope43_lo5", zeroes);
148    Var * s43_lo6 = main.createVar("scope43_lo6", zeroes);
149    Var * s43_lo7 = main.createVar("scope43_lo7", zeroes);
150
151    p4b.createAssign(s43_lo1, p4b.createAnd(u8scope43, p4b.createNot(bit3a1)));           // e - 1
152    p4b.createAssign(s43_lo0, p4b.createAnd(u8scope43, p4b.createXor(bit2a1, s43_lo1)));  // d - borrow
153    PabloAST * brw1 = p4b.createAnd(s43_lo1, p4b.createNot(bit2a1));
154    p4b.createAssign(s43_hi7, p4b.createAnd(u8scope43, p4b.createXor(bit7a2, brw1)));     // c - borrow
155    PabloAST * brw2 = p4b.createAnd(brw1, p4b.createNot(bit7a2));
156    p4b.createAssign(s43_hi6, p4b.createAnd(u8scope43, p4b.createXor(bit6a2, brw2)));     // b - borrow
157    //
158    p4b.createAssign(s43_lo2, p4b.createAnd(u8scope43, bit4a1));
159    p4b.createAssign(s43_lo3, p4b.createAnd(u8scope43, bit5a1));
160    p4b.createAssign(s43_lo4, p4b.createAnd(u8scope43, bit6a1));
161    p4b.createAssign(s43_lo5, p4b.createAnd(u8scope43, bit7a1));
162    p4b.createAssign(s43_lo6, p4b.createAnd(u8scope43, u8_bits[2]));
163    p4b.createAssign(s43_lo7, p4b.createAnd(u8scope43, u8_bits[3]));
164    //
165    //
166    p34b.createIf(pfx4, p4b);
167    //
168    // Combined logic for 3 and 4 byte sequences
169    //
170    PabloAST * pfx3 = ccc.compileCC(re::makeCC(0xE0, 0xEF), p34b);
171
172    p34b.createAssign(u8scope32, p34b.createAdvance(pfx3, 1));
173    p34b.createAssign(u8scope33, p34b.createAdvance(u8scope32, 1));
174
175    // Illegal 3-byte sequences
176    PabloAST * E0 = ccc.compileCC(re::makeCC(0xE0), p34b);
177    PabloAST * ED = ccc.compileCC(re::makeCC(0xED), p34b);
178    PabloAST * E0_err = p34b.createAnd(p34b.createAdvance(E0, 1), ccc.compileCC(re::makeCC(0x80, 0x9F), p34b));
179    PabloAST * ED_err = p34b.createAnd(p34b.createAdvance(ED, 1), ccc.compileCC(re::makeCC(0xA0, 0xBF), p34b));
180    Var * EX_FX_err = nAb.createVar("EX_FX_err", zeroes);
181
182    p34b.createAssign(EX_FX_err, p34b.createOr(p34b.createOr(E0_err, ED_err), FX_err));
183    // Two surrogate UTF-16 units are computed at the 3rd and 4th positions of 4-byte sequences.
184    PabloAST * surrogate = p34b.createOr(u8scope43, u8scope44);
185   
186    Var * p34del = nAb.createVar("p34del", zeroes);
187    p34b.createAssign(p34del, p34b.createOr(u8scope32, u8scope42));
188
189
190    // The high 5 bits of the UTF-16 code unit are only nonzero for 3 and 4-byte
191    // UTF-8 sequences.
192    p34b.createAssign(u16_hi[0], p34b.createOr(p34b.createAnd(u8scope33, bit4a2), surrogate));
193    p34b.createAssign(u16_hi[1], p34b.createOr(p34b.createAnd(u8scope33, bit5a2), surrogate));
194    p34b.createAssign(u16_hi[2], p34b.createAnd(u8scope33, bit6a2));
195    p34b.createAssign(u16_hi[3], p34b.createOr(p34b.createAnd(u8scope33, bit7a2), surrogate));
196    p34b.createAssign(u16_hi[4], p34b.createOr(p34b.createAnd(u8scope33, bit2a1), surrogate));
197   
198    //
199    nAb.createIf(pfx34, p34b);
200    //
201    // Combined logic for 2, 3 and 4 byte sequences
202    //
203
204    Var * u8lastscope = main.createVar("u8lastscope", zeroes);
205
206    PabloAST * pfx2 = ccc.compileCC(re::makeCC(0xC0, 0xDF), nAb);
207    PabloAST * u8scope22 = nAb.createAdvance(pfx2, 1);
208    nAb.createAssign(u8lastscope, nAb.createOr(u8scope22, nAb.createOr(u8scope33, u8scope44)));
209    PabloAST * u8anyscope = nAb.createOr(u8lastscope, p34del);
210
211    PabloAST * C0_C1_err = ccc.compileCC(re::makeCC(0xC0, 0xC1), nAb);
212    PabloAST * scope_suffix_mismatch = nAb.createXor(u8anyscope, ccc.compileCC(re::makeCC(0x80, 0xBF), nAb));
213    nAb.createAssign(error_mask, nAb.createOr(scope_suffix_mismatch, nAb.createOr(C0_C1_err, EX_FX_err)));
214    nAb.createAssign(delmask, nAb.createOr(p34del, ccc.compileCC(re::makeCC(0xC0, 0xFF), nAb)));
215   
216    // The low 3 bits of the high byte of the UTF-16 code unit as well as the high bit of the
217    // low byte are only nonzero for 2, 3 and 4 byte sequences.
218    nAb.createAssign(u16_hi[5], nAb.createOr(nAb.createAnd(u8lastscope, bit3a1), u8scope44));
219    nAb.createAssign(u16_hi[6], nAb.createOr(nAb.createAnd(u8lastscope, bit4a1), s43_hi6));
220    nAb.createAssign(u16_hi[7], nAb.createOr(nAb.createAnd(u8lastscope, bit5a1), s43_hi7));
221    nAb.createAssign(u16_lo[0], nAb.createOr(nAb.createAnd(u8lastscope, bit6a1), s43_lo0));
222
223    Var * p234_lo1 = main.createVar("p234_lo1", zeroes);
224
225    nAb.createAssign(p234_lo1, nAb.createOr(nAb.createAnd(u8lastscope, bit7a1), s43_lo1));
226
227    main.createIf(nonASCII, nAb);
228    //
229    //
230    PabloAST * ASCII = ccc.compileCC(re::makeCC(0x0, 0x7F));
231    PabloAST * last_byte = main.createOr(ASCII, u8lastscope);
232    main.createAssign(u16_lo[1], main.createOr(main.createAnd(ASCII, u8_bits[1]), p234_lo1));
233    main.createAssign(u16_lo[2], main.createOr(main.createAnd(last_byte, u8_bits[2]), s43_lo2));
234    main.createAssign(u16_lo[3], main.createOr(main.createAnd(last_byte, u8_bits[3]), s43_lo3));
235    main.createAssign(u16_lo[4], main.createOr(main.createAnd(last_byte, u8_bits[4]), s43_lo4));
236    main.createAssign(u16_lo[5], main.createOr(main.createAnd(last_byte, u8_bits[5]), s43_lo5));
237    main.createAssign(u16_lo[6], main.createOr(main.createAnd(last_byte, u8_bits[6]), s43_lo6));
238    main.createAssign(u16_lo[7], main.createOr(main.createAnd(last_byte, u8_bits[7]), s43_lo7));
239   
240    Var * output = kernel->getOutputStreamVar("u16bit");
241    Var * delmask_out = kernel->getOutputStreamVar("delMask");
242    Var * error_mask_out = kernel->getOutputStreamVar("errMask");
243   
244    for (unsigned i = 0; i < 8; i++) {
245        main.createAssign(main.createExtract(output, i), u16_hi[i]);
246    }
247    for (unsigned i = 0; i < 8; i++) {
248        main.createAssign(main.createExtract(output, i + 8), u16_lo[i]);
249    }
250    main.createAssign(main.createExtract(delmask_out, main.getInteger(0)), delmask);
251    main.createAssign(main.createExtract(error_mask_out,  main.getInteger(0)), error_mask);
252
253    pablo_function_passes(kernel);
254}
255
256Function * u8u16PipelineAVX2(Module * mod, IDISA::IDISA_Builder * iBuilder) {
257
258    const unsigned segmentSize = codegen::SegmentSize;
259    const unsigned bufferSegments = codegen::ThreadNum+1;
260
261    assert (iBuilder);
262
263    Type * const size_ty = iBuilder->getSizeTy();
264    Type * const voidTy = iBuilder->getVoidTy();
265    Type * const bitBlockType = iBuilder->getBitBlockType();
266    Type * const inputType = ArrayType::get(ArrayType::get(bitBlockType, 8), 1)->getPointerTo();
267    Type * const outputType = ArrayType::get(ArrayType::get(bitBlockType, 16), 1)->getPointerTo();
268   
269    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, inputType, outputType, size_ty, nullptr));
270    main->setCallingConv(CallingConv::C);
271    Function::arg_iterator args = main->arg_begin();
272   
273    Value * const inputStream = &*(args++);
274    inputStream->setName("inputStream");
275    Value * const outputStream = &*(args++);
276    outputStream->setName("outputStream");
277    Value * const fileSize = &*(args++);
278    fileSize->setName("fileSize");
279
280    // File data from mmap
281    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
282
283    MMapSourceKernel mmapK(iBuilder, segmentSize); 
284    mmapK.generateKernel({}, {&ByteStream});
285    mmapK.setInitialArguments({fileSize});
286   
287    // Transposed bits from s2p
288    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
289
290    S2PKernel s2pk(iBuilder);
291    s2pk.generateKernel({&ByteStream}, {&BasisBits});
292   
293    // Calculate UTF-16 data bits through bitwise logic on u8-indexed streams.
294    CircularBuffer U8u16Bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
295    CircularBuffer DelMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
296    CircularBuffer ErrorMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
297
298    PabloKernel u8u16k(iBuilder, "u8u16",
299                       {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
300                       {Binding{iBuilder->getStreamSetTy(16, 1), "u16bit"},
301                           Binding{iBuilder->getStreamSetTy(1, 1), "delMask"},
302                           Binding{iBuilder->getStreamSetTy(1, 1), "errMask"}}, {});
303   
304    u8u16_pablo(&u8u16k);
305    u8u16k.generateKernel({&BasisBits}, {&U8u16Bits, &DelMask, &ErrorMask});
306
307    // Apply a deletion algorithm to discard all but the final position of the UTF-8
308    // sequences for each UTF-16 code unit. Swizzle the results.
309    CircularBuffer SwizzleFields0(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
310    CircularBuffer SwizzleFields1(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
311    CircularBuffer SwizzleFields2(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
312    CircularBuffer SwizzleFields3(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
313    CircularBuffer DeletionCounts(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
314
315    DeleteByPEXTkernel delK(iBuilder, 64, 16, true);
316    delK.generateKernel({&U8u16Bits, &DelMask}, {&SwizzleFields0, &SwizzleFields1, &SwizzleFields2, &SwizzleFields3, &DeletionCounts});
317;
318    //  Produce fully compressed swizzled UTF-16 bit streams
319    SwizzledCopybackBuffer u16Swizzle0(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
320    SwizzledCopybackBuffer u16Swizzle1(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
321    SwizzledCopybackBuffer u16Swizzle2(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
322    SwizzledCopybackBuffer u16Swizzle3(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
323
324    SwizzledBitstreamCompressByCount compressK(iBuilder, 16);
325    compressK.generateKernel({&DeletionCounts, &SwizzleFields0, &SwizzleFields1, &SwizzleFields2, &SwizzleFields3},
326                             {&u16Swizzle0, &u16Swizzle1, &u16Swizzle2, &u16Swizzle3});
327 
328    // Produce unswizzled UTF-16 bit streams
329    CircularBuffer u16bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
330    SwizzleGenerator unSwizzleK(iBuilder, 16, 1, 4);
331    unSwizzleK.setName("unswizzle");
332    unSwizzleK.generateKernel({&u16Swizzle0, &u16Swizzle1, &u16Swizzle2, &u16Swizzle3}, {&u16bits});
333   
334    // Different choices for the output buffer depending on chosen option.
335    ExternalFileBuffer U16external(iBuilder, iBuilder->getStreamSetTy(1, 16));
336    CircularBuffer U16out(iBuilder, iBuilder->getStreamSetTy(1, 16), segmentSize * bufferSegments);
337
338    P2S16Kernel p2sk(iBuilder);
339
340    //P2S16KernelWithCompressedOutput p2sk(iBuilder);
341
342    FileSink outK(iBuilder, 16);
343    if (mMapBuffering || memAlignBuffering) {
344        p2sk.generateKernel({&u16bits}, {&U16external});
345        outK.generateKernel({&U16external}, {});
346    } else {
347        p2sk.generateKernel({&u16bits}, {&U16out});
348        outK.generateKernel({&U16out}, {});
349    }
350   
351    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
352
353    ByteStream.setStreamSetBuffer(inputStream);
354    BasisBits.allocateBuffer();
355    U8u16Bits.allocateBuffer();
356    DelMask.allocateBuffer();
357    ErrorMask.allocateBuffer();
358    DeletionCounts.allocateBuffer();
359    SwizzleFields0.allocateBuffer();
360    SwizzleFields1.allocateBuffer();
361    SwizzleFields2.allocateBuffer();
362    SwizzleFields3.allocateBuffer();
363    u16Swizzle0.allocateBuffer();
364    u16Swizzle1.allocateBuffer();
365    u16Swizzle2.allocateBuffer();
366    u16Swizzle3.allocateBuffer();
367    u16bits.allocateBuffer();
368
369    if (mMapBuffering || memAlignBuffering) {
370        U16external.setStreamSetBuffer(outputStream);
371    } else {
372        U16out.allocateBuffer();
373    }
374    Value * fName = iBuilder->CreatePointerCast(iBuilder->CreateGlobalString(outputFile.c_str()), iBuilder->getInt8PtrTy());
375    outK.setInitialArguments({fName});
376
377    generatePipeline(iBuilder, {&mmapK, &s2pk, &u8u16k, &delK, &compressK, &unSwizzleK, &p2sk, &outK});
378
379    iBuilder->CreateRetVoid();
380    return main;
381}
382
383
384Function * u8u16Pipeline(Module * mod, IDISA::IDISA_Builder * iBuilder) {
385   
386    const unsigned segmentSize = codegen::SegmentSize;
387    const unsigned bufferSegments = codegen::ThreadNum+1;
388   
389    assert (iBuilder);
390   
391    Type * const size_ty = iBuilder->getSizeTy();
392    Type * const voidTy = iBuilder->getVoidTy();
393    Type * const bitBlockType = iBuilder->getBitBlockType();
394    Type * const inputType = ArrayType::get(ArrayType::get(bitBlockType, 8), 1)->getPointerTo();
395    Type * const outputType = ArrayType::get(ArrayType::get(bitBlockType, 16), 1)->getPointerTo();
396   
397    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, inputType, outputType, size_ty, nullptr));
398    main->setCallingConv(CallingConv::C);
399    Function::arg_iterator args = main->arg_begin();
400   
401    Value * const inputStream = &*(args++);
402    inputStream->setName("inputStream");
403    Value * const outputStream = &*(args++);
404    outputStream->setName("outputStream");
405    Value * const fileSize = &*(args++);
406    fileSize->setName("fileSize");
407   
408    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
409   
410    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
411   
412    CircularBuffer U8u16Bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
413    CircularBuffer DelMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
414    CircularBuffer ErrorMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
415   
416    CircularBuffer U16Bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
417   
418    CircularBuffer DeletionCounts(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
419   
420    // Different choices for the output buffer depending on chosen option.
421    ExternalFileBuffer U16external(iBuilder, iBuilder->getStreamSetTy(1, 16));
422    CircularCopybackBuffer U16out(iBuilder, iBuilder->getStreamSetTy(1, 16), segmentSize * bufferSegments, 1 /*overflow block*/);
423   
424    MMapSourceKernel mmapK(iBuilder, segmentSize); 
425    mmapK.generateKernel({}, {&ByteStream});
426    mmapK.setInitialArguments({fileSize});
427   
428    S2PKernel s2pk(iBuilder);
429   
430    s2pk.generateKernel({&ByteStream}, {&BasisBits});
431   
432    PabloKernel u8u16k(iBuilder, "u8u16",
433                       {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
434                       {Binding{iBuilder->getStreamSetTy(16, 1), "u16bit"},
435                           Binding{iBuilder->getStreamSetTy(1, 1), "delMask"},
436                           Binding{iBuilder->getStreamSetTy(1, 1), "errMask"}}, {});
437   
438    u8u16_pablo(&u8u16k);
439   
440    u8u16k.generateKernel({&BasisBits}, {&U8u16Bits, &DelMask, &ErrorMask});
441   
442    DeletionKernel delK(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
443    delK.generateKernel({&U8u16Bits, &DelMask}, {&U16Bits, &DeletionCounts});
444   
445    P2S16KernelWithCompressedOutput p2sk(iBuilder);
446   
447    FileSink outK(iBuilder, 16);
448    if (mMapBuffering || memAlignBuffering) {
449        p2sk.generateKernel({&U16Bits, &DeletionCounts}, {&U16external});
450        outK.generateKernel({&U16external}, {});
451    } else {
452        p2sk.generateKernel({&U16Bits, &DeletionCounts}, {&U16out});
453        outK.generateKernel({&U16out}, {});
454    }
455    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
456   
457    ByteStream.setStreamSetBuffer(inputStream);
458    BasisBits.allocateBuffer();
459    U8u16Bits.allocateBuffer();
460    DelMask.allocateBuffer();
461    ErrorMask.allocateBuffer();
462    U16Bits.allocateBuffer();
463    DeletionCounts.allocateBuffer();
464    if (mMapBuffering || memAlignBuffering) {
465        U16external.setStreamSetBuffer(outputStream);
466    } else {
467        U16out.allocateBuffer();
468    }
469    Value * fName = iBuilder->CreatePointerCast(iBuilder->CreateGlobalString(outputFile.c_str()), iBuilder->getInt8PtrTy());
470    outK.setInitialArguments({fName});
471   
472    generatePipeline(iBuilder, {&mmapK, &s2pk, &u8u16k, &delK, &p2sk, &outK});
473   
474    iBuilder->CreateRetVoid();
475    return main;
476}
477
478
479
480typedef void (*u8u16FunctionType)(char * byte_data, char * output_data, size_t filesize);
481
482static ExecutionEngine * u8u16Engine = nullptr;
483
484u8u16FunctionType u8u16CodeGen(void) {
485    LLVMContext TheContext;                           
486    Module * M = new Module("u8u16", TheContext);
487    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
488
489    llvm::Function * main_IR = (enableAVXdel && AVX2_available() && codegen::BlockSize==256) ? u8u16PipelineAVX2(M, idb) : u8u16Pipeline(M, idb);
490
491    verifyModule(*M, &dbgs());
492    u8u16Engine = JIT_to_ExecutionEngine(M);   
493    u8u16Engine->finalizeObject();
494
495    delete idb;
496    return reinterpret_cast<u8u16FunctionType>(u8u16Engine->getPointerToFunction(main_IR));
497}
498
499void u8u16(u8u16FunctionType fn_ptr, const std::string & fileName) {
500    std::string mFileName = fileName;
501    size_t fileSize;
502    char * fileBuffer;
503   
504    const boost::filesystem::path file(mFileName);
505    if (exists(file)) {
506        if (is_directory(file)) {
507            return;
508        }
509    } else {
510        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
511        return;
512    }
513   
514    fileSize = file_size(file);
515    boost::iostreams::mapped_file_source mFile;
516    if (fileSize == 0) {
517        fileBuffer = nullptr;
518    }
519    else {
520        try {
521            mFile.open(mFileName);
522        } catch (std::exception &e) {
523            std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
524            return;
525        }
526        fileBuffer = const_cast<char *>(mFile.data());
527    }
528
529    if (mMapBuffering) {
530        boost::interprocess::mapped_region outputBuffer(boost::interprocess::anonymous_shared_memory(2*fileSize));
531        outputBuffer.advise(boost::interprocess::mapped_region::advice_willneed);
532        outputBuffer.advise(boost::interprocess::mapped_region::advice_sequential);
533        fn_ptr(fileBuffer, static_cast<char*>(outputBuffer.get_address()), fileSize);
534    }
535    else if (memAlignBuffering) {
536        char * outputBuffer;
537        const auto r = posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, 2*fileSize);
538        if (LLVM_UNLIKELY(r != 0)) {
539            throw std::runtime_error("posix_memalign failed with return code " + std::to_string(r));
540        }
541        fn_ptr(fileBuffer, outputBuffer, fileSize);
542        free(reinterpret_cast<void *>(outputBuffer));
543    }
544    else {
545        /* No external output buffer */
546        fn_ptr(fileBuffer, nullptr, fileSize);
547    }
548    mFile.close();
549   
550}
551
552
553int main(int argc, char *argv[]) {
554    AddParabixVersionPrinter();
555    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&u8u16Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
556    cl::ParseCommandLineOptions(argc, argv);
557
558    u8u16FunctionType fn_ptr = u8u16CodeGen();
559
560    u8u16(fn_ptr, inputFile);
561
562    return 0;
563}
564
565                       
Note: See TracBrowser for help on using the repository browser.