source: icGREP/icgrep-devel/icgrep/u8u16.cpp @ 5401

Last change on this file since 5401 was 5401, checked in by nmedfort, 2 years ago

Updated all projects to use ParabixDriver?. Deprecated original pipeline generation methods. Enabled LLVM optimizations, IR and ASM printing for Kernel modules. Enabled object cache by default. Begun work on moving consumed position information back to producing kernels.

File size: 26.0 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <IR_Gen/idisa_builder.h>                  // for IDISA_Builder
8#include <IR_Gen/idisa_target.h>                   // for GetIDISA_Builder
9#include <cc/cc_compiler.h>                        // for CC_Compiler
10#include <kernels/deletion.h>                      // for DeletionKernel
11#include <kernels/swizzle.h>                      // for DeletionKernel
12#include <kernels/mmap_kernel.h>                   // for MMapSourceKernel
13#include <kernels/p2s_kernel.h>                    // for P2S16KernelWithCom...
14#include <kernels/s2p_kernel.h>                    // for S2PKernel
15#include <kernels/stdout_kernel.h>                 // for StdOutKernel
16#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for ExecutionEngine
17#include <llvm/IR/Function.h>                      // for Function, Function...
18#include <llvm/IR/Module.h>                        // for Module
19#include <llvm/IR/Verifier.h>                      // for verifyModule
20#include <llvm/Support/CommandLine.h>              // for ParseCommandLineOp...
21#include <llvm/Support/Debug.h>                    // for dbgs
22#include <pablo/pablo_kernel.h>                    // for PabloKernel
23#include <pablo/pablo_toolchain.h>                 // for pablo_function_passes
24#include <pablo/pe_zeroes.h>
25#include <toolchain.h>                             // for JIT_to_ExecutionEn...
26#include <boost/iostreams/device/mapped_file.hpp>  // for mapped_file_source
27#include <boost/filesystem.hpp>
28#include <boost/interprocess/anonymous_shared_memory.hpp>
29#include "kernels/streamset.h"                     // for CircularBuffer
30#include <kernels/pipeline.h>
31#include "llvm/ADT/StringRef.h"                    // for StringRef
32#include "llvm/IR/CallingConv.h"                   // for ::C
33#include "llvm/IR/DerivedTypes.h"                  // for ArrayType, Pointer...
34#include "llvm/IR/LLVMContext.h"                   // for LLVMContext
35#include "llvm/IR/Value.h"                         // for Value
36#include "llvm/Support/Compiler.h"                 // for LLVM_UNLIKELY
37#include <pablo/builder.hpp>                       // for PabloBuilder
38#include <iostream>
39
40using namespace pablo;
41using namespace kernel;
42using namespace parabix;
43using namespace llvm;
44
45static cl::OptionCategory u8u16Options("u8u16 Options", "Transcoding control options.");
46static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(u8u16Options));
47static cl::opt<std::string> outputFile(cl::Positional, cl::desc("<output file>"),  cl::Required, cl::cat(u8u16Options));
48static cl::opt<bool> enableAVXdel("enable-AVX-deletion", cl::desc("Enable AVX2 deletion algorithms."), cl::cat(u8u16Options));
49static cl::opt<bool> mMapBuffering("mmap-buffering", cl::desc("Enable mmap buffering."), cl::cat(u8u16Options));
50static cl::opt<bool> memAlignBuffering("memalign-buffering", cl::desc("Enable posix_memalign buffering."), cl::cat(u8u16Options));
51
52
53void u8u16_pablo(PabloKernel * kernel) {
54    //  input: 8 basis bit streams
55   
56    const auto u8bitSet = kernel->getInputStreamVar("u8bit");
57   
58    //  output: 16 u8-indexed streams, + delmask stream + error stream
59   
60    cc::CC_Compiler ccc(kernel, u8bitSet);
61   
62    PabloBuilder & main = ccc.getBuilder();
63    const auto u8_bits = ccc.getBasisBits();
64   
65    Zeroes * zeroes = main.createZeroes();
66
67    // Outputs
68    Var * u16_hi[8];
69    for (int i = 0; i < 8; ++i) {
70        u16_hi[i] = main.createVar("u16_hi" + std::to_string(i), zeroes);
71    }
72    Var * u16_lo[8];
73    for (int i = 0; i < 8; ++i) {
74        u16_lo[i] = main.createVar("u16_lo" + std::to_string(i), zeroes);
75    }
76    Var * delmask = main.createVar("delmask", zeroes);
77    Var * error_mask = main.createVar("error_mask", zeroes);
78
79    // The logic for processing non-ASCII bytes will be embedded within an if-hierarchy.
80    PabloAST * nonASCII = ccc.compileCC(re::makeCC(0x80, 0xFF));
81   
82    // Builder for the if statement handling all non-ASCII logic
83    PabloBuilder nAb = PabloBuilder::Create(main);
84    // Bits 3 through 7 of a 2-byte prefix are data bits, needed to
85    // produce the UTF-16 code unit data ...,
86    PabloAST * bit3a1 = nAb.createAdvance(u8_bits[3], 1);
87    PabloAST * bit4a1 = nAb.createAdvance(u8_bits[4], 1);
88    PabloAST * bit5a1 = nAb.createAdvance(u8_bits[5], 1);
89    PabloAST * bit6a1 = nAb.createAdvance(u8_bits[6], 1);
90    PabloAST * bit7a1 = nAb.createAdvance(u8_bits[7], 1);
91   
92    // Entry condition for 3 or 4 byte sequences: we have a prefix byte in the range 0xE0-0xFF.
93    PabloAST * pfx34 = ccc.compileCC(re::makeCC(0xE0, 0xFF), nAb);
94    // Builder for the if statement handling all logic for 3- and 4-byte sequences.
95    PabloBuilder p34b = PabloBuilder::Create(nAb);
96    // Bits 4 through 7 of a 3-byte prefix are data bits.  They must be moved
97    // to the final position of the 3-byte sequence.
98    PabloAST * bit2a1 = p34b.createAdvance(u8_bits[2], 1);
99    PabloAST * bit4a2 = p34b.createAdvance(bit4a1, 1);
100    PabloAST * bit5a2 = p34b.createAdvance(bit5a1, 1);
101    PabloAST * bit6a2 = p34b.createAdvance(bit6a1, 1);
102    PabloAST * bit7a2 = p34b.createAdvance(bit7a1, 1);
103
104
105    Var * const u8scope32 = nAb.createVar("u8scope32", zeroes);
106    Var * const u8scope33 = nAb.createVar("u8scope33", zeroes);
107    Var * const u8scope44 = nAb.createVar("u8scope44", zeroes);
108
109    //
110    // Logic for 4-byte UTF-8 sequences
111    //
112    // Entry condition  or 4 byte sequences: we have a prefix byte in the range 0xF0-0xFF.
113    PabloAST * pfx4 = ccc.compileCC(re::makeCC(0xF0, 0xFF), p34b);
114    // Builder for the if statement handling all logic for 4-byte sequences only.
115    PabloBuilder p4b = PabloBuilder::Create(p34b);
116    // Illegal 4-byte sequences
117    PabloAST * F0 = ccc.compileCC(re::makeCC(0xF0), p4b);
118    PabloAST * F4 = ccc.compileCC(re::makeCC(0xF4), p4b);
119    PabloAST * F0_err = p4b.createAnd(p4b.createAdvance(F0, 1), ccc.compileCC(re::makeCC(0x80, 0x8F), p4b));
120    PabloAST * F4_err = p4b.createAnd(p4b.createAdvance(F4, 1), ccc.compileCC(re::makeCC(0x90, 0xBF), p4b));
121    PabloAST * F5_FF = ccc.compileCC(re::makeCC(0xF5, 0xFF), p4b);
122
123    Var * FX_err = p34b.createVar("FX_err", zeroes);
124    p4b.createAssign(FX_err, p4b.createOr(F5_FF, p4b.createOr(F0_err, F4_err)));
125    //
126    // 4-byte prefixes have a scope that extends over the next 3 bytes.
127
128    Var * u8scope42 = p34b.createVar("u8scope42", zeroes);
129    Var * u8scope43 = p34b.createVar("u8scope43", zeroes);
130
131    p4b.createAssign(u8scope42, p4b.createAdvance(pfx4, 1));
132    p4b.createAssign(u8scope43, p4b.createAdvance(u8scope42, 1));
133    p4b.createAssign(u8scope44, p4b.createAdvance(u8scope43, 1));
134    //
135   
136    //  From the 4-byte sequence 11110abc 10defghi 10jklmno 10pqrstu,
137    //  we must calculate the value abcde - 1 to produce the bit values
138    //  for u16_hi6, hi7, lo0, lo1 at the scope43 position.
139    Var * s43_lo0 = nAb.createVar("scope43_lo0", zeroes);
140    Var * s43_lo1 = nAb.createVar("scope43_lo1", zeroes);
141    Var * s43_hi6 = nAb.createVar("scope43_hi6", zeroes);
142    Var * s43_hi7 = nAb.createVar("scope43_hi7", zeroes);
143
144    Var * s43_lo2 = main.createVar("scope43_lo2", zeroes);
145    Var * s43_lo3 = main.createVar("scope43_lo3", zeroes);
146    Var * s43_lo4 = main.createVar("scope43_lo4", zeroes);
147    Var * s43_lo5 = main.createVar("scope43_lo5", zeroes);
148    Var * s43_lo6 = main.createVar("scope43_lo6", zeroes);
149    Var * s43_lo7 = main.createVar("scope43_lo7", zeroes);
150
151    p4b.createAssign(s43_lo1, p4b.createAnd(u8scope43, p4b.createNot(bit3a1)));           // e - 1
152    p4b.createAssign(s43_lo0, p4b.createAnd(u8scope43, p4b.createXor(bit2a1, s43_lo1)));  // d - borrow
153    PabloAST * brw1 = p4b.createAnd(s43_lo1, p4b.createNot(bit2a1));
154    p4b.createAssign(s43_hi7, p4b.createAnd(u8scope43, p4b.createXor(bit7a2, brw1)));     // c - borrow
155    PabloAST * brw2 = p4b.createAnd(brw1, p4b.createNot(bit7a2));
156    p4b.createAssign(s43_hi6, p4b.createAnd(u8scope43, p4b.createXor(bit6a2, brw2)));     // b - borrow
157    //
158    p4b.createAssign(s43_lo2, p4b.createAnd(u8scope43, bit4a1));
159    p4b.createAssign(s43_lo3, p4b.createAnd(u8scope43, bit5a1));
160    p4b.createAssign(s43_lo4, p4b.createAnd(u8scope43, bit6a1));
161    p4b.createAssign(s43_lo5, p4b.createAnd(u8scope43, bit7a1));
162    p4b.createAssign(s43_lo6, p4b.createAnd(u8scope43, u8_bits[2]));
163    p4b.createAssign(s43_lo7, p4b.createAnd(u8scope43, u8_bits[3]));
164    //
165    //
166    p34b.createIf(pfx4, p4b);
167    //
168    // Combined logic for 3 and 4 byte sequences
169    //
170    PabloAST * pfx3 = ccc.compileCC(re::makeCC(0xE0, 0xEF), p34b);
171
172    p34b.createAssign(u8scope32, p34b.createAdvance(pfx3, 1));
173    p34b.createAssign(u8scope33, p34b.createAdvance(u8scope32, 1));
174
175    // Illegal 3-byte sequences
176    PabloAST * E0 = ccc.compileCC(re::makeCC(0xE0), p34b);
177    PabloAST * ED = ccc.compileCC(re::makeCC(0xED), p34b);
178    PabloAST * E0_err = p34b.createAnd(p34b.createAdvance(E0, 1), ccc.compileCC(re::makeCC(0x80, 0x9F), p34b));
179    PabloAST * ED_err = p34b.createAnd(p34b.createAdvance(ED, 1), ccc.compileCC(re::makeCC(0xA0, 0xBF), p34b));
180    Var * EX_FX_err = nAb.createVar("EX_FX_err", zeroes);
181
182    p34b.createAssign(EX_FX_err, p34b.createOr(p34b.createOr(E0_err, ED_err), FX_err));
183    // Two surrogate UTF-16 units are computed at the 3rd and 4th positions of 4-byte sequences.
184    PabloAST * surrogate = p34b.createOr(u8scope43, u8scope44);
185   
186    Var * p34del = nAb.createVar("p34del", zeroes);
187    p34b.createAssign(p34del, p34b.createOr(u8scope32, u8scope42));
188
189
190    // The high 5 bits of the UTF-16 code unit are only nonzero for 3 and 4-byte
191    // UTF-8 sequences.
192    p34b.createAssign(u16_hi[0], p34b.createOr(p34b.createAnd(u8scope33, bit4a2), surrogate));
193    p34b.createAssign(u16_hi[1], p34b.createOr(p34b.createAnd(u8scope33, bit5a2), surrogate));
194    p34b.createAssign(u16_hi[2], p34b.createAnd(u8scope33, bit6a2));
195    p34b.createAssign(u16_hi[3], p34b.createOr(p34b.createAnd(u8scope33, bit7a2), surrogate));
196    p34b.createAssign(u16_hi[4], p34b.createOr(p34b.createAnd(u8scope33, bit2a1), surrogate));
197   
198    //
199    nAb.createIf(pfx34, p34b);
200    //
201    // Combined logic for 2, 3 and 4 byte sequences
202    //
203
204    Var * u8lastscope = main.createVar("u8lastscope", zeroes);
205
206    PabloAST * pfx2 = ccc.compileCC(re::makeCC(0xC0, 0xDF), nAb);
207    PabloAST * u8scope22 = nAb.createAdvance(pfx2, 1);
208    nAb.createAssign(u8lastscope, nAb.createOr(u8scope22, nAb.createOr(u8scope33, u8scope44)));
209    PabloAST * u8anyscope = nAb.createOr(u8lastscope, p34del);
210
211    PabloAST * C0_C1_err = ccc.compileCC(re::makeCC(0xC0, 0xC1), nAb);
212    PabloAST * scope_suffix_mismatch = nAb.createXor(u8anyscope, ccc.compileCC(re::makeCC(0x80, 0xBF), nAb));
213    nAb.createAssign(error_mask, nAb.createOr(scope_suffix_mismatch, nAb.createOr(C0_C1_err, EX_FX_err)));
214    nAb.createAssign(delmask, nAb.createOr(p34del, ccc.compileCC(re::makeCC(0xC0, 0xFF), nAb)));
215   
216    // The low 3 bits of the high byte of the UTF-16 code unit as well as the high bit of the
217    // low byte are only nonzero for 2, 3 and 4 byte sequences.
218    nAb.createAssign(u16_hi[5], nAb.createOr(nAb.createAnd(u8lastscope, bit3a1), u8scope44));
219    nAb.createAssign(u16_hi[6], nAb.createOr(nAb.createAnd(u8lastscope, bit4a1), s43_hi6));
220    nAb.createAssign(u16_hi[7], nAb.createOr(nAb.createAnd(u8lastscope, bit5a1), s43_hi7));
221    nAb.createAssign(u16_lo[0], nAb.createOr(nAb.createAnd(u8lastscope, bit6a1), s43_lo0));
222
223    Var * p234_lo1 = main.createVar("p234_lo1", zeroes);
224
225    nAb.createAssign(p234_lo1, nAb.createOr(nAb.createAnd(u8lastscope, bit7a1), s43_lo1));
226
227    main.createIf(nonASCII, nAb);
228    //
229    //
230    PabloAST * ASCII = ccc.compileCC(re::makeCC(0x0, 0x7F));
231    PabloAST * last_byte = main.createOr(ASCII, u8lastscope);
232    main.createAssign(u16_lo[1], main.createOr(main.createAnd(ASCII, u8_bits[1]), p234_lo1));
233    main.createAssign(u16_lo[2], main.createOr(main.createAnd(last_byte, u8_bits[2]), s43_lo2));
234    main.createAssign(u16_lo[3], main.createOr(main.createAnd(last_byte, u8_bits[3]), s43_lo3));
235    main.createAssign(u16_lo[4], main.createOr(main.createAnd(last_byte, u8_bits[4]), s43_lo4));
236    main.createAssign(u16_lo[5], main.createOr(main.createAnd(last_byte, u8_bits[5]), s43_lo5));
237    main.createAssign(u16_lo[6], main.createOr(main.createAnd(last_byte, u8_bits[6]), s43_lo6));
238    main.createAssign(u16_lo[7], main.createOr(main.createAnd(last_byte, u8_bits[7]), s43_lo7));
239   
240    Var * output = kernel->getOutputStreamVar("u16bit");
241    Var * delmask_out = kernel->getOutputStreamVar("delMask");
242    Var * error_mask_out = kernel->getOutputStreamVar("errMask");
243   
244    for (unsigned i = 0; i < 8; i++) {
245        main.createAssign(main.createExtract(output, i), u16_hi[i]);
246    }
247    for (unsigned i = 0; i < 8; i++) {
248        main.createAssign(main.createExtract(output, i + 8), u16_lo[i]);
249    }
250    main.createAssign(main.createExtract(delmask_out, main.getInteger(0)), delmask);
251    main.createAssign(main.createExtract(error_mask_out,  main.getInteger(0)), error_mask);
252
253    pablo_function_passes(kernel);
254}
255
256void u8u16PipelineAVX2Gen(ParabixDriver & pxDriver) {
257
258    IDISA::IDISA_Builder * iBuilder = pxDriver.getIDISA_Builder();
259    Module * mod = iBuilder->getModule();
260    const unsigned segmentSize = codegen::SegmentSize;
261    const unsigned bufferSegments = codegen::ThreadNum+1;
262
263    assert (iBuilder);
264
265    Type * const size_ty = iBuilder->getSizeTy();
266    Type * const voidTy = iBuilder->getVoidTy();
267    Type * const bitBlockType = iBuilder->getBitBlockType();
268    Type * const inputType = ArrayType::get(ArrayType::get(bitBlockType, 8), 1)->getPointerTo();
269    Type * const outputType = ArrayType::get(ArrayType::get(bitBlockType, 16), 1)->getPointerTo();
270   
271    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, inputType, outputType, size_ty, nullptr));
272    main->setCallingConv(CallingConv::C);
273    Function::arg_iterator args = main->arg_begin();
274   
275    Value * const inputStream = &*(args++);
276    inputStream->setName("inputStream");
277    Value * const outputStream = &*(args++);
278    outputStream->setName("outputStream");
279    Value * const fileSize = &*(args++);
280    fileSize->setName("fileSize");
281
282    // File data from mmap
283    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
284
285    MMapSourceKernel mmapK(iBuilder, segmentSize); 
286    mmapK.setInitialArguments({fileSize});
287    pxDriver.addKernelCall(mmapK, {}, {&ByteStream});
288
289    // Transposed bits from s2p
290    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
291
292    S2PKernel s2pk(iBuilder);
293    pxDriver.addKernelCall(s2pk, {&ByteStream}, {&BasisBits});
294   
295    // Calculate UTF-16 data bits through bitwise logic on u8-indexed streams.
296    CircularBuffer U8u16Bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
297    CircularBuffer DelMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
298    CircularBuffer ErrorMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
299
300    PabloKernel u8u16k(iBuilder, "u8u16",
301                       {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
302                       {Binding{iBuilder->getStreamSetTy(16, 1), "u16bit"},
303                           Binding{iBuilder->getStreamSetTy(1, 1), "delMask"},
304                           Binding{iBuilder->getStreamSetTy(1, 1), "errMask"}}, {});
305   
306    u8u16_pablo(&u8u16k);
307    pxDriver.addKernelCall(u8u16k, {&BasisBits}, {&U8u16Bits, &DelMask, &ErrorMask});
308
309    // Apply a deletion algorithm to discard all but the final position of the UTF-8
310    // sequences for each UTF-16 code unit. Swizzle the results.
311    CircularBuffer SwizzleFields0(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
312    CircularBuffer SwizzleFields1(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
313    CircularBuffer SwizzleFields2(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
314    CircularBuffer SwizzleFields3(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * bufferSegments);
315    CircularBuffer DeletionCounts(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
316
317    DeleteByPEXTkernel delK(iBuilder, 64, 16, true);
318    pxDriver.addKernelCall(delK, {&U8u16Bits, &DelMask}, {&SwizzleFields0, &SwizzleFields1, &SwizzleFields2, &SwizzleFields3, &DeletionCounts});
319;
320    //  Produce fully compressed swizzled UTF-16 bit streams
321    SwizzledCopybackBuffer u16Swizzle0(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
322    SwizzledCopybackBuffer u16Swizzle1(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
323    SwizzledCopybackBuffer u16Swizzle2(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
324    SwizzledCopybackBuffer u16Swizzle3(iBuilder, iBuilder->getStreamSetTy(4), segmentSize * (bufferSegments+2), 1);
325
326    SwizzledBitstreamCompressByCount compressK(iBuilder, 16);
327    pxDriver.addKernelCall(compressK, {&DeletionCounts, &SwizzleFields0, &SwizzleFields1, &SwizzleFields2, &SwizzleFields3},
328                             {&u16Swizzle0, &u16Swizzle1, &u16Swizzle2, &u16Swizzle3});
329 
330    // Produce unswizzled UTF-16 bit streams
331    CircularBuffer u16bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
332    SwizzleGenerator unSwizzleK(iBuilder, 16, 1, 4);
333    unSwizzleK.setName("unswizzle");
334    pxDriver.addKernelCall(unSwizzleK, {&u16Swizzle0, &u16Swizzle1, &u16Swizzle2, &u16Swizzle3}, {&u16bits});
335   
336    // Different choices for the output buffer depending on chosen option.
337    ExternalFileBuffer U16external(iBuilder, iBuilder->getStreamSetTy(1, 16));
338    CircularBuffer U16out(iBuilder, iBuilder->getStreamSetTy(1, 16), segmentSize * bufferSegments);
339
340    P2S16Kernel p2sk(iBuilder);
341
342    //P2S16KernelWithCompressedOutput p2sk(iBuilder);
343
344    FileSink outK(iBuilder, 16);
345    if (mMapBuffering || memAlignBuffering) {
346        pxDriver.addKernelCall(p2sk, {&u16bits}, {&U16external});
347        pxDriver.addKernelCall(outK, {&U16external}, {});
348    } else {
349        pxDriver.addKernelCall(p2sk, {&u16bits}, {&U16out});
350        pxDriver.addKernelCall(outK, {&U16out}, {});
351    }
352   
353    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
354
355    ByteStream.setStreamSetBuffer(inputStream);
356    BasisBits.allocateBuffer();
357    U8u16Bits.allocateBuffer();
358    DelMask.allocateBuffer();
359    ErrorMask.allocateBuffer();
360    DeletionCounts.allocateBuffer();
361    SwizzleFields0.allocateBuffer();
362    SwizzleFields1.allocateBuffer();
363    SwizzleFields2.allocateBuffer();
364    SwizzleFields3.allocateBuffer();
365    u16Swizzle0.allocateBuffer();
366    u16Swizzle1.allocateBuffer();
367    u16Swizzle2.allocateBuffer();
368    u16Swizzle3.allocateBuffer();
369    u16bits.allocateBuffer();
370
371    if (mMapBuffering || memAlignBuffering) {
372        U16external.setStreamSetBuffer(outputStream);
373    } else {
374        U16out.allocateBuffer();
375    }
376    Value * fName = iBuilder->CreatePointerCast(iBuilder->CreateGlobalString(outputFile.c_str()), iBuilder->getInt8PtrTy());
377    outK.setInitialArguments({fName});
378
379    pxDriver.generatePipelineIR();
380
381    iBuilder->CreateRetVoid();
382
383    pxDriver.linkAndFinalize();
384}
385
386void u8u16PipelineGen(ParabixDriver & pxDriver) {
387   
388    IDISA::IDISA_Builder * iBuilder = pxDriver.getIDISA_Builder();
389    Module * mod = iBuilder->getModule();
390   
391    const unsigned segmentSize = codegen::SegmentSize;
392    const unsigned bufferSegments = codegen::ThreadNum+1;
393   
394    assert (iBuilder);
395   
396    Type * const size_ty = iBuilder->getSizeTy();
397    Type * const voidTy = iBuilder->getVoidTy();
398    Type * const bitBlockType = iBuilder->getBitBlockType();
399    Type * const inputType = ArrayType::get(ArrayType::get(bitBlockType, 8), 1)->getPointerTo();
400    Type * const outputType = ArrayType::get(ArrayType::get(bitBlockType, 16), 1)->getPointerTo();
401   
402    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, inputType, outputType, size_ty, nullptr));
403    main->setCallingConv(CallingConv::C);
404    Function::arg_iterator args = main->arg_begin();
405   
406    Value * const inputStream = &*(args++);
407    inputStream->setName("inputStream");
408    Value * const outputStream = &*(args++);
409    outputStream->setName("outputStream");
410    Value * const fileSize = &*(args++);
411    fileSize->setName("fileSize");
412   
413    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
414   
415    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
416   
417    CircularBuffer U8u16Bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
418    CircularBuffer DelMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
419    CircularBuffer ErrorMask(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
420   
421    CircularBuffer U16Bits(iBuilder, iBuilder->getStreamSetTy(16), segmentSize * bufferSegments);
422   
423    CircularBuffer DeletionCounts(iBuilder, iBuilder->getStreamSetTy(), segmentSize * bufferSegments);
424   
425    // Different choices for the output buffer depending on chosen option.
426    ExternalFileBuffer U16external(iBuilder, iBuilder->getStreamSetTy(1, 16));
427    CircularCopybackBuffer U16out(iBuilder, iBuilder->getStreamSetTy(1, 16), segmentSize * bufferSegments, 1 /*overflow block*/);
428   
429    MMapSourceKernel mmapK(iBuilder, segmentSize); 
430    mmapK.setInitialArguments({fileSize});
431    pxDriver.addKernelCall(mmapK, {}, {&ByteStream});
432   
433    S2PKernel s2pk(iBuilder);
434    pxDriver.addKernelCall(s2pk, {&ByteStream}, {&BasisBits});
435   
436    PabloKernel u8u16k(iBuilder, "u8u16",
437                       {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
438                       {Binding{iBuilder->getStreamSetTy(16, 1), "u16bit"},
439                           Binding{iBuilder->getStreamSetTy(1, 1), "delMask"},
440                           Binding{iBuilder->getStreamSetTy(1, 1), "errMask"}}, {});
441   
442    u8u16_pablo(&u8u16k);
443    pxDriver.addKernelCall(u8u16k, {&BasisBits}, {&U8u16Bits, &DelMask, &ErrorMask});
444   
445    DeletionKernel delK(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
446    pxDriver.addKernelCall(delK, {&U8u16Bits, &DelMask}, {&U16Bits, &DeletionCounts});
447   
448    P2S16KernelWithCompressedOutput p2sk(iBuilder);
449   
450    FileSink outK(iBuilder, 16);
451    if (mMapBuffering || memAlignBuffering) {
452        pxDriver.addKernelCall(p2sk, {&U16Bits, &DeletionCounts}, {&U16external});
453        pxDriver.addKernelCall(outK, {&U16external}, {});
454    } else {
455        pxDriver.addKernelCall(p2sk, {&U16Bits, &DeletionCounts}, {&U16out});
456        pxDriver.addKernelCall(outK, {&U16out}, {});
457    }
458    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
459   
460    ByteStream.setStreamSetBuffer(inputStream);
461    BasisBits.allocateBuffer();
462    U8u16Bits.allocateBuffer();
463    DelMask.allocateBuffer();
464    ErrorMask.allocateBuffer();
465    U16Bits.allocateBuffer();
466    DeletionCounts.allocateBuffer();
467    if (mMapBuffering || memAlignBuffering) {
468        U16external.setStreamSetBuffer(outputStream);
469    } else {
470        U16out.allocateBuffer();
471    }
472    Value * fName = iBuilder->CreatePointerCast(iBuilder->CreateGlobalString(outputFile.c_str()), iBuilder->getInt8PtrTy());
473    outK.setInitialArguments({fName});
474   
475    pxDriver.generatePipelineIR();
476
477   
478    iBuilder->CreateRetVoid();
479
480    pxDriver.linkAndFinalize();
481}
482
483
484
485typedef void (*u8u16FunctionType)(char * byte_data, char * output_data, size_t filesize);
486
487u8u16FunctionType u8u16CodeGen(void) {
488    LLVMContext TheContext;                           
489    Module * M = new Module("u8u16", TheContext);
490    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
491    ParabixDriver pxDriver(idb);
492   
493    if (enableAVXdel && AVX2_available() && codegen::BlockSize==256) {
494        //u8u16PipelineAVX2(M, idb)
495        u8u16PipelineAVX2Gen(pxDriver);
496    }
497    else{
498        //u8u16Pipeline(M, idb);
499        u8u16PipelineGen(pxDriver);
500    }
501    u8u16FunctionType main = reinterpret_cast<u8u16FunctionType>(pxDriver.getPointerToMain());
502
503    delete idb;
504    return main;
505}
506
507void u8u16(u8u16FunctionType fn_ptr, const std::string & fileName) {
508    std::string mFileName = fileName;
509    size_t fileSize;
510    char * fileBuffer;
511   
512    const boost::filesystem::path file(mFileName);
513    if (exists(file)) {
514        if (is_directory(file)) {
515            return;
516        }
517    } else {
518        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
519        return;
520    }
521   
522    fileSize = file_size(file);
523    boost::iostreams::mapped_file_source mFile;
524    if (fileSize == 0) {
525        fileBuffer = nullptr;
526    }
527    else {
528        try {
529            mFile.open(mFileName);
530        } catch (std::exception &e) {
531            std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
532            return;
533        }
534        fileBuffer = const_cast<char *>(mFile.data());
535    }
536
537    if (mMapBuffering) {
538        boost::interprocess::mapped_region outputBuffer(boost::interprocess::anonymous_shared_memory(2*fileSize));
539        outputBuffer.advise(boost::interprocess::mapped_region::advice_willneed);
540        outputBuffer.advise(boost::interprocess::mapped_region::advice_sequential);
541        fn_ptr(fileBuffer, static_cast<char*>(outputBuffer.get_address()), fileSize);
542    }
543    else if (memAlignBuffering) {
544        char * outputBuffer;
545        const auto r = posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, 2*fileSize);
546        if (LLVM_UNLIKELY(r != 0)) {
547            throw std::runtime_error("posix_memalign failed with return code " + std::to_string(r));
548        }
549        fn_ptr(fileBuffer, outputBuffer, fileSize);
550        free(reinterpret_cast<void *>(outputBuffer));
551    }
552    else {
553        /* No external output buffer */
554        fn_ptr(fileBuffer, nullptr, fileSize);
555    }
556    mFile.close();
557   
558}
559
560
561int main(int argc, char *argv[]) {
562    AddParabixVersionPrinter();
563    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&u8u16Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
564    cl::ParseCommandLineOptions(argc, argv);
565
566    u8u16FunctionType fn_ptr = u8u16CodeGen();
567
568    u8u16(fn_ptr, inputFile);
569
570    return 0;
571}
572
573                       
Note: See TracBrowser for help on using the repository browser.