source: icGREP/icgrep-devel/icgrep/u32u8.cpp @ 6173

Last change on this file since 6173 was 6097, checked in by cameron, 15 months ago

Small fix

File size: 20.7 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <IR_Gen/idisa_target.h>                   // for GetIDISA_Builder
8#include <cc/cc_compiler.h>                        // for CC_Compiler
9#include <kernels/deletion.h>                      // for DeletionKernel
10#include <kernels/source_kernel.h>
11#include <kernels/p2s_kernel.h>                    // for P2S16KernelWithCom...
12#include <kernels/s2p_kernel.h>                    // for S2PKernel
13#include <kernels/stdout_kernel.h>                 // for StdOutKernel_
14#include <kernels/pdep_kernel.h>
15#include <llvm/IR/Function.h>                      // for Function, Function...
16#include <llvm/IR/Module.h>                        // for Module
17#include <llvm/Support/CommandLine.h>              // for ParseCommandLineOp...
18#include <llvm/Support/Debug.h>                    // for dbgs
19#include <pablo/pablo_kernel.h>                    // for PabloKernel
20#include <pablo/pablo_toolchain.h>                 // for pablo_function_passes
21#include <kernels/kernel_builder.h>
22#include <pablo/pe_zeroes.h>
23#include <toolchain/toolchain.h>
24#include <toolchain/cpudriver.h>
25#include <kernels/streamset.h>
26#include <kernels/hex_convert.h>
27#include <llvm/ADT/StringRef.h>
28#include <llvm/IR/CallingConv.h>
29#include <llvm/IR/DerivedTypes.h>
30#include <llvm/IR/LLVMContext.h>
31#include <llvm/IR/Value.h>
32#include <llvm/Support/Compiler.h>
33#include <llvm/Support/raw_ostream.h>
34#include <pablo/builder.hpp>
35#include <fcntl.h>
36
37using namespace pablo;
38using namespace kernel;
39using namespace parabix;
40using namespace llvm;
41
42static cl::OptionCategory u32u8Options("u32u8 Options", "Transcoding control options.");
43static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(u32u8Options));
44
45//
46// UTF-8 encoding requires one to four bytes per Unicode character.
47// To generate UTF-8 encoded output from sets of basis bit streams
48// representing Unicode characters (that is, codepoint-indexed streams
49// having one bit position per codepoint), deposit masks are needed
50// to identify the positions at which bits for each character are
51// to be deposited.   A UTF-8 deposit mask will have one to four bit
52// positions per character depending on the character being encoded, that is,
53// depending on the number of bytes needed to encode the character.   Within
54// each group of one to four positions for a single character, a deposit mask
55// must have exactly one 1 bit set.  Different deposit masks are used for
56// depositing bits, depending on the destination byte position within the
57// ultimate 4 byte sequence.
58//
59// The following deposit masks (shown in little-endian representation) are
60// used for depositing bits.
61//
62//  UTF-8 sequence length:          1     2     3       4
63//  Unicode bit position:
64//  Unicode codepoint bits 0-5      1    10   100    1000    u8final
65//  Bits 6-11                       1    01   010    0100    u8mask6_11
66//  Bits 12-17                      1    01   001    0010    u8mask12_17
67//  Bits 18-20                      1    01   001    0001    u8initial
68//
69//  To compute UTF-8 deposit masks, we begin by constructing an extraction
70//  mask having 4 bit positions per character, but with the number of
71//  1 bits to be kept dependent on the sequence length.  When this extraction
72//  mask is applied to the repeating constant 4-bit mask 1000, u8final above
73//  is produced. 
74//
75//  UTF-8 sequence length:             1     2     3       4
76//  extraction mask                 1000  1100  1110    1111
77//  constant mask                   1000  1000  1000    1000
78//  final position mask             1     10    100     1000
79//  From this mask, other masks may subsequently computed by
80//  bitwise logic and shifting.
81//
82//  The UTF8fieldDepositMask kernel produces this deposit mask
83//  within 64-bit fields.
84
85class UTF8fieldDepositMask final : public BlockOrientedKernel {
86public:
87    UTF8fieldDepositMask(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned depositFieldWidth = 64);
88private:
89    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
90    void generateFinalBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Value * const remainingBytes) override;
91    const unsigned mDepositFieldWidth;
92};
93
94UTF8fieldDepositMask::UTF8fieldDepositMask(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned depositFieldWidth)
95: BlockOrientedKernel("u8depositMask",
96            {Binding{b->getStreamSetTy(21, 1), "basis"}},
97#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
98            {Binding{b->getStreamSetTy(1, 1), "fieldDepositMask", FixedRate(4)},
99                      Binding{b->getStreamSetTy(1, 1), "extractionMask", FixedRate(4)}},
100#else
101            {Binding{b->getStreamSetTy(1, 1), "fieldDepositMask", FixedRate(4)},
102                Binding{b->getStreamSetTy(1, 1), "codeUnitCounts", FixedRate(4), RoundUpTo(b->getBitBlockWidth())}},
103#endif
104                {}, {}, {Binding{b->getBitBlockType(), "EOFmask"}}), mDepositFieldWidth(depositFieldWidth) {
105}
106
107
108void UTF8fieldDepositMask::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
109    Value * fileExtentMask = b->CreateNot(b->getScalarField("EOFmask"));
110    // If any of bits 16 through 20 are 1, a four-byte UTF-8 sequence is required.
111    Value * u8len4 = b->loadInputStreamBlock("basis", b->getSize(16), b->getSize(0));
112    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(17), b->getSize(0)));
113    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(18), b->getSize(0)));
114    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(19), b->getSize(0)));
115    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(20), b->getSize(0)), "u8len4");
116    u8len4 = b->CreateAnd(u8len4, fileExtentMask);
117    Value * u8len34 = u8len4;
118    // Otherwise, if any of bits 11 through 15 are 1, a three-byte UTF-8 sequence is required.
119    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(11), b->getSize(0)));
120    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(12), b->getSize(0)));
121    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(13), b->getSize(0)));
122    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(14), b->getSize(0)));
123    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(15), b->getSize(0)));
124    u8len34 = b->CreateAnd(u8len34, fileExtentMask);
125    Value * nonASCII = u8len34;
126    // Otherwise, if any of bits 7 through 10 are 1, a two-byte UTF-8 sequence is required.
127    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(7), b->getSize(0)));
128    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(8), b->getSize(0)));
129    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(9), b->getSize(0)));
130    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(10), b->getSize(0)), "nonASCII");
131    nonASCII = b->CreateAnd(nonASCII, fileExtentMask);
132    //
133    //  UTF-8 sequence length:    1     2     3       4
134    //  extraction mask        1000  1100  1110    1111
135    //  interleave u8len3|u8len4, allOnes() for bits 1, 3:  x..., ..x.
136    //  interleave prefix4, u8len2|u8len3|u8len4 for bits 0, 2:  .x.., ...x
137   
138    Value * maskA_lo = b->esimd_mergel(1, u8len34, fileExtentMask);
139    Value * maskA_hi = b->esimd_mergeh(1, u8len34, fileExtentMask);
140    Value * maskB_lo = b->esimd_mergel(1, u8len4, nonASCII);
141    Value * maskB_hi = b->esimd_mergeh(1, u8len4, nonASCII);
142    Value * extraction_mask[4];
143    extraction_mask[0] = b->esimd_mergel(1, maskB_lo, maskA_lo);
144    extraction_mask[1] = b->esimd_mergeh(1, maskB_lo, maskA_lo);
145    extraction_mask[2] = b->esimd_mergel(1, maskB_hi, maskA_hi);
146    extraction_mask[3] = b->esimd_mergeh(1, maskB_hi, maskA_hi);
147    const unsigned bw = b->getBitBlockWidth();
148    Constant * mask1000 = Constant::getIntegerValue(b->getIntNTy(bw), APInt::getSplat(bw, APInt::getHighBitsSet(4, 1)));
149    for (unsigned j = 0; j < 4; ++j) {
150        Value * deposit_mask = b->simd_pext(mDepositFieldWidth, mask1000, extraction_mask[j]);
151        b->storeOutputStreamBlock("fieldDepositMask", b->getSize(0), b->getSize(j), deposit_mask);
152#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
153        b->storeOutputStreamBlock("extractionMask", b->getSize(0), b->getSize(j), extraction_mask[j]);
154#else
155        Value * unit_counts = b->simd_popcount(mDepositFieldWidth, extraction_mask[j]);
156        b->storeOutputStreamBlock("codeUnitCounts", b->getSize(0), b->getSize(j), unit_counts);
157#endif
158    }
159}
160void UTF8fieldDepositMask::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, Value * const remainingBytes) {
161    // Standard Pablo convention for final block processing: set a bit marking
162    // the position just past EOF, as well as a mask marking all positions past EOF.
163    b->setScalarField("EOFmask", b->bitblock_mask_from(remainingBytes));
164    CreateDoBlockMethodCall(b);
165}
166
167
168//
169// Given a u8-indexed bit stream marking the final code unit position
170// of each UTF-8 sequence, this kernel computes the deposit masks
171// u8initial, u8mask12_17, and u8mask6_11.
172//
173class UTF8_DepositMasks : public pablo::PabloKernel {
174public:
175    UTF8_DepositMasks(const std::unique_ptr<kernel::KernelBuilder> & kb);
176    bool isCachable() const override { return true; }
177    bool hasSignature() const override { return false; }
178protected:
179    void generatePabloMethod() override;
180};
181
182UTF8_DepositMasks::UTF8_DepositMasks (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
183: PabloKernel(iBuilder, "UTF8_DepositMasks",
184              {Binding{iBuilder->getStreamSetTy(1), "u8final", FixedRate(1), LookAhead(2)}},
185              {Binding{iBuilder->getStreamSetTy(1), "u8initial"},
186               Binding{iBuilder->getStreamSetTy(1), "u8mask12_17"},
187               Binding{iBuilder->getStreamSetTy(1), "u8mask6_11"}}) {}
188
189void UTF8_DepositMasks::generatePabloMethod() {
190    PabloBuilder pb(getEntryScope());
191    PabloAST * u8final = pb.createExtract(getInputStreamVar("u8final"), pb.getInteger(0));
192    PabloAST * nonFinal = pb.createNot(u8final, "nonFinal");
193    PabloAST * initial = pb.createInFile(pb.createNot(pb.createAdvance(nonFinal, 1)), "u8initial");
194    PabloAST * ASCII = pb.createAnd(u8final, initial);
195    PabloAST * lookAheadFinal = pb.createLookahead(u8final, 1, "lookaheadFinal");
196    // Eliminate lookahead positions that are the final position of the prior unit.
197    PabloAST * secondLast = pb.createAnd(lookAheadFinal, nonFinal);
198    PabloAST * u8mask6_11 = pb.createInFile(pb.createOr(secondLast, ASCII, "u8mask6_11"));
199    PabloAST * prefix2 = pb.createAnd(secondLast, initial);
200    PabloAST * lookAhead2 = pb.createLookahead(u8final, 2, "lookahead2");
201    PabloAST * thirdLast = pb.createAnd(pb.createAnd(lookAhead2, nonFinal), pb.createNot(secondLast));
202    PabloAST * u8mask12_17 = pb.createInFile(pb.createOr(thirdLast, pb.createOr(prefix2, ASCII), "u8mask12_17"));
203    pb.createAssign(pb.createExtract(getOutputStreamVar("u8initial"), pb.getInteger(0)), initial);
204    pb.createAssign(pb.createExtract(getOutputStreamVar("u8mask6_11"), pb.getInteger(0)), u8mask6_11);
205    pb.createAssign(pb.createExtract(getOutputStreamVar("u8mask12_17"), pb.getInteger(0)), u8mask12_17);
206}
207
208// This kernel assembles the UTF-8 basis bit data, given four sets of deposited
209// bits bits 18-20, 11-17, 6-11 and 0-5, as weil as the marker streams u8initial,
210// u8final, u8prefix3 and u8prefix4.
211//
212class UTF8assembly : public pablo::PabloKernel {
213public:
214    UTF8assembly(const std::unique_ptr<kernel::KernelBuilder> & kb);
215    bool isCachable() const override { return true; }
216    bool hasSignature() const override { return false; }
217protected:
218    void generatePabloMethod() override;
219};
220
221UTF8assembly::UTF8assembly (const std::unique_ptr<kernel::KernelBuilder> & b)
222: PabloKernel(b, "UTF8assembly",
223              {Binding{b->getStreamSetTy(3), "dep18_20"},
224                Binding{b->getStreamSetTy(6), "dep12_17"},
225                Binding{b->getStreamSetTy(6), "dep6_11"},
226                Binding{b->getStreamSetTy(6), "dep0_5"},
227                Binding{b->getStreamSetTy(1), "u8initial"},
228                Binding{b->getStreamSetTy(1), "u8final"},
229                Binding{b->getStreamSetTy(1), "u8mask6_11"},
230                Binding{b->getStreamSetTy(1), "u8mask12_17"}},
231              {Binding{b->getStreamSetTy(8), "u8basis"}}) {}
232
233void UTF8assembly::generatePabloMethod() {
234    PabloBuilder pb(getEntryScope());
235    std::vector<PabloAST *> dep18_20 = getInputStreamSet("dep18_20");
236    std::vector<PabloAST *> dep12_17 = getInputStreamSet("dep12_17");
237    std::vector<PabloAST *> dep6_11 = getInputStreamSet("dep6_11");
238    std::vector<PabloAST *> dep0_5 = getInputStreamSet("dep0_5");
239    PabloAST * u8initial = pb.createExtract(getInputStreamVar("u8initial"), pb.getInteger(0));
240    PabloAST * u8final = pb.createExtract(getInputStreamVar("u8final"), pb.getInteger(0));
241    PabloAST * u8mask6_11 = pb.createExtract(getInputStreamVar("u8mask6_11"), pb.getInteger(0));
242    PabloAST * u8mask12_17 = pb.createExtract(getInputStreamVar("u8mask12_17"), pb.getInteger(0));
243    PabloAST * ASCII = pb.createAnd(u8initial, u8final);
244    PabloAST * nonASCII = pb.createNot(ASCII, "nonASCII");
245    PabloAST * u8basis[8];
246    //
247    // Deposit bit 6 is either used for bit 6 of an ASCII code unit, or
248    // bit 0 for nonASCII units.   Extract the ASCII case separately.
249    PabloAST * ASCIIbit6 = pb.createAnd(dep6_11[0], ASCII);
250    dep6_11[0] = pb.createAnd(dep6_11[0], nonASCII);
251    for (unsigned i = 0; i < 6; i++) {
252        u8basis[i] = pb.createOr(dep0_5[i], dep6_11[i]);
253        u8basis[i] = pb.createOr(u8basis[i], dep12_17[i], "basis" + std::to_string(i));
254        if (i < 3) u8basis[i] = pb.createOr(u8basis[i], dep18_20[i]);
255    }
256    // The high bit of UTF-8 prefix and suffix bytes (any nonASCII byte) is always 1.
257    u8basis[7] = nonASCII;
258    // The second highest bit of UTF-8 units is 1 for any prefix, or ASCII byte with
259    // a 1 in bit 6 of the Unicode representation.
260    u8basis[6] = pb.createOr(pb.createAnd(u8initial, nonASCII), ASCIIbit6, "basis6");
261    //
262    // For any prefix of a 3-byte or 4-byte sequence the third highest bit is set to 1.
263    u8basis[5] = pb.createOr(u8basis[5], pb.createAnd(u8initial, pb.createNot(u8mask6_11)), "basis5");
264    // For any prefix of a 4-byte sequence the fourth highest bit is set to 1.
265    u8basis[4] = pb.createOr(u8basis[4], pb.createAnd(u8initial, pb.createNot(u8mask12_17)), "basis4");
266    for (unsigned i = 0; i < 8; i++) {
267        pb.createAssign(pb.createExtract(getOutputStreamVar("u8basis"), pb.getInteger(i)), u8basis[i]);
268    }
269}
270
271void u32u8_gen (ParabixDriver & pxDriver) {
272    auto & idb = pxDriver.getBuilder();
273    Module * mod = idb->getModule();
274   
275    unsigned const FieldWidth = sizeof(size_t) * 8;
276
277    const unsigned u32buffersize = codegen::SegmentSize * codegen::ThreadNum;
278    const unsigned u8buffersize = 4 * (u32buffersize + 1);
279    const unsigned u8buffersize2 = u8buffersize + 1;
280    const unsigned u8buffersize3 = u8buffersize2 + 4;
281
282    Type * const voidTy = idb->getVoidTy();
283   
284    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, idb->getInt32Ty(), nullptr));
285    main->setCallingConv(CallingConv::C);
286    Function::arg_iterator args = main->arg_begin();
287   
288    Value * const fileDecriptor = &*(args++);
289    fileDecriptor->setName("fileDecriptor");
290   
291    idb->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
292   
293    // Source data
294    StreamSetBuffer * codeUnitStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, 32));
295   
296    Kernel * sourceK = pxDriver.addKernelInstance<FDSourceKernel>(idb, 32);
297    sourceK->setInitialArguments({idb->getInt8(0), fileDecriptor});
298    pxDriver.makeKernelCall(sourceK, {}, {codeUnitStream});
299   
300    // Source buffers for transposed UTF-32 basis bits.
301    StreamSetBuffer * u32basis = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(21), u32buffersize+1);
302   
303    kernel::Kernel * s2p21K = pxDriver.addKernelInstance<S2P_21Kernel>(idb);
304    pxDriver.makeKernelCall(s2p21K, {codeUnitStream}, {u32basis});
305
306
307        // Buffers for calculated deposit masks.
308    StreamSetBuffer * u8fieldMask = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
309    StreamSetBuffer * u8final = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize2);
310    StreamSetBuffer * u8initial = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize2);
311    StreamSetBuffer * u8mask12_17 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize2);
312    StreamSetBuffer * u8mask6_11 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize2);
313
314    // Intermediate buffers for deposited bits
315    StreamSetBuffer * deposit18_20 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(3), u8buffersize3);
316    StreamSetBuffer * deposit12_17 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize3);
317    StreamSetBuffer * deposit6_11 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize3);
318    StreamSetBuffer * deposit0_5 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize3);
319
320    // Final buffers for computed UTF-8 basis bits and byte stream.
321    StreamSetBuffer * u8basis = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), u8buffersize3);
322    StreamSetBuffer * u8bytes = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 8), u8buffersize3);
323
324    // Calculate the u8final deposit mask.
325#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
326    StreamSetBuffer * extractionMask = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
327    kernel::Kernel * fieldDepositMaskK = pxDriver.addKernelInstance<UTF8fieldDepositMask>(idb, FieldWidth);
328    pxDriver.makeKernelCall(fieldDepositMaskK, {u32basis}, {u8fieldMask, extractionMask});
329    kernel::Kernel * streamK = pxDriver.addKernelInstance<StreamCompressKernel>(idb, FieldWidth, 1);
330    pxDriver.makeKernelCall(streamK, {u8fieldMask, extractionMask}, {u8final});
331#else
332    StreamSetBuffer * u8unitCounts = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
333    kernel::Kernel * fieldDepositMaskK = pxDriver.addKernelInstance<UTF8fieldDepositMask>(idb, FieldWidth);
334    pxDriver.makeKernelCall(fieldDepositMaskK, {u32basis}, {u8fieldMask, u8unitCounts});
335    kernel::Kernel * streamK = pxDriver.addKernelInstance<StreamCompressKernel>(idb, FieldWidth, 1);
336    pxDriver.makeKernelCall(streamK, {u8fieldMask, u8unitCounts}, {u8final});
337#endif
338//    kernel::Kernel * hexConvert =  pxDriver.addKernelInstance<BinaryToHex>(idb);
339   
340    kernel::Kernel * maskK = pxDriver.addKernelInstance<UTF8_DepositMasks>(idb);
341    pxDriver.makeKernelCall(maskK, {u8final}, {u8initial, u8mask12_17, u8mask6_11});
342
343//    pxDriver.makeKernelCall(hexConvert, {u8mask6_11}, {u8bytes});
344
345
346    StreamDepositCompiler deposit18_20compiler(pxDriver, 21, 18, 3, u32buffersize);
347    deposit18_20compiler.makeCall(u8initial, u32basis, deposit18_20);
348   
349    StreamDepositCompiler deposit12_17compiler(pxDriver, 21, 12, 6, u32buffersize);
350    deposit12_17compiler.makeCall(u8mask12_17, u32basis, deposit12_17);
351   
352    StreamDepositCompiler deposit6_11compiler(pxDriver, 21, 6, 6, u32buffersize);
353    deposit6_11compiler.makeCall(u8mask6_11, u32basis, deposit6_11);
354   
355    StreamDepositCompiler deposit0_5compiler(pxDriver, 21, 0, 6, u32buffersize);
356    deposit0_5compiler.makeCall(u8final, u32basis, deposit0_5);
357   
358    kernel::Kernel * u8assemblyK = pxDriver.addKernelInstance<UTF8assembly>(idb);
359    pxDriver.makeKernelCall(u8assemblyK, {deposit18_20, deposit12_17, deposit6_11, deposit0_5,
360                                          u8initial, u8final, u8mask6_11, u8mask12_17},
361                                         {u8basis});
362
363    kernel::Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(idb);
364    pxDriver.makeKernelCall(p2sK, {u8basis}, {u8bytes});
365
366    kernel::Kernel * outK = pxDriver.addKernelInstance<StdOutKernel>(idb, 8);
367    pxDriver.makeKernelCall(outK, {u8bytes}, {});
368
369    pxDriver.generatePipelineIR();
370
371    pxDriver.deallocateBuffers();
372
373    idb->CreateRetVoid();
374
375    pxDriver.finalizeObject();
376}
377
378typedef void (*u32u8FunctionType)(uint32_t fd);
379
380int main(int argc, char *argv[]) {
381    codegen::ParseCommandLineOptions(argc, argv, {&u32u8Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
382    ParabixDriver pxDriver("u32u8");
383    u32u8_gen(pxDriver);
384    auto u32u8Function = reinterpret_cast<u32u8FunctionType>(pxDriver.getMain());
385    const int fd = open(inputFile.c_str(), O_RDONLY);
386    if (LLVM_UNLIKELY(fd == -1)) {
387        errs() << "Error: cannot open " << inputFile << " for processing. Skipped.\n";
388    } else {
389        u32u8Function(fd);
390        close(fd);
391    }
392    return 0;
393}
Note: See TracBrowser for help on using the repository browser.