source: icGREP/icgrep-devel/icgrep/u32u8.cpp @ 6071

Last change on this file since 6071 was 6071, checked in by cameron, 11 months ago

u32u8.cpp initial check-in

File size: 18.6 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <IR_Gen/idisa_target.h>                   // for GetIDISA_Builder
8#include <cc/cc_compiler.h>                        // for CC_Compiler
9#include <kernels/deletion.h>                      // for DeletionKernel
10#include <kernels/source_kernel.h>
11#include <kernels/p2s_kernel.h>                    // for P2S16KernelWithCom...
12#include <kernels/s2p_kernel.h>                    // for S2PKernel
13#include <kernels/stdout_kernel.h>                 // for StdOutKernel_
14#include <kernels/pdep_kernel.h>
15#include <llvm/IR/Function.h>                      // for Function, Function...
16#include <llvm/IR/Module.h>                        // for Module
17#include <llvm/Support/CommandLine.h>              // for ParseCommandLineOp...
18#include <llvm/Support/Debug.h>                    // for dbgs
19#include <pablo/pablo_kernel.h>                    // for PabloKernel
20#include <pablo/pablo_toolchain.h>                 // for pablo_function_passes
21#include <kernels/kernel_builder.h>
22#include <pablo/pe_zeroes.h>
23#include <toolchain/toolchain.h>
24#include <toolchain/cpudriver.h>
25#include <kernels/streamset.h>
26#include <llvm/ADT/StringRef.h>
27#include <llvm/IR/CallingConv.h>
28#include <llvm/IR/DerivedTypes.h>
29#include <llvm/IR/LLVMContext.h>
30#include <llvm/IR/Value.h>
31#include <llvm/Support/Compiler.h>
32#include <llvm/Support/raw_ostream.h>
33#include <pablo/builder.hpp>
34#include <fcntl.h>
35
36using namespace pablo;
37using namespace kernel;
38using namespace parabix;
39using namespace llvm;
40
41static cl::OptionCategory u32u8Options("u32u8 Options", "Transcoding control options.");
42static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(u32u8Options));
43
44//
45// UTF-8 encoding requires one to four bytes per Unicode character.
46// To generate UTF-8 encoded output from sets of basis bit streams
47// representing Unicode characters (that is, codepoint-indexed streams
48// having one bit position per codepoint), deposit masks are needed
49// to identify the positions at which bits for each character are
50// to be deposited.   A UTF-8 deposit mask will have one to four bit
51// positions per character depending on the character being encoded, that is,
52// depending on the number of bytes needed to encode the character.   Within
53// each group of one to four positions for a single character, a deposit mask
54// must have exactly one 1 bit set.  Different deposit masks are used for
55// depositing bits, depending on the destination byte position within the
56// ultimate 4 byte sequence.
57//
58// The following deposit masks (shown in little-endian representation) are
59// used for depositing bits.
60//
61//  UTF-8 sequence length:          1     2     3       4
62//  Unicode bit position:
63//  Unicode codepoint bits 0-5      1    10   100    1000    u8final
64//  Bits 6-11                       1    01   010    0100    u8mask6_11
65//  Bits 12-17                      1    01   001    0010    u8mask12_17
66//  Bits 18-20                      1    01   001    0001    u8initial
67//
68//  To compute UTF-8 deposit masks, we begin by constructing an extraction
69//  mask having 4 bit positions per character, but with the number of
70//  1 bits to be kept dependent on the sequence length.  When this extraction
71//  mask is applied to the repeating constant 4-bit mask 1000, u8final above
72//  is produced. 
73//
74//  UTF-8 sequence length:             1     2     3       4
75//  extraction mask                 1000  1100  1110    1111
76//  constant mask                   1000  1000  1000    1000
77//  final position mask             1     10    100     1000
78//  From this mask, other masks may subsequently computed by
79//  bitwise logic and shifting.
80//
81//  The UTF8fieldDepositMask kernel produces this deposit mask
82//  within 64-bit fields.
83
84class UTF8fieldDepositMask final : public BlockOrientedKernel {
85public:
86    UTF8fieldDepositMask(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned depositFieldWidth = 64);
87private:
88    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
89    const unsigned mDepositFieldWidth;
90};
91
92UTF8fieldDepositMask::UTF8fieldDepositMask(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned depositFieldWidth)
93: BlockOrientedKernel("u8depositMask",
94            {Binding{b->getStreamSetTy(1, 21), "basis"}},
95            {Binding{b->getStreamSetTy(1, 1), "fieldDepositMask", FixedRate(4)}, 
96                Binding{b->getStreamSetTy(1, 1), "codeUnitCounts", FixedRate(4), RoundUpTo(b->getBitBlockWidth())}},
97            {}, {}, {}), mDepositFieldWidth(depositFieldWidth) {
98}
99
100
101void UTF8fieldDepositMask::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
102    // If any of bits 16 through 20 are 1, a four-byte UTF-8 sequence is required.
103    Value * u8len4 = b->loadInputStreamBlock("basis", b->getSize(16), b->getSize(0));
104    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(17), b->getSize(0)));
105    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(18), b->getSize(0)));
106    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(19), b->getSize(0)));
107    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(20), b->getSize(0)), "u8len4");
108    Value * u8len34 = u8len4;
109    // Otherwise, if any of bits 11 through 15 are 1, a three-byte UTF-8 sequence is required.
110    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(11), b->getSize(0)));
111    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(12), b->getSize(0)));
112    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(13), b->getSize(0)));
113    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(14), b->getSize(0)));
114    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(15), b->getSize(0)));
115    Value * nonASCII = u8len34;
116    // Otherwise, if any of bits 7 through 10 are 1, a two-byte UTF-8 sequence is required.
117    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(7), b->getSize(0)));
118    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(8), b->getSize(0)));
119    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(9), b->getSize(0)));
120    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(10), b->getSize(0)), "nonASCII");
121    //
122    //  UTF-8 sequence length:    1     2     3       4
123    //  extraction mask        1000  1100  1110    1111
124    //  interleave u8len3|u8len4, allOnes() for bits 1, 3:  x..., ..x.
125    //  interleave prefix4, u8len2|u8len3|u8len4 for bits 0, 2:  .x.., ...x
126    Value * maskA_lo = b->esimd_mergel(1, u8len34, b->allOnes());
127    Value * maskA_hi = b->esimd_mergeh(1, u8len34, b->allOnes());
128    Value * maskB_lo = b->esimd_mergel(1, u8len4, nonASCII);
129    Value * maskB_hi = b->esimd_mergeh(1, u8len4, nonASCII);
130    Value * extraction_mask[4];
131    extraction_mask[0] = b->esimd_mergel(1, maskB_lo, maskA_lo);
132    extraction_mask[1] = b->esimd_mergeh(1, maskB_lo, maskA_lo);
133    extraction_mask[2] = b->esimd_mergel(1, maskB_hi, maskA_hi);
134    extraction_mask[3] = b->esimd_mergeh(1, maskB_hi, maskA_hi);
135    const unsigned bw = b->getBitBlockWidth();
136    Constant * mask1000 = Constant::getIntegerValue(b->getIntNTy(bw), APInt::getSplat(bw, APInt::getHighBitsSet(4, 1)));
137    for (unsigned j = 0; j < 4; ++j) {
138        Value * deposit_mask = b->simd_pext(mDepositFieldWidth, mask1000, extraction_mask[j]);
139        Value * unit_counts = b->simd_popcount(mDepositFieldWidth, extraction_mask[j]);
140        b->storeOutputStreamBlock("fieldDepositMask", b->getSize(0), b->getSize(j), deposit_mask);
141        b->storeOutputStreamBlock("codeUnitCounts", b->getSize(0), b->getSize(j), unit_counts);
142    }
143}
144
145//
146// Given a u8-indexed bit stream marking the final code unit position
147// of each UTF-8 sequence, this kernel computes the deposit masks
148// u8initial, u8mask12_17, and u8mask6_11.
149//
150class UTF8_DepositMasks : public pablo::PabloKernel {
151public:
152    UTF8_DepositMasks(const std::unique_ptr<kernel::KernelBuilder> & kb);
153    bool isCachable() const override { return true; }
154    bool hasSignature() const override { return false; }
155protected:
156    void generatePabloMethod() override;
157};
158
159UTF8_DepositMasks::UTF8_DepositMasks (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
160: PabloKernel(iBuilder, "UTF8_DepositMasks",
161              {Binding{iBuilder->getStreamSetTy(1), "u8final", FixedRate(1), LookAhead(2)}},
162              {Binding{iBuilder->getStreamSetTy(1), "u8initial"},
163               Binding{iBuilder->getStreamSetTy(1), "u8mask12_17"},
164               Binding{iBuilder->getStreamSetTy(1), "u8mask6_11"}}) {}
165
166void UTF8_DepositMasks::generatePabloMethod() {
167    PabloBuilder pb(getEntryScope());
168    PabloAST * u8final = pb.createExtract(getInputStreamVar("u8final"), pb.getInteger(0));
169    PabloAST * nonFinal = pb.createNot(u8final, "nonFinal");
170    PabloAST * initial = pb.createInFile(pb.createNot(pb.createAdvance(nonFinal, 1)), "u8initial");
171    PabloAST * ASCII = pb.createAnd(u8final, initial);
172    PabloAST * lookAheadFinal = pb.createLookahead(u8final, 1, "lookaheadFinal");
173    // Eliminate lookahead positions that are the final position of the prior unit.
174    PabloAST * secondLast = pb.createAnd(lookAheadFinal, nonFinal);
175    PabloAST * u8mask6_11 = pb.createOr(secondLast, ASCII, "u8mask6_11");
176    PabloAST * prefix2 = pb.createAnd(secondLast, initial);
177    PabloAST * lookAhead2 = pb.createLookahead(u8final, 2, "lookahead2");
178    PabloAST * thirdLast = pb.createAnd(pb.createAnd(lookAhead2, nonFinal), pb.createNot(secondLast));
179    PabloAST * u8mask12_17 = pb.createOr(thirdLast, pb.createOr(prefix2, ASCII), "u8mask12_17");
180    pb.createAssign(pb.createExtract(getOutputStreamVar("u8initial"), pb.getInteger(0)), initial);
181    pb.createAssign(pb.createExtract(getOutputStreamVar("u8mask6_11"), pb.getInteger(0)), u8mask6_11);
182    pb.createAssign(pb.createExtract(getOutputStreamVar("u8mask12_17"), pb.getInteger(0)), u8mask12_17);
183}
184
185// This kernel assembles the UTF-8 basis bit data, given four sets of deposited
186// bits bits 18-20, 11-17, 6-11 and 0-5, as weil as the marker streams u8initial,
187// u8final, u8prefix3 and u8prefix4.
188//
189class UTF8assembly : public pablo::PabloKernel {
190public:
191    UTF8assembly(const std::unique_ptr<kernel::KernelBuilder> & kb);
192    bool isCachable() const override { return true; }
193    bool hasSignature() const override { return false; }
194protected:
195    void generatePabloMethod() override;
196};
197
198UTF8assembly::UTF8assembly (const std::unique_ptr<kernel::KernelBuilder> & b)
199: PabloKernel(b, "UTF8assembly",
200              {Binding{b->getStreamSetTy(3), "dep18_20"},
201                Binding{b->getStreamSetTy(6), "dep12_17"},
202                Binding{b->getStreamSetTy(6), "dep6_11"},
203                Binding{b->getStreamSetTy(6), "dep0_5"},
204                Binding{b->getStreamSetTy(1), "u8initial"},
205                Binding{b->getStreamSetTy(1), "u8final"},
206                Binding{b->getStreamSetTy(1), "u8mask6_11"},
207                Binding{b->getStreamSetTy(1), "u8mask12_17"}},
208              {Binding{b->getStreamSetTy(8), "u8basis"}}) {}
209
210void UTF8assembly::generatePabloMethod() {
211    PabloBuilder pb(getEntryScope());
212    std::vector<PabloAST *> dep18_20 = getInputStreamSet("dep18_20");
213    std::vector<PabloAST *> dep12_17 = getInputStreamSet("dep12_17");
214    std::vector<PabloAST *> dep6_11 = getInputStreamSet("dep6_11");
215    std::vector<PabloAST *> dep0_5 = getInputStreamSet("dep0_5");
216    PabloAST * u8initial = pb.createExtract(getInputStreamVar("u8initial"), pb.getInteger(0));
217    PabloAST * u8final = pb.createExtract(getInputStreamVar("u8final"), pb.getInteger(0));
218    PabloAST * u8mask6_11 = pb.createExtract(getInputStreamVar("u8mask6_11"), pb.getInteger(0));
219    PabloAST * u8mask12_17 = pb.createExtract(getInputStreamVar("u8mask12_17"), pb.getInteger(0));
220    PabloAST * ASCII = pb.createAnd(u8initial, u8final);
221    PabloAST * nonASCII = pb.createNot(ASCII, "nonASCII");
222    PabloAST * u8basis[8];
223    //
224    // Deposit bit 6 is either used for bit 6 of an ASCII code unit, or
225    // bit 0 for nonASCII units.   Extract the ASCII case separately.
226    PabloAST * ASCIIbit6 = pb.createAnd(dep6_11[0], ASCII);
227    dep6_11[0] = pb.createAnd(dep6_11[0], nonASCII);
228    for (unsigned i = 0; i < 6; i++) {
229        u8basis[i] = pb.createOr(dep0_5[i], dep6_11[i]);
230        u8basis[i] = pb.createOr(u8basis[i], dep12_17[i], "basis" + std::to_string(i));
231        if (i < 3) u8basis[i] = pb.createOr(u8basis[i], dep18_20[i]);
232    }
233    // The high bit of UTF-8 prefix and suffix bytes (any nonASCII byte) is always 1.
234    u8basis[7] = nonASCII;
235    // The second highest bit of UTF-8 units is 1 for any prefix, or ASCII byte with
236    // a 1 in bit 6 of the Unicode representation.
237    u8basis[6] = pb.createOr(pb.createAnd(u8initial, nonASCII), ASCIIbit6, "basis6");
238    //
239    // For any prefix of a 3-byte or 4-byte sequence the third highest bit is set to 1.
240    u8basis[5] = pb.createOr(u8basis[5], pb.createAnd(u8initial, pb.createNot(u8mask6_11)), "basis5");
241    // For any prefix of a 4-byte sequence the fourth highest bit is set to 1.
242    u8basis[4] = pb.createOr(u8basis[4], pb.createAnd(u8initial, pb.createNot(u8mask12_17)), "basis4");
243    for (unsigned i = 0; i < 8; i++) {
244        pb.createAssign(pb.createExtract(getOutputStreamVar("u8basis"), pb.getInteger(i)), u8basis[i]);
245    }
246}
247
248void u32u8_gen (ParabixDriver & pxDriver) {
249    auto & idb = pxDriver.getBuilder();
250    Module * mod = idb->getModule();
251
252    const unsigned u32buffersize = codegen::SegmentSize * codegen::ThreadNum;
253        const unsigned u8buffersize = 4 * u32buffersize;
254
255    Type * const voidTy = idb->getVoidTy();
256   
257    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, idb->getInt32Ty(), nullptr));
258    main->setCallingConv(CallingConv::C);
259    Function::arg_iterator args = main->arg_begin();
260   
261    Value * const fileDecriptor = &*(args++);
262    fileDecriptor->setName("fileDecriptor");
263   
264    idb->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
265   
266    // File data from mmap
267    StreamSetBuffer * codeUnitStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, 32));
268   
269    Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(idb, 32);
270    mmapK->setInitialArguments({fileDecriptor});
271    pxDriver.makeKernelCall(mmapK, {}, {codeUnitStream});
272   
273    // Source buffers for transposed UTF-32 basis bits.
274    StreamSetBuffer * u32basis = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(21), u32buffersize);
275   
276    kernel::Kernel * s2p21K = pxDriver.addKernelInstance<S2P_21Kernel>(idb);
277    pxDriver.makeKernelCall(s2p21K, {codeUnitStream}, {u32basis});
278
279    StreamSetBuffer * u8unitCounts = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
280
281        // Buffers for calculated deposit masks.
282    StreamSetBuffer * u8fieldMask = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
283    StreamSetBuffer * u8final = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
284    StreamSetBuffer * u8initial = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
285    StreamSetBuffer * u8mask12_17 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
286    StreamSetBuffer * u8mask6_11 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), u8buffersize);
287
288    // Intermediate buffers for deposited bits
289    StreamSetBuffer * deposit18_20 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(3), u8buffersize);
290    StreamSetBuffer * deposit12_17 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize);
291    StreamSetBuffer * deposit6_11 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize);
292    StreamSetBuffer * deposit0_5 = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(6), u8buffersize);
293
294    // Final buffers for computed UTF-8 basis bits and byte stream.
295    StreamSetBuffer * u8basis = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), u8buffersize);
296    StreamSetBuffer * u8bytes = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 8), u8buffersize);
297
298    // Calculate the u8final deposit mask.
299    kernel::Kernel * fieldDepositMaskK = pxDriver.addKernelInstance<UTF8fieldDepositMask>(idb);
300    pxDriver.makeKernelCall(fieldDepositMaskK, {u32basis}, {u8fieldMask, u8unitCounts});
301    kernel::Kernel * streamK = pxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, 1);
302    pxDriver.makeKernelCall(streamK, {u8fieldMask, u8unitCounts}, {u8final});
303
304    kernel::Kernel * maskK = pxDriver.addKernelInstance<UTF8_DepositMasks>(idb);
305    pxDriver.makeKernelCall(maskK, {u8final}, {u8initial, u8mask12_17, u8mask6_11});
306   
307    StreamDepositCompiler deposit18_20compiler(pxDriver, 21, 18, 3, u32buffersize);
308    deposit18_20compiler.makeCall(u8initial, u32basis, deposit18_20);
309   
310    StreamDepositCompiler deposit12_17compiler(pxDriver, 21, 12, 6, u32buffersize);
311    deposit12_17compiler.makeCall(u8mask12_17, u32basis, deposit12_17);
312   
313    StreamDepositCompiler deposit6_11compiler(pxDriver, 21, 6, 6, u32buffersize);
314    deposit6_11compiler.makeCall(u8mask6_11, u32basis, deposit6_11);
315   
316    StreamDepositCompiler deposit0_5compiler(pxDriver, 21, 0, 6, u32buffersize);
317    deposit0_5compiler.makeCall(u8final, u32basis, deposit0_5);
318   
319    kernel::Kernel * u8assemblyK = pxDriver.addKernelInstance<UTF8assembly>(idb);
320    pxDriver.makeKernelCall(u8assemblyK, {deposit18_20, deposit12_17, deposit6_11, deposit0_5,
321                                          u8initial, u8final, u8mask6_11, u8mask12_17},
322                                         {u8basis});
323
324    kernel::Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(idb);
325    pxDriver.makeKernelCall(p2sK, {u8basis}, {u8bytes});
326
327    kernel::Kernel * outK = pxDriver.addKernelInstance<StdOutKernel>(idb, 8);
328    pxDriver.makeKernelCall(outK, {u8bytes}, {});
329
330    pxDriver.generatePipelineIR();
331
332    pxDriver.deallocateBuffers();
333
334    idb->CreateRetVoid();
335
336    pxDriver.finalizeObject();
337}
338
339typedef void (*u32u8FunctionType)(uint32_t fd);
340
341int main(int argc, char *argv[]) {
342    codegen::ParseCommandLineOptions(argc, argv, {&u32u8Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
343    ParabixDriver pxDriver("u32u8");
344    u32u8_gen(pxDriver);
345    auto u32u8Function = reinterpret_cast<u32u8FunctionType>(pxDriver.getMain());
346    const int fd = open(inputFile.c_str(), O_RDONLY);
347    if (LLVM_UNLIKELY(fd == -1)) {
348        errs() << "Error: cannot open " << inputFile << " for processing. Skipped.\n";
349    } else {
350        u32u8Function(fd);
351        close(fd);
352    }
353    return 0;
354}
Note: See TracBrowser for help on using the repository browser.