source: icGREP/icgrep-devel/icgrep/u32u8.cpp @ 6189

Last change on this file since 6189 was 6189, checked in by nmedfort, 7 months ago

Bug fixes for 32-bit

File size: 18.6 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <IR_Gen/idisa_target.h>                   // for GetIDISA_Builder
8#include <cc/cc_compiler.h>                        // for CC_Compiler
9#include <kernels/deletion.h>                      // for DeletionKernel
10#include <kernels/source_kernel.h>
11#include <kernels/p2s_kernel.h>                    // for P2S16KernelWithCom...
12#include <kernels/s2p_kernel.h>                    // for S2PKernel
13#include <kernels/stdout_kernel.h>                 // for StdOutKernel_
14#include <kernels/pdep_kernel.h>
15#include <llvm/IR/Function.h>                      // for Function, Function...
16#include <llvm/IR/Module.h>                        // for Module
17#include <llvm/Support/CommandLine.h>              // for ParseCommandLineOp...
18#include <llvm/Support/Debug.h>                    // for dbgs
19#include <pablo/pablo_kernel.h>                    // for PabloKernel
20#include <pablo/pablo_toolchain.h>                 // for pablo_function_passes
21#include <kernels/kernel_builder.h>
22#include <pablo/pe_zeroes.h>
23#include <toolchain/toolchain.h>
24#include <toolchain/cpudriver.h>
25#include <kernels/streamset.h>
26#include <kernels/hex_convert.h>
27#include <llvm/ADT/StringRef.h>
28#include <llvm/IR/CallingConv.h>
29#include <llvm/IR/DerivedTypes.h>
30#include <llvm/IR/LLVMContext.h>
31#include <llvm/IR/Value.h>
32#include <llvm/Support/Compiler.h>
33#include <llvm/Support/raw_ostream.h>
34#include <pablo/builder.hpp>
35#include <fcntl.h>
36#include <kernels/pipeline_builder.h>
37
38using namespace pablo;
39using namespace kernel;
40using namespace llvm;
41using namespace codegen;
42
43static cl::OptionCategory u32u8Options("u32u8 Options", "Transcoding control options.");
44static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(u32u8Options));
45
46//
47// UTF-8 encoding requires one to four bytes per Unicode character.
48// To generate UTF-8 encoded output from sets of basis bit streams
49// representing Unicode characters (that is, codepoint-indexed streams
50// having one bit position per codepoint), deposit masks are needed
51// to identify the positions at which bits for each character are
52// to be deposited.   A UTF-8 deposit mask will have one to four bit
53// positions per character depending on the character being encoded, that is,
54// depending on the number of bytes needed to encode the character.   Within
55// each group of one to four positions for a single character, a deposit mask
56// must have exactly one 1 bit set.  Different deposit masks are used for
57// depositing bits, depending on the destination byte position within the
58// ultimate 4 byte sequencE->
59//
60// The following deposit masks (shown in little-endian representation) are
61// used for depositing bits.
62//
63//  UTF-8 sequence length:          1     2     3       4
64//  Unicode bit position:
65//  Unicode codepoint bits 0-5      1    10   100    1000    u8final
66//  Bits 6-11                       1    01   010    0100    u8mask6_11
67//  Bits 12-17                      1    01   001    0010    u8mask12_17
68//  Bits 18-20                      1    01   001    0001    u8initial
69//
70//  To compute UTF-8 deposit masks, we begin by constructing an extraction
71//  mask having 4 bit positions per character, but with the number of
72//  1 bits to be kept dependent on the sequence length.  When this extraction
73//  mask is applied to the repeating constant 4-bit mask 1000, u8final above
74//  is produced. 
75//
76//  UTF-8 sequence length:             1     2     3       4
77//  extraction mask                 1000  1100  1110    1111
78//  constant mask                   1000  1000  1000    1000
79//  final position mask             1     10    100     1000
80//  From this mask, other masks may subsequently computed by
81//  bitwise logic and shifting.
82//
83//  The UTF8fieldDepositMask kernel produces this deposit mask
84//  within 64-bit fields.
85
86class UTF8fieldDepositMask final : public BlockOrientedKernel {
87public:
88    UTF8fieldDepositMask(const std::unique_ptr<KernelBuilder> & b, StreamSet * u32basis, StreamSet * u8fieldMask, StreamSet * u8unitCounts, unsigned depositFieldWidth = sizeof(size_t) * 8);
89private:
90    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) override;
91    void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const remainingBytes) override;
92    const unsigned mDepositFieldWidth;
93};
94
95UTF8fieldDepositMask::UTF8fieldDepositMask(const std::unique_ptr<KernelBuilder> & b, StreamSet * u32basis, StreamSet * u8fieldMask, StreamSet * u8unitCounts, unsigned depositFieldWidth)
96: BlockOrientedKernel("u8depositMask",
97{Binding{"basis", u32basis}},
98{Binding{"fieldDepositMask", u8fieldMask, FixedRate(4)},
99#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
100Binding{"extractionMask", u8unitCounts, FixedRate(4)}},
101#else
102Binding{"codeUnitCounts", u8unitCounts, FixedRate(4), RoundUpTo(b->getBitBlockWidth())}},
103#endif
104{}, {}, {Binding{b->getBitBlockType(), "EOFmask"}})
105, mDepositFieldWidth(depositFieldWidth) {
106
107}
108
109
110void UTF8fieldDepositMask::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
111    Value * fileExtentMask = b->CreateNot(b->getScalarField("EOFmask"));
112    // If any of bits 16 through 20 are 1, a four-byte UTF-8 sequence is required.
113    Value * u8len4 = b->loadInputStreamBlock("basis", b->getSize(16), b->getSize(0));
114    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(17), b->getSize(0)));
115    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(18), b->getSize(0)));
116    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(19), b->getSize(0)));
117    u8len4 = b->CreateOr(u8len4, b->loadInputStreamBlock("basis", b->getSize(20), b->getSize(0)), "u8len4");
118    u8len4 = b->CreateAnd(u8len4, fileExtentMask);
119    Value * u8len34 = u8len4;
120    // Otherwise, if any of bits 11 through 15 are 1, a three-byte UTF-8 sequence is required.
121    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(11), b->getSize(0)));
122    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(12), b->getSize(0)));
123    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(13), b->getSize(0)));
124    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(14), b->getSize(0)));
125    u8len34 = b->CreateOr(u8len34, b->loadInputStreamBlock("basis", b->getSize(15), b->getSize(0)));
126    u8len34 = b->CreateAnd(u8len34, fileExtentMask);
127    Value * nonASCII = u8len34;
128    // Otherwise, if any of bits 7 through 10 are 1, a two-byte UTF-8 sequence is required.
129    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(7), b->getSize(0)));
130    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(8), b->getSize(0)));
131    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(9), b->getSize(0)));
132    nonASCII = b->CreateOr(nonASCII, b->loadInputStreamBlock("basis", b->getSize(10), b->getSize(0)), "nonASCII");
133    nonASCII = b->CreateAnd(nonASCII, fileExtentMask);
134    //
135    //  UTF-8 sequence length:    1     2     3       4
136    //  extraction mask        1000  1100  1110    1111
137    //  interleave u8len3|u8len4, allOnes() for bits 1, 3:  x..., ..x.
138    //  interleave prefix4, u8len2|u8len3|u8len4 for bits 0, 2:  .x.., ...x
139   
140    Value * maskA_lo = b->esimd_mergel(1, u8len34, fileExtentMask);
141    Value * maskA_hi = b->esimd_mergeh(1, u8len34, fileExtentMask);
142    Value * maskB_lo = b->esimd_mergel(1, u8len4, nonASCII);
143    Value * maskB_hi = b->esimd_mergeh(1, u8len4, nonASCII);
144    Value * extraction_mask[4];
145    extraction_mask[0] = b->esimd_mergel(1, maskB_lo, maskA_lo);
146    extraction_mask[1] = b->esimd_mergeh(1, maskB_lo, maskA_lo);
147    extraction_mask[2] = b->esimd_mergel(1, maskB_hi, maskA_hi);
148    extraction_mask[3] = b->esimd_mergeh(1, maskB_hi, maskA_hi);
149    const unsigned bw = b->getBitBlockWidth();
150    Constant * mask1000 = Constant::getIntegerValue(b->getIntNTy(bw), APInt::getSplat(bw, APInt::getHighBitsSet(4, 1)));
151    for (unsigned j = 0; j < 4; ++j) {
152        Value * deposit_mask = b->simd_pext(mDepositFieldWidth, mask1000, extraction_mask[j]);
153        b->storeOutputStreamBlock("fieldDepositMask", b->getSize(0), b->getSize(j), deposit_mask);
154#ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
155        b->storeOutputStreamBlock("extractionMask", b->getSize(0), b->getSize(j), extraction_mask[j]);
156#else
157        Value * unit_counts = b->simd_popcount(mDepositFieldWidth, extraction_mask[j]);
158        b->storeOutputStreamBlock("codeUnitCounts", b->getSize(0), b->getSize(j), unit_counts);
159#endif
160    }
161}
162void UTF8fieldDepositMask::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, Value * const remainingBytes) {
163    // Standard Pablo convention for final block processing: set a bit marking
164    // the position just past EOF, as well as a mask marking all positions past EOF.
165    b->setScalarField("EOFmask", b->bitblock_mask_from(remainingBytes));
166    CreateDoBlockMethodCall(b);
167}
168
169
170//
171// Given a u8-indexed bit stream marking the final code unit position
172// of each UTF-8 sequence, this kernel computes the deposit masks
173// u8initial, u8mask12_17, and u8mask6_11.
174//
175class UTF8_DepositMasks : public pablo::PabloKernel {
176public:
177    UTF8_DepositMasks(const std::unique_ptr<KernelBuilder> & kb, StreamSet * u8final, StreamSet * u8initial, StreamSet * u8mask12_17, StreamSet * u8mask6_11);
178    bool isCachable() const override { return true; }
179    bool hasSignature() const override { return false; }
180protected:
181    void generatePabloMethod() override;
182};
183
184UTF8_DepositMasks::UTF8_DepositMasks (const std::unique_ptr<KernelBuilder> & iBuilder, StreamSet * u8final, StreamSet * u8initial, StreamSet * u8mask12_17, StreamSet * u8mask6_11)
185: PabloKernel(iBuilder, "UTF8_DepositMasks",
186              {Binding{"u8final", u8final, FixedRate(1), LookAhead(2)}},
187              {Binding{"u8initial", u8initial},
188               Binding{"u8mask12_17", u8mask12_17},
189               Binding{"u8mask6_11", u8mask6_11}}) {}
190
191void UTF8_DepositMasks::generatePabloMethod() {
192    PabloBuilder pb(getEntryScope());
193    PabloAST * u8final = pb.createExtract(getInputStreamVar("u8final"), pb.getInteger(0));
194    PabloAST * nonFinal = pb.createNot(u8final, "nonFinal");
195    PabloAST * initial = pb.createInFile(pb.createNot(pb.createAdvance(nonFinal, 1)), "u8initial");
196    PabloAST * ASCII = pb.createAnd(u8final, initial);
197    PabloAST * lookAheadFinal = pb.createLookahead(u8final, 1, "lookaheadFinal");
198    // Eliminate lookahead positions that are the final position of the prior unit.
199    PabloAST * secondLast = pb.createAnd(lookAheadFinal, nonFinal);
200    PabloAST * u8mask6_11 = pb.createInFile(pb.createOr(secondLast, ASCII, "u8mask6_11"));
201    PabloAST * prefix2 = pb.createAnd(secondLast, initial);
202    PabloAST * lookAhead2 = pb.createLookahead(u8final, 2, "lookahead2");
203    PabloAST * thirdLast = pb.createAnd(pb.createAnd(lookAhead2, nonFinal), pb.createNot(secondLast));
204    PabloAST * u8mask12_17 = pb.createInFile(pb.createOr(thirdLast, pb.createOr(prefix2, ASCII), "u8mask12_17"));
205    pb.createAssign(pb.createExtract(getOutputStreamVar("u8initial"), pb.getInteger(0)), initial);
206    pb.createAssign(pb.createExtract(getOutputStreamVar("u8mask6_11"), pb.getInteger(0)), u8mask6_11);
207    pb.createAssign(pb.createExtract(getOutputStreamVar("u8mask12_17"), pb.getInteger(0)), u8mask12_17);
208}
209
210// This kernel assembles the UTF-8 basis bit data, given four sets of deposited
211// bits bits 18-20, 11-17, 6-11 and 0-5, as weil as the marker streams u8initial,
212// u8final, u8prefix3 and u8prefix4.
213//
214class UTF8assembly : public pablo::PabloKernel {
215public:
216    UTF8assembly(const std::unique_ptr<KernelBuilder> & kb,
217                 StreamSet * deposit18_20, StreamSet * deposit12_17, StreamSet * deposit6_11, StreamSet * deposit0_5,
218                 StreamSet * u8initial, StreamSet * u8final, StreamSet * u8mask6_11, StreamSet * u8mask12_17,
219                 StreamSet * u8basis);
220    bool isCachable() const override { return true; }
221    bool hasSignature() const override { return false; }
222protected:
223    void generatePabloMethod() override;
224};
225
226UTF8assembly::UTF8assembly (const std::unique_ptr<KernelBuilder> & b,
227                            StreamSet * deposit18_20, StreamSet * deposit12_17, StreamSet * deposit6_11, StreamSet * deposit0_5,
228                            StreamSet * u8initial, StreamSet * u8final, StreamSet * u8mask6_11, StreamSet * u8mask12_17,
229                            StreamSet * u8basis)
230: PabloKernel(b, "UTF8assembly",
231{Binding{"dep18_20", deposit18_20},
232 Binding{"dep12_17", deposit12_17},
233 Binding{"dep6_11", deposit6_11},
234 Binding{"dep0_5", deposit0_5},
235 Binding{"u8initial", u8initial},
236 Binding{"u8final", u8final},
237 Binding{"u8mask6_11", u8mask6_11},
238 Binding{"u8mask12_17", u8mask12_17}},
239{Binding{"u8basis", u8basis}}) {
240
241}
242
243void UTF8assembly::generatePabloMethod() {
244    PabloBuilder pb(getEntryScope());
245    std::vector<PabloAST *> dep18_20 = getInputStreamSet("dep18_20");
246    std::vector<PabloAST *> dep12_17 = getInputStreamSet("dep12_17");
247    std::vector<PabloAST *> dep6_11 = getInputStreamSet("dep6_11");
248    std::vector<PabloAST *> dep0_5 = getInputStreamSet("dep0_5");
249    PabloAST * u8initial = pb.createExtract(getInputStreamVar("u8initial"), pb.getInteger(0));
250    PabloAST * u8final = pb.createExtract(getInputStreamVar("u8final"), pb.getInteger(0));
251    PabloAST * u8mask6_11 = pb.createExtract(getInputStreamVar("u8mask6_11"), pb.getInteger(0));
252    PabloAST * u8mask12_17 = pb.createExtract(getInputStreamVar("u8mask12_17"), pb.getInteger(0));
253    PabloAST * ASCII = pb.createAnd(u8initial, u8final);
254    PabloAST * nonASCII = pb.createNot(ASCII, "nonASCII");
255    PabloAST * u8basis[8];
256    //
257    // Deposit bit 6 is either used for bit 6 of an ASCII code unit, or
258    // bit 0 for nonASCII units.   Extract the ASCII case separately.
259    PabloAST * ASCIIbit6 = pb.createAnd(dep6_11[0], ASCII);
260    dep6_11[0] = pb.createAnd(dep6_11[0], nonASCII);
261    for (unsigned i = 0; i < 6; i++) {
262        u8basis[i] = pb.createOr(dep0_5[i], dep6_11[i]);
263        u8basis[i] = pb.createOr(u8basis[i], dep12_17[i], "basis" + std::to_string(i));
264        if (i < 3) u8basis[i] = pb.createOr(u8basis[i], dep18_20[i]);
265    }
266    // The high bit of UTF-8 prefix and suffix bytes (any nonASCII byte) is always 1.
267    u8basis[7] = nonASCII;
268    // The second highest bit of UTF-8 units is 1 for any prefix, or ASCII byte with
269    // a 1 in bit 6 of the Unicode representation.
270    u8basis[6] = pb.createOr(pb.createAnd(u8initial, nonASCII), ASCIIbit6, "basis6");
271    //
272    // For any prefix of a 3-byte or 4-byte sequence the third highest bit is set to 1.
273    u8basis[5] = pb.createOr(u8basis[5], pb.createAnd(u8initial, pb.createNot(u8mask6_11)), "basis5");
274    // For any prefix of a 4-byte sequence the fourth highest bit is set to 1.
275    u8basis[4] = pb.createOr(u8basis[4], pb.createAnd(u8initial, pb.createNot(u8mask12_17)), "basis4");
276    for (unsigned i = 0; i < 8; i++) {
277        pb.createAssign(pb.createExtract(getOutputStreamVar("u8basis"), pb.getInteger(i)), u8basis[i]);
278    }
279}
280
281void deposit(const std::unique_ptr<PipelineBuilder> & P, const unsigned base, const unsigned count, StreamSet * mask, StreamSet * inputs, StreamSet * outputs) {
282    StreamSet * const expanded = P->CreateStreamSet(count);
283    P->CreateKernelCall<StreamExpandKernel>(inputs, base, mask, expanded);
284    if (AVX2_available()) {
285        P->CreateKernelCall<PDEPFieldDepositKernel>(mask, expanded, outputs);
286    } else {
287        P->CreateKernelCall<FieldDepositKernel>(mask, expanded, outputs);
288    }
289}
290
291typedef void (*u32u8FunctionType)(uint32_t fd);
292
293u32u8FunctionType u32u8_gen (CPUDriver & pxDriver) {
294
295    auto & iBuilder = pxDriver.getBuilder();
296    Type * const int32Ty = iBuilder->getInt32Ty();
297    auto P = pxDriver.makePipeline({Binding{int32Ty, "fd"}});
298
299    Scalar * const fileDescriptor = P->getInputScalar("fd");
300
301    // Source data
302    StreamSet * const codeUnitStream = P->CreateStreamSet(1, 32);
303    P->CreateKernelCall<MMapSourceKernel>(fileDescriptor, codeUnitStream);
304
305    // Source buffers for transposed UTF-32 basis bits.
306    StreamSet * const u32basis = P->CreateStreamSet(21);
307    P->CreateKernelCall<S2P_21Kernel>(codeUnitStream, u32basis);
308
309    // Buffers for calculated deposit masks.
310    StreamSet * const u8fieldMask = P->CreateStreamSet();
311    StreamSet * const u8final = P->CreateStreamSet();
312    StreamSet * const u8initial = P->CreateStreamSet();
313    StreamSet * const u8mask12_17 = P->CreateStreamSet();
314    StreamSet * const u8mask6_11 = P->CreateStreamSet();
315
316    // Intermediate buffers for deposited bits
317    StreamSet * const deposit18_20 = P->CreateStreamSet(3);
318    StreamSet * const deposit12_17 = P->CreateStreamSet(6);
319    StreamSet * const deposit6_11 = P->CreateStreamSet(6);
320    StreamSet * const deposit0_5 = P->CreateStreamSet(6);
321
322    // Final buffers for computed UTF-8 basis bits and byte stream.
323    StreamSet * const u8basis = P->CreateStreamSet(8);
324    StreamSet * const u8bytes = P->CreateStreamSet(1, 8);
325
326    // Calculate the u8final deposit mask.
327    #ifdef STREAM_COMPRESS_USING_EXTRACTION_MASK
328    StreamSet * const extractionMask = P->CreateStreamSet();
329    P->CreateKernelCall<UTF8fieldDepositMask>(u32basis, u8fieldMask, extractionMask);
330    P->CreateKernelCall<StreamCompressKernel>(u8fieldMask, extractionMask, u8final);
331    #else
332    StreamSet * const u8unitCounts = P->CreateStreamSet();
333    P->CreateKernelCall<UTF8fieldDepositMask>(u32basis, u8fieldMask, u8unitCounts);
334    P->CreateKernelCall<StreamCompressKernel>(u8fieldMask, u8unitCounts, u8final);
335    #endif
336
337    P->CreateKernelCall<UTF8_DepositMasks>(u8final, u8initial, u8mask12_17, u8mask6_11);
338
339    deposit(P, 18, 3, u8initial, u32basis, deposit18_20);
340    deposit(P, 12, 6, u8mask12_17, u32basis, deposit12_17);
341    deposit(P, 6, 6, u8mask6_11, u32basis, deposit6_11);
342    deposit(P, 0, 6, u8final, u32basis, deposit0_5);
343
344    P->CreateKernelCall<UTF8assembly>(deposit18_20, deposit12_17, deposit6_11, deposit0_5,
345                                      u8initial, u8final, u8mask6_11, u8mask12_17,
346                                      u8basis);
347
348    P->CreateKernelCall<P2SKernel>(u8basis, u8bytes);
349
350    P->CreateKernelCall<StdOutKernel>(u8bytes);
351
352    return reinterpret_cast<u32u8FunctionType>(P->compile());
353}
354
355int main(int argc, char *argv[]) {
356    codegen::ParseCommandLineOptions(argc, argv, {&u32u8Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
357    CPUDriver pxDriver("u32u8");
358    auto u32u8Function = u32u8_gen(pxDriver);
359    const int fd = open(inputFile.c_str(), O_RDONLY);
360    if (LLVM_UNLIKELY(fd == -1)) {
361        errs() << "Error: cannot open " << inputFile << " for processing. Skipped.\n";
362    } else {
363        u32u8Function(fd);
364        close(fd);
365    }
366    return 0;
367}
Note: See TracBrowser for help on using the repository browser.