source: icGREP/icgrep-devel/icgrep/u8u16.cpp @ 5071

Last change on this file since 5071 was 5071, checked in by cameron, 3 years ago

Update u8u16 to use new kernel infrastructure

File size: 17.8 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <fstream>
10#include <sstream>
11
12#include <llvm/IR/Function.h>
13#include <llvm/IR/Module.h>
14#include <llvm/ExecutionEngine/ExecutionEngine.h>
15#include <llvm/ExecutionEngine/MCJIT.h>
16#include <llvm/IRReader/IRReader.h>
17#include <llvm/IR/Verifier.h>
18#include <llvm/Support/Debug.h>
19
20#include <llvm/Support/CommandLine.h>
21
22#include <toolchain.h>
23#include <re/re_cc.h>
24#include <cc/cc_compiler.h>
25#include <pablo/pablo_toolchain.h>
26#include <pablo/pablo_kernel.h>
27#include <pablo/function.h>
28#include <IDISA/idisa_builder.h>
29#include <IDISA/idisa_target.h>
30#include <kernels/interface.h>
31#include <kernels/kernel.h>
32#include <kernels/s2p_kernel.h>
33#include <kernels/p2s_kernel.h>
34#include <kernels/deletion.h>
35
36#include <utf_encoding.h>
37
38// mmap system
39#include <boost/filesystem.hpp>
40#include <boost/iostreams/device/mapped_file.hpp>
41using namespace boost::iostreams;
42using namespace boost::filesystem;
43
44#include <fcntl.h>
45static cl::OptionCategory u8u16Options("u8u16 Options",
46                                            "Transcoding control options.");
47
48static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(u8u16Options));
49
50//
51//
52//
53namespace pablo {
54
55PabloFunction * u8u16_pablo(const Encoding encoding) {
56    //  input: 8 basis bit streams
57    //  output: 16 u8-indexed streams, + delmask stream + error stream
58    PabloFunction * function = PabloFunction::Create("u8u16", 8, 18);
59    cc::CC_Compiler ccc(*function, encoding);
60   
61    PabloBuilder pBuilder(ccc.getBuilder().getPabloBlock(), ccc.getBuilder());
62    const std::vector<Var *> u8_bits = ccc.getBasisBits();
63    // Outputs
64    Assign * u16_hi[8];
65    Assign * u16_lo[8];
66    Assign * delmask;
67    Assign * error_mask;
68   
69    // The logic for processing non-ASCII bytes is to be embedded within an if-hierarchy.
70    PabloAST * nonASCII = ccc.compileCC(re::makeCC(0x80, 0xFF));
71   
72    // Builder for the if statement handling all non-ASCII logic
73    PabloBuilder nAb = PabloBuilder::Create(pBuilder);
74    // Bits 3 through 7 of a 2-byte prefix are data bits, needed to
75    // produce the UTF-16 code unit data ...,
76    PabloAST * bit3a1 = nAb.createAdvance(u8_bits[3], 1);
77    PabloAST * bit4a1 = nAb.createAdvance(u8_bits[4], 1);
78    PabloAST * bit5a1 = nAb.createAdvance(u8_bits[5], 1);
79    PabloAST * bit6a1 = nAb.createAdvance(u8_bits[6], 1);
80    PabloAST * bit7a1 = nAb.createAdvance(u8_bits[7], 1);
81   
82    // Entry condition for 3 or 4 byte sequences: we have a prefix byte in the range 0xE0-0xFF.
83    PabloAST * pfx34 = ccc.compileCC(re::makeCC(0xE0, 0xFF), nAb);
84    // Builder for the if statement handling all logic for 3- and 4-byte sequences.
85    PabloBuilder p34b = PabloBuilder::Create(nAb);
86    // Bits 4 through 7 of a 3-byte prefix are data bits.  They must be moved
87    // to the final position of the 3-byte sequence.
88    PabloAST * bit2a1 = p34b.createAdvance(u8_bits[2], 1);
89    PabloAST * bit4a2 = p34b.createAdvance(bit4a1, 1);
90    PabloAST * bit5a2 = p34b.createAdvance(bit5a1, 1);
91    PabloAST * bit6a2 = p34b.createAdvance(bit6a1, 1);
92    PabloAST * bit7a2 = p34b.createAdvance(bit7a1, 1);
93    //
94    // Logic for 4-byte UTF-8 sequences
95    //
96    // Entry condition  or 4 byte sequences: we have a prefix byte in the range 0xF0-0xFF.
97    PabloAST * pfx4 = ccc.compileCC(re::makeCC(0xF0, 0xFF), p34b);
98    // Builder for the if statement handling all logic for 4-byte sequences only.
99    PabloBuilder p4b = PabloBuilder::Create(p34b);
100    // Illegal 4-byte sequences
101    PabloAST * F0 = ccc.compileCC(re::makeCC(0xF0), p4b);
102    PabloAST * F4 = ccc.compileCC(re::makeCC(0xF4), p4b);
103    PabloAST * F0_err = p4b.createAnd(p4b.createAdvance(F0, 1), ccc.compileCC(re::makeCC(0x80, 0x8F), p4b));
104    PabloAST * F4_err = p4b.createAnd(p4b.createAdvance(F4, 1), ccc.compileCC(re::makeCC(0x90, 0xBF), p4b));
105    PabloAST * F5_FF = ccc.compileCC(re::makeCC(0xF5, 0xFF), p4b);
106    Assign * FX_err = p4b.createAssign("FX_err", p4b.createOr(F5_FF, p4b.createOr(F0_err, F4_err)));
107    //
108    // 4-byte prefixes have a scope that extends over the next 3 bytes.
109    Assign * u8scope42 = p4b.createAssign("u8scope42", p4b.createAdvance(pfx4, 1));
110    Assign * u8scope43 = p4b.createAssign("u8scope43", p4b.createAdvance(u8scope42, 1));
111    Assign * u8scope44 = p4b.createAssign("u8scope44", p4b.createAdvance(u8scope43, 1));
112    //
113   
114    //  From the 4-byte sequence 11110abc 10defghi 10jklmno 10pqrstu,
115    //  we must calculate the value abcde - 1 to produce the bit values
116    //  for u16_hi6, hi7, lo0, lo1 at the scope43 position.
117    Assign * s43_lo1 = p4b.createAssign("scope43_lo1", p4b.createAnd(u8scope43, p4b.createNot(bit3a1)));           // e - 1
118    Assign * s43_lo0 = p4b.createAssign("scope43_lo0", p4b.createAnd(u8scope43, p4b.createXor(bit2a1, s43_lo1)));  // d - borrow
119    PabloAST * brw1 = p4b.createAnd(s43_lo1, p4b.createNot(bit2a1));
120    Assign * s43_hi7 = p4b.createAssign("scope43_hi7", p4b.createAnd(u8scope43, p4b.createXor(bit7a2, brw1)));     // c - borrow
121    PabloAST * brw2 = p4b.createAnd(brw1, p4b.createNot(bit7a2));
122    Assign * s43_hi6 = p4b.createAssign("scope43_hi6", p4b.createAnd(u8scope43, p4b.createXor(bit6a2, brw2)));     // b - borrow
123    //
124    Assign * s43_lo2 = p4b.createAssign("scope43_lo2", p4b.createAnd(u8scope43, bit4a1));
125    Assign * s43_lo3 = p4b.createAssign("scope43_lo3", p4b.createAnd(u8scope43, bit5a1));
126    Assign * s43_lo4 = p4b.createAssign("scope43_lo4", p4b.createAnd(u8scope43, bit6a1));
127    Assign * s43_lo5 = p4b.createAssign("scope43_lo5", p4b.createAnd(u8scope43, bit7a1));
128    Assign * s43_lo6 = p4b.createAssign("scope43_lo6", p4b.createAnd(u8scope43, u8_bits[2]));
129    Assign * s43_lo7 = p4b.createAssign("scope43_lo7", p4b.createAnd(u8scope43, u8_bits[3]));
130    //
131    //
132    p34b.createIf(pfx4,
133                  {FX_err, u8scope42, u8scope43, u8scope44, s43_hi6, s43_hi7,
134                   s43_lo0, s43_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
135                   p4b);
136    //
137    // Combined logic for 3 and 4 byte sequences
138    //
139    PabloAST * pfx3 = ccc.compileCC(re::makeCC(0xE0, 0xEF), p34b);
140    Assign * u8scope32 = p34b.createAssign("u8scope32", p34b.createAdvance(pfx3, 1));
141    Assign * u8scope33 = p34b.createAssign("u8scope33", p34b.createAdvance(u8scope32, 1));
142
143    // Illegal 3-byte sequences
144    PabloAST * E0 = ccc.compileCC(re::makeCC(0xE0), p34b);
145    PabloAST * ED = ccc.compileCC(re::makeCC(0xED), p34b);
146    PabloAST * E0_err = p34b.createAnd(p34b.createAdvance(E0, 1), ccc.compileCC(re::makeCC(0x80, 0x9F), p34b));
147    PabloAST * ED_err = p34b.createAnd(p34b.createAdvance(ED, 1), ccc.compileCC(re::makeCC(0xA0, 0xBF), p34b));
148    Assign * EX_FX_err = p34b.createAssign("EX_FX_err", p34b.createOr(p34b.createOr(E0_err, ED_err), FX_err));
149    // Two surrogate UTF-16 units are computed at the 3rd and 4th positions of 4-byte sequences.
150    PabloAST * surrogate = p34b.createOr(u8scope43, u8scope44);
151   
152    Assign * p34del = p34b.createAssign("p34del", p34b.createOr(u8scope32, u8scope42));
153
154
155    // The high 5 bits of the UTF-16 code unit are only nonzero for 3 and 4-byte
156    // UTF-8 sequences.
157    u16_hi[0] = p34b.createAssign("u16_hi0", p34b.createOr(p34b.createAnd(u8scope33, bit4a2), surrogate));
158    u16_hi[1] = p34b.createAssign("u16_hi1", p34b.createOr(p34b.createAnd(u8scope33, bit5a2), surrogate));
159    u16_hi[2] = p34b.createAssign("u16_hi2", p34b.createAnd(u8scope33, bit6a2));
160    u16_hi[3] = p34b.createAssign("u16_hi3", p34b.createOr(p34b.createAnd(u8scope33, bit7a2), surrogate));
161    u16_hi[4] = p34b.createAssign("u16_hi4", p34b.createOr(p34b.createAnd(u8scope33, bit2a1), surrogate));
162   
163    //
164    nAb.createIf(pfx34, 
165                 {u8scope33, EX_FX_err, p34del, 
166                  u16_hi[0], u16_hi[1], u16_hi[2], u16_hi[3], u16_hi[4], u8scope44, s43_hi6, s43_hi7,
167                  s43_lo0, s43_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
168                 p34b);
169    //
170    // Combined logic for 2, 3 and 4 byte sequences
171    //
172    PabloAST * pfx2 = ccc.compileCC(re::makeCC(0xC0, 0xDF), nAb);
173    PabloAST * u8scope22 = nAb.createAdvance(pfx2, 1);
174    Assign * u8lastscope = nAb.createAssign("u8lastscope", nAb.createOr(u8scope22, nAb.createOr(u8scope33, u8scope44)));
175    PabloAST * u8anyscope = nAb.createOr(u8lastscope, p34del);
176
177    PabloAST * C0_C1_err = ccc.compileCC(re::makeCC(0xC0, 0xC1), nAb);
178    PabloAST * scope_suffix_mismatch = nAb.createXor(u8anyscope, ccc.compileCC(re::makeCC(0x80, 0xBF), nAb));
179    error_mask = nAb.createAssign("errormask", nAb.createOr(scope_suffix_mismatch, nAb.createOr(C0_C1_err, EX_FX_err)));
180    delmask = nAb.createAssign("delmask", nAb.createOr(p34del, ccc.compileCC(re::makeCC(0xC0, 0xFF), nAb)));
181   
182    // The low 3 bits of the high byte of the UTF-16 code unit as well as the high bit of the
183    // low byte are only nonzero for 2, 3 and 4 byte sequences.
184    u16_hi[5] = nAb.createAssign("u16_hi5", nAb.createOr(nAb.createAnd(u8lastscope, bit3a1), u8scope44));
185    u16_hi[6] = nAb.createAssign("u16_hi6", nAb.createOr(nAb.createAnd(u8lastscope, bit4a1), s43_hi6));
186    u16_hi[7] = nAb.createAssign("u16_hi7", nAb.createOr(nAb.createAnd(u8lastscope, bit5a1), s43_hi7));
187    u16_lo[0] = nAb.createAssign("u16_lo0", nAb.createOr(nAb.createAnd(u8lastscope, bit6a1), s43_lo0));
188    Assign * p234_lo1 = nAb.createAssign("p234_lo1", nAb.createOr(nAb.createAnd(u8lastscope, bit7a1), s43_lo1));
189
190    pBuilder.createIf(nonASCII, 
191                      {error_mask, delmask, u8lastscope,
192                       u16_hi[0], u16_hi[1], u16_hi[2], u16_hi[3], u16_hi[4], u16_hi[5], u16_hi[6], u16_hi[7],
193                       u16_lo[0], p234_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
194                      nAb);
195    //
196    //
197    PabloAST * ASCII = ccc.compileCC(re::makeCC(0x0, 0x7F));
198    PabloAST * last_byte = pBuilder.createOr(ASCII, u8lastscope);
199    u16_lo[1] = pBuilder.createAssign("u16_lo1", pBuilder.createOr(pBuilder.createAnd(ASCII, u8_bits[1]), p234_lo1));
200    u16_lo[2] = pBuilder.createAssign("u16_lo2", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[2]), s43_lo2));
201    u16_lo[3] = pBuilder.createAssign("u16_lo3", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[3]), s43_lo3));
202    u16_lo[4] = pBuilder.createAssign("u16_lo4", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[4]), s43_lo4));
203    u16_lo[5] = pBuilder.createAssign("u16_lo5", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[5]), s43_lo5));
204    u16_lo[6] = pBuilder.createAssign("u16_lo6", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[6]), s43_lo6));
205    u16_lo[7] = pBuilder.createAssign("u16_lo7", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[7]), s43_lo7));
206   
207    for (unsigned i = 0; i < 8; i++) {
208        function->setResult(i, pBuilder.createAssign("u16_hi" + std::to_string(i), u16_hi[i]));
209        function->setResult(i+8, pBuilder.createAssign("u16_lo" + std::to_string(i), u16_lo[i]));
210    }
211    function->setResult(16, pBuilder.createAssign("delbits", delmask));
212    function->setResult(17, pBuilder.createAssign("errors", error_mask));
213
214    return function;
215}
216}
217
218
219
220using namespace kernel;
221
222
223Function * u8u16Pipeline(Module * mMod, IDISA::IDISA_Builder * iBuilder, pablo::PabloFunction * function) {
224    Type * mBitBlockType = iBuilder->getBitBlockType();
225    unsigned mBlockSize = iBuilder->getBitBlockWidth();
226    s2pKernel  s2pk(iBuilder);
227    s2pk.generateKernel();
228   
229    pablo_function_passes(function);
230    pablo::PabloKernel  u8u16k(iBuilder, "u8u16", function, {});
231    u8u16k.prepareKernel();
232    u8u16k.generateKernel();
233   
234    deletionKernel delK(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
235    delK.generateKernel();
236   
237    p2s_16Kernel_withCompressedOutputKernel p2sk(iBuilder);   
238    p2sk.generateKernel();
239   
240    Type * const int64ty = iBuilder->getInt64Ty();
241    Type * const voidTy = Type::getVoidTy(mMod->getContext());
242    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
243   
244    Function * const main = cast<Function>(mMod->getOrInsertFunction("Main", Type::getVoidTy(mMod->getContext()), inputType, int64ty, nullptr));
245    main->setCallingConv(CallingConv::C);
246    Function::arg_iterator args = main->arg_begin();
247   
248    Value * const inputStream = &*(args++);
249    inputStream->setName("input");
250    Value * const bufferSize = &*(args++);
251    bufferSize->setName("bufferSize");
252   
253    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
254   
255    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
256   
257    BasicBlock * fullCondBlock = BasicBlock::Create(mMod->getContext(), "fullCond", main, 0);
258    BasicBlock * fullBodyBlock = BasicBlock::Create(mMod->getContext(), "fullBody", main, 0);
259    BasicBlock * finalBlock = BasicBlock::Create(mMod->getContext(), "final", main, 0);
260   
261    StreamSetBuffer ByteStream(iBuilder, StreamSetType(1, 8), 0);
262    StreamSetBuffer BasisBits(iBuilder, StreamSetType(8, 1), 1);
263    StreamSetBuffer U8u16Bits(iBuilder, StreamSetType(18, 1), 1);
264    StreamSetBuffer U16Bits(iBuilder, StreamSetType(16, 1), 1);
265    StreamSetBuffer DeletionCounts(iBuilder, StreamSetType(1, 1), 1);
266    StreamSetBuffer U16out(iBuilder, StreamSetType(1, 16), 1);
267
268    ByteStream.setStreamSetBuffer(inputStream);
269    Value * basisBits = BasisBits.allocateBuffer();
270    Value * u8u16Bits = U8u16Bits.allocateBuffer();
271    Value * u16Bits = U16Bits.allocateBuffer();
272    Value * delCounts = DeletionCounts.allocateBuffer();
273    Value * u16out = U16out.allocateBuffer();
274   
275    Value * s2pInstance = s2pk.createInstance({});
276    Value * u8u16Instance = u8u16k.createInstance({});
277    Value * delInstance = delK.createInstance({});
278    Value * p2sInstance = p2sk.createInstance({});
279   
280    Value * initialBufferSize = bufferSize;
281    BasicBlock * initialBlock = entryBlock;
282    Value * initialBlockNo = iBuilder->getInt64(0);
283   
284    iBuilder->CreateBr(fullCondBlock);
285   
286   
287    iBuilder->SetInsertPoint(fullCondBlock);
288    PHINode * remainingBytes = iBuilder->CreatePHI(int64ty, 2, "remainingBytes");
289    remainingBytes->addIncoming(initialBufferSize, initialBlock);
290    PHINode * blockNo = iBuilder->CreatePHI(int64ty, 2, "blockNo");
291    blockNo->addIncoming(initialBlockNo, initialBlock);
292   
293    Constant * const step = ConstantInt::get(int64ty, mBlockSize);
294    Value * fullCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
295    iBuilder->CreateCondBr(fullCondTest, finalBlock, fullBodyBlock);
296   
297    iBuilder->SetInsertPoint(fullBodyBlock);
298   
299    s2pk.createDoBlockCall(s2pInstance, {ByteStream.getBlockPointer(blockNo), basisBits});
300    u8u16k.createDoBlockCall(u8u16Instance, {basisBits, u8u16Bits});
301    delK.createDoBlockCall(delInstance, {u8u16Bits, u16Bits, delCounts});
302    p2sk.createDoBlockCall(p2sInstance, {u16Bits, delCounts, u16out});
303   
304    Value * diff = iBuilder->CreateSub(remainingBytes, step);
305   
306    remainingBytes->addIncoming(diff, fullBodyBlock);
307    blockNo->addIncoming(iBuilder->CreateAdd(blockNo, iBuilder->getInt64(1)), fullBodyBlock);
308    iBuilder->CreateBr(fullCondBlock);
309   
310    iBuilder->SetInsertPoint(finalBlock);
311    s2pk.createFinalBlockCall(s2pInstance, remainingBytes, {ByteStream.getBlockPointer(blockNo), basisBits});
312    u8u16k.createFinalBlockCall(u8u16Instance, remainingBytes, {basisBits, u8u16Bits});
313    delK.createFinalBlockCall(delInstance, remainingBytes, {u8u16Bits, u16Bits, delCounts});
314    p2sk.createFinalBlockCall(p2sInstance, remainingBytes, {u16Bits, delCounts, u16out});
315   
316   
317    iBuilder->CreateRetVoid();
318    return main;
319}
320
321
322
323
324
325typedef void (*u8u16FunctionType)(char * byte_data, size_t filesize);
326
327static ExecutionEngine * u8u16Engine = nullptr;
328
329u8u16FunctionType u8u16CodeGen(void) {
330                           
331    Module * M = new Module("u8u16", getGlobalContext());
332    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
333
334    Encoding encoding(Encoding::Type::UTF_8, 8);
335    pablo::PabloFunction * function = pablo::u8u16_pablo(encoding);
336   
337    llvm::Function * main_IR = u8u16Pipeline(M, idb, function);
338   
339    verifyModule(*M, &dbgs());
340    //std::cerr << "ExecuteKernels(); done\n";
341    u8u16Engine = JIT_to_ExecutionEngine(M);
342   
343    u8u16Engine->finalizeObject();
344    //std::cerr << "finalizeObject(); done\n";
345
346    delete idb;
347    return reinterpret_cast<u8u16FunctionType>(u8u16Engine->getPointerToFunction(main_IR));
348}
349
350void u8u16(u8u16FunctionType fn_ptr, const std::string & fileName) {
351    std::string mFileName = fileName;
352    size_t mFileSize;
353    char * mFileBuffer;
354   
355    const path file(mFileName);
356    if (exists(file)) {
357        if (is_directory(file)) {
358            return;
359        }
360    } else {
361        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
362        return;
363    }
364   
365    mFileSize = file_size(file);
366    mapped_file_source mFile;
367    if (mFileSize == 0) {
368        mFileBuffer = nullptr;
369    }
370    else {
371        try {
372            mFile.open(mFileName);
373        } catch (std::exception &e) {
374            std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
375            return;
376        }
377        mFileBuffer = const_cast<char *>(mFile.data());
378    }
379    //std::cerr << "mFileSize =" << mFileSize << "\n";
380    //std::cerr << "fn_ptr =" << std::hex << reinterpret_cast<intptr_t>(fn_ptr) << "\n";
381
382    fn_ptr(mFileBuffer, mFileSize);
383
384    mFile.close();
385   
386}
387
388
389int main(int argc, char *argv[]) {
390    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&u8u16Options, pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
391    cl::ParseCommandLineOptions(argc, argv);
392
393    u8u16FunctionType fn_ptr = u8u16CodeGen();
394
395    for (unsigned i = 0; i != inputFiles.size(); ++i) {
396        u8u16(fn_ptr, inputFiles[i]);
397    }
398
399    delete u8u16Engine;
400
401    return 0;
402}
403
404                       
Note: See TracBrowser for help on using the repository browser.