source: icGREP/icgrep-devel/icgrep/u8u16.cpp @ 5005

Last change on this file since 5005 was 5005, checked in by cameron, 3 years ago

u8u16 application and kernels - partial

File size: 16.5 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <fstream>
10#include <sstream>
11
12#include <llvm/IR/Function.h>
13#include <llvm/IR/Module.h>
14#include <llvm/ExecutionEngine/ExecutionEngine.h>
15#include <llvm/ExecutionEngine/MCJIT.h>
16#include <llvm/IRReader/IRReader.h>
17#include <llvm/Support/CommandLine.h>
18#include <llvm/CodeGen/CommandFlags.h>
19#include <llvm/Support/SourceMgr.h>
20#include <llvm/Support/TargetSelect.h>
21#include <llvm/Support/Host.h>
22#include <llvm/Support/raw_ostream.h>
23
24#include <re/re_cc.h>
25#include <cc/cc_compiler.h>
26#include <pablo/function.h>
27#include <IDISA/idisa_builder.h>
28#include <IDISA/idisa_target.h>
29#include <kernels/u8u16_pipeline.h>
30
31// Dynamic processor detection
32#define ISPC_LLVM_VERSION ISPC_LLVM_3_6
33#include <util/ispc.cpp>
34
35#include <utf_encoding.h>
36
37// mmap system
38#include <boost/filesystem.hpp>
39#include <boost/iostreams/device/mapped_file.hpp>
40using namespace boost::iostreams;
41using namespace boost::filesystem;
42
43#include <fcntl.h>
44
45static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore);
46
47
48static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels.");
49
50static cl::opt<char> OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O0')"),
51                              cl::cat(cMachineCodeOptimization), cl::Prefix, cl::ZeroOrMore, cl::init('0'));
52
53
54//
55//  Functions taken from toolchain.cpp and modified for casefold
56//  JIT_t_ExecutionEngine : remove object cache
57//  icgrep_Linking:   unneeded?
58//  all others: definitely unneeded
59//
60
61ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
62
63    InitializeNativeTarget();
64    InitializeNativeTargetAsmPrinter();
65    InitializeNativeTargetAsmParser();
66
67    PassRegistry * Registry = PassRegistry::getPassRegistry();
68    initializeCore(*Registry);
69    initializeCodeGen(*Registry);
70    initializeLowerIntrinsicsPass(*Registry);
71
72    std::string errMessage;
73    EngineBuilder builder(std::move(std::unique_ptr<Module>(m)));
74    builder.setErrorStr(&errMessage);
75    builder.setMCPU(sys::getHostCPUName());
76    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
77    switch (OptLevel) {
78        case '0': optLevel = CodeGenOpt::None; break;
79        case '1': optLevel = CodeGenOpt::Less; break;
80        case '2': optLevel = CodeGenOpt::Default; break;
81        case '3': optLevel = CodeGenOpt::Aggressive; break;
82        default: errs() << OptLevel << " is an invalid optimization level.\n";
83    }
84    builder.setOptLevel(optLevel);
85
86    if ((strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
87            std::vector<std::string> attrs;
88            attrs.push_back("avx2");
89            builder.setMAttrs(attrs);
90    }
91
92    // builder.selectTarget();
93
94    //builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
95    ExecutionEngine * engine = builder.create();
96    if (engine == nullptr) {
97        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
98    }
99    return engine;
100}
101
102
103//
104//
105//
106namespace pablo {
107
108PabloFunction * u8u16_pablo(const Encoding encoding) {
109    //  input: 8 basis bit streams
110    //  output: 16 u8-indexed streams, + delmask stream + error stream
111    PabloFunction * function = PabloFunction::Create("u8u16", 8, 18);
112    cc::CC_Compiler ccc(*function, encoding);
113   
114    PabloBuilder pBuilder(ccc.getBuilder().getPabloBlock(), ccc.getBuilder());
115    const std::vector<Var *> u8_bits = ccc.getBasisBits();
116    // Outputs
117    Assign * u16_hi[8];
118    Assign * u16_lo[8];
119    Assign * delmask;
120    Assign * error_mask;
121   
122    // The logic for processing non-ASCII bytes is to be embedded within an if-hierarchy.
123    PabloAST * nonASCII = ccc.compileCC(re::makeCC(0x80, 0xFF));
124   
125    // Builder for the if statement handling all non-ASCII logic
126    PabloBuilder nAb = PabloBuilder::Create(pBuilder);
127    // Bits 3 through 7 of a 2-byte prefix are data bits, needed to
128    // produce the UTF-16 code unit data ...,
129    PabloAST * bit3a1 = nAb.createAdvance(u8_bits[3], 1);
130    PabloAST * bit4a1 = nAb.createAdvance(u8_bits[4], 1);
131    PabloAST * bit5a1 = nAb.createAdvance(u8_bits[5], 1);
132    PabloAST * bit6a1 = nAb.createAdvance(u8_bits[6], 1);
133    PabloAST * bit7a1 = nAb.createAdvance(u8_bits[7], 1);
134   
135    // Entry condition for 3 or 4 byte sequences: we have a prefix byte in the range 0xE0-0xFF.
136    PabloAST * pfx34 = ccc.compileCC(re::makeCC(0xE0, 0xFF), nAb);
137    // Builder for the if statement handling all logic for 3- and 4-byte sequences.
138    PabloBuilder p34b = PabloBuilder::Create(nAb);
139    // Bits 4 through 7 of a 3-byte prefix are data bits.  They must be moved
140    // to the final position of the 3-byte sequence.
141    PabloAST * bit2a1 = p34b.createAdvance(u8_bits[2], 1);
142    PabloAST * bit4a2 = p34b.createAdvance(bit4a1, 1);
143    PabloAST * bit5a2 = p34b.createAdvance(bit5a1, 1);
144    PabloAST * bit6a2 = p34b.createAdvance(bit6a1, 1);
145    PabloAST * bit7a2 = p34b.createAdvance(bit7a1, 1);
146    //
147    // Logic for 4-byte UTF-8 sequences
148    //
149    // Entry condition  or 4 byte sequences: we have a prefix byte in the range 0xF0-0xFF.
150    PabloAST * pfx4 = ccc.compileCC(re::makeCC(0xF0, 0xFF), p34b);
151    // Builder for the if statement handling all logic for 4-byte sequences only.
152    PabloBuilder p4b = PabloBuilder::Create(p34b);
153    // Illegal 4-byte sequences
154    PabloAST * F0 = ccc.compileCC(re::makeCC(0xF0), p4b);
155    PabloAST * F4 = ccc.compileCC(re::makeCC(0xF4), p4b);
156    PabloAST * F0_err = p4b.createAnd(p4b.createAdvance(F0, 1), ccc.compileCC(re::makeCC(0x80, 0x8F), p4b));
157    PabloAST * F4_err = p4b.createAnd(p4b.createAdvance(F4, 1), ccc.compileCC(re::makeCC(0x90, 0xBF), p4b));
158    PabloAST * F5_FF = ccc.compileCC(re::makeCC(0xF5, 0xFF), p4b);
159    Assign * FX_err = p4b.createAssign("FX_err", p4b.createOr(F5_FF, p4b.createOr(F0_err, F4_err)));
160    //
161    // 4-byte prefixes have a scope that extends over the next 3 bytes.
162    Assign * u8scope42 = p4b.createAssign("u8scope42", p4b.createAdvance(pfx4, 1));
163    Assign * u8scope43 = p4b.createAssign("u8scope43", p4b.createAdvance(u8scope42, 1));
164    Assign * u8scope44 = p4b.createAssign("u8scope44", p4b.createAdvance(u8scope43, 1));
165    //
166   
167    //  From the 4-byte sequence 11110abc 10defghi 10jklmno 10pqrstu,
168    //  we must calculate the value abcde - 1 to produce the bit values
169    //  for u16_hi6, hi7, lo0, lo1 at the scope43 position.
170    Assign * s43_lo1 = p4b.createAssign("scope43_lo1", p4b.createAnd(u8scope43, p4b.createNot(bit3a1)));           // e - 1
171    Assign * s43_lo0 = p4b.createAssign("scope43_lo0", p4b.createAnd(u8scope43, p4b.createXor(bit2a1, s43_lo1)));  // d - borrow
172    PabloAST * brw1 = p4b.createAnd(s43_lo1, p4b.createNot(bit2a1));
173    Assign * s43_hi7 = p4b.createAssign("scope43_hi7", p4b.createAnd(u8scope43, p4b.createXor(bit7a2, brw1)));     // c - borrow
174    PabloAST * brw2 = p4b.createAnd(brw1, p4b.createNot(bit7a2));
175    Assign * s43_hi6 = p4b.createAssign("scope43_hi6", p4b.createAnd(u8scope43, p4b.createXor(bit6a2, brw2)));     // b - borrow
176    //
177    Assign * s43_lo2 = p4b.createAssign("scope43_lo2", p4b.createAnd(u8scope43, bit4a1));
178    Assign * s43_lo3 = p4b.createAssign("scope43_lo3", p4b.createAnd(u8scope43, bit5a1));
179    Assign * s43_lo4 = p4b.createAssign("scope43_lo4", p4b.createAnd(u8scope43, bit6a1));
180    Assign * s43_lo5 = p4b.createAssign("scope43_lo5", p4b.createAnd(u8scope43, bit7a1));
181    Assign * s43_lo6 = p4b.createAssign("scope43_lo6", p4b.createAnd(u8scope43, u8_bits[2]));
182    Assign * s43_lo7 = p4b.createAssign("scope43_lo7", p4b.createAnd(u8scope43, u8_bits[3]));
183    //
184    //
185    p34b.createIf(pfx4,
186                  {FX_err, u8scope42, u8scope43, u8scope44, s43_hi6, s43_hi7,
187                   s43_lo0, s43_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
188                   p4b);
189    //
190    // Combined logic for 3 and 4 byte sequences
191    //
192    PabloAST * pfx3 = ccc.compileCC(re::makeCC(0xE0, 0xEF), p34b);
193    Assign * u8scope32 = p34b.createAssign("u8scope32", p34b.createAdvance(pfx3, 1));
194    Assign * u8scope33 = p34b.createAssign("u8scope33", p34b.createAdvance(u8scope32, 1));
195
196    // Illegal 3-byte sequences
197    PabloAST * E0 = ccc.compileCC(re::makeCC(0xE0), p34b);
198    PabloAST * ED = ccc.compileCC(re::makeCC(0xED), p34b);
199    PabloAST * E0_err = p34b.createAnd(p34b.createAdvance(E0, 1), ccc.compileCC(re::makeCC(0x80, 0x9F), p34b));
200    PabloAST * ED_err = p34b.createAnd(p34b.createAdvance(ED, 1), ccc.compileCC(re::makeCC(0xA0, 0xBF), p34b));
201    Assign * EX_FX_err = p34b.createAssign("EX_FX_err", p34b.createOr(p34b.createOr(E0_err, ED_err), FX_err));
202    // Two surrogate UTF-16 units are computed at the 3rd and 4th positions of 4-byte sequences.
203    PabloAST * surrogate = p34b.createOr(u8scope43, u8scope44);
204   
205    Assign * p34del = p34b.createAssign("p34del", p34b.createOr(u8scope32, u8scope42));
206
207
208    // The high 5 bits of the UTF-16 code unit are only nonzero for 3 and 4-byte
209    // UTF-8 sequences.
210    u16_hi[0] = p34b.createAssign("u16_hi0", p34b.createOr(p34b.createAnd(u8scope33, bit4a2), surrogate));
211    u16_hi[1] = p34b.createAssign("u16_hi1", p34b.createOr(p34b.createAnd(u8scope33, bit5a2), surrogate));
212    u16_hi[2] = p34b.createAssign("u16_hi2", p34b.createAnd(u8scope33, bit6a2));
213    u16_hi[3] = p34b.createAssign("u16_hi3", p34b.createOr(p34b.createAnd(u8scope33, bit7a2), surrogate));
214    u16_hi[4] = p34b.createAssign("u16_hi4", p34b.createOr(p34b.createAnd(u8scope33, bit4a1), surrogate));
215   
216    //
217    nAb.createIf(pfx34, 
218                 {u8scope33, EX_FX_err, p34del, 
219                  u16_hi[0], u16_hi[1], u16_hi[2], u16_hi[3], u16_hi[4], u8scope44, s43_hi6, s43_hi7,
220                  s43_lo0, s43_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
221                 p34b);
222    //
223    // Combined logic for 2, 3 and 4 byte sequences
224    //
225    PabloAST * pfx2 = ccc.compileCC(re::makeCC(0xC0, 0xDF), nAb);
226    PabloAST * u8scope22 = nAb.createAdvance(pfx2, 1);
227    Assign * u8lastscope = nAb.createAssign("u8lastscope", nAb.createOr(u8scope22, nAb.createOr(u8scope33, u8scope44)));
228    PabloAST * u8anyscope = nAb.createOr(u8lastscope, p34del);
229
230    PabloAST * C0_C1_err = ccc.compileCC(re::makeCC(0xC0, 0xC1), nAb);
231    PabloAST * scope_suffix_mismatch = nAb.createXor(u8anyscope, ccc.compileCC(re::makeCC(0x80, 0xBF), nAb));
232    error_mask = nAb.createAssign("errormask", nAb.createOr(scope_suffix_mismatch, nAb.createOr(C0_C1_err, EX_FX_err)));
233    delmask = nAb.createAssign("delmask", nAb.createOr(p34del, ccc.compileCC(re::makeCC(0xC0, 0xFF), nAb)));
234   
235    // The low 3 bits of the high byte of the UTF-16 code unit as well as the high bit of the
236    // low byte are only nonzero for 2, 3 and 4 byte sequences.
237    u16_hi[5] = nAb.createAssign("u16_hi5", nAb.createOr(nAb.createAnd(u8lastscope, bit3a1), u8scope44));
238    u16_hi[6] = nAb.createAssign("u16_hi6", nAb.createOr(nAb.createAnd(u8lastscope, bit4a1), s43_hi6));
239    u16_hi[7] = nAb.createAssign("u16_hi7", nAb.createOr(nAb.createAnd(u8lastscope, bit5a1), s43_hi7));
240    u16_lo[0] = nAb.createAssign("u16_lo0", nAb.createOr(nAb.createAnd(u8lastscope, bit6a1), s43_lo0));
241    Assign * p234_lo1 = nAb.createAssign("p234_lo1", nAb.createOr(nAb.createAnd(u8lastscope, bit7a1), s43_lo1));
242
243    pBuilder.createIf(nonASCII, 
244                      {error_mask, delmask, u8lastscope,
245                       u16_hi[0], u16_hi[1], u16_hi[2], u16_hi[3], u16_hi[4], u16_hi[5], u16_hi[6], u16_hi[7],
246                       u16_lo[0], p234_lo1, s43_lo2, s43_lo3, s43_lo4, s43_lo5, s43_lo6, s43_lo7},
247                      nAb);
248    //
249    //
250    PabloAST * ASCII = ccc.compileCC(re::makeCC(0x0, 0x7F));
251    PabloAST * last_byte = pBuilder.createOr(ASCII, u8lastscope);
252    u16_lo[1] = pBuilder.createAssign("u16_lo1", pBuilder.createOr(pBuilder.createAnd(ASCII, u8_bits[1]), p234_lo1));
253    u16_lo[2] = pBuilder.createAssign("u16_lo2", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[2]), s43_lo2));
254    u16_lo[3] = pBuilder.createAssign("u16_lo3", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[3]), s43_lo3));
255    u16_lo[4] = pBuilder.createAssign("u16_lo4", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[4]), s43_lo4));
256    u16_lo[5] = pBuilder.createAssign("u16_lo5", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[5]), s43_lo5));
257    u16_lo[6] = pBuilder.createAssign("u16_lo6", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[6]), s43_lo6));
258    u16_lo[7] = pBuilder.createAssign("u16_lo7", pBuilder.createOr(pBuilder.createAnd(last_byte, u8_bits[7]), s43_lo7));
259   
260    for (unsigned i = 0; i < 8; i++) {
261        function->setResult(i, pBuilder.createAssign("u16_hi" + std::to_string(i), u16_hi[i]));
262        function->setResult(i+8, pBuilder.createAssign("u16_lo" + std::to_string(i), u16_lo[i]));
263    }
264    function->setResult(16, pBuilder.createAssign("delbits", delmask));
265    function->setResult(17, pBuilder.createAssign("errors", error_mask));
266
267    return function;
268}
269}
270
271
272typedef void (*u8u16FunctionType)(char * byte_data, size_t filesize);
273
274u8u16FunctionType u8u16CodeGen(void) {
275                           
276    Module * M = new Module("u8u16", getGlobalContext());
277   
278    IDISA::IDISA_Builder * idb = GetIDISA_Builder(M);
279
280    kernel::PipelineBuilder pipelineBuilder(M, idb);
281
282    Encoding encoding(Encoding::Type::UTF_8, 8);
283   
284    pablo::PabloFunction * function = pablo::u8u16_pablo(encoding);
285   
286
287    pipelineBuilder.CreateKernels(function);
288
289    pipelineBuilder.ExecuteKernels();
290
291    //std::cerr << "ExecuteKernels(); done\n";
292    llvm::Function * main_IR = M->getFunction("Main");
293    ExecutionEngine * mEngine = JIT_to_ExecutionEngine(M);
294   
295    mEngine->finalizeObject();
296    //std::cerr << "finalizeObject(); done\n";
297
298    delete idb;
299
300    return reinterpret_cast<u8u16FunctionType>(mEngine->getPointerToFunction(main_IR));
301}
302
303void doCaseFold(u8u16FunctionType fn_ptr, const std::string & fileName) {
304    std::string mFileName = fileName;
305    size_t mFileSize;
306    char * mFileBuffer;
307   
308    const path file(mFileName);
309    if (exists(file)) {
310        if (is_directory(file)) {
311            return;
312        }
313    } else {
314        std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
315        return;
316    }
317   
318    mFileSize = file_size(file);
319    mapped_file mFile;
320    if (mFileSize == 0) {
321        mFileBuffer = nullptr;
322    }
323    else {
324        try {
325            mFile.open(mFileName, mapped_file::priv, mFileSize, 0);
326        } catch (std::ios_base::failure e) {
327            std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
328            return;
329        }
330        mFileBuffer = mFile.data();
331    }
332    //std::cerr << "mFileSize =" << mFileSize << "\n";
333    //std::cerr << "fn_ptr =" << std::hex << reinterpret_cast<intptr_t>(fn_ptr) << "\n";
334
335    fn_ptr(mFileBuffer, mFileSize);
336
337    mFile.close();
338   
339}
340
341
342int main(int argc, char *argv[]) {
343    StringMap<cl::Option*> Map;
344    cl::getRegisteredOptions(Map);
345    Map["time-passes"]->setHiddenFlag(cl::Hidden);
346    Map["disable-spill-fusing"]->setHiddenFlag(cl::Hidden);
347    Map["enable-misched"]->setHiddenFlag(cl::Hidden);
348    Map["enable-tbaa"]->setHiddenFlag(cl::Hidden);
349    Map["exhaustive-register-search"]->setHiddenFlag(cl::Hidden);
350    Map["join-liveintervals"]->setHiddenFlag(cl::Hidden);
351    Map["limit-float-precision"]->setHiddenFlag(cl::Hidden);
352    Map["mc-x86-disable-arith-relaxation"]->setHiddenFlag(cl::Hidden);
353    Map["limit-float-precision"]->setHiddenFlag(cl::Hidden);
354    Map["print-after-all"]->setHiddenFlag(cl::Hidden);
355    Map["print-before-all"]->setHiddenFlag(cl::Hidden);
356    Map["print-machineinstrs"]->setHiddenFlag(cl::Hidden);
357    Map["regalloc"]->setHiddenFlag(cl::Hidden);
358    Map["rng-seed"]->setHiddenFlag(cl::Hidden);
359    Map["stackmap-version"]->setHiddenFlag(cl::Hidden);
360    Map["x86-asm-syntax"]->setHiddenFlag(cl::Hidden);
361    Map["verify-debug-info"]->setHiddenFlag(cl::Hidden);
362    Map["verify-dom-info"]->setHiddenFlag(cl::Hidden);
363    Map["verify-loop-info"]->setHiddenFlag(cl::Hidden);
364    Map["verify-regalloc"]->setHiddenFlag(cl::Hidden);
365    Map["verify-scev"]->setHiddenFlag(cl::Hidden);
366    Map["x86-recip-refinement-steps"]->setHiddenFlag(cl::Hidden);
367    Map["rewrite-map-file"]->setHiddenFlag(cl::Hidden);
368
369    cl::ParseCommandLineOptions(argc, argv);
370
371    u8u16FunctionType fn_ptr = u8u16CodeGen();
372
373    for (unsigned i = 0; i != inputFiles.size(); ++i) {
374        std::cerr << inputFiles[i] << " beginning\n";
375        doCaseFold(fn_ptr, inputFiles[i]);
376    }
377
378    return 0;
379}
380
381                       
Note: See TracBrowser for help on using the repository browser.