source: icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp @ 5625

Last change on this file since 5625 was 5625, checked in by xuedongx, 19 months ago

add ExpandStream? kernel and small fix.

File size: 13.8 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "alignedprint.h"
7#include <kernels/kernel_builder.h>
8
9using namespace llvm;
10
11namespace kernel {
12
13inline void ap_p2s_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
14    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
15    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
16    s1 = iBuilder->esimd_mergeh(8, t1, t0);
17    s0 = iBuilder->esimd_mergel(8, t1, t0);
18}
19
20inline void p2s(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p[], Value * s[]) {
21    Value * bit00004444[2];
22    Value * bit22226666[2];
23    Value * bit11115555[2];
24    Value * bit33337777[2];
25    ap_p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
26    ap_p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
27    ap_p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
28    ap_p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
29    Value * bit00224466[4];
30    Value * bit11335577[4];
31    for (unsigned j = 0; j<2; j++) {
32        ap_p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
33        ap_p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
34    }
35    for (unsigned j = 0; j<4; j++) {
36        ap_p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
37    }
38}
39
40void PrintableBits::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
41    // Load current block
42    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStream", iBuilder->getInt32(0));
43
44    Value * bits[8];
45
46    /*
47    00110001 is the Unicode codepoint for '1' and 00101110 is the codepoint for '.'.
48    We want to output a byte stream that is aligned with the input bitstream such that it contains 00110001 in each 1 position and 00101110 in each 0 position.
49   
50    For example, consider input bitstream 101. Our desired output is:
51    00110001 00101110 00110001
52
53    We can do the bitstream to bytestream conversion in parallel by viewing the output stream in terms of parallel bit streams.
54
55    0   0   0 -> First bit position of every byte is all zeros
56    0   0   0 -> Same for second bit
57    1   1   1 -> Third bit is all ones
58    1   0   1 -> 4th bit is 1 for a '1' byte and '0' for a zero byte. Matches input bit stream
59    0   1   0 -> opposite
60    0   1   0 -> opposite
61    0   1   0 -> opposite
62    1   0   1 -> same as 4th bit position.
63   
64    Armed with the above we can do the bit->byte conversion all at once
65    rather than byte at a time! That's what we do below.
66    */
67
68    bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
69    bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
70    bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
71    bits[3] = bitStrmVal;
72    Value * negBitStrmVal = iBuilder->simd_not(bitStrmVal);
73    bits[4] = negBitStrmVal;
74    bits[5] = negBitStrmVal;
75    bits[6] = negBitStrmVal;
76    bits[7] = bitStrmVal;
77   
78    // Reassemble the paralell bit streams into a byte stream
79    Value * printableBytes[8];
80    p2s(iBuilder, bits, printableBytes);
81   
82    for (unsigned j = 0; j < 8; ++j) {
83        iBuilder->storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), iBuilder->bitCast(printableBytes[j]));
84    }
85}
86
87void SelectStream::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
88    if (mStreamIndex >= mSizeInputStreamSet)
89        llvm::report_fatal_error("Stream index out of bounds.\n");
90   
91    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStreams", iBuilder->getInt32(mStreamIndex));
92
93    iBuilder->storeOutputStreamBlock("bitStream", iBuilder->getInt32(0), bitStrmVal);
94}
95
96void ExpandStream::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
97    if (mSizeOutputStreamSet <= mSizeInputStreamSet)
98        llvm::report_fatal_error("Stream Expanding fails.\n");
99
100    for (unsigned i = 0; i < mSizeOutputStreamSet; i++) {
101        if (i < mSizeInputStreamSet) {
102            Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStreams", iBuilder->getInt32(i));
103            iBuilder->storeOutputStreamBlock("outputbitStreams", iBuilder->getInt32(i), bitStrmVal);
104        } else {
105            iBuilder->storeOutputStreamBlock("outputbitStreams", iBuilder->getInt32(i), iBuilder->bitCast(Constant::getNullValue(iBuilder->getBitBlockType())));
106        }
107    }
108   
109}
110
111void PrintStreamSet::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
112
113    /*
114    00110001 is the Unicode codepoint for '1' and 00101110 is the codepoint for '.'.
115    We want to output a byte stream that is aligned with the input bitstream such that it contains 00110001 in each 1 position and 00101110 in each 0 position.
116
117    For example, consider input bitstream 101. Our desired output is:
118    00110001 00101110 00110001
119
120    We can do the bitstream to bytestream conversion in parallel by viewing the output stream in terms of parallel bit streams.
121
122    0   0   0 -> First bit position of every byte is all zeros
123    0   0   0 -> Same for second bit
124    1   1   1 -> Third bit is all ones
125    1   0   1 -> 4th bit is 1 for a '1' byte and '0' for a zero byte. Matches input bit stream
126    0   1   0 -> opposite
127    0   1   0 -> opposite
128    0   1   0 -> opposite
129    1   0   1 -> same as 4th bit position.
130
131    Armed with the above we can do the bit->byte conversion all at once
132    rather than byte at a time! That's what we do below.
133    */
134
135    for (const std::string & name : mNames) {
136
137        BasicBlock * entry = iBuilder->GetInsertBlock();
138
139        Value * count = iBuilder->getInputStreamSetCount(name);
140        ConstantInt * const streamLength = iBuilder->getSize(iBuilder->getBitBlockWidth() + mNameWidth + 1);
141        Value * output = iBuilder->CreateAlloca(iBuilder->getInt8Ty(), streamLength);
142
143        Value * outputName = iBuilder->GetString(name.c_str());
144        ConstantInt * const length = iBuilder->getInt32(name.length());
145        iBuilder->CreateMemCpy(output, outputName, length, 1);
146        iBuilder->CreateMemSet(iBuilder->CreateGEP(output, iBuilder->getInt32(name.length())), iBuilder->getInt8(' '), iBuilder->getInt32(mNameWidth - name.length()), 1);
147        iBuilder->CreateStore(iBuilder->getInt8(10), iBuilder->CreateGEP(output, iBuilder->getInt32(iBuilder->getBitBlockWidth() + mNameWidth)));
148
149        if (isa<ConstantInt>(count) && cast<ConstantInt>(count)->isOne()) {
150
151            // Load current block
152            Value * const input = iBuilder->loadInputStreamBlock(name, iBuilder->getInt32(0));
153
154            Value * bits[8];
155            bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
156            bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
157            bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
158            bits[3] = input;
159            Value * const negated = iBuilder->simd_not(input);
160            bits[4] = negated;
161            bits[5] = negated;
162            bits[6] = negated;
163            bits[7] = input;
164
165            // Reassemble the paralell bit streams into a byte stream
166            Value * printableBytes[8];
167            p2s(iBuilder, bits, printableBytes);
168            for (unsigned k = 0; k < 8; ++k) {
169                const auto offset = mNameWidth + (k * (iBuilder->getBitBlockWidth() / 8));
170                for (unsigned t = 0; t < (iBuilder->getBitBlockWidth() / 8); ++t) {
171                    iBuilder->CreateStore(iBuilder->CreateExtractElement(printableBytes[k], iBuilder->getInt32(t)), iBuilder->CreateGEP(output, iBuilder->getInt32(offset + t)));
172                }
173            }
174
175            iBuilder->CreateWriteCall(iBuilder->getInt32(1), output, streamLength);
176
177        } else {
178
179            iBuilder->CreateStore(iBuilder->getInt8('['), iBuilder->CreateGEP(output, length));
180
181            BasicBlock * cond = iBuilder->CreateBasicBlock("cond");
182
183            BasicBlock * getIntLength = iBuilder->CreateBasicBlock("getIntLength");
184
185            BasicBlock * writeInt = iBuilder->CreateBasicBlock("writeInt");
186            BasicBlock * writeVector = iBuilder->CreateBasicBlock("writeVector");
187
188            BasicBlock * exit = iBuilder->CreateBasicBlock("exit");
189
190            ConstantInt * TEN = iBuilder->getSize(10);
191            ConstantInt * ONE = iBuilder->getSize(1);
192
193            iBuilder->CreateBr(cond);
194            iBuilder->SetInsertPoint(cond);
195            PHINode * i = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "i");
196            i->addIncoming(iBuilder->getSize(0), entry);
197
198            iBuilder->CreateCondBr(iBuilder->CreateICmpNE(i, count), getIntLength, exit);
199            // -------------------------------------------------------------------------
200            iBuilder->SetInsertPoint(getIntLength);
201
202            PHINode * l = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "l");
203            l->addIncoming(iBuilder->getSize(name.length() + 1), cond);
204            PHINode * temp = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "temp");
205            temp->addIncoming(i, cond);
206
207            l->addIncoming(iBuilder->CreateAdd(l, ONE), getIntLength);
208
209            temp->addIncoming(iBuilder->CreateUDiv(temp, TEN), getIntLength);
210
211            iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(temp, TEN), getIntLength, writeInt);
212            // -------------------------------------------------------------------------
213            iBuilder->SetInsertPoint(writeInt);
214            PHINode * value = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
215            value->addIncoming(i, getIntLength);
216
217            PHINode * j = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "j");
218            j->addIncoming(l, getIntLength);
219            Value * ch = iBuilder->CreateURem(value, TEN);
220            ch = iBuilder->CreateTrunc(ch, iBuilder->getInt8Ty());
221            ch = iBuilder->CreateAdd(ch, iBuilder->getInt8('0'));
222
223            value->addIncoming(iBuilder->CreateUDiv(value, TEN), writeInt);
224            iBuilder->CreateStore(ch, iBuilder->CreateGEP(output, j));
225            j->addIncoming(iBuilder->CreateSub(j, ONE), writeInt);
226
227            iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(value, TEN), writeInt, writeVector);
228            // -------------------------------------------------------------------------
229            iBuilder->SetInsertPoint(writeVector);
230
231            iBuilder->CreateStore(iBuilder->getInt8(']'), iBuilder->CreateGEP(output, iBuilder->CreateAdd(l, iBuilder->getSize(1))));
232
233            // Load current block
234            Value * const input = iBuilder->loadInputStreamBlock(name, i);
235
236            Value * bits[8];
237            bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
238            bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
239            bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
240            bits[3] = input;
241            Value * const negated = iBuilder->simd_not(input);
242            bits[4] = negated;
243            bits[5] = negated;
244            bits[6] = negated;
245            bits[7] = input;
246
247            // Reassemble the paralell bit streams into a byte stream
248            Value * printableBytes[8];
249            p2s(iBuilder, bits, printableBytes);
250            for (unsigned k = 0; k < 8; ++k) {
251                const auto offset = mNameWidth + (k * (iBuilder->getBitBlockWidth() / 8));
252                for (unsigned t = 0; t < (iBuilder->getBitBlockWidth() / 8); ++t) {
253                    iBuilder->CreateStore(iBuilder->CreateExtractElement(printableBytes[k], iBuilder->getInt32(t)), iBuilder->CreateGEP(output, iBuilder->getInt32(offset + t)));
254                }
255            }
256
257            iBuilder->CreateWriteCall(iBuilder->getInt32(1), output, streamLength);
258
259            i->addIncoming(iBuilder->CreateAdd(i, ONE), iBuilder->GetInsertBlock());
260            iBuilder->CreateBr(cond);
261
262            // -------------------------------------------------------------------------
263            iBuilder->SetInsertPoint(exit);
264
265        }
266    }
267
268
269
270}
271
272PrintableBits::PrintableBits(const std::unique_ptr<kernel::KernelBuilder> & builder)
273: BlockOrientedKernel("PrintableBits", {Binding{builder->getStreamSetTy(1), "bitStream"}}, {Binding{builder->getStreamSetTy(1, 8), "byteStream"}}, {}, {}, {}) {
274    setNoTerminateAttribute(true);
275}
276
277SelectStream::SelectStream(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned streamIndex)
278: BlockOrientedKernel("SelectStream", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(1, 1), "bitStream"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mStreamIndex(streamIndex) {
279    setNoTerminateAttribute(true);
280
281}
282
283ExpandStream::ExpandStream(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned sizeOutputStreamSet)
284: BlockOrientedKernel("ExpandStream", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(sizeOutputStreamSet), "outputbitStreams"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mSizeOutputStreamSet(sizeOutputStreamSet) {
285    setNoTerminateAttribute(true);
286
287}
288
289PrintStreamSet::PrintStreamSet(const std::unique_ptr<kernel::KernelBuilder> & builder, std::vector<std::string> && names, const unsigned minWidth)
290: BlockOrientedKernel("PrintableStreamSet", {}, {}, {}, {}, {})
291, mNames(names)
292, mNameWidth(0) {
293    auto width = minWidth;
294    for (const std::string & name : mNames) {
295        mStreamSetInputs.emplace_back(builder->getStreamSetTy(0), name);
296        width = std::max<unsigned>(name.length() + 5, width);
297    }
298    mNameWidth = width;
299}
300
301}
Note: See TracBrowser for help on using the repository browser.