source: icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp

Last change on this file was 6261, checked in by nmedfort, 7 months ago

Work on OptimizationBranch?; revisited pipeline termination

File size: 13.6 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "alignedprint.h"
7#include <kernels/kernel_builder.h>
8
9using namespace llvm;
10
11namespace kernel {
12
13inline void ap_p2s_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
14    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
15    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
16    s1 = iBuilder->esimd_mergeh(8, t1, t0);
17    s0 = iBuilder->esimd_mergel(8, t1, t0);
18}
19
20inline void p2s(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p[], Value * s[]) {
21    Value * bit00004444[2];
22    Value * bit22226666[2];
23    Value * bit11115555[2];
24    Value * bit33337777[2];
25    ap_p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
26    ap_p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
27    ap_p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
28    ap_p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
29    Value * bit00224466[4];
30    Value * bit11335577[4];
31    for (unsigned j = 0; j<2; j++) {
32        ap_p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
33        ap_p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
34    }
35    for (unsigned j = 0; j<4; j++) {
36        ap_p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
37    }
38}
39
40void PrintableBits::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
41    // Load current block
42    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStream", iBuilder->getInt32(0));
43
44    Value * bits[8];
45
46    /*
47    00110001 is the Unicode codepoint for '1' and 00101110 is the codepoint for '.'.
48    We want to output a byte stream that is aligned with the input bitstream such that it contains 00110001 in each 1 position and 00101110 in each 0 position.
49
50    For example, consider input bitstream 101. Our desired output is:
51    00110001 00101110 00110001
52
53    We can do the bitstream to bytestream conversion in parallel by viewing the output stream in terms of parallel bit streams.
54
55    0   0   0 -> First bit position of every byte is all zeros
56    0   0   0 -> Same for second bit
57    1   1   1 -> Third bit is all ones
58    1   0   1 -> 4th bit is 1 for a '1' byte and '0' for a zero byte. Matches input bit stream
59    0   1   0 -> opposite
60    0   1   0 -> opposite
61    0   1   0 -> opposite
62    1   0   1 -> same as 4th bit position.
63
64    Armed with the above we can do the bit->byte conversion all at once
65    rather than byte at a time! That's what we do below.
66    */
67
68    bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
69    bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
70    bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
71    bits[3] = bitStrmVal;
72    Value * negBitStrmVal = iBuilder->simd_not(bitStrmVal);
73    bits[4] = negBitStrmVal;
74    bits[5] = negBitStrmVal;
75    bits[6] = negBitStrmVal;
76    bits[7] = bitStrmVal;
77
78    // Reassemble the paralell bit streams into a byte stream
79    Value * printableBytes[8];
80    p2s(iBuilder, bits, printableBytes);
81
82    for (unsigned j = 0; j < 8; ++j) {
83        iBuilder->storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), iBuilder->bitCast(printableBytes[j]));
84    }
85}
86
87void SelectStream::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
88    if (mStreamIndex >= mSizeInputStreamSet)
89        llvm::report_fatal_error("Stream index out of bounds.\n");
90
91    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStreams", iBuilder->getInt32(mStreamIndex));
92
93    iBuilder->storeOutputStreamBlock("bitStream", iBuilder->getInt32(0), bitStrmVal);
94}
95
96void ExpandOrSelectStreams::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
97
98    for (unsigned i = 0; i < mSizeOutputStreamSet; i++) {
99        if (i < mSizeInputStreamSet) {
100            Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStreams", iBuilder->getInt32(i));
101            iBuilder->storeOutputStreamBlock("outputbitStreams", iBuilder->getInt32(i), bitStrmVal);
102        } else {
103            iBuilder->storeOutputStreamBlock("outputbitStreams", iBuilder->getInt32(i), iBuilder->bitCast(Constant::getNullValue(iBuilder->getBitBlockType())));
104        }
105    }
106
107}
108
109void PrintStreamSet::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
110
111    /*
112    00110001 is the Unicode codepoint for '1' and 00101110 is the codepoint for '.'.
113    We want to output a byte stream that is aligned with the input bitstream such that it contains 00110001 in each 1 position and 00101110 in each 0 position.
114
115    For example, consider input bitstream 101. Our desired output is:
116    00110001 00101110 00110001
117
118    We can do the bitstream to bytestream conversion in parallel by viewing the output stream in terms of parallel bit streams.
119
120    0   0   0 -> First bit position of every byte is all zeros
121    0   0   0 -> Same for second bit
122    1   1   1 -> Third bit is all ones
123    1   0   1 -> 4th bit is 1 for a '1' byte and '0' for a zero byte. Matches input bit stream
124    0   1   0 -> opposite
125    0   1   0 -> opposite
126    0   1   0 -> opposite
127    1   0   1 -> same as 4th bit position.
128
129    Armed with the above we can do the bit->byte conversion all at once
130    rather than byte at a time! That's what we do below.
131    */
132
133    for (const std::string & name : mNames) {
134
135        BasicBlock * entry = iBuilder->GetInsertBlock();
136
137        Value * count = iBuilder->getInputStreamSetCount(name);
138        ConstantInt * const streamLength = iBuilder->getSize(iBuilder->getBitBlockWidth() + mNameWidth + 1);
139        Value * output = iBuilder->CreateAlloca(iBuilder->getInt8Ty(), streamLength);
140
141        Value * outputName = iBuilder->GetString(name.c_str());
142        ConstantInt * const length = iBuilder->getInt32(name.length());
143        iBuilder->CreateMemCpy(output, outputName, length, 1);
144        iBuilder->CreateMemSet(iBuilder->CreateGEP(output, iBuilder->getInt32(name.length())), iBuilder->getInt8(' '), iBuilder->getInt32(mNameWidth - name.length()), 1);
145        iBuilder->CreateStore(iBuilder->getInt8(10), iBuilder->CreateGEP(output, iBuilder->getInt32(iBuilder->getBitBlockWidth() + mNameWidth)));
146
147        if (isa<ConstantInt>(count) && cast<ConstantInt>(count)->isOne()) {
148
149            // Load current block
150            Value * const input = iBuilder->loadInputStreamBlock(name, iBuilder->getInt32(0));
151
152            Value * bits[8];
153            bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
154            bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
155            bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
156            bits[3] = input;
157            Value * const negated = iBuilder->simd_not(input);
158            bits[4] = negated;
159            bits[5] = negated;
160            bits[6] = negated;
161            bits[7] = input;
162
163            // Reassemble the paralell bit streams into a byte stream
164            Value * printableBytes[8];
165            p2s(iBuilder, bits, printableBytes);
166            for (unsigned k = 0; k < 8; ++k) {
167                const auto offset = mNameWidth + (k * (iBuilder->getBitBlockWidth() / 8));
168                for (unsigned t = 0; t < (iBuilder->getBitBlockWidth() / 8); ++t) {
169                    iBuilder->CreateStore(iBuilder->CreateExtractElement(printableBytes[k], iBuilder->getInt32(t)), iBuilder->CreateGEP(output, iBuilder->getInt32(offset + t)));
170                }
171            }
172
173            iBuilder->CreateWriteCall(iBuilder->getInt32(1), output, streamLength);
174
175        } else {
176
177            iBuilder->CreateStore(iBuilder->getInt8('['), iBuilder->CreateGEP(output, length));
178
179            BasicBlock * cond = iBuilder->CreateBasicBlock("cond");
180
181            BasicBlock * getIntLength = iBuilder->CreateBasicBlock("getIntLength");
182
183            BasicBlock * writeInt = iBuilder->CreateBasicBlock("writeInt");
184            BasicBlock * writeVector = iBuilder->CreateBasicBlock("writeVector");
185
186            BasicBlock * exit = iBuilder->CreateBasicBlock("exit");
187
188            ConstantInt * TEN = iBuilder->getSize(10);
189            ConstantInt * ONE = iBuilder->getSize(1);
190
191            iBuilder->CreateBr(cond);
192            iBuilder->SetInsertPoint(cond);
193            PHINode * i = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "i");
194            i->addIncoming(iBuilder->getSize(0), entry);
195
196            iBuilder->CreateCondBr(iBuilder->CreateICmpNE(i, count), getIntLength, exit);
197            // -------------------------------------------------------------------------
198            iBuilder->SetInsertPoint(getIntLength);
199
200            PHINode * l = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "l");
201            l->addIncoming(iBuilder->getSize(name.length() + 1), cond);
202            PHINode * temp = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "temp");
203            temp->addIncoming(i, cond);
204
205            l->addIncoming(iBuilder->CreateAdd(l, ONE), getIntLength);
206
207            temp->addIncoming(iBuilder->CreateUDiv(temp, TEN), getIntLength);
208
209            iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(temp, TEN), getIntLength, writeInt);
210            // -------------------------------------------------------------------------
211            iBuilder->SetInsertPoint(writeInt);
212            PHINode * value = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
213            value->addIncoming(i, getIntLength);
214
215            PHINode * j = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "j");
216            j->addIncoming(l, getIntLength);
217            Value * ch = iBuilder->CreateURem(value, TEN);
218            ch = iBuilder->CreateTrunc(ch, iBuilder->getInt8Ty());
219            ch = iBuilder->CreateAdd(ch, iBuilder->getInt8('0'));
220
221            value->addIncoming(iBuilder->CreateUDiv(value, TEN), writeInt);
222            iBuilder->CreateStore(ch, iBuilder->CreateGEP(output, j));
223            j->addIncoming(iBuilder->CreateSub(j, ONE), writeInt);
224
225            iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(value, TEN), writeInt, writeVector);
226            // -------------------------------------------------------------------------
227            iBuilder->SetInsertPoint(writeVector);
228
229            iBuilder->CreateStore(iBuilder->getInt8(']'), iBuilder->CreateGEP(output, iBuilder->CreateAdd(l, iBuilder->getSize(1))));
230
231            // Load current block
232            Value * const input = iBuilder->loadInputStreamBlock(name, i);
233
234            Value * bits[8];
235            bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
236            bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
237            bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
238            bits[3] = input;
239            Value * const negated = iBuilder->simd_not(input);
240            bits[4] = negated;
241            bits[5] = negated;
242            bits[6] = negated;
243            bits[7] = input;
244
245            // Reassemble the paralell bit streams into a byte stream
246            Value * printableBytes[8];
247            p2s(iBuilder, bits, printableBytes);
248            for (unsigned k = 0; k < 8; ++k) {
249                const auto offset = mNameWidth + (k * (iBuilder->getBitBlockWidth() / 8));
250                for (unsigned t = 0; t < (iBuilder->getBitBlockWidth() / 8); ++t) {
251                    iBuilder->CreateStore(iBuilder->CreateExtractElement(printableBytes[k], iBuilder->getInt32(t)), iBuilder->CreateGEP(output, iBuilder->getInt32(offset + t)));
252                }
253            }
254
255            iBuilder->CreateWriteCall(iBuilder->getInt32(1), output, streamLength);
256
257            i->addIncoming(iBuilder->CreateAdd(i, ONE), iBuilder->GetInsertBlock());
258            iBuilder->CreateBr(cond);
259
260            // -------------------------------------------------------------------------
261            iBuilder->SetInsertPoint(exit);
262
263        }
264    }
265
266
267
268}
269
270PrintableBits::PrintableBits(const std::unique_ptr<kernel::KernelBuilder> & builder)
271: BlockOrientedKernel(b, "PrintableBits", {Binding{builder->getStreamSetTy(1), "bitStream"}}, {Binding{builder->getStreamSetTy(1, 8), "byteStream"}}, {}, {}, {}) {
272
273}
274
275SelectStream::SelectStream(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned streamIndex)
276: BlockOrientedKernel(b, "SelectStream", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(1, 1), "bitStream"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mStreamIndex(streamIndex) {
277
278}
279
280ExpandOrSelectStreams::ExpandOrSelectStreams(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned sizeOutputStreamSet)
281: BlockOrientedKernel(b, "ExpandOrSelectStreams", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(sizeOutputStreamSet), "outputbitStreams"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mSizeOutputStreamSet(sizeOutputStreamSet) {
282
283}
284
285PrintStreamSet::PrintStreamSet(const std::unique_ptr<kernel::KernelBuilder> & builder, std::vector<std::string> && names, const unsigned minWidth)
286: BlockOrientedKernel(b, "PrintableStreamSet", {}, {}, {}, {}, {})
287, mNames(names)
288, mNameWidth(0) {
289    auto width = minWidth;
290    for (const std::string & name : mNames) {
291        mInputStreamSets.emplace_back(builder->getStreamSetTy(0), name);
292        width = std::max<unsigned>(name.length() + 5, width);
293    }
294    mNameWidth = width;
295}
296
297}
Note: See TracBrowser for help on using the repository browser.