source: icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp @ 5440

Last change on this file since 5440 was 5440, checked in by nmedfort, 23 months ago

Large refactoring step. Removed IR generation code from Kernel (formally KernelBuilder?) and moved it into the new KernelBuilder? class.

File size: 12.7 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "alignedprint.h"
7#include <kernels/kernel_builder.h>
8
9using namespace llvm;
10
11namespace kernel {
12
13inline void ap_p2s_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
14    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
15    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
16    s1 = iBuilder->esimd_mergeh(8, t1, t0);
17    s0 = iBuilder->esimd_mergel(8, t1, t0);
18}
19
20inline void p2s(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p[], Value * s[]) {
21    Value * bit00004444[2];
22    Value * bit22226666[2];
23    Value * bit11115555[2];
24    Value * bit33337777[2];
25    ap_p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
26    ap_p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
27    ap_p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
28    ap_p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
29    Value * bit00224466[4];
30    Value * bit11335577[4];
31    for (unsigned j = 0; j<2; j++) {
32        ap_p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
33        ap_p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
34    }
35    for (unsigned j = 0; j<4; j++) {
36        ap_p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
37    }
38}
39
40void PrintableBits::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
41    // Load current block
42    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStream", iBuilder->getInt32(0));
43
44    Value * bits[8];
45
46    /*
47    00110001 is the Unicode codepoint for '1' and 00101110 is the codepoint for '.'.
48    We want to output a byte stream that is aligned with the input bitstream such that it contains 00110001 in each 1 position and 00101110 in each 0 position.
49   
50    For example, consider input bitstream 101. Our desired output is:
51    00110001 00101110 00110001
52
53    We can do the bitstream to bytestream conversion in parallel by viewing the output stream in terms of parallel bit streams.
54
55    0   0   0 -> First bit position of every byte is all zeros
56    0   0   0 -> Same for second bit
57    1   1   1 -> Third bit is all ones
58    1   0   1 -> 4th bit is 1 for a '1' byte and '0' for a zero byte. Matches input bit stream
59    0   1   0 -> opposite
60    0   1   0 -> opposite
61    0   1   0 -> opposite
62    1   0   1 -> same as 4th bit position.
63   
64    Armed with the above we can do the bit->byte conversion all at once
65    rather than byte at a time! That's what we do below.
66    */
67
68    bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
69    bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
70    bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
71    bits[3] = bitStrmVal;
72    Value * negBitStrmVal = iBuilder->simd_not(bitStrmVal);
73    bits[4] = negBitStrmVal;
74    bits[5] = negBitStrmVal;
75    bits[6] = negBitStrmVal;
76    bits[7] = bitStrmVal;
77   
78    // Reassemble the paralell bit streams into a byte stream
79    Value * printableBytes[8];
80    p2s(iBuilder, bits, printableBytes);
81   
82    for (unsigned j = 0; j < 8; ++j) {
83        iBuilder->storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), iBuilder->bitCast(printableBytes[j]));
84    }
85}
86
87void SelectStream::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
88    if (mStreamIndex >= mSizeInputStreamSet)
89        llvm::report_fatal_error("Stream index out of bounds.\n");
90   
91    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStreams", iBuilder->getInt32(mStreamIndex));
92
93    iBuilder->storeOutputStreamBlock("bitStream", iBuilder->getInt32(0), bitStrmVal);
94}
95
96void PrintStreamSet::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
97
98    /*
99    00110001 is the Unicode codepoint for '1' and 00101110 is the codepoint for '.'.
100    We want to output a byte stream that is aligned with the input bitstream such that it contains 00110001 in each 1 position and 00101110 in each 0 position.
101
102    For example, consider input bitstream 101. Our desired output is:
103    00110001 00101110 00110001
104
105    We can do the bitstream to bytestream conversion in parallel by viewing the output stream in terms of parallel bit streams.
106
107    0   0   0 -> First bit position of every byte is all zeros
108    0   0   0 -> Same for second bit
109    1   1   1 -> Third bit is all ones
110    1   0   1 -> 4th bit is 1 for a '1' byte and '0' for a zero byte. Matches input bit stream
111    0   1   0 -> opposite
112    0   1   0 -> opposite
113    0   1   0 -> opposite
114    1   0   1 -> same as 4th bit position.
115
116    Armed with the above we can do the bit->byte conversion all at once
117    rather than byte at a time! That's what we do below.
118    */
119
120    for (const std::string & name : mNames) {
121
122        BasicBlock * entry = iBuilder->GetInsertBlock();
123
124        Value * count = iBuilder->getInputStreamSetCount(name);
125        ConstantInt * const streamLength = iBuilder->getSize(iBuilder->getBitBlockWidth() + mNameWidth + 1);
126        Value * output = iBuilder->CreateAlloca(iBuilder->getInt8Ty(), streamLength);
127
128        Value * outputName = iBuilder->GetString(name.c_str());
129        ConstantInt * const length = iBuilder->getInt32(name.length());
130        iBuilder->CreateMemCpy(output, outputName, length, 1);
131        iBuilder->CreateMemSet(iBuilder->CreateGEP(output, iBuilder->getInt32(name.length())), iBuilder->getInt8(' '), iBuilder->getInt32(mNameWidth - name.length()), 1);
132        iBuilder->CreateStore(iBuilder->getInt8(10), iBuilder->CreateGEP(output, iBuilder->getInt32(iBuilder->getBitBlockWidth() + mNameWidth)));
133
134        if (isa<ConstantInt>(count) && cast<ConstantInt>(count)->isOne()) {
135
136            // Load current block
137            Value * const input = iBuilder->loadInputStreamBlock(name, iBuilder->getInt32(0));
138
139            Value * bits[8];
140            bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
141            bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
142            bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
143            bits[3] = input;
144            Value * const negated = iBuilder->simd_not(input);
145            bits[4] = negated;
146            bits[5] = negated;
147            bits[6] = negated;
148            bits[7] = input;
149
150            // Reassemble the paralell bit streams into a byte stream
151            Value * printableBytes[8];
152            p2s(iBuilder, bits, printableBytes);
153            for (unsigned k = 0; k < 8; ++k) {
154                const auto offset = mNameWidth + (k * (iBuilder->getBitBlockWidth() / 8));
155                for (unsigned t = 0; t < (iBuilder->getBitBlockWidth() / 8); ++t) {
156                    iBuilder->CreateStore(iBuilder->CreateExtractElement(printableBytes[k], iBuilder->getInt32(t)), iBuilder->CreateGEP(output, iBuilder->getInt32(offset + t)));
157                }
158            }
159
160            iBuilder->CreateWriteCall(iBuilder->getInt32(1), output, streamLength);
161
162        } else {
163
164            iBuilder->CreateStore(iBuilder->getInt8('['), iBuilder->CreateGEP(output, length));
165
166            BasicBlock * cond = iBuilder->CreateBasicBlock("cond");
167
168            BasicBlock * getIntLength = iBuilder->CreateBasicBlock("getIntLength");
169
170            BasicBlock * writeInt = iBuilder->CreateBasicBlock("writeInt");
171            BasicBlock * writeVector = iBuilder->CreateBasicBlock("writeVector");
172
173            BasicBlock * exit = iBuilder->CreateBasicBlock("exit");
174
175            ConstantInt * TEN = iBuilder->getSize(10);
176            ConstantInt * ONE = iBuilder->getSize(1);
177
178            iBuilder->CreateBr(cond);
179            iBuilder->SetInsertPoint(cond);
180            PHINode * i = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "i");
181            i->addIncoming(iBuilder->getSize(0), entry);
182
183            iBuilder->CreateCondBr(iBuilder->CreateICmpNE(i, count), getIntLength, exit);
184            // -------------------------------------------------------------------------
185            iBuilder->SetInsertPoint(getIntLength);
186
187            PHINode * l = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "l");
188            l->addIncoming(iBuilder->getSize(name.length() + 1), cond);
189            PHINode * temp = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "temp");
190            temp->addIncoming(i, cond);
191
192            l->addIncoming(iBuilder->CreateAdd(l, ONE), getIntLength);
193
194            temp->addIncoming(iBuilder->CreateUDiv(temp, TEN), getIntLength);
195
196            iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(temp, TEN), getIntLength, writeInt);
197            // -------------------------------------------------------------------------
198            iBuilder->SetInsertPoint(writeInt);
199            PHINode * value = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
200            value->addIncoming(i, getIntLength);
201
202            PHINode * j = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "j");
203            j->addIncoming(l, getIntLength);
204            Value * ch = iBuilder->CreateURem(value, TEN);
205            ch = iBuilder->CreateTrunc(ch, iBuilder->getInt8Ty());
206            ch = iBuilder->CreateAdd(ch, iBuilder->getInt8('0'));
207
208            value->addIncoming(iBuilder->CreateUDiv(value, TEN), writeInt);
209            iBuilder->CreateStore(ch, iBuilder->CreateGEP(output, j));
210            j->addIncoming(iBuilder->CreateSub(j, ONE), writeInt);
211
212            iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(value, TEN), writeInt, writeVector);
213            // -------------------------------------------------------------------------
214            iBuilder->SetInsertPoint(writeVector);
215
216            iBuilder->CreateStore(iBuilder->getInt8(']'), iBuilder->CreateGEP(output, iBuilder->CreateAdd(l, iBuilder->getSize(1))));
217
218            // Load current block
219            Value * const input = iBuilder->loadInputStreamBlock(name, i);
220
221            Value * bits[8];
222            bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
223            bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
224            bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
225            bits[3] = input;
226            Value * const negated = iBuilder->simd_not(input);
227            bits[4] = negated;
228            bits[5] = negated;
229            bits[6] = negated;
230            bits[7] = input;
231
232            // Reassemble the paralell bit streams into a byte stream
233            Value * printableBytes[8];
234            p2s(iBuilder, bits, printableBytes);
235            for (unsigned k = 0; k < 8; ++k) {
236                const auto offset = mNameWidth + (k * (iBuilder->getBitBlockWidth() / 8));
237                for (unsigned t = 0; t < (iBuilder->getBitBlockWidth() / 8); ++t) {
238                    iBuilder->CreateStore(iBuilder->CreateExtractElement(printableBytes[k], iBuilder->getInt32(t)), iBuilder->CreateGEP(output, iBuilder->getInt32(offset + t)));
239                }
240            }
241
242            iBuilder->CreateWriteCall(iBuilder->getInt32(1), output, streamLength);
243
244            i->addIncoming(iBuilder->CreateAdd(i, ONE), iBuilder->GetInsertBlock());
245            iBuilder->CreateBr(cond);
246
247            // -------------------------------------------------------------------------
248            iBuilder->SetInsertPoint(exit);
249
250        }
251    }
252
253
254
255}
256
257PrintableBits::PrintableBits(const std::unique_ptr<kernel::KernelBuilder> & builder)
258: BlockOrientedKernel("PrintableBits", {Binding{builder->getStreamSetTy(1), "bitStream"}}, {Binding{builder->getStreamSetTy(1, 8), "byteStream"}}, {}, {}, {}) {
259    setNoTerminateAttribute(true);
260}
261
262SelectStream::SelectStream(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned streamIndex)
263: BlockOrientedKernel("SelectStream", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(1, 1), "bitStream"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mStreamIndex(streamIndex) {
264    setNoTerminateAttribute(true);
265
266}
267
268PrintStreamSet::PrintStreamSet(const std::unique_ptr<kernel::KernelBuilder> & builder, std::vector<std::string> && names, const unsigned minWidth)
269: BlockOrientedKernel("PrintableStreamSet", {}, {}, {}, {}, {})
270, mNames(names)
271, mNameWidth(0) {
272    auto width = minWidth;
273    for (const std::string & name : mNames) {
274        mStreamSetInputs.emplace_back(builder->getStreamSetTy(0), name);
275        width = std::max<unsigned>(name.length() + 5, width);
276    }
277    mNameWidth = width;
278}
279
280}
Note: See TracBrowser for help on using the repository browser.