source: icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp @ 5508

Last change on this file since 5508 was 5440, checked in by nmedfort, 2 years ago

Large refactoring step. Removed IR generation code from Kernel (formally KernelBuilder?) and moved it into the new KernelBuilder? class.

File size: 12.7 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "alignedprint.h"
7#include <kernels/kernel_builder.h>
8
9using namespace llvm;
10
11namespace kernel {
12
13inline void ap_p2s_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
14    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
15    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
16    s1 = iBuilder->esimd_mergeh(8, t1, t0);
17    s0 = iBuilder->esimd_mergel(8, t1, t0);
18}
19
20inline void p2s(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p[], Value * s[]) {
21    Value * bit00004444[2];
22    Value * bit22226666[2];
23    Value * bit11115555[2];
24    Value * bit33337777[2];
25    ap_p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
26    ap_p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
27    ap_p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
28    ap_p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
29    Value * bit00224466[4];
30    Value * bit11335577[4];
31    for (unsigned j = 0; j<2; j++) {
32        ap_p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
33        ap_p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
34    }
35    for (unsigned j = 0; j<4; j++) {
36        ap_p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
37    }
38}
39
40void PrintableBits::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
41    // Load current block
42    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStream", iBuilder->getInt32(0));
43
44    Value * bits[8];
45
46    /*
47    00110001 is the Unicode codepoint for '1' and 00101110 is the codepoint for '.'.
48    We want to output a byte stream that is aligned with the input bitstream such that it contains 00110001 in each 1 position and 00101110 in each 0 position.
49   
50    For example, consider input bitstream 101. Our desired output is:
51    00110001 00101110 00110001
52
53    We can do the bitstream to bytestream conversion in parallel by viewing the output stream in terms of parallel bit streams.
54
55    0   0   0 -> First bit position of every byte is all zeros
56    0   0   0 -> Same for second bit
57    1   1   1 -> Third bit is all ones
58    1   0   1 -> 4th bit is 1 for a '1' byte and '0' for a zero byte. Matches input bit stream
59    0   1   0 -> opposite
60    0   1   0 -> opposite
61    0   1   0 -> opposite
62    1   0   1 -> same as 4th bit position.
63   
64    Armed with the above we can do the bit->byte conversion all at once
65    rather than byte at a time! That's what we do below.
66    */
67
68    bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
69    bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
70    bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
71    bits[3] = bitStrmVal;
72    Value * negBitStrmVal = iBuilder->simd_not(bitStrmVal);
73    bits[4] = negBitStrmVal;
74    bits[5] = negBitStrmVal;
75    bits[6] = negBitStrmVal;
76    bits[7] = bitStrmVal;
77   
78    // Reassemble the paralell bit streams into a byte stream
79    Value * printableBytes[8];
80    p2s(iBuilder, bits, printableBytes);
81   
82    for (unsigned j = 0; j < 8; ++j) {
83        iBuilder->storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), iBuilder->bitCast(printableBytes[j]));
84    }
85}
86
87void SelectStream::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
88    if (mStreamIndex >= mSizeInputStreamSet)
89        llvm::report_fatal_error("Stream index out of bounds.\n");
90   
91    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStreams", iBuilder->getInt32(mStreamIndex));
92
93    iBuilder->storeOutputStreamBlock("bitStream", iBuilder->getInt32(0), bitStrmVal);
94}
95
96void PrintStreamSet::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
97
98    /*
99    00110001 is the Unicode codepoint for '1' and 00101110 is the codepoint for '.'.
100    We want to output a byte stream that is aligned with the input bitstream such that it contains 00110001 in each 1 position and 00101110 in each 0 position.
101
102    For example, consider input bitstream 101. Our desired output is:
103    00110001 00101110 00110001
104
105    We can do the bitstream to bytestream conversion in parallel by viewing the output stream in terms of parallel bit streams.
106
107    0   0   0 -> First bit position of every byte is all zeros
108    0   0   0 -> Same for second bit
109    1   1   1 -> Third bit is all ones
110    1   0   1 -> 4th bit is 1 for a '1' byte and '0' for a zero byte. Matches input bit stream
111    0   1   0 -> opposite
112    0   1   0 -> opposite
113    0   1   0 -> opposite
114    1   0   1 -> same as 4th bit position.
115
116    Armed with the above we can do the bit->byte conversion all at once
117    rather than byte at a time! That's what we do below.
118    */
119
120    for (const std::string & name : mNames) {
121
122        BasicBlock * entry = iBuilder->GetInsertBlock();
123
124        Value * count = iBuilder->getInputStreamSetCount(name);
125        ConstantInt * const streamLength = iBuilder->getSize(iBuilder->getBitBlockWidth() + mNameWidth + 1);
126        Value * output = iBuilder->CreateAlloca(iBuilder->getInt8Ty(), streamLength);
127
128        Value * outputName = iBuilder->GetString(name.c_str());
129        ConstantInt * const length = iBuilder->getInt32(name.length());
130        iBuilder->CreateMemCpy(output, outputName, length, 1);
131        iBuilder->CreateMemSet(iBuilder->CreateGEP(output, iBuilder->getInt32(name.length())), iBuilder->getInt8(' '), iBuilder->getInt32(mNameWidth - name.length()), 1);
132        iBuilder->CreateStore(iBuilder->getInt8(10), iBuilder->CreateGEP(output, iBuilder->getInt32(iBuilder->getBitBlockWidth() + mNameWidth)));
133
134        if (isa<ConstantInt>(count) && cast<ConstantInt>(count)->isOne()) {
135
136            // Load current block
137            Value * const input = iBuilder->loadInputStreamBlock(name, iBuilder->getInt32(0));
138
139            Value * bits[8];
140            bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
141            bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
142            bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
143            bits[3] = input;
144            Value * const negated = iBuilder->simd_not(input);
145            bits[4] = negated;
146            bits[5] = negated;
147            bits[6] = negated;
148            bits[7] = input;
149
150            // Reassemble the paralell bit streams into a byte stream
151            Value * printableBytes[8];
152            p2s(iBuilder, bits, printableBytes);
153            for (unsigned k = 0; k < 8; ++k) {
154                const auto offset = mNameWidth + (k * (iBuilder->getBitBlockWidth() / 8));
155                for (unsigned t = 0; t < (iBuilder->getBitBlockWidth() / 8); ++t) {
156                    iBuilder->CreateStore(iBuilder->CreateExtractElement(printableBytes[k], iBuilder->getInt32(t)), iBuilder->CreateGEP(output, iBuilder->getInt32(offset + t)));
157                }
158            }
159
160            iBuilder->CreateWriteCall(iBuilder->getInt32(1), output, streamLength);
161
162        } else {
163
164            iBuilder->CreateStore(iBuilder->getInt8('['), iBuilder->CreateGEP(output, length));
165
166            BasicBlock * cond = iBuilder->CreateBasicBlock("cond");
167
168            BasicBlock * getIntLength = iBuilder->CreateBasicBlock("getIntLength");
169
170            BasicBlock * writeInt = iBuilder->CreateBasicBlock("writeInt");
171            BasicBlock * writeVector = iBuilder->CreateBasicBlock("writeVector");
172
173            BasicBlock * exit = iBuilder->CreateBasicBlock("exit");
174
175            ConstantInt * TEN = iBuilder->getSize(10);
176            ConstantInt * ONE = iBuilder->getSize(1);
177
178            iBuilder->CreateBr(cond);
179            iBuilder->SetInsertPoint(cond);
180            PHINode * i = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "i");
181            i->addIncoming(iBuilder->getSize(0), entry);
182
183            iBuilder->CreateCondBr(iBuilder->CreateICmpNE(i, count), getIntLength, exit);
184            // -------------------------------------------------------------------------
185            iBuilder->SetInsertPoint(getIntLength);
186
187            PHINode * l = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "l");
188            l->addIncoming(iBuilder->getSize(name.length() + 1), cond);
189            PHINode * temp = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "temp");
190            temp->addIncoming(i, cond);
191
192            l->addIncoming(iBuilder->CreateAdd(l, ONE), getIntLength);
193
194            temp->addIncoming(iBuilder->CreateUDiv(temp, TEN), getIntLength);
195
196            iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(temp, TEN), getIntLength, writeInt);
197            // -------------------------------------------------------------------------
198            iBuilder->SetInsertPoint(writeInt);
199            PHINode * value = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
200            value->addIncoming(i, getIntLength);
201
202            PHINode * j = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "j");
203            j->addIncoming(l, getIntLength);
204            Value * ch = iBuilder->CreateURem(value, TEN);
205            ch = iBuilder->CreateTrunc(ch, iBuilder->getInt8Ty());
206            ch = iBuilder->CreateAdd(ch, iBuilder->getInt8('0'));
207
208            value->addIncoming(iBuilder->CreateUDiv(value, TEN), writeInt);
209            iBuilder->CreateStore(ch, iBuilder->CreateGEP(output, j));
210            j->addIncoming(iBuilder->CreateSub(j, ONE), writeInt);
211
212            iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(value, TEN), writeInt, writeVector);
213            // -------------------------------------------------------------------------
214            iBuilder->SetInsertPoint(writeVector);
215
216            iBuilder->CreateStore(iBuilder->getInt8(']'), iBuilder->CreateGEP(output, iBuilder->CreateAdd(l, iBuilder->getSize(1))));
217
218            // Load current block
219            Value * const input = iBuilder->loadInputStreamBlock(name, i);
220
221            Value * bits[8];
222            bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
223            bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
224            bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
225            bits[3] = input;
226            Value * const negated = iBuilder->simd_not(input);
227            bits[4] = negated;
228            bits[5] = negated;
229            bits[6] = negated;
230            bits[7] = input;
231
232            // Reassemble the paralell bit streams into a byte stream
233            Value * printableBytes[8];
234            p2s(iBuilder, bits, printableBytes);
235            for (unsigned k = 0; k < 8; ++k) {
236                const auto offset = mNameWidth + (k * (iBuilder->getBitBlockWidth() / 8));
237                for (unsigned t = 0; t < (iBuilder->getBitBlockWidth() / 8); ++t) {
238                    iBuilder->CreateStore(iBuilder->CreateExtractElement(printableBytes[k], iBuilder->getInt32(t)), iBuilder->CreateGEP(output, iBuilder->getInt32(offset + t)));
239                }
240            }
241
242            iBuilder->CreateWriteCall(iBuilder->getInt32(1), output, streamLength);
243
244            i->addIncoming(iBuilder->CreateAdd(i, ONE), iBuilder->GetInsertBlock());
245            iBuilder->CreateBr(cond);
246
247            // -------------------------------------------------------------------------
248            iBuilder->SetInsertPoint(exit);
249
250        }
251    }
252
253
254
255}
256
257PrintableBits::PrintableBits(const std::unique_ptr<kernel::KernelBuilder> & builder)
258: BlockOrientedKernel("PrintableBits", {Binding{builder->getStreamSetTy(1), "bitStream"}}, {Binding{builder->getStreamSetTy(1, 8), "byteStream"}}, {}, {}, {}) {
259    setNoTerminateAttribute(true);
260}
261
262SelectStream::SelectStream(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned streamIndex)
263: BlockOrientedKernel("SelectStream", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(1, 1), "bitStream"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mStreamIndex(streamIndex) {
264    setNoTerminateAttribute(true);
265
266}
267
268PrintStreamSet::PrintStreamSet(const std::unique_ptr<kernel::KernelBuilder> & builder, std::vector<std::string> && names, const unsigned minWidth)
269: BlockOrientedKernel("PrintableStreamSet", {}, {}, {}, {}, {})
270, mNames(names)
271, mNameWidth(0) {
272    auto width = minWidth;
273    for (const std::string & name : mNames) {
274        mStreamSetInputs.emplace_back(builder->getStreamSetTy(0), name);
275        width = std::max<unsigned>(name.length() + 5, width);
276    }
277    mNameWidth = width;
278}
279
280}
Note: See TracBrowser for help on using the repository browser.