source: icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp @ 5390

Last change on this file since 5390 was 5362, checked in by cameron, 2 years ago

Adam's changes to add swizzling option to DeleteByPEXT kernel

File size: 12.4 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "alignedprint.h"
7#include <IR_Gen/idisa_builder.h>  // for IDISA_Builder
8#include <llvm/IR/Module.h>
9
10using namespace llvm;
11
12namespace kernel {
13
14    void ap_p2s_step(IDISA::IDISA_Builder * iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
15    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
16    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
17    s1 = iBuilder->esimd_mergeh(8, t1, t0);
18    s0 = iBuilder->esimd_mergel(8, t1, t0);
19}
20
21void p2s(IDISA::IDISA_Builder * iBuilder, Value * p[], Value * s[]) {
22    Value * bit00004444[2];
23    Value * bit22226666[2];
24    Value * bit11115555[2];
25    Value * bit33337777[2];
26    ap_p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
27    ap_p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
28    ap_p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
29    ap_p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
30    Value * bit00224466[4];
31    Value * bit11335577[4];
32    for (unsigned j = 0; j<2; j++) {
33        ap_p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
34        ap_p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
35    }
36    for (unsigned j = 0; j<4; j++) {
37        ap_p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
38    }
39}
40
41void PrintableBits::generateDoBlockMethod() {
42    // Load current block
43    Value * bitStrmVal = loadInputStreamBlock("bitStream", iBuilder->getInt32(0));
44
45    Value * bits[8];
46
47    /*
48    00110001 is the Unicode codepoint for '1' and 00101110 is the codepoint for '.'.
49    We want to output a byte stream that is aligned with the input bitstream such that it contains 00110001 in each 1 position and 00101110 in each 0 position.
50   
51    For example, consider input bitstream 101. Our desired output is:
52    00110001 00101110 00110001
53
54    We can do the bitstream to bytestream conversion in parallel by viewing the output stream in terms of parallel bit streams.
55
56    0   0   0 -> First bit position of every byte is all zeros
57    0   0   0 -> Same for second bit
58    1   1   1 -> Third bit is all ones
59    1   0   1 -> 4th bit is 1 for a '1' byte and '0' for a zero byte. Matches input bit stream
60    0   1   0 -> opposite
61    0   1   0 -> opposite
62    0   1   0 -> opposite
63    1   0   1 -> same as 4th bit position.
64   
65    Armed with the above we can do the bit->byte conversion all at once
66    rather than byte at a time! That's what we do below.
67    */
68
69    bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
70    bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
71    bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
72    bits[3] = bitStrmVal;
73    Value * negBitStrmVal = iBuilder->simd_not(bitStrmVal);
74    bits[4] = negBitStrmVal;
75    bits[5] = negBitStrmVal;
76    bits[6] = negBitStrmVal;
77    bits[7] = bitStrmVal;
78   
79    // Reassemble the paralell bit streams into a byte stream
80    Value * printableBytes[8];
81    p2s(iBuilder, bits, printableBytes);
82   
83    for (unsigned j = 0; j < 8; ++j) {
84        storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), iBuilder->bitCast(printableBytes[j]));
85    }
86}
87
88void SelectStream::generateDoBlockMethod() {
89    if (mStreamIndex >= mSizeInputStreamSet)
90        llvm::report_fatal_error("Stream index out of bounds.\n");
91   
92    Value * bitStrmVal = loadInputStreamBlock("bitStreams", iBuilder->getInt32(mStreamIndex));
93
94    storeOutputStreamBlock("bitStream", iBuilder->getInt32(0), bitStrmVal);
95}
96
97void PrintStreamSet::generateDoBlockMethod() {
98
99    /*
100    00110001 is the Unicode codepoint for '1' and 00101110 is the codepoint for '.'.
101    We want to output a byte stream that is aligned with the input bitstream such that it contains 00110001 in each 1 position and 00101110 in each 0 position.
102
103    For example, consider input bitstream 101. Our desired output is:
104    00110001 00101110 00110001
105
106    We can do the bitstream to bytestream conversion in parallel by viewing the output stream in terms of parallel bit streams.
107
108    0   0   0 -> First bit position of every byte is all zeros
109    0   0   0 -> Same for second bit
110    1   1   1 -> Third bit is all ones
111    1   0   1 -> 4th bit is 1 for a '1' byte and '0' for a zero byte. Matches input bit stream
112    0   1   0 -> opposite
113    0   1   0 -> opposite
114    0   1   0 -> opposite
115    1   0   1 -> same as 4th bit position.
116
117    Armed with the above we can do the bit->byte conversion all at once
118    rather than byte at a time! That's what we do below.
119    */
120
121    for (const std::string & name : mNames) {
122
123        BasicBlock * entry = iBuilder->GetInsertBlock();
124
125        Value * count = getInputStreamSetCount(name);
126        ConstantInt * const streamLength = iBuilder->getSize(iBuilder->getBitBlockWidth() + mNameWidth + 1);
127        Value * output = iBuilder->CreateAlloca(iBuilder->getInt8Ty(), streamLength);
128
129        Value * outputName = iBuilder->CreateGlobalStringPtr(name.c_str());
130        ConstantInt * const length = iBuilder->getInt32(name.length());
131        iBuilder->CreateMemCpy(output, outputName, length, 1);
132        iBuilder->CreateMemSet(iBuilder->CreateGEP(output, iBuilder->getInt32(name.length())), iBuilder->getInt8(' '), iBuilder->getInt32(mNameWidth - name.length()), 1);
133        iBuilder->CreateStore(iBuilder->getInt8(10), iBuilder->CreateGEP(output, iBuilder->getInt32(iBuilder->getBitBlockWidth() + mNameWidth)));
134
135        if (isa<ConstantInt>(count) && cast<ConstantInt>(count)->isOne()) {
136
137            // Load current block
138            Value * const input = loadInputStreamBlock(name, iBuilder->getInt32(0));
139
140            Value * bits[8];
141            bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
142            bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
143            bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
144            bits[3] = input;
145            Value * const negated = iBuilder->simd_not(input);
146            bits[4] = negated;
147            bits[5] = negated;
148            bits[6] = negated;
149            bits[7] = input;
150
151            // Reassemble the paralell bit streams into a byte stream
152            Value * printableBytes[8];
153            p2s(iBuilder, bits, printableBytes);
154            for (unsigned k = 0; k < 8; ++k) {
155                const auto offset = mNameWidth + (k * (iBuilder->getBitBlockWidth() / 8));
156                for (unsigned t = 0; t < (iBuilder->getBitBlockWidth() / 8); ++t) {
157                    iBuilder->CreateStore(iBuilder->CreateExtractElement(printableBytes[k], iBuilder->getInt32(t)), iBuilder->CreateGEP(output, iBuilder->getInt32(offset + t)));
158                }
159            }
160
161            iBuilder->CreateWriteCall(iBuilder->getInt32(1), output, streamLength);
162
163        } else {
164
165            iBuilder->CreateStore(iBuilder->getInt8('['), iBuilder->CreateGEP(output, length));
166
167            BasicBlock * cond = CreateBasicBlock("cond");
168
169            BasicBlock * getIntLength = CreateBasicBlock("getIntLength");
170
171            BasicBlock * writeInt = CreateBasicBlock("writeInt");
172            BasicBlock * writeVector = CreateBasicBlock("writeVector");
173
174            BasicBlock * exit = CreateBasicBlock("exit");
175
176            ConstantInt * TEN = iBuilder->getSize(10);
177            ConstantInt * ONE = iBuilder->getSize(1);
178
179            iBuilder->CreateBr(cond);
180            iBuilder->SetInsertPoint(cond);
181            PHINode * i = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "i");
182            i->addIncoming(iBuilder->getSize(0), entry);
183
184            iBuilder->CreateCondBr(iBuilder->CreateICmpNE(i, count), getIntLength, exit);
185            // -------------------------------------------------------------------------
186            iBuilder->SetInsertPoint(getIntLength);
187
188            PHINode * l = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "l");
189            l->addIncoming(iBuilder->getSize(name.length() + 1), cond);
190            PHINode * temp = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "temp");
191            temp->addIncoming(i, cond);
192
193            l->addIncoming(iBuilder->CreateAdd(l, ONE), getIntLength);
194
195            temp->addIncoming(iBuilder->CreateUDiv(temp, TEN), getIntLength);
196
197            iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(temp, TEN), getIntLength, writeInt);
198            // -------------------------------------------------------------------------
199            iBuilder->SetInsertPoint(writeInt);
200            PHINode * value = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
201            value->addIncoming(i, getIntLength);
202
203            PHINode * j = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "j");
204            j->addIncoming(l, getIntLength);
205            Value * ch = iBuilder->CreateURem(value, TEN);
206            ch = iBuilder->CreateTrunc(ch, iBuilder->getInt8Ty());
207            ch = iBuilder->CreateAdd(ch, iBuilder->getInt8('0'));
208
209            value->addIncoming(iBuilder->CreateUDiv(value, TEN), writeInt);
210            iBuilder->CreateStore(ch, iBuilder->CreateGEP(output, j));
211            j->addIncoming(iBuilder->CreateSub(j, ONE), writeInt);
212
213            iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(value, TEN), writeInt, writeVector);
214            // -------------------------------------------------------------------------
215            iBuilder->SetInsertPoint(writeVector);
216
217            iBuilder->CreateStore(iBuilder->getInt8(']'), iBuilder->CreateGEP(output, iBuilder->CreateAdd(l, iBuilder->getSize(1))));
218
219            // Load current block
220            Value * const input = loadInputStreamBlock(name, i);
221
222            Value * bits[8];
223            bits[0] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
224            bits[1] = ConstantInt::getNullValue(iBuilder->getBitBlockType());
225            bits[2] = ConstantInt::getAllOnesValue(iBuilder->getBitBlockType());
226            bits[3] = input;
227            Value * const negated = iBuilder->simd_not(input);
228            bits[4] = negated;
229            bits[5] = negated;
230            bits[6] = negated;
231            bits[7] = input;
232
233            // Reassemble the paralell bit streams into a byte stream
234            Value * printableBytes[8];
235            p2s(iBuilder, bits, printableBytes);
236            for (unsigned k = 0; k < 8; ++k) {
237                const auto offset = mNameWidth + (k * (iBuilder->getBitBlockWidth() / 8));
238                for (unsigned t = 0; t < (iBuilder->getBitBlockWidth() / 8); ++t) {
239                    iBuilder->CreateStore(iBuilder->CreateExtractElement(printableBytes[k], iBuilder->getInt32(t)), iBuilder->CreateGEP(output, iBuilder->getInt32(offset + t)));
240                }
241            }
242
243            iBuilder->CreateWriteCall(iBuilder->getInt32(1), output, streamLength);
244
245            i->addIncoming(iBuilder->CreateAdd(i, ONE), iBuilder->GetInsertBlock());
246            iBuilder->CreateBr(cond);
247
248            // -------------------------------------------------------------------------
249            iBuilder->SetInsertPoint(exit);
250
251        }
252    }
253
254
255
256}
257
258PrintableBits::PrintableBits(IDISA::IDISA_Builder * builder)
259: BlockOrientedKernel(builder, "PrintableBits", {Binding{builder->getStreamSetTy(1), "bitStream"}}, {Binding{builder->getStreamSetTy(1, 8), "byteStream"}}, {}, {}, {}) {
260    setNoTerminateAttribute(true);
261}
262
263SelectStream::SelectStream(IDISA::IDISA_Builder * builder, unsigned sizeInputStreamSet, unsigned streamIndex)
264: BlockOrientedKernel(builder, "SelectStream", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(1, 1), "bitStream"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mStreamIndex(streamIndex) {
265    setNoTerminateAttribute(true);
266
267}
268
269PrintStreamSet::PrintStreamSet(IDISA::IDISA_Builder * builder, std::vector<std::string> && names, const unsigned minWidth)
270: BlockOrientedKernel(builder, "PrintableStreamSet", {}, {}, {}, {}, {})
271, mNames(names)
272, mNameWidth(0) {
273    auto width = minWidth;
274    for (const std::string & name : mNames) {
275        mStreamSetInputs.emplace_back(builder->getStreamSetTy(0), name);
276        width = std::max<unsigned>(name.length() + 5, width);
277    }
278    mNameWidth = width;
279}
280
281}
Note: See TracBrowser for help on using the repository browser.