source: icGREP/icgrep-devel/icgrep/kernels/deletion.cpp @ 5009

Last change on this file since 5009 was 5009, checked in by cameron, 2 years ago

u8u16 transcoder demo program now working

File size: 3.2 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include <kernels/kernel.h>
7#include <kernels/deletion.h>
8#include <IDISA/idisa_builder.h>
9#include <llvm/IR/Value.h>
10
11namespace kernel {
12
13std::vector<Value *> parallel_prefix_deletion_masks(IDISA::IDISA_Builder * iBuilder, unsigned fw, Value * del_mask) {
14    Value * m = iBuilder->simd_not(del_mask);
15    Value * mk = iBuilder->simd_slli(fw, del_mask, 1);
16    std::vector<Value *> move_masks;
17    for (unsigned shift = 1; shift < fw; shift *= 2) {
18        Value * mp = mk;
19        for (unsigned lookright = 1; lookright < fw; lookright *= 2) {
20            mp = iBuilder->simd_xor(mp, iBuilder->simd_slli(fw, mp, lookright));
21        }
22        Value * mv = iBuilder->simd_and(mp, m);
23        m = iBuilder->simd_or(iBuilder->simd_xor(m, mv), iBuilder->simd_srli(fw, mv, shift));
24        mk = iBuilder->simd_and(mk, iBuilder->simd_not(mp));
25        move_masks.push_back(mv);
26    }
27    return move_masks;
28}
29
30Value * apply_parallel_prefix_deletion(IDISA::IDISA_Builder * iBuilder, unsigned fw, Value * del_mask, std::vector<Value *> mv, Value * strm) {
31    Value * s = iBuilder->simd_and(strm, iBuilder->simd_not(del_mask));
32    for (unsigned i = 0; i < mv.size(); i++) {
33        unsigned shift = 1 << i;
34        Value * t = iBuilder->simd_and(s, mv[i]);
35        s = iBuilder->simd_or(iBuilder->simd_xor(s, t), iBuilder->simd_srli(fw, t, shift));
36    }
37    return s;
38}
39
40Value * partial_sum_popcount(IDISA::IDISA_Builder * iBuilder, unsigned fw, Value * mask) {
41    Value * per_field = iBuilder->simd_popcount(fw, mask);
42    for (unsigned move = 1; move < iBuilder->getBitBlockWidth()/fw; move *= 2) {
43        per_field = iBuilder->simd_add(fw, per_field, iBuilder->mvmd_slli(fw, per_field, move));
44    }
45    return per_field;
46}
47
48// Apply deletion to a set of stream_count input streams to produce a set of output streams.
49// Kernel inputs: stream_count data streams plus one del_mask stream
50// Outputs: the deleted streams, plus a partial sum popcount
51void generateDeletionKernel(Module * m, IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned stream_count, KernelBuilder * kBuilder) {
52   
53    for(unsigned i = 0; i < stream_count; ++i) {
54        kBuilder->addInputStream(1);
55        kBuilder->addOutputStream(1);
56    }
57    kBuilder->addInputStream(1, "del_mask");
58    kBuilder->addOutputStream(1);  // partial_sum popcount
59    kBuilder->prepareFunction();
60   
61    Value * del_mask = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(stream_count));
62   
63    std::vector<Value *> move_masks = parallel_prefix_deletion_masks(iBuilder, fw, del_mask);
64    for (unsigned j = 0; j < stream_count; ++j) {
65        Value * input = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
66        Value * output = apply_parallel_prefix_deletion(iBuilder, fw, del_mask, move_masks, input);
67        iBuilder->CreateBlockAlignedStore(output, kBuilder->getOutputStream(j));
68    }
69    Value * counts = partial_sum_popcount(iBuilder, fw, iBuilder->simd_not(del_mask));
70   
71    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(counts), kBuilder->getOutputStream(stream_count));
72
73    kBuilder->finalize();
74}
75}
76
Note: See TracBrowser for help on using the repository browser.