source: icGREP/icgrep-devel/icgrep/IDISA/idisa_sse_builder.cpp @ 4903

Last change on this file since 4903 was 4901, checked in by cameron, 3 years ago

SSE specialized IDISA functions

File size: 1.7 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "idisa_sse_builder.h"
8#include <llvm/IR/IRBuilder.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Intrinsics.h>
11#include <llvm/IR/Function.h>
12
13namespace IDISA {
14
15
16Value * IDISA_SSE2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
17    if (fw == 16) {
18        Value * packuswb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_packuswb_128);
19        return CreateCall(packuswb_func, std::vector<Value *>({simd_srli(16, a, 8), simd_srli(16, b, 8)}));
20    }
21    unsigned field_count = 2 * mBitBlockWidth/fw;
22    Value * aVec = fwCast(fw/2, a);
23    Value * bVec = fwCast(fw/2, b);
24    std::vector<Constant*> Idxs;
25    for (unsigned i = 0; i < field_count; i++) {
26        Idxs.push_back(getInt32(2*i));
27    }
28    return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
29}
30
31Value * IDISA_SSE2_Builder::hsimd_packl(unsigned fw, Value * a, Value * b) {
32    if (fw == 16) {
33        Value * packuswb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_packuswb_128);
34        Value * mask = simd_lomask(16);
35        return CreateCall(packuswb_func, std::vector<Value *>({fwCast(16, simd_and(a, mask)), fwCast(16, simd_and(b, mask))}));
36    }
37    unsigned field_count = 2 * mBitBlockWidth/fw;
38    Value * aVec = fwCast(fw/2, a);
39    Value * bVec = fwCast(fw/2, b);
40    std::vector<Constant*> Idxs;
41    for (unsigned i = 0; i < field_count; i++) {
42        Idxs.push_back(getInt32(2*i+1));
43    }
44    return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
45}
46
47}
Note: See TracBrowser for help on using the repository browser.