source: icGREP/icgrep-devel/icgrep/IDISA/idisa_avx_builder.cpp @ 4954

Last change on this file since 4954 was 4954, checked in by cameron, 3 years ago

Support for -BlockSize?=512 on AVX2

File size: 2.3 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "idisa_avx_builder.h"
8#include <llvm/IR/IRBuilder.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Intrinsics.h>
11#include <llvm/IR/Function.h>
12
13namespace IDISA {
14
15Value * IDISA_AVX_Builder::hsimd_signmask(unsigned fw, Value * a) {
16    if (mBitBlockWidth == 256) {
17        if (fw == 64) {
18            Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_pd_256);
19            Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
20            Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
21            Value * mask = CreateCall(signmask_f64func, std::vector<Value *>({a_as_pd}));
22            return mask;
23        }
24        else if (fw == 32) {
25            Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
26            Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
27            Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
28            Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({a_as_ps}));
29            return mask;
30        }
31    }
32    else if (mBitBlockWidth == 512) {
33        if (fw == 64) {
34            Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
35            Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
36            std::vector<Constant*> Idxs;
37            for (unsigned i = 0; i < 8; i++) {
38                Idxs.push_back(getInt32(2*i+1));
39            }
40            Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get(Idxs));
41            Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
42            Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
43            Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
44            Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({pack_as_ps}));
45            return mask;
46        }
47    }
48    Value * mask = CreateICmpSLT(fwCast(fw, a), ConstantAggregateZero::get(fwVectorType(fw)));
49    return CreateBitCast(mask, getIntNTy(mBitBlockWidth/fw));
50}
51   
52}
Note: See TracBrowser for help on using the repository browser.