| 97 | The LLVM {{{shufflevector}}} operation allows a result vector to be populated |
| 98 | by directly selecting elements from a concatenated pair of input vectors. |
| 99 | A constant vector of {{{i32}}} selectors lets each vector element be selected |
| 100 | from any of the positions within either of the two input vectors. For example, |
| 101 | working with 8-bit input vectors (for simplicity of the example), the |
| 102 | {{{hsimd<2>::pack{h,l}(e1, e2)}}} operations may be translated directly into |
| 103 | {{{shufflevector}}} operations. |
| 104 | {{{ |
| 105 | define <8 x i1> @hsimd_packh_2(<8 x i1> %x, <8 x i1> %y) { |
| 106 | %result = shufflevector <8 x i1> %x, <8 x i1> %y, |
| 107 | <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> |
| 108 | return <8 x i1> result |
| 109 | } |
| 110 | define <8 x i1> @hsimd_packl_2(<8 x i1> %x, <8 x i1> %y) { |
| 111 | %result = shufflevector <8 x i1> %x, <8 x i1> %y, |
| 112 | <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> |
| 113 | return <8 x i1> result |
| 114 | } |
| 115 | }}} |
| 116 | |
| 117 | Similarly, it is straightforward to define the additional {{{hsimd<{4,8}>::pack{h,l}(e1, e2)}}} |
| 118 | operations on 8-bit registers as follows. |
| 119 | {{{ |
| 120 | define <8 x i1> @hsimd_packh_4(<8 x i1> %x, <8 x i1> %y) { |
| 121 | %result = shufflevector <8 x i1> %x, <8 x i1> %y, |
| 122 | <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 10, i32 11, i32 14, i32 15> |
| 123 | return <8 x i1> result |
| 124 | } |
| 125 | define <8 x i1> @hsimd_packl_4(<8 x i1> %x, <8 x i1> %y) { |
| 126 | %result = shufflevector <8 x i1> %x, <8 x i1> %y, |
| 127 | <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13> |
| 128 | return <8 x i1> result |
| 129 | } |
| 130 | }}} |
| 131 | {{{ |
| 132 | define <8 x i1> @hsimd_packh_8(<8 x i1> %x, <8 x i1> %y) { |
| 133 | %result = shufflevector <8 x i1> %x, <8 x i1> %y, |
| 134 | <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> |
| 135 | return <8 x i1> result |
| 136 | } |
| 137 | define <8 x i1> @hsimd_packl_8(<8 x i1> %x, <8 x i1> %y) { |
| 138 | %result = shufflevector <8 x i1> %x, <8 x i1> %y, |
| 139 | <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> |
| 140 | return <8 x i1> result |
| 141 | } |
| 142 | }}} |
| 143 | |
| 144 | |
| 145 | |
| 146 | |
| 147 | > %step = zext i32 %y to i128 |
| 148 | > %newX = bitcast <2 x i64> %x to i128 |
| 149 | > %newX1 = shl i128 %newX, %step |
| 150 | > %result = bitcast i128 %newX1 to <2 x i64> |
| 151 | > ret <2 x i64> %result |
| 152 | > } |
| 153 | > |
| 154 | |
| 155 | The LLVM shufflevector operation |
| 156 | |
| 157 | |
| 158 | |
| 159 | However, it is possible to model these packing operations |
| 160 | using bit shuffle operations. |