source: parabix-LLVM/diff-r209855 @ 5692

Last change on this file since 5692 was 4851, checked in by cameron, 4 years ago

Update diff

File size: 86.7 KB
Line 
1diff -r parabix-llvm/include/llvm/CodeGen/ISDOpcodes.h r209855/include/llvm/CodeGen/ISDOpcodes.h
2220,222d219
3<     // llvm.uadd.with.overflow.carryin intrinsics.
4<     UADDE,
5<
6460c457
7<
8---
9>     
10diff -r parabix-llvm/include/llvm/CodeGen/MachineValueType.h r209855/include/llvm/CodeGen/MachineValueType.h
1138,49c38,43
12<       i1              =  1,
13<       i2              =  2,
14<       i4              =  3,
15<       i8              =  4,
16<       i16             =  5,
17<       i32             =  6,
18<       i64             =  7,
19<       i128            =  8,
20<       i256            =  9,
21<       i512            =  10,
22<       i1024           =  11,
23<
24---
25>       i1             =   1,   // This is a 1 bit integer value
26>       i8             =   2,   // This is an 8 bit integer value
27>       i16            =   3,   // This is a 16 bit integer value
28>       i32            =   4,   // This is a 32 bit integer value
29>       i64            =   5,   // This is a 64 bit integer value
30>       i128           =   6,   // This is a 128 bit integer value
3152c46
32<       LAST_INTEGER_VALUETYPE  = i1024,
33---
34>       LAST_INTEGER_VALUETYPE  = i128,
3554,59c48,53
36<       f16            =   12,   // This is a 16 bit floating point value
37<       f32            =   13,   // This is a 32 bit floating point value
38<       f64            =   14,   // This is a 64 bit floating point value
39<       f80            =  15,   // This is a 80 bit floating point value
40<       f128           =  16,   // This is a 128 bit floating point value
41<       ppcf128        =  17,   // This is a PPC 128-bit floating point value
42---
43>       f16            =   7,   // This is a 16 bit floating point value
44>       f32            =   8,   // This is a 32 bit floating point value
45>       f64            =   9,   // This is a 64 bit floating point value
46>       f80            =  10,   // This is a 80 bit floating point value
47>       f128           =  11,   // This is a 128 bit floating point value
48>       ppcf128        =  12,   // This is a PPC 128-bit floating point value
4964,169c58,119
50<       v1i1            =  18,
51<       v2i1            =  19,
52<       v4i1            =  20,
53<       v8i1            =  21,
54<       v16i1           =  22,
55<       v32i1           =  23,
56<       v64i1           =  24,
57<       v128i1          =  25,
58<       v256i1          =  26,
59<       v512i1          =  27,
60<       v1024i1         =  28,
61<       v1i2            =  29,
62<       v2i2            =  30,
63<       v4i2            =  31,
64<       v8i2            =  32,
65<       v16i2           =  33,
66<       v32i2           =  34,
67<       v64i2           =  35,
68<       v128i2          =  36,
69<       v256i2          =  37,
70<       v512i2          =  38,
71<       v1i4            =  39,
72<       v2i4            =  40,
73<       v4i4            =  41,
74<       v8i4            =  42,
75<       v16i4           =  43,
76<       v32i4           =  44,
77<       v64i4           =  45,
78<       v128i4          =  46,
79<       v256i4          =  47,
80<       v1i8            =  48,
81<       v2i8            =  49,
82<       v4i8            =  50,
83<       v8i8            =  51,
84<       v16i8           =  52,
85<       v32i8           =  53,
86<       v64i8           =  54,
87<       v128i8          =  55,
88<       v1i16           =  56,
89<       v2i16           =  57,
90<       v4i16           =  58,
91<       v8i16           =  59,
92<       v16i16          =  60,
93<       v32i16          =  61,
94<       v64i16          =  62,
95<       v1i32           =  63,
96<       v2i32           =  64,
97<       v4i32           =  65,
98<       v8i32           =  66,
99<       v16i32          =  67,
100<       v32i32          =  68,
101<       v1i64           =  69,
102<       v2i64           =  70,
103<       v4i64           =  71,
104<       v8i64           =  72,
105<       v16i64          =  73,
106<       v1i128          =  74,
107<       v2i128          =  75,
108<       v4i128          =  76,
109<       v8i128          =  77,
110<       v1i256          =  78,
111<       v2i256          =  79,
112<       v4i256          =  80,
113<       v1i512          =  81,
114<       v2i512          =  82,
115<       v1i1024         =  83,
116<
117<
118<       FIRST_INTEGER_VECTOR_VALUETYPE = v1i1,
119<       LAST_INTEGER_VECTOR_VALUETYPE = v1i1024,
120<
121<       v1f16           =  84,
122<       v2f16           =  85,
123<       v4f16           =  86,
124<       v8f16           =  87,
125<       v16f16          =  88,
126<       v32f16          =  89,
127<       v64f16          =  90,
128<       v1f32           =  91,
129<       v2f32           =  92,
130<       v4f32           =  93,
131<       v8f32           =  94,
132<       v16f32          =  95,
133<       v32f32          =  96,
134<       v1f64           =  97,
135<       v2f64           =  98,
136<       v4f64           =  99,
137<       v8f64           =  100,
138<       v16f64          =  101,
139<
140<
141<       FIRST_FP_VECTOR_VALUETYPE = v1f16,
142<       LAST_FP_VECTOR_VALUETYPE = v16f64,
143<
144<       FIRST_VECTOR_VALUETYPE = v1i1,
145<       LAST_VECTOR_VALUETYPE  = v16f64,
146<
147<       x86mmx         =  102,   // This is an X86 MMX value
148<
149<       Glue           =  103,   // This glues nodes together during pre-RA sched
150<
151<       isVoid         =  104,   // This has no value
152<
153<       Untyped        =  105,   // This value takes a register, but has
154<             // unspecified type.  The register class
155<             // will be determined by the opcode.
156---
157>       v2i1           =  13,   //  2 x i1
158>       v4i1           =  14,   //  4 x i1
159>       v8i1           =  15,   //  8 x i1
160>       v16i1          =  16,   // 16 x i1
161>       v32i1          =  17,   // 32 x i1
162>       v64i1          =  18,   // 64 x i1
163>
164>       v1i8           =  19,   //  1 x i8
165>       v2i8           =  20,   //  2 x i8
166>       v4i8           =  21,   //  4 x i8
167>       v8i8           =  22,   //  8 x i8
168>       v16i8          =  23,   // 16 x i8
169>       v32i8          =  24,   // 32 x i8
170>       v64i8          =  25,   // 64 x i8
171>       v1i16          =  26,   //  1 x i16
172>       v2i16          =  27,   //  2 x i16
173>       v4i16          =  28,   //  4 x i16
174>       v8i16          =  29,   //  8 x i16
175>       v16i16         =  30,   // 16 x i16
176>       v32i16         =  31,   // 32 x i16
177>       v1i32          =  32,   //  1 x i32
178>       v2i32          =  33,   //  2 x i32
179>       v4i32          =  34,   //  4 x i32
180>       v8i32          =  35,   //  8 x i32
181>       v16i32         =  36,   // 16 x i32
182>       v1i64          =  37,   //  1 x i64
183>       v2i64          =  38,   //  2 x i64
184>       v4i64          =  39,   //  4 x i64
185>       v8i64          =  40,   //  8 x i64
186>       v16i64         =  41,   // 16 x i64
187>
188>       FIRST_INTEGER_VECTOR_VALUETYPE = v2i1,
189>       LAST_INTEGER_VECTOR_VALUETYPE = v16i64,
190>
191>       v2f16          =  42,   //  2 x f16
192>       v4f16          =  43,   //  4 x f16
193>       v8f16          =  44,   //  8 x f16
194>       v1f32          =  45,   //  1 x f32
195>       v2f32          =  46,   //  2 x f32
196>       v4f32          =  47,   //  4 x f32
197>       v8f32          =  48,   //  8 x f32
198>       v16f32         =  49,   // 16 x f32
199>       v1f64          =  50,   //  1 x f64
200>       v2f64          =  51,   //  2 x f64
201>       v4f64          =  52,   //  4 x f64
202>       v8f64          =  53,   //  8 x f64
203>
204>       FIRST_FP_VECTOR_VALUETYPE = v2f16,
205>       LAST_FP_VECTOR_VALUETYPE = v8f64,
206>
207>       FIRST_VECTOR_VALUETYPE = v2i1,
208>       LAST_VECTOR_VALUETYPE  = v8f64,
209>
210>       x86mmx         =  54,   // This is an X86 MMX value
211>
212>       Glue           =  55,   // This glues nodes together during pre-RA sched
213>
214>       isVoid         =  56,   // This has no value
215>
216>       Untyped        =  57,   // This value takes a register, but has
217>                               // unspecified type.  The register class
218>                               // will be determined by the opcode.
219171c121
220<       LAST_VALUETYPE =  106,   // This always remains at the end of the list.
221---
222>       LAST_VALUETYPE =  58,   // This always remains at the end of the list.
223176c126
224<       MAX_ALLOWED_VALUETYPE = 128,
225---
226>       MAX_ALLOWED_VALUETYPE = 64,
227240,277d189
228<     /// Parabix helper functions here
229<     bool isParabixVector() const {
230<       return (SimpleTy == MVT::v32i1 || SimpleTy == MVT::v64i1 ||
231<               SimpleTy == MVT::v64i2 || SimpleTy == MVT::v32i4 ||
232<               SimpleTy == MVT::v128i1);
233<     }
234<
235<     bool isParabixValue() const {
236<       return (SimpleTy == MVT::v1i1 || SimpleTy == MVT::v1i2 || SimpleTy == MVT::v2i2 || SimpleTy == MVT::v1i4 || SimpleTy == MVT::v4i2 || SimpleTy == MVT::v2i4 || SimpleTy == MVT::v8i2 || SimpleTy == MVT::v4i4 || SimpleTy == MVT::v1f16 || SimpleTy == MVT::v16i2 || SimpleTy == MVT::v8i4 || SimpleTy == MVT::v32i2 || SimpleTy == MVT::v16i4 || SimpleTy == MVT::v128i1 || SimpleTy == MVT::v64i2 || SimpleTy == MVT::v32i4 || SimpleTy == MVT::v1i128 || SimpleTy == MVT::v256i1 || SimpleTy == MVT::v128i2 || SimpleTy == MVT::v64i4 || SimpleTy == MVT::v2i128 || SimpleTy == MVT::v1i256 || SimpleTy == MVT::v16f16 || SimpleTy == MVT::v512i1 || SimpleTy == MVT::v256i2 || SimpleTy == MVT::v128i4 || SimpleTy == MVT::v64i8 || SimpleTy == MVT::v4i128 || SimpleTy == MVT::v2i256 || SimpleTy == MVT::v1i512 || SimpleTy == MVT::v32f16 || SimpleTy == MVT::v1024i1 || SimpleTy == MVT::v512i2 || SimpleTy == MVT::v256i4 || SimpleTy == MVT::v128i8 || SimpleTy == MVT::v64i16 || SimpleTy == MVT::v32i32 || SimpleTy == MVT::v8i128 || SimpleTy == MVT::v4i256 || SimpleTy == MVT::v2i512 || SimpleTy == MVT::v1i1024 || SimpleTy == MVT::v64f16 || SimpleTy == MVT::v32f32 || SimpleTy == MVT::v16f64);
237<     }
238<
239<
240<     /// is1BitVector - Return true if this is a 1-bit vector type.
241<     bool is1BitVector() const {
242<     return (SimpleTy == MVT::v1i1);
243<     }
244<
245<     /// is2BitVector - Return true if this is a 2-bit vector type.
246<     bool is2BitVector() const {
247<     return (SimpleTy == MVT::v2i1 ||
248<               SimpleTy == MVT::v1i2);
249<     }
250<
251<     /// is4BitVector - Return true if this is a 4-bit vector type.
252<     bool is4BitVector() const {
253<     return (SimpleTy == MVT::v4i1 ||
254<               SimpleTy == MVT::v2i2 ||
255<               SimpleTy == MVT::v1i4);
256<     }
257<
258<     /// is8BitVector - Return true if this is a 8-bit vector type.
259<     bool is8BitVector() const {
260<     return (SimpleTy == MVT::v8i1 ||
261<               SimpleTy == MVT::v4i2 ||
262<               SimpleTy == MVT::v2i4 ||
263<               SimpleTy == MVT::v1i8);
264<     }
265<
266280,285c192,193
267<     return (SimpleTy == MVT::v16i1 ||
268<               SimpleTy == MVT::v8i2 ||
269<               SimpleTy == MVT::v4i4 ||
270<               SimpleTy == MVT::v2i8 ||
271<               SimpleTy == MVT::v1i16 ||
272<               SimpleTy == MVT::v1f16);
273---
274>       return (SimpleTy == MVT::v2i8  || SimpleTy == MVT::v1i16 ||
275>               SimpleTy == MVT::v16i1);
276290,297c198,199
277<     return (SimpleTy == MVT::v32i1 ||
278<               SimpleTy == MVT::v16i2 ||
279<               SimpleTy == MVT::v8i4 ||
280<               SimpleTy == MVT::v4i8 ||
281<               SimpleTy == MVT::v2i16 ||
282<               SimpleTy == MVT::v1i32 ||
283<               SimpleTy == MVT::v2f16 ||
284<               SimpleTy == MVT::v1f32);
285---
286>       return (SimpleTy == MVT::v4i8  || SimpleTy == MVT::v2i16 ||
287>               SimpleTy == MVT::v1i32);
288302,311c204,206
289<     return (SimpleTy == MVT::v64i1 ||
290<               SimpleTy == MVT::v32i2 ||
291<               SimpleTy == MVT::v16i4 ||
292<               SimpleTy == MVT::v8i8 ||
293<               SimpleTy == MVT::v4i16 ||
294<               SimpleTy == MVT::v2i32 ||
295<               SimpleTy == MVT::v1i64 ||
296<               SimpleTy == MVT::v4f16 ||
297<               SimpleTy == MVT::v2f32 ||
298<               SimpleTy == MVT::v1f64);
299---
300>       return (SimpleTy == MVT::v8i8  || SimpleTy == MVT::v4i16 ||
301>               SimpleTy == MVT::v2i32 || SimpleTy == MVT::v1i64 ||
302>               SimpleTy == MVT::v1f64 || SimpleTy == MVT::v2f32);
303316,326c211,213
304<     return (SimpleTy == MVT::v128i1 ||
305<               SimpleTy == MVT::v64i2 ||
306<               SimpleTy == MVT::v32i4 ||
307<               SimpleTy == MVT::v16i8 ||
308<               SimpleTy == MVT::v8i16 ||
309<               SimpleTy == MVT::v4i32 ||
310<               SimpleTy == MVT::v2i64 ||
311<               SimpleTy == MVT::v1i128 ||
312<               SimpleTy == MVT::v8f16 ||
313<               SimpleTy == MVT::v4f32 ||
314<               SimpleTy == MVT::v2f64);
315---
316>       return (SimpleTy == MVT::v16i8 || SimpleTy == MVT::v8i16 ||
317>               SimpleTy == MVT::v4i32 || SimpleTy == MVT::v2i64 ||
318>               SimpleTy == MVT::v4f32 || SimpleTy == MVT::v2f64);
319331,342c218,220
320<     return (SimpleTy == MVT::v256i1 ||
321<               SimpleTy == MVT::v128i2 ||
322<               SimpleTy == MVT::v64i4 ||
323<               SimpleTy == MVT::v32i8 ||
324<               SimpleTy == MVT::v16i16 ||
325<               SimpleTy == MVT::v8i32 ||
326<               SimpleTy == MVT::v4i64 ||
327<               SimpleTy == MVT::v2i128 ||
328<               SimpleTy == MVT::v1i256 ||
329<               SimpleTy == MVT::v16f16 ||
330<               SimpleTy == MVT::v8f32 ||
331<               SimpleTy == MVT::v4f64);
332---
333>       return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64  ||
334>               SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
335>               SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64);
336347,359c225,227
337<     return (SimpleTy == MVT::v512i1 ||
338<               SimpleTy == MVT::v256i2 ||
339<               SimpleTy == MVT::v128i4 ||
340<               SimpleTy == MVT::v64i8 ||
341<               SimpleTy == MVT::v32i16 ||
342<               SimpleTy == MVT::v16i32 ||
343<               SimpleTy == MVT::v8i64 ||
344<               SimpleTy == MVT::v4i128 ||
345<               SimpleTy == MVT::v2i256 ||
346<               SimpleTy == MVT::v1i512 ||
347<               SimpleTy == MVT::v32f16 ||
348<               SimpleTy == MVT::v16f32 ||
349<               SimpleTy == MVT::v8f64);
350---
351>       return (SimpleTy == MVT::v8f64 || SimpleTy == MVT::v16f32 ||
352>               SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 ||
353>               SimpleTy == MVT::v8i64 || SimpleTy == MVT::v16i32);
354364,377c232
355<     return (SimpleTy == MVT::v1024i1 ||
356<               SimpleTy == MVT::v512i2 ||
357<               SimpleTy == MVT::v256i4 ||
358<               SimpleTy == MVT::v128i8 ||
359<               SimpleTy == MVT::v64i16 ||
360<               SimpleTy == MVT::v32i32 ||
361<               SimpleTy == MVT::v16i64 ||
362<               SimpleTy == MVT::v8i128 ||
363<               SimpleTy == MVT::v4i256 ||
364<               SimpleTy == MVT::v2i512 ||
365<               SimpleTy == MVT::v1i1024 ||
366<               SimpleTy == MVT::v64f16 ||
367<               SimpleTy == MVT::v32f32 ||
368<               SimpleTy == MVT::v16f64);
369---
370>       return (SimpleTy == MVT::v16i64);
371380d234
372<
373410d263
374<
375412,429c265,308
376<         switch (SimpleTy) {
377<         default:
378<             llvm_unreachable("Not a vector MVT!");
379<               case v1i1:  case v2i1:  case v4i1:  case v8i1:  case v16i1:  case v32i1:  case v64i1:  case v128i1:  case v256i1:  case v512i1:  case v1024i1:  return i1;
380<       case v1i2:  case v2i2:  case v4i2:  case v8i2:  case v16i2:  case v32i2:  case v64i2:  case v128i2:  case v256i2:  case v512i2:  return i2;
381<       case v1i4:  case v2i4:  case v4i4:  case v8i4:  case v16i4:  case v32i4:  case v64i4:  case v128i4:  case v256i4:  return i4;
382<       case v1i8:  case v2i8:  case v4i8:  case v8i8:  case v16i8:  case v32i8:  case v64i8:  case v128i8:  return i8;
383<       case v1f16:  case v2f16:  case v4f16:  case v8f16:  case v16f16:  case v32f16:  case v64f16:  return f16;
384<       case v1i16:  case v2i16:  case v4i16:  case v8i16:  case v16i16:  case v32i16:  case v64i16:  return i16;
385<       case v1f32:  case v2f32:  case v4f32:  case v8f32:  case v16f32:  case v32f32:  return f32;
386<       case v1i32:  case v2i32:  case v4i32:  case v8i32:  case v16i32:  case v32i32:  return i32;
387<       case v1f64:  case v2f64:  case v4f64:  case v8f64:  case v16f64:  return f64;
388<       case v1i64:  case v2i64:  case v4i64:  case v8i64:  case v16i64:  return i64;
389<       case v1i128:  case v2i128:  case v4i128:  case v8i128:  return i128;
390<       case v1i256:  case v2i256:  case v4i256:  return i256;
391<       case v1i512:  case v2i512:  return i512;
392<       case v1i1024:  return i1024;
393<
394---
395>       switch (SimpleTy) {
396>       default:
397>         llvm_unreachable("Not a vector MVT!");
398>       case v2i1 :
399>       case v4i1 :
400>       case v8i1 :
401>       case v16i1 :
402>       case v32i1 :
403>       case v64i1: return i1;
404>       case v1i8 :
405>       case v2i8 :
406>       case v4i8 :
407>       case v8i8 :
408>       case v16i8:
409>       case v32i8:
410>       case v64i8: return i8;
411>       case v1i16:
412>       case v2i16:
413>       case v4i16:
414>       case v8i16:
415>       case v16i16:
416>       case v32i16: return i16;
417>       case v1i32:
418>       case v2i32:
419>       case v4i32:
420>       case v8i32:
421>       case v16i32: return i32;
422>       case v1i64:
423>       case v2i64:
424>       case v4i64:
425>       case v8i64:
426>       case v16i64: return i64;
427>       case v2f16:
428>       case v4f16:
429>       case v8f16: return f16;
430>       case v1f32:
431>       case v2f32:
432>       case v4f32:
433>       case v8f32:
434>       case v16f32: return f32;
435>       case v1f64:
436>       case v2f64:
437>       case v4f64:
438>       case v8f64: return f64;
439433,435d311
440<
441<
442<
443440,451c316,356
444<             case v1i1: case v1i2: case v1i4: case v1i8: case v1i16: case v1f16: case v1i32: case v1f32: case v1i64: case v1f64: case v1i128: case v1i256: case v1i512: case v1i1024:  return 1;
445<       case v1024i1:  return 1024;
446<       case v128i1: case v128i2: case v128i4: case v128i8:  return 128;
447<       case v16i1: case v16i2: case v16i4: case v16i8: case v16i16: case v16f16: case v16i32: case v16f32: case v16i64: case v16f64:  return 16;
448<       case v2i1: case v2i2: case v2i4: case v2i8: case v2i16: case v2f16: case v2i32: case v2f32: case v2i64: case v2f64: case v2i128: case v2i256: case v2i512:  return 2;
449<       case v256i1: case v256i2: case v256i4:  return 256;
450<       case v32i1: case v32i2: case v32i4: case v32i8: case v32i16: case v32f16: case v32i32: case v32f32:  return 32;
451<       case v4i1: case v4i2: case v4i4: case v4i8: case v4i16: case v4f16: case v4i32: case v4f32: case v4i64: case v4f64: case v4i128: case v4i256:  return 4;
452<       case v512i1: case v512i2:  return 512;
453<       case v64i1: case v64i2: case v64i4: case v64i8: case v64i16: case v64f16:  return 64;
454<       case v8i1: case v8i2: case v8i4: case v8i8: case v8i16: case v8f16: case v8i32: case v8f32: case v8i64: case v8f64: case v8i128:  return 8;
455<
456---
457>       case v32i1:
458>       case v32i8:
459>       case v32i16: return 32;
460>       case v64i1:
461>       case v64i8: return 64;
462>       case v16i1:
463>       case v16i8:
464>       case v16i16:
465>       case v16i32:
466>       case v16i64:
467>       case v16f32: return 16;
468>       case v8i1 :
469>       case v8i8 :
470>       case v8i16:
471>       case v8i32:
472>       case v8i64:
473>       case v8f16:
474>       case v8f32:
475>       case v8f64: return 8;
476>       case v4i1:
477>       case v4i8:
478>       case v4i16:
479>       case v4i32:
480>       case v4i64:
481>       case v4f16:
482>       case v4f32:
483>       case v4f64: return 4;
484>       case v2i1:
485>       case v2i8:
486>       case v2i16:
487>       case v2i32:
488>       case v2i64:
489>       case v2f16:
490>       case v2f32:
491>       case v2f64: return 2;
492>       case v1i8:
493>       case v1i16:
494>       case v1i32:
495>       case v1i64:
496>       case v1f32:
497>       case v1f64: return 1;
498455,457d359
499<
500<
501<
502459c361
503<     switch (SimpleTy) {
504---
505>       switch (SimpleTy) {
506473,485c375,404
507<             case i1: case v1i1: return 1;
508<       case i2: case v2i1: case v1i2: return 2;
509<       case i4: case v4i1: case v2i2: case v1i4: return 4;
510<       case i8: case v8i1: case v4i2: case v2i4: case v1i8: return 8;
511<       case i16: case f16: case v16i1: case v8i2: case v4i4: case v2i8: case v1i16: case v1f16: return 16;
512<       case i32: case f32: case v32i1: case v16i2: case v8i4: case v4i8: case v2i16: case v1i32: case v2f16: case v1f32: return 32;
513<       case i64: case f64: case v64i1: case v32i2: case v16i4: case v8i8: case v4i16: case v2i32: case v1i64: case v4f16: case v2f32: case v1f64: return 64;
514<       case i128: case v128i1: case v64i2: case v32i4: case v16i8: case v8i16: case v4i32: case v2i64: case v1i128: case v8f16: case v4f32: case v2f64: return 128;
515<       case i256: case v256i1: case v128i2: case v64i4: case v32i8: case v16i16: case v8i32: case v4i64: case v2i128: case v1i256: case v16f16: case v8f32: case v4f64: return 256;
516<       case i512: case v512i1: case v256i2: case v128i4: case v64i8: case v32i16: case v16i32: case v8i64: case v4i128: case v2i256: case v1i512: case v32f16: case v16f32: case v8f64: return 512;
517<       case i1024: case v1024i1: case v512i2: case v256i4: case v128i8: case v64i16: case v32i32: case v16i64: case v8i128: case v4i256: case v2i512: case v1i1024: case v64f16: case v32f32: case v16f64: return 1024;
518<
519<       case x86mmx: return 64;
520---
521>       case i1  :  return 1;
522>       case v2i1:  return 2;
523>       case v4i1:  return 4;
524>       case i8  :
525>       case v1i8:
526>       case v8i1: return 8;
527>       case i16 :
528>       case f16:
529>       case v16i1:
530>       case v2i8:
531>       case v1i16: return 16;
532>       case f32 :
533>       case i32 :
534>       case v32i1:
535>       case v4i8:
536>       case v2i16:
537>       case v2f16:
538>       case v1f32:
539>       case v1i32: return 32;
540>       case x86mmx:
541>       case f64 :
542>       case i64 :
543>       case v64i1:
544>       case v8i8:
545>       case v4i16:
546>       case v2i32:
547>       case v1i64:
548>       case v4f16:
549>       case v2f32:
550>       case v1f64: return 64;
551488c407,428
552<       case ppcf128: return 128;
553---
554>       case ppcf128:
555>       case i128:
556>       case v16i8:
557>       case v8i16:
558>       case v4i32:
559>       case v2i64:
560>       case v8f16:
561>       case v4f32:
562>       case v2f64: return 128;
563>       case v32i8:
564>       case v16i16:
565>       case v8i32:
566>       case v4i64:
567>       case v8f32:
568>       case v4f64: return 256;
569>       case v64i8:
570>       case v32i16:
571>       case v16i32:
572>       case v8i64:
573>       case v16f32:
574>       case v8f64: return 512;
575>       case v16i64:return 1024;
576492,493d431
577<
578<
579548d485
580<
581553c490
582<             case 1:
583---
584>       case 1:
585555,558d491
586<       case 2:
587<         return MVT::i2;
588<       case 4:
589<         return MVT::i4;
590569,575d501
591<       case 256:
592<         return MVT::i256;
593<       case 512:
594<         return MVT::i512;
595<       case 1024:
596<         return MVT::i1024;
597<
598579,580d504
599<
600<
601586d509
602<         if (NumElements == 1)  return MVT::v1i1;
603590,619c513,515
604<         if (NumElements == 16)  return MVT::v16i1;
605<         if (NumElements == 32)  return MVT::v32i1;
606<         if (NumElements == 64)  return MVT::v64i1;
607<         if (NumElements == 128)  return MVT::v128i1;
608<         if (NumElements == 256)  return MVT::v256i1;
609<         if (NumElements == 512)  return MVT::v512i1;
610<         if (NumElements == 1024)  return MVT::v1024i1;
611<         break;
612<       case MVT::i2:
613<         if (NumElements == 1)  return MVT::v1i2;
614<         if (NumElements == 2)  return MVT::v2i2;
615<         if (NumElements == 4)  return MVT::v4i2;
616<         if (NumElements == 8)  return MVT::v8i2;
617<         if (NumElements == 16)  return MVT::v16i2;
618<         if (NumElements == 32)  return MVT::v32i2;
619<         if (NumElements == 64)  return MVT::v64i2;
620<         if (NumElements == 128)  return MVT::v128i2;
621<         if (NumElements == 256)  return MVT::v256i2;
622<         if (NumElements == 512)  return MVT::v512i2;
623<         break;
624<       case MVT::i4:
625<         if (NumElements == 1)  return MVT::v1i4;
626<         if (NumElements == 2)  return MVT::v2i4;
627<         if (NumElements == 4)  return MVT::v4i4;
628<         if (NumElements == 8)  return MVT::v8i4;
629<         if (NumElements == 16)  return MVT::v16i4;
630<         if (NumElements == 32)  return MVT::v32i4;
631<         if (NumElements == 64)  return MVT::v64i4;
632<         if (NumElements == 128)  return MVT::v128i4;
633<         if (NumElements == 256)  return MVT::v256i4;
634---
635>         if (NumElements == 16) return MVT::v16i1;
636>         if (NumElements == 32) return MVT::v32i1;
637>         if (NumElements == 64) return MVT::v64i1;
638626,629c522,524
639<         if (NumElements == 16)  return MVT::v16i8;
640<         if (NumElements == 32)  return MVT::v32i8;
641<         if (NumElements == 64)  return MVT::v64i8;
642<         if (NumElements == 128)  return MVT::v128i8;
643---
644>         if (NumElements == 16) return MVT::v16i8;
645>         if (NumElements == 32) return MVT::v32i8;
646>         if (NumElements == 64) return MVT::v64i8;
647636,638c531,532
648<         if (NumElements == 16)  return MVT::v16i16;
649<         if (NumElements == 32)  return MVT::v32i16;
650<         if (NumElements == 64)  return MVT::v64i16;
651---
652>         if (NumElements == 16) return MVT::v16i16;
653>         if (NumElements == 32) return MVT::v32i16;
654645,646c539
655<         if (NumElements == 16)  return MVT::v16i32;
656<         if (NumElements == 32)  return MVT::v32i32;
657---
658>         if (NumElements == 16) return MVT::v16i32;
659653,671c546
660<         if (NumElements == 16)  return MVT::v16i64;
661<         break;
662<       case MVT::i128:
663<         if (NumElements == 1)  return MVT::v1i128;
664<         if (NumElements == 2)  return MVT::v2i128;
665<         if (NumElements == 4)  return MVT::v4i128;
666<         if (NumElements == 8)  return MVT::v8i128;
667<         break;
668<       case MVT::i256:
669<         if (NumElements == 1)  return MVT::v1i256;
670<         if (NumElements == 2)  return MVT::v2i256;
671<         if (NumElements == 4)  return MVT::v4i256;
672<         break;
673<       case MVT::i512:
674<         if (NumElements == 1)  return MVT::v1i512;
675<         if (NumElements == 2)  return MVT::v2i512;
676<         break;
677<       case MVT::i1024:
678<         if (NumElements == 1)  return MVT::v1i1024;
679---
680>         if (NumElements == 16) return MVT::v16i64;
681674d548
682<         if (NumElements == 1)  return MVT::v1f16;
683678,680d551
684<         if (NumElements == 16)  return MVT::v16f16;
685<         if (NumElements == 32)  return MVT::v32f16;
686<         if (NumElements == 64)  return MVT::v64f16;
687687,688c558
688<         if (NumElements == 16)  return MVT::v16f32;
689<         if (NumElements == 32)  return MVT::v32f32;
690---
691>         if (NumElements == 16) return MVT::v16f32;
692695d564
693<         if (NumElements == 16)  return MVT::v16f64;
694697d565
695<
696701d568
697<
698diff -r parabix-llvm/include/llvm/CodeGen/ValueTypes.h r209855/include/llvm/CodeGen/ValueTypes.h
699120,124d119
700<     /// isParabixVector - Return true if this is a parabix vector.
701<     bool isParabixVector() const {
702<       return isSimple() && V.isParabixVector();
703<     }
704<
705diff -r parabix-llvm/include/llvm/CodeGen/ValueTypes.td r209855/include/llvm/CodeGen/ValueTypes.td
70623,134c23,83
707< def i1        : ValueType<1 , 1>;
708< def i2        : ValueType<2 , 2>;
709< def i4        : ValueType<4 , 3>;
710< def i8        : ValueType<8 , 4>;
711< def i16       : ValueType<16 , 5>;
712< def i32       : ValueType<32 , 6>;
713< def i64       : ValueType<64 , 7>;
714< def i128      : ValueType<128 , 8>;
715< def i256      : ValueType<256 , 9>;
716< def i512      : ValueType<512 , 10>;
717< def i1024     : ValueType<1024 , 11>;
718<
719< def f16    : ValueType<16 , 12>;   // 16-bit floating point value
720< def f32    : ValueType<32 , 13>;   // 32-bit floating point value
721< def f64    : ValueType<64 , 14>;   // 64-bit floating point value
722< def f80    : ValueType<80 , 15>;   // 80-bit floating point value
723< def f128   : ValueType<128, 16>;   // 128-bit floating point value
724< def ppcf128: ValueType<128, 17>;   // PPC 128-bit floating point value
725<
726< def v1i1      : ValueType<1 , 18>;
727< def v2i1      : ValueType<2 , 19>;
728< def v4i1      : ValueType<4 , 20>;
729< def v8i1      : ValueType<8 , 21>;
730< def v16i1     : ValueType<16 , 22>;
731< def v32i1     : ValueType<32 , 23>;
732< def v64i1     : ValueType<64 , 24>;
733< def v128i1    : ValueType<128 , 25>;
734< def v256i1    : ValueType<256 , 26>;
735< def v512i1    : ValueType<512 , 27>;
736< def v1024i1   : ValueType<1024 , 28>;
737< def v1i2      : ValueType<2 , 29>;
738< def v2i2      : ValueType<4 , 30>;
739< def v4i2      : ValueType<8 , 31>;
740< def v8i2      : ValueType<16 , 32>;
741< def v16i2     : ValueType<32 , 33>;
742< def v32i2     : ValueType<64 , 34>;
743< def v64i2     : ValueType<128 , 35>;
744< def v128i2    : ValueType<256 , 36>;
745< def v256i2    : ValueType<512 , 37>;
746< def v512i2    : ValueType<1024 , 38>;
747< def v1i4      : ValueType<4 , 39>;
748< def v2i4      : ValueType<8 , 40>;
749< def v4i4      : ValueType<16 , 41>;
750< def v8i4      : ValueType<32 , 42>;
751< def v16i4     : ValueType<64 , 43>;
752< def v32i4     : ValueType<128 , 44>;
753< def v64i4     : ValueType<256 , 45>;
754< def v128i4    : ValueType<512 , 46>;
755< def v256i4    : ValueType<1024 , 47>;
756< def v1i8      : ValueType<8 , 48>;
757< def v2i8      : ValueType<16 , 49>;
758< def v4i8      : ValueType<32 , 50>;
759< def v8i8      : ValueType<64 , 51>;
760< def v16i8     : ValueType<128 , 52>;
761< def v32i8     : ValueType<256 , 53>;
762< def v64i8     : ValueType<512 , 54>;
763< def v128i8    : ValueType<1024 , 55>;
764< def v1i16     : ValueType<16 , 56>;
765< def v2i16     : ValueType<32 , 57>;
766< def v4i16     : ValueType<64 , 58>;
767< def v8i16     : ValueType<128 , 59>;
768< def v16i16    : ValueType<256 , 60>;
769< def v32i16    : ValueType<512 , 61>;
770< def v64i16    : ValueType<1024 , 62>;
771< def v1i32     : ValueType<32 , 63>;
772< def v2i32     : ValueType<64 , 64>;
773< def v4i32     : ValueType<128 , 65>;
774< def v8i32     : ValueType<256 , 66>;
775< def v16i32    : ValueType<512 , 67>;
776< def v32i32    : ValueType<1024 , 68>;
777< def v1i64     : ValueType<64 , 69>;
778< def v2i64     : ValueType<128 , 70>;
779< def v4i64     : ValueType<256 , 71>;
780< def v8i64     : ValueType<512 , 72>;
781< def v16i64    : ValueType<1024 , 73>;
782< def v1i128    : ValueType<128 , 74>;
783< def v2i128    : ValueType<256 , 75>;
784< def v4i128    : ValueType<512 , 76>;
785< def v8i128    : ValueType<1024 , 77>;
786< def v1i256    : ValueType<256 , 78>;
787< def v2i256    : ValueType<512 , 79>;
788< def v4i256    : ValueType<1024 , 80>;
789< def v1i512    : ValueType<512 , 81>;
790< def v2i512    : ValueType<1024 , 82>;
791< def v1i1024   : ValueType<1024 , 83>;
792<
793<
794< def v1f16     : ValueType<16 , 84>;
795< def v2f16     : ValueType<32 , 85>;
796< def v4f16     : ValueType<64 , 86>;
797< def v8f16     : ValueType<128 , 87>;
798< def v16f16    : ValueType<256 , 88>;
799< def v32f16    : ValueType<512 , 89>;
800< def v64f16    : ValueType<1024 , 90>;
801< def v1f32     : ValueType<32 , 91>;
802< def v2f32     : ValueType<64 , 92>;
803< def v4f32     : ValueType<128 , 93>;
804< def v8f32     : ValueType<256 , 94>;
805< def v16f32    : ValueType<512 , 95>;
806< def v32f32    : ValueType<1024 , 96>;
807< def v1f64     : ValueType<64 , 97>;
808< def v2f64     : ValueType<128 , 98>;
809< def v4f64     : ValueType<256 , 99>;
810< def v8f64     : ValueType<512 , 100>;
811< def v16f64    : ValueType<1024 , 101>;
812<
813<
814< def x86mmx : ValueType<64 , 102>;   // X86 MMX value
815< def FlagVT : ValueType<0  , 103>;   // Pre-RA sched glue
816< def isVoid : ValueType<0  , 104>;   // Produces no value
817< def untyped: ValueType<8  , 105>;   // Produces an untyped value
818<
819---
820> def i1     : ValueType<1  ,  1>;   // One bit boolean value
821> def i8     : ValueType<8  ,  2>;   // 8-bit integer value
822> def i16    : ValueType<16 ,  3>;   // 16-bit integer value
823> def i32    : ValueType<32 ,  4>;   // 32-bit integer value
824> def i64    : ValueType<64 ,  5>;   // 64-bit integer value
825> def i128   : ValueType<128,  6>;   // 128-bit integer value
826> def f16    : ValueType<16 ,  7>;   // 16-bit floating point value
827> def f32    : ValueType<32 ,  8>;   // 32-bit floating point value
828> def f64    : ValueType<64 ,  9>;   // 64-bit floating point value
829> def f80    : ValueType<80 , 10>;   // 80-bit floating point value
830> def f128   : ValueType<128, 11>;   // 128-bit floating point value
831> def ppcf128: ValueType<128, 12>;   // PPC 128-bit floating point value
832>
833> def v2i1   : ValueType<2 ,  13>;   //  2 x i1  vector value
834> def v4i1   : ValueType<4 ,  14>;   //  4 x i1  vector value
835> def v8i1   : ValueType<8 ,  15>;   //  8 x i1  vector value
836> def v16i1  : ValueType<16,  16>;   // 16 x i1  vector value
837> def v32i1  : ValueType<32 , 17>;   // 32 x i1  vector value
838> def v64i1  : ValueType<64 , 18>;   // 64 x i1  vector value
839> def v1i8   : ValueType<16, 19>;    //  1 x i8  vector value
840> def v2i8   : ValueType<16 , 20>;   //  2 x i8  vector value
841> def v4i8   : ValueType<32 , 21>;   //  4 x i8  vector value
842> def v8i8   : ValueType<64 , 22>;   //  8 x i8  vector value
843> def v16i8  : ValueType<128, 23>;   // 16 x i8  vector value
844> def v32i8  : ValueType<256, 24>;   // 32 x i8 vector value
845> def v64i8  : ValueType<512, 25>;   // 64 x i8 vector value
846> def v1i16  : ValueType<16 , 26>;   //  1 x i16 vector value
847> def v2i16  : ValueType<32 , 27>;   //  2 x i16 vector value
848> def v4i16  : ValueType<64 , 28>;   //  4 x i16 vector value
849> def v8i16  : ValueType<128, 29>;   //  8 x i16 vector value
850> def v16i16 : ValueType<256, 30>;   // 16 x i16 vector value
851> def v32i16 : ValueType<512, 31>;   // 32 x i16 vector value
852> def v1i32  : ValueType<32 , 32>;   //  1 x i32 vector value
853> def v2i32  : ValueType<64 , 33>;   //  2 x i32 vector value
854> def v4i32  : ValueType<128, 34>;   //  4 x i32 vector value
855> def v8i32  : ValueType<256, 35>;   //  8 x i32 vector value
856> def v16i32 : ValueType<512, 36>;   // 16 x i32 vector value
857> def v1i64  : ValueType<64 , 37>;   //  1 x i64 vector value
858> def v2i64  : ValueType<128, 38>;   //  2 x i64 vector value
859> def v4i64  : ValueType<256, 39>;   //  4 x i64 vector value
860> def v8i64  : ValueType<512, 40>;   //  8 x i64 vector value
861> def v16i64 : ValueType<1024,41>;   // 16 x i64 vector value
862>
863> def v2f16  : ValueType<32 , 42>;   //  2 x f16 vector value
864> def v4f16  : ValueType<64 , 43>;   //  4 x f16 vector value
865> def v8f16  : ValueType<128, 44>;   //  8 x f16 vector value
866> def v1f32  : ValueType<32 , 45>;   //  1 x f32 vector value
867> def v2f32  : ValueType<64 , 46>;   //  2 x f32 vector value
868> def v4f32  : ValueType<128, 47>;   //  4 x f32 vector value
869> def v8f32  : ValueType<256, 48>;   //  8 x f32 vector value
870> def v16f32 : ValueType<512, 49>;   // 16 x f32 vector value
871> def v1f64  : ValueType<64, 50>;    //  1 x f64 vector value
872> def v2f64  : ValueType<128, 51>;   //  2 x f64 vector value
873> def v4f64  : ValueType<256, 52>;   //  4 x f64 vector value
874> def v8f64  : ValueType<512, 53>;   //  8 x f64 vector value
875>
876>
877> def x86mmx : ValueType<64 , 54>;   // X86 MMX value
878> def FlagVT : ValueType<0  , 55>;   // Pre-RA sched glue
879> def isVoid : ValueType<0  , 56>;   // Produces no value
880> def untyped: ValueType<8  , 57>;   // Produces an untyped value
881diff -r parabix-llvm/include/llvm/IR/Intrinsics.td r209855/include/llvm/IR/Intrinsics.td
882437,440d436
883< def int_uadd_with_overflow_carryin : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
884<                                        [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
885<                                        [IntrNoMem]>;
886<
887diff -r parabix-llvm/include/llvm/IR/IntrinsicsX86.td r209855/include/llvm/IR/IntrinsicsX86.td
8881306a1307,1315
889>   def int_x86_avx_vbroadcast_ss :
890>         GCCBuiltin<"__builtin_ia32_vbroadcastss">,
891>         Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
892>   def int_x86_avx_vbroadcast_sd_256 :
893>         GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
894>         Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
895>   def int_x86_avx_vbroadcast_ss_256 :
896>         GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
897>         Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
898diff -r parabix-llvm/include/llvm/Target/TargetLowering.h r209855/include/llvm/Target/TargetLowering.h
899465,470d464
900<     //FIXME: Parabix hack here, i2/i4 is not extended now, but it
901<     //seems LLVM has an assumption that i2/i4 will always expand.
902<     if (VT.getSimpleVT() == MVT::i2 ||
903<         VT.getSimpleVT() == MVT::i4)
904<       return Expand;
905<
906478,492d471
907<   /// Parabix: similar to getOperationAction, but always use
908<   /// operand[0] as the VT here.
909<   LegalizeAction getOperand0Action(ISD::NodeType Op, EVT VT) const {
910<     // FIXME: we only use operand0action for simpleTy for now.
911<     if (VT.isExtended()) return Legal;
912<
913<     Operand0ActionsKeyTy key = std::make_pair(VT.getSimpleVT(), Op);
914<     auto iter = Operand0Actions.find(key);
915<     if (iter == Operand0Actions.end()) {
916<       return Legal;
917<     } else {
918<       return iter->second;
919<     }
920<   }
921<
9221098,1111d1076
923<   /// Parabix
924<   /// Indicate that the specified operation does not work with the
925<   /// specified operand type, and indicate what to do about it.
926<   void setOperand0Action(ISD::NodeType Op, MVT VT,
927<                         LegalizeAction Action) {
928<     assert(Op < array_lengthof(OpActions[0]) && "Op too large for the table");
929<     Operand0ActionsKeyTy key = std::make_pair(VT, Op);
930<     Operand0Actions[key] = Action;
931<   }
932<
933<   void resetOperand0Action() {
934<     Operand0Actions.clear();
935<   }
936<
9371599,1602d1563
938<
939<   /// Same with OpActions, but for Operand(0)
940<   typedef std::pair<MVT, ISD::NodeType> Operand0ActionsKeyTy;
941<   std::map<Operand0ActionsKeyTy, LegalizeAction> Operand0Actions;
942diff -r parabix-llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp r209855/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
9431292,1298c1292
944<       // Parabix: test if we want to custom it with operand0 action.
945<       if (Node->getNumOperands() > 0) {
946<         Action = TLI.getOperand0Action((ISD::NodeType) Node->getOpcode(), Node->getOperand(0).getValueType());
947<         if (Action == TargetLowering::Legal)
948<           Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
949<       } else
950<         Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
951---
952>       Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
9531389,1390d1382
954<
955<
9561868c1860
957<
958---
959>         
960diff -r parabix-llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp r209855/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
9615326,5334d5325
962<   case Intrinsic::uadd_with_overflow_carryin: {
963<     SDValue Op1 = getValue(I.getArgOperand(0));
964<     SDValue Op2 = getValue(I.getArgOperand(1));
965<     SDValue Op3 = getValue(I.getArgOperand(2));
966<
967<     SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
968<     setValue(&I, DAG.getNode(ISD::UADDE, sdl, VTs, Op1, Op2, Op3));
969<     return nullptr;
970<   }
971diff -r parabix-llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp r209855/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
972208d207
973<   case ISD::UADDE:                      return "uadde";
974diff -r parabix-llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp r209855/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
9752468c2468
976<         NodeToMatch->dumprFull(CurDAG);
977---
978>         NodeToMatch->dump(CurDAG);
979diff -r parabix-llvm/lib/CodeGen/TargetLoweringBase.cpp r209855/lib/CodeGen/TargetLoweringBase.cpp
980386c386
981<
982---
983>   
9841100,1101c1100
985<             isTypeLegal(SVT) && SVT.getScalarType().isInteger() &&
986<             !SVT.isParabixVector()) {
987---
988>             isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
9891118c1117
990<             isTypeLegal(SVT) && !SVT.isParabixVector()) {
991---
992>             isTypeLegal(SVT)) {
993diff -r parabix-llvm/lib/IR/AutoUpgrade.cpp r209855/lib/IR/AutoUpgrade.cpp
994117,119d116
995<         Name == "x86.avx.vbroadcast.ss" ||
996<         Name == "x86.avx.vbroadcast.ss.256" ||
997<         Name == "x86.avx.vbroadcast.sd.256" ||
998341,353d337
999<     } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
1000<       // Replace broadcasts with a series of insertelements.
1001<       Type *VecTy = CI->getType();
1002<       Type *EltTy = VecTy->getVectorElementType();
1003<       unsigned EltNum = VecTy->getVectorNumElements();
1004<       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1005<                                           EltTy->getPointerTo());
1006<       Value *Load = Builder.CreateLoad(Cast);
1007<       Type *I32Ty = Type::getInt32Ty(C);
1008<       Rep = UndefValue::get(VecTy);
1009<       for (unsigned I = 0; I < EltNum; ++I)
1010<         Rep = Builder.CreateInsertElement(Rep, Load,
1011<                                           ConstantInt::get(I32Ty, I));
1012diff -r parabix-llvm/lib/IR/LLVMContextImpl.cpp r209855/lib/IR/LLVMContextImpl.cpp
101374c74
1014<
1015---
1016>   
101795c95
1018<
1019---
1020>   
1021diff -r parabix-llvm/lib/IR/LLVMContextImpl.h r209855/lib/IR/LLVMContextImpl.h
102210c10
1023< //  This file declares LLVMContextImpl, the opaque implementation
1024---
1025> //  This file declares LLVMContextImpl, the opaque implementation
102686c86
1027<   static inline KeyTy getEmptyKey() {
1028---
1029>   static inline KeyTy getEmptyKey() {
103089,90c89,90
1031<   static inline KeyTy getTombstoneKey() {
1032<     return KeyTy(APFloat(APFloat::Bogus,2));
1033---
1034>   static inline KeyTy getTombstoneKey() {
1035>     return KeyTy(APFloat(APFloat::Bogus,2));
1036218c218
1037<
1038---
1039>   
1040227c227
1041<
1042---
1043>   
1044235c235
1045<
1046---
1047>   
1048241c241
1049<
1050---
1051>   
1052254,255c254,255
1053<
1054<   typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*,
1055---
1056>   
1057>   typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*,
1058272c272
1059<
1060---
1061>   
1062277c277
1063<
1064---
1065>   
1066280c280
1067<
1068---
1069>   
1070283c283
1071<
1072---
1073>   
1074287c287
1075<
1076---
1077>   
1078297c297
1079<
1080---
1081>   
1082300c300
1083<
1084---
1085>   
1086302c302
1087<
1088---
1089>   
1090308c308
1091<
1092---
1093>   
1094312c312
1095<
1096---
1097>   
1098314c314
1099<
1100---
1101>   
1102321c321
1103<
1104---
1105>     
1106333c333
1107<
1108---
1109>   
1110336c336
1111<
1112---
1113>   
1114343c343
1115<
1116---
1117>   
1118347c347
1119<
1120---
1121>   
1122352c352
1123<
1124---
1125>   
1126356c356
1127<
1128---
1129>   
1130379c379
1131<
1132---
1133>   
1134diff -r parabix-llvm/lib/IR/Type.cpp r209855/lib/IR/Type.cpp
113567c67
1136<   if (this == Ty)
1137---
1138>   if (this == Ty)
113969c69
1140<
1141---
1142>   
1143302c302
1144<
1145---
1146>   
1147310c310
1148<   default:
1149---
1150>   default:
1151313c313
1152<
1153---
1154>   
1155318c318
1156<
1157---
1158>   
1159399c399
1160< StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
1161---
1162> StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
1163422c422
1164<
1165---
1166>   
1167430c430
1168<
1169---
1170>   
1171455c455
1172<
1173---
1174>   
1175458c458
1176<
1177---
1178>   
1179465c465
1180<
1181---
1182>   
1183470c470
1184<
1185---
1186>       
1187605c605
1188<
1189---
1190>   
1191609c609
1192<
1193---
1194>   
1195639c639
1196<
1197---
1198>   
1199679c679
1200<
1201---
1202>     
1203681c681
1204<   ArrayType *&Entry =
1205---
1206>   ArrayType *&Entry =
1207708c708
1208<
1209---
1210>   
1211730c730
1212<
1213---
1214>   
1215732c732
1216<
1217---
1218>   
1219diff -r parabix-llvm/lib/IR/ValueTypes.cpp r209855/lib/IR/ValueTypes.cpp
122020d19
1221< #include "llvm/Support/Debug.h"
1222184d182
1223<     dbgs() << "EVT String: " << getEVTString() << '\n';
1224189,190d186
1225<   case MVT::i2:      return Type::getIntNTy(Context, 2);
1226<   case MVT::i4:      return Type::getIntNTy(Context, 4);
1227196,198d191
1228<   case MVT::i256:    return IntegerType::get(Context, 256);
1229<   case MVT::i512:    return IntegerType::get(Context, 512);
1230<   case MVT::i1024:   return IntegerType::get(Context, 1024);
1231206,216c199,201
1232<
1233<   case MVT::v1i1:   return VectorType::get(Type::getInt1Ty(Context), 1);
1234<   case MVT::v2i1:   return VectorType::get(Type::getInt1Ty(Context), 2);
1235<   case MVT::v1i2:   return VectorType::get(Type::getIntNTy(Context, 2), 1);
1236<   case MVT::v4i1:   return VectorType::get(Type::getInt1Ty(Context), 4);
1237<   case MVT::v2i2:   return VectorType::get(Type::getIntNTy(Context, 2), 2);
1238<   case MVT::v1i4:   return VectorType::get(Type::getIntNTy(Context, 4), 1);
1239<   case MVT::v8i1:   return VectorType::get(Type::getInt1Ty(Context), 8);
1240<   case MVT::v4i2:   return VectorType::get(Type::getIntNTy(Context, 2), 4);
1241<   case MVT::v2i4:   return VectorType::get(Type::getIntNTy(Context, 4), 2);
1242<   case MVT::v1i8:   return VectorType::get(Type::getInt8Ty(Context), 1);
1243---
1244>   case MVT::v2i1:    return VectorType::get(Type::getInt1Ty(Context), 2);
1245>   case MVT::v4i1:    return VectorType::get(Type::getInt1Ty(Context), 4);
1246>   case MVT::v8i1:    return VectorType::get(Type::getInt1Ty(Context), 8);
1247218,221d202
1248<   case MVT::v8i2:   return VectorType::get(Type::getIntNTy(Context, 2), 8);
1249<   case MVT::v4i4:   return VectorType::get(Type::getIntNTy(Context, 4), 4);
1250<   case MVT::v2i8:   return VectorType::get(Type::getInt8Ty(Context), 2);
1251<   case MVT::v1i16:   return VectorType::get(Type::getInt16Ty(Context), 1);
1252223,227d203
1253<   case MVT::v16i2:   return VectorType::get(Type::getIntNTy(Context, 2), 16);
1254<   case MVT::v8i4:   return VectorType::get(Type::getIntNTy(Context, 4), 8);
1255<   case MVT::v4i8:   return VectorType::get(Type::getInt8Ty(Context), 4);
1256<   case MVT::v2i16:   return VectorType::get(Type::getInt16Ty(Context), 2);
1257<   case MVT::v1i32:   return VectorType::get(Type::getInt32Ty(Context), 1);
1258229,237c205,208
1259<   case MVT::v32i2:   return VectorType::get(Type::getIntNTy(Context, 2), 32);
1260<   case MVT::v16i4:   return VectorType::get(Type::getIntNTy(Context, 4), 16);
1261<   case MVT::v8i8:   return VectorType::get(Type::getInt8Ty(Context), 8);
1262<   case MVT::v4i16:   return VectorType::get(Type::getInt16Ty(Context), 4);
1263<   case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2);
1264<   case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1);
1265<   case MVT::v128i1:   return VectorType::get(Type::getInt1Ty(Context), 128);
1266<   case MVT::v64i2:   return VectorType::get(Type::getIntNTy(Context, 2), 64);
1267<   case MVT::v32i4:   return VectorType::get(Type::getIntNTy(Context, 4), 32);
1268---
1269>   case MVT::v1i8:    return VectorType::get(Type::getInt8Ty(Context), 1);
1270>   case MVT::v2i8:    return VectorType::get(Type::getInt8Ty(Context), 2);
1271>   case MVT::v4i8:    return VectorType::get(Type::getInt8Ty(Context), 4);
1272>   case MVT::v8i8:    return VectorType::get(Type::getInt8Ty(Context), 8);
1273238a210,214
1274>   case MVT::v32i8:   return VectorType::get(Type::getInt8Ty(Context), 32);
1275>   case MVT::v64i8:   return VectorType::get(Type::getInt8Ty(Context), 64);
1276>   case MVT::v1i16:   return VectorType::get(Type::getInt16Ty(Context), 1);
1277>   case MVT::v2i16:   return VectorType::get(Type::getInt16Ty(Context), 2);
1278>   case MVT::v4i16:   return VectorType::get(Type::getInt16Ty(Context), 4);
1279239a216,219
1280>   case MVT::v16i16:  return VectorType::get(Type::getInt16Ty(Context), 16);
1281>   case MVT::v32i16:  return VectorType::get(Type::getInt16Ty(Context), 32);
1282>   case MVT::v1i32:   return VectorType::get(Type::getInt32Ty(Context), 1);
1283>   case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2);
1284241,247d220
1285<   case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
1286<   case MVT::v1i128:   return VectorType::get(Type::getIntNTy(Context, 128), 1);
1287<   case MVT::v256i1:   return VectorType::get(Type::getInt1Ty(Context), 256);
1288<   case MVT::v128i2:   return VectorType::get(Type::getIntNTy(Context, 2), 128);
1289<   case MVT::v64i4:   return VectorType::get(Type::getIntNTy(Context, 4), 64);
1290<   case MVT::v32i8:   return VectorType::get(Type::getInt8Ty(Context), 32);
1291<   case MVT::v16i16:   return VectorType::get(Type::getInt16Ty(Context), 16);
1292248a222,224
1293>   case MVT::v16i32:  return VectorType::get(Type::getInt32Ty(Context), 16);
1294>   case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1);
1295>   case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
1296250,257d225
1297<   case MVT::v2i128:   return VectorType::get(Type::getIntNTy(Context, 128), 2);
1298<   case MVT::v1i256:   return VectorType::get(Type::getIntNTy(Context, 256), 1);
1299<   case MVT::v512i1:   return VectorType::get(Type::getInt1Ty(Context), 512);
1300<   case MVT::v256i2:   return VectorType::get(Type::getIntNTy(Context, 2), 256);
1301<   case MVT::v128i4:   return VectorType::get(Type::getIntNTy(Context, 4), 128);
1302<   case MVT::v64i8:   return VectorType::get(Type::getInt8Ty(Context), 64);
1303<   case MVT::v32i16:   return VectorType::get(Type::getInt16Ty(Context), 32);
1304<   case MVT::v16i32:   return VectorType::get(Type::getInt32Ty(Context), 16);
1305259,274c227
1306<   case MVT::v4i128:   return VectorType::get(Type::getIntNTy(Context, 128), 4);
1307<   case MVT::v2i256:   return VectorType::get(Type::getIntNTy(Context, 256), 2);
1308<   case MVT::v1i512:   return VectorType::get(Type::getIntNTy(Context, 512), 1);
1309<   case MVT::v1024i1:   return VectorType::get(Type::getInt1Ty(Context), 1024);
1310<   case MVT::v512i2:   return VectorType::get(Type::getIntNTy(Context, 2), 512);
1311<   case MVT::v256i4:   return VectorType::get(Type::getIntNTy(Context, 4), 256);
1312<   case MVT::v128i8:   return VectorType::get(Type::getInt8Ty(Context), 128);
1313<   case MVT::v64i16:   return VectorType::get(Type::getInt16Ty(Context), 64);
1314<   case MVT::v32i32:   return VectorType::get(Type::getInt32Ty(Context), 32);
1315<   case MVT::v16i64:   return VectorType::get(Type::getInt64Ty(Context), 16);
1316<   case MVT::v8i128:   return VectorType::get(Type::getIntNTy(Context, 128), 8);
1317<   case MVT::v4i256:   return VectorType::get(Type::getIntNTy(Context, 256), 4);
1318<   case MVT::v2i512:   return VectorType::get(Type::getIntNTy(Context, 512), 2);
1319<   case MVT::v1i1024:   return VectorType::get(Type::getIntNTy(Context, 1024), 1);
1320<
1321<   case MVT::v1f16:   return VectorType::get(Type::getHalfTy(Context), 1);
1322---
1323>   case MVT::v16i64:  return VectorType::get(Type::getInt64Ty(Context), 16);
1324276d228
1325<   case MVT::v1f32:   return VectorType::get(Type::getFloatTy(Context), 1);
1326278,279d229
1327<   case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
1328<   case MVT::v1f64:   return VectorType::get(Type::getDoubleTy(Context), 1);
1329280a231,232
1330>   case MVT::v1f32:   return VectorType::get(Type::getFloatTy(Context), 1);
1331>   case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
1332282,283d233
1333<   case MVT::v2f64:   return VectorType::get(Type::getDoubleTy(Context), 2);
1334<   case MVT::v16f16:   return VectorType::get(Type::getHalfTy(Context), 16);
1335285,286d234
1336<   case MVT::v4f64:   return VectorType::get(Type::getDoubleTy(Context), 4);
1337<   case MVT::v32f16:   return VectorType::get(Type::getHalfTy(Context), 32);
1338288,293c236,239
1339<   case MVT::v8f64:   return VectorType::get(Type::getDoubleTy(Context), 8);
1340<   case MVT::v64f16:   return VectorType::get(Type::getHalfTy(Context), 64);
1341<   case MVT::v32f32:   return VectorType::get(Type::getFloatTy(Context), 32);
1342<   case MVT::v16f64:   return VectorType::get(Type::getDoubleTy(Context), 16);
1343<
1344<
1345---
1346>   case MVT::v1f64:   return VectorType::get(Type::getDoubleTy(Context), 1);
1347>   case MVT::v2f64:   return VectorType::get(Type::getDoubleTy(Context), 2);
1348>   case MVT::v4f64:   return VectorType::get(Type::getDoubleTy(Context), 4);
1349>   case MVT::v8f64:   return VectorType::get(Type::getDoubleTy(Context), 8);
1350diff -r parabix-llvm/lib/Target/X86/CMakeLists.txt r209855/lib/Target/X86/CMakeLists.txt
135123d22
1352<   X86ParabixISelLowering.cpp
1353Only in parabix-llvm/lib/Target/X86: ParabixGeneratedFuncs.h
1354diff -r parabix-llvm/lib/Target/X86/X86CallingConv.td r209855/lib/Target/X86/X86CallingConv.td
135540,43d39
1356<   //Parabix
1357<   CCIfType<[v32i1], CCAssignToReg<[EAX, EDX, ECX]>>,
1358<   CCIfType<[v64i1], CCAssignToReg<[RAX, RDX, RCX]>>,
1359<
136050,53d45
1361<   //Parabix 128-bit vectors
1362<   CCIfType<[v64i2, v32i4, v128i1],
1363<             CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
1364<
1365101,103d92
1366<   //Parabix
1367<   CCIfType<[v32i1], CCAssignToReg<[EAX, EDX, ECX]>>,
1368<
1369244,247d232
1370<   // Parabix
1371<   CCIfType<[v32i1], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
1372<   CCIfType<[v64i1], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
1373<
1374259,263d243
1375<   //Parabix
1376<   CCIfType<[v64i2, v32i4, v128i1],
1377<             CCIfSubtarget<"hasSSE2()",
1378<             CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
1379<
1380281c261
1381<   CCIfType<[i32, i64, v32i1, v64i1, f32, f64], CCAssignToStack<8, 8>>,
1382---
1383>   CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
1384288c268
1385<   CCIfType<[v16i8, v8i16, v4i32, v2i64, v64i2, v32i4, v4f32, v2f64, v128i1], CCAssignToStack<16, 16>>,
1386---
1387>   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
1388311c291
1389<   CCIfType<[v16i8, v8i16, v4i32, v2i64, v32i4, v64i2, v4f32, v2f64, v128i1], CCPassIndirect<i64>>,
1390---
1391>   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
1392337c317
1393<   CCIfType<[f32, f64, v32i4, v64i2, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v128i1],
1394---
1395>   CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
1396358,360d337
1397<   CCIfType<[v64i1],
1398<             CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>,
1399<
1400362c339
1401<   CCIfType<[f32, f64, v64i2, v32i4, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v128i1],
1402---
1403>   CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
1404373d349
1405<   CCIfType<[v64i1], CCAssignToReg<[R15, RBP, RSI, RDX, RCX, R8]>>,
1406439c415
1407<   CCIfNotVarArg<CCIfType<[v64i2, v32i4, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v128i1],
1408---
1409>   CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
1410448c424
1411<   CCIfType<[v64i2, v32i4, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v128i1], CCAssignToStack<16, 16>>,
1412---
1413>   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
1414diff -r parabix-llvm/lib/Target/X86/X86InstrFragmentsSIMD.td r209855/lib/Target/X86/X86InstrFragmentsSIMD.td
1415227,230d226
1416< def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>;
1417< def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
1418< def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
1419<
1420diff -r parabix-llvm/lib/Target/X86/X86InstrInfo.cpp r209855/lib/Target/X86/X86InstrInfo.cpp
14211614d1613
1422<   {
14231618d1616
1424<   }
14253998c3996
1426<   case X86::KSET0B:
1427---
1428>   case X86::KSET0B:
1429diff -r parabix-llvm/lib/Target/X86/X86InstrInfo.td r209855/lib/Target/X86/X86InstrInfo.td
14302007c2007
1431<               (X86cmp GR16:$src, (i16 0))),
1432---
1433>               (X86cmp GR16:$src, (i16 0))),
14342026c2026
1435<               (X86cmp (loadi16 addr:$src), (i16 0))),
1436---
1437>               (X86cmp (loadi16 addr:$src), (i16 0))),
14382029c2029
1439<               (X86cmp (loadi32 addr:$src), (i32 0))),
1440---
1441>               (X86cmp (loadi32 addr:$src), (i32 0))),
14422032c2032
1443<               (X86cmp (loadi64 addr:$src), (i64 0))),
1444---
1445>               (X86cmp (loadi64 addr:$src), (i64 0))),
14462035c2035
1447<               (X86cmp (loadi16 addr:$src), (i16 0))),
1448---
1449>               (X86cmp (loadi16 addr:$src), (i16 0))),
14502038c2038
1451<               (X86cmp (loadi32 addr:$src), (i32 0))),
1452---
1453>               (X86cmp (loadi32 addr:$src), (i32 0))),
14542041c2041
1455<               (X86cmp (loadi64 addr:$src), (i64 0))),
1456---
1457>               (X86cmp (loadi64 addr:$src), (i64 0))),
14582142c2142
1459<               (X86cmp (loadi16 addr:$src), (i16 0))),
1460---
1461>               (X86cmp (loadi16 addr:$src), (i16 0))),
14622145c2145
1463<               (X86cmp (loadi32 addr:$src), (i32 0))),
1464---
1465>               (X86cmp (loadi32 addr:$src), (i32 0))),
14662148c2148
1467<               (X86cmp (loadi64 addr:$src), (i64 0))),
1468---
1469>               (X86cmp (loadi64 addr:$src), (i64 0))),
14702151c2151
1471<               (X86cmp (loadi16 addr:$src), (i16 0))),
1472---
1473>               (X86cmp (loadi16 addr:$src), (i16 0))),
14742154c2154
1475<               (X86cmp (loadi32 addr:$src), (i32 0))),
1476---
1477>               (X86cmp (loadi32 addr:$src), (i32 0))),
14782157c2157
1479<               (X86cmp (loadi64 addr:$src), (i64 0))),
1480---
1481>               (X86cmp (loadi64 addr:$src), (i64 0))),
14822221,2270d2220
1483<
1484< //TODO: isn't this redundant???
1485< //Parabix: bit convert v32i1 to i32, v64i1 to i64
1486< def : Pat <(i32 (bitconvert (v32i1 GR32X:$src))), (i32 GR32X:$src)>;
1487< def : Pat <(i64 (bitconvert (v64i1 GR64X:$src))), (i64 GR64X:$src)>;
1488< //Parabix: bitconvert from v64i2 vector
1489< def : Pat <(v16i8 (bitconvert (v64i2 VR128PX:$src))), (v16i8 VR128PX:$src)>;
1490< def : Pat <(v8i16 (bitconvert (v64i2 VR128PX:$src))), (v8i16 VR128PX:$src)>;
1491< def : Pat <(v4i32 (bitconvert (v64i2 VR128PX:$src))), (v4i32 VR128PX:$src)>;
1492< def : Pat <(v2i64 (bitconvert (v64i2 VR128PX:$src))), (v2i64 VR128PX:$src)>;
1493< def : Pat <(v4f32 (bitconvert (v64i2 VR128PX:$src))), (v4f32 VR128PX:$src)>;
1494< def : Pat <(v2f64 (bitconvert (v64i2 VR128PX:$src))), (v2f64 VR128PX:$src)>;
1495< def : Pat <(v64i2 (bitconvert (v2i64 VR128PX:$src))), (v64i2 VR128PX:$src)>;
1496< def : Pat <(v64i2 (bitconvert (v4i32 VR128PX:$src))), (v64i2 VR128PX:$src)>;
1497< def : Pat <(v64i2 (bitconvert (v8i16 VR128PX:$src))), (v64i2 VR128PX:$src)>;
1498< def : Pat <(v64i2 (bitconvert (v16i8 VR128PX:$src))), (v64i2 VR128PX:$src)>;
1499< def : Pat <(v64i2 (bitconvert (v2f64 VR128PX:$src))), (v64i2 VR128PX:$src)>;
1500< def : Pat <(v64i2 (bitconvert (v4f32 VR128PX:$src))), (v64i2 VR128PX:$src)>;
1501< //Parabix bitconvert from/to v32i4 vector
1502< def : Pat <(v64i2 (bitconvert (v32i4 VR128PX:$src))), (v64i2 VR128PX:$src)>;
1503< def : Pat <(v16i8 (bitconvert (v32i4 VR128PX:$src))), (v16i8 VR128PX:$src)>;
1504< def : Pat <(v8i16 (bitconvert (v32i4 VR128PX:$src))), (v8i16 VR128PX:$src)>;
1505< def : Pat <(v4i32 (bitconvert (v32i4 VR128PX:$src))), (v4i32 VR128PX:$src)>;
1506< def : Pat <(v2i64 (bitconvert (v32i4 VR128PX:$src))), (v2i64 VR128PX:$src)>;
1507< def : Pat <(v4f32 (bitconvert (v32i4 VR128PX:$src))), (v4f32 VR128PX:$src)>;
1508< def : Pat <(v2f64 (bitconvert (v32i4 VR128PX:$src))), (v2f64 VR128PX:$src)>;
1509< def : Pat <(v32i4 (bitconvert (v2i64 VR128PX:$src))), (v32i4 VR128PX:$src)>;
1510< def : Pat <(v32i4 (bitconvert (v4i32 VR128PX:$src))), (v32i4 VR128PX:$src)>;
1511< def : Pat <(v32i4 (bitconvert (v8i16 VR128PX:$src))), (v32i4 VR128PX:$src)>;
1512< def : Pat <(v32i4 (bitconvert (v16i8 VR128PX:$src))), (v32i4 VR128PX:$src)>;
1513< def : Pat <(v32i4 (bitconvert (v64i2 VR128PX:$src))), (v32i4 VR128PX:$src)>;
1514< def : Pat <(v32i4 (bitconvert (v2f64 VR128PX:$src))), (v32i4 VR128PX:$src)>;
1515< def : Pat <(v32i4 (bitconvert (v4f32 VR128PX:$src))), (v32i4 VR128PX:$src)>;
1516< //Parabix bitconvert from/to v128i1 vector
1517< def : Pat <(v64i2 (bitconvert (v128i1 VR128PX:$src))), (v64i2 VR128PX:$src)>;
1518< def : Pat <(v32i4 (bitconvert (v128i1 VR128PX:$src))), (v32i4 VR128PX:$src)>;
1519< def : Pat <(v16i8 (bitconvert (v128i1 VR128PX:$src))), (v16i8 VR128PX:$src)>;
1520< def : Pat <(v8i16 (bitconvert (v128i1 VR128PX:$src))), (v8i16 VR128PX:$src)>;
1521< def : Pat <(v4i32 (bitconvert (v128i1 VR128PX:$src))), (v4i32 VR128PX:$src)>;
1522< def : Pat <(v2i64 (bitconvert (v128i1 VR128PX:$src))), (v2i64 VR128PX:$src)>;
1523< def : Pat <(v4f32 (bitconvert (v128i1 VR128PX:$src))), (v4f32 VR128PX:$src)>;
1524< def : Pat <(v2f64 (bitconvert (v128i1 VR128PX:$src))), (v2f64 VR128PX:$src)>;
1525< def : Pat <(v128i1 (bitconvert (v2i64 VR128PX:$src))), (v128i1 VR128PX:$src)>;
1526< def : Pat <(v128i1 (bitconvert (v4i32 VR128PX:$src))), (v128i1 VR128PX:$src)>;
1527< def : Pat <(v128i1 (bitconvert (v8i16 VR128PX:$src))), (v128i1 VR128PX:$src)>;
1528< def : Pat <(v128i1 (bitconvert (v16i8 VR128PX:$src))), (v128i1 VR128PX:$src)>;
1529< def : Pat <(v128i1 (bitconvert (v32i4 VR128PX:$src))), (v128i1 VR128PX:$src)>;
1530< def : Pat <(v128i1 (bitconvert (v64i2 VR128PX:$src))), (v128i1 VR128PX:$src)>;
1531< def : Pat <(v128i1 (bitconvert (v2f64 VR128PX:$src))), (v128i1 VR128PX:$src)>;
1532< def : Pat <(v128i1 (bitconvert (v4f32 VR128PX:$src))), (v128i1 VR128PX:$src)>;
1533diff -r parabix-llvm/lib/Target/X86/X86InstrSSE.td r209855/lib/Target/X86/X86InstrSSE.td
15344339a4340,4353
1535> // SSE2 - Packed Integer Pack Instructions
1536> //===---------------------------------------------------------------------===//
1537>
1538> defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
1539>                                   int_x86_avx2_packsswb,
1540>                                   SSE_INTALU_ITINS_SHUFF_P, 0>;
1541> defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
1542>                                   int_x86_avx2_packssdw,
1543>                                   SSE_INTALU_ITINS_SHUFF_P, 0>;
1544> defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
1545>                                   int_x86_avx2_packuswb,
1546>                                   SSE_INTALU_ITINS_SHUFF_P, 0>;
1547>
1548> //===---------------------------------------------------------------------===//
15494421,4550d4434
1550< // Packed Integer Pack Instructions (SSE & AVX)
1551< //===---------------------------------------------------------------------===//
1552<
1553< let ExeDomain = SSEPackedInt in {
1554< multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
1555<                      ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
1556<                      bit Is2Addr = 1> {
1557<   def rr : PDI<opc, MRMSrcReg,
1558<                (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1559<                !if(Is2Addr,
1560<                    !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1561<                    !strconcat(OpcodeStr,
1562<                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1563<                [(set VR128:$dst,
1564<                      (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
1565<                Sched<[WriteShuffle]>;
1566<   def rm : PDI<opc, MRMSrcMem,
1567<                (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
1568<                !if(Is2Addr,
1569<                    !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1570<                    !strconcat(OpcodeStr,
1571<                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1572<                [(set VR128:$dst,
1573<                      (OutVT (OpNode VR128:$src1,
1574<                                     (bc_frag (memopv2i64 addr:$src2)))))]>,
1575<                Sched<[WriteShuffleLd, ReadAfterLd]>;
1576< }
1577<
1578< multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
1579<                        ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
1580<   def Yrr : PDI<opc, MRMSrcReg,
1581<                 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
1582<                 !strconcat(OpcodeStr,
1583<                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1584<                 [(set VR256:$dst,
1585<                       (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
1586<                 Sched<[WriteShuffle]>;
1587<   def Yrm : PDI<opc, MRMSrcMem,
1588<                 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
1589<                 !strconcat(OpcodeStr,
1590<                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1591<                 [(set VR256:$dst,
1592<                       (OutVT (OpNode VR256:$src1,
1593<                                      (bc_frag (memopv4i64 addr:$src2)))))]>,
1594<                 Sched<[WriteShuffleLd, ReadAfterLd]>;
1595< }
1596<
1597< multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
1598<                      ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
1599<                      bit Is2Addr = 1> {
1600<   def rr : SS48I<opc, MRMSrcReg,
1601<                  (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1602<                  !if(Is2Addr,
1603<                      !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1604<                      !strconcat(OpcodeStr,
1605<                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1606<                  [(set VR128:$dst,
1607<                        (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
1608<                  Sched<[WriteShuffle]>;
1609<   def rm : SS48I<opc, MRMSrcMem,
1610<                  (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
1611<                  !if(Is2Addr,
1612<                      !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1613<                      !strconcat(OpcodeStr,
1614<                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1615<                  [(set VR128:$dst,
1616<                        (OutVT (OpNode VR128:$src1,
1617<                                       (bc_frag (memopv2i64 addr:$src2)))))]>,
1618<                  Sched<[WriteShuffleLd, ReadAfterLd]>;
1619< }
1620<
1621< multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
1622<                      ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
1623<   def Yrr : SS48I<opc, MRMSrcReg,
1624<                   (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
1625<                   !strconcat(OpcodeStr,
1626<                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1627<                   [(set VR256:$dst,
1628<                         (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
1629<                   Sched<[WriteShuffle]>;
1630<   def Yrm : SS48I<opc, MRMSrcMem,
1631<                   (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
1632<                   !strconcat(OpcodeStr,
1633<                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1634<                   [(set VR256:$dst,
1635<                         (OutVT (OpNode VR256:$src1,
1636<                                        (bc_frag (memopv4i64 addr:$src2)))))]>,
1637<                   Sched<[WriteShuffleLd, ReadAfterLd]>;
1638< }
1639<
1640< let Predicates = [HasAVX] in {
1641<   defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss,
1642<                              bc_v8i16, 0>, VEX_4V;
1643<   defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss,
1644<                              bc_v4i32, 0>, VEX_4V;
1645<
1646<   defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus,
1647<                              bc_v8i16, 0>, VEX_4V;
1648<   defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus,
1649<                              bc_v4i32, 0>, VEX_4V;
1650< }
1651<
1652< let Predicates = [HasAVX2] in {
1653<   defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss,
1654<                                bc_v16i16>, VEX_4V, VEX_L;
1655<   defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss,
1656<                                bc_v8i32>, VEX_4V, VEX_L;
1657<
1658<   defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus,
1659<                                bc_v16i16>, VEX_4V, VEX_L;
1660<   defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus,
1661<                                bc_v8i32>, VEX_4V, VEX_L;
1662< }
1663<
1664< let Constraints = "$src1 = $dst" in {
1665<   defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss,
1666<                             bc_v8i16>;
1667<   defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss,
1668<                             bc_v4i32>;
1669<
1670<   defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus,
1671<                             bc_v8i16>;
1672<
1673<   let Predicates = [HasSSE41] in
1674<   defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus,
1675<                             bc_v4i32>;
1676< }
1677< } // ExeDomain = SSEPackedInt
1678<
1679< //===---------------------------------------------------------------------===//
16807171a7056,7057
1681>   defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
1682>                                       0, DEFAULT_ITINS_SHUFFLESCHED>, VEX_4V;
16837202a7089,7091
1684>   defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
1685>                                         int_x86_avx2_packusdw, WriteShuffle>,
1686>                                         VEX_4V, VEX_L;
16877233a7123,7124
1688>   defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw,
1689>                                      1, DEFAULT_ITINS_SHUFFLESCHED>;
16908081,8090d7971
1691< class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
1692<                            X86MemOperand x86memop, ValueType VT,
1693<                            PatFrag ld_frag, SchedWrite Sched> :
1694<   AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
1695<         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1696<         [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>,
1697<         Sched<[Sched]>, VEX {
1698<     let mayLoad = 1;
1699< }
1700<
17018099,8103c7980,7984
1702<   def VBROADCASTSSrm  : avx_broadcast_no_int<0x18, "vbroadcastss", VR128,
1703<                                              f32mem, v4f32, loadf32, WriteLoad>;
1704<   def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256,
1705<                                              f32mem, v8f32, loadf32,
1706<                                              WriteFShuffleLd>, VEX_L;
1707---
1708>   def VBROADCASTSSrm  : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
1709>                                       int_x86_avx_vbroadcast_ss, WriteLoad>;
1710>   def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
1711>                                       int_x86_avx_vbroadcast_ss_256,
1712>                                       WriteFShuffleLd>, VEX_L;
17138106,8107c7987,7989
1714< def VBROADCASTSDYrm  : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem,
1715<                                     v4f64, loadf64, WriteFShuffleLd>, VEX_L;
1716---
1717> def VBROADCASTSDYrm  : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
1718>                                     int_x86_avx_vbroadcast_sd_256,
1719>                                     WriteFShuffleLd>, VEX_L;
17208663a8546,8552
1721> def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
1722>           (VBROADCASTSSYrm addr:$src)>;
1723> def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
1724>           (VBROADCASTSDYrm addr:$src)>;
1725> def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
1726>           (VBROADCASTSSrm addr:$src)>;
1727>
1728diff -r parabix-llvm/lib/Target/X86/X86ISelLowering.cpp r209855/lib/Target/X86/X86ISelLowering.cpp
172922d21
1730< #include "X86ParabixISelLowering.h"
1731213,214d211
1732< extern MVT getFullRegisterType(MVT VT);
1733<
1734298d294
1735<   {
1736300,315d295
1737<   }
1738<
1739<   // Parabix register class
1740<   static const MVT ParabixVTs[] = { MVT::v32i1, MVT::v64i1, MVT::v64i2, MVT::v32i4,
1741<                                     MVT::v128i1 };
1742<   for (unsigned i = 0; i != array_lengthof(ParabixVTs); ++i) {
1743<     if (ParabixVTs[i].is32BitVector()) {
1744<       addRegisterClass(ParabixVTs[i], &X86::GR32XRegClass);
1745<     }
1746<     else if (ParabixVTs[i].is64BitVector() && Subtarget->is64Bit()) {
1747<       addRegisterClass(ParabixVTs[i], &X86::GR64XRegClass);
1748<     }
1749<     else if (ParabixVTs[i].is128BitVector() && Subtarget->hasSSE2()) {
1750<       addRegisterClass(ParabixVTs[i], &X86::VR128PXRegClass);
1751<     }
1752<   }
17531561,1604d1540
1754<   // Should be a good place to put Parabix operations.
1755<   // Like Add on v64i1
1756<   // Clear std::map here.
1757<   resetOperand0Action();
1758<   setOperationAction(ISD::MULHU, MVT::v32i1, Custom);
1759<   if (Subtarget->is64Bit())
1760<       setOperationAction(ISD::MULHU, MVT::v64i1, Custom);
1761<   if (Subtarget->hasSSE2()) {
1762<     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1763<     setOperationAction(ISD::MULHU, MVT::v128i1, Custom);
1764<   }
1765<
1766<   for (unsigned i = 0; i != array_lengthof(ParabixVTs); ++i) {
1767<     // v64i1 is only added and lowered for 64bit subtarget
1768<     if (ParabixVTs[i].is64BitVector() && !Subtarget->is64Bit())
1769<       continue;
1770<
1771<     setOperationAction(ISD::ADD, ParabixVTs[i], Custom);
1772<     setOperationAction(ISD::SUB, ParabixVTs[i], Custom);
1773<     setOperationAction(ISD::MUL, ParabixVTs[i], Custom);
1774<     setOperationAction(ISD::AND, ParabixVTs[i], Custom);
1775<     setOperationAction(ISD::OR,  ParabixVTs[i], Custom);
1776<     setOperationAction(ISD::XOR, ParabixVTs[i], Custom);
1777<     setOperationAction(ISD::SHL, ParabixVTs[i], Custom);
1778<     setOperationAction(ISD::SRL, ParabixVTs[i], Custom);
1779<     setOperationAction(ISD::SRA, ParabixVTs[i], Custom);
1780<     setOperationAction(ISD::SETCC,              ParabixVTs[i], Custom);
1781<     setOperationAction(ISD::BUILD_VECTOR,       ParabixVTs[i], Custom);
1782<     setOperationAction(ISD::SCALAR_TO_VECTOR,   ParabixVTs[i], Custom);
1783<     setOperationAction(ISD::EXTRACT_VECTOR_ELT, ParabixVTs[i], Custom);
1784<     setOperationAction(ISD::INSERT_VECTOR_ELT,  ParabixVTs[i], Custom);
1785<
1786<     //Better way to lower LOAD/STORE
1787<     setOperationAction(ISD::STORE, ParabixVTs[i], Promote);
1788<     AddPromotedToType (ISD::STORE, ParabixVTs[i], getFullRegisterType(ParabixVTs[i]));
1789<     setOperationAction(ISD::LOAD,  ParabixVTs[i], Promote);
1790<     AddPromotedToType (ISD::LOAD,  ParabixVTs[i], getFullRegisterType(ParabixVTs[i]));
1791<   }
1792<   // Parabix: custom lowering ISD::UADDO for long stream addition.
1793<   // ref: LegalizeDAG.cpp 3693. UADDO is expanded to ADD and SetCC
1794<   // ref: this file, lowerXALUO, UADDO to X86ISD::ADD and SetCC X86::Cond_B
1795<   setTargetDAGCombine(ISD::UADDO);
1796<   setTargetDAGCombine(ISD::UADDE);
1797<
17981940c1876
1799<            "Unexpected FP-extend for return value.");
1800---
1801>            "Unexpected FP-extend for return value."); 
18022326,2327d2261
1803<       else if (RegVT.isParabixVector() && RegVT.is128BitVector())
1804<         RC = &X86::VR128PXRegClass;
18052338,2341d2271
1806<       else if (RegVT == MVT::v32i1)
1807<         RC = &X86::GR32XRegClass;
1808<       else if (RegVT == MVT::v64i1)
1809<         RC = &X86::GR64XRegClass;
18104946d4875
1811<
18126095c6024
1813<     else
1814---
1815>     else
18168423c8352
1817< SDValue
1818---
1819> SDValue
18208436c8365
1821<     SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
1822---
1823>     SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
18248459c8388
1825<
1826---
1827>   
18289669c9598
1829<
1830---
1831>     
183210334c10263
1833<
1834---
1835
183610337,10339c10266,10268
1837<     // Do the comparison at i32 if it's smaller, besides the Atom case.
1838<     // This avoids subregister aliasing issues. Keep the smaller reference
1839<     // if we're optimizing for size, however, as that'll allow better folding
1840---
1841>     // Do the comparison at i32 if it's smaller, besides the Atom case.
1842>     // This avoids subregister aliasing issues. Keep the smaller reference
1843>     // if we're optimizing for size, however, as that'll allow better folding
184412232,12245d12160
1845<   case Intrinsic::x86_sse2_packssdw_128:
1846<   case Intrinsic::x86_sse2_packsswb_128:
1847<   case Intrinsic::x86_avx2_packssdw:
1848<   case Intrinsic::x86_avx2_packsswb:
1849<     return DAG.getNode(X86ISD::PACKSS, dl, Op.getValueType(),
1850<                        Op.getOperand(1), Op.getOperand(2));
1851<
1852<   case Intrinsic::x86_sse2_packuswb_128:
1853<   case Intrinsic::x86_sse41_packusdw:
1854<   case Intrinsic::x86_avx2_packuswb:
1855<   case Intrinsic::x86_avx2_packusdw:
1856<     return DAG.getNode(X86ISD::PACKUS, dl, Op.getValueType(),
1857<                        Op.getOperand(1), Op.getOperand(2));
1858<
185912776c12691
1860<   if (Initialized)
1861---
1862>   if (Initialized)
186312788c12703
1864<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpi_512,
1865---
1866>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpi_512,
186712790c12705
1868<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpq_512,
1869---
1870>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpq_512,
187112792c12707
1872<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpi_512,
1873---
1874>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpi_512,
187512794c12709
1876<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpq_512,
1877---
1878>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpq_512,
187912799c12714
1880<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpd_512,
1881---
1882>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpd_512,
188312801c12716
1884<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpd_512,
1885---
1886>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpd_512,
188712803c12718
1888<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dps_512,
1889---
1890>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dps_512,
189112805c12720
1892<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpi_512,
1893---
1894>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpi_512,
189512807c12722
1896<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpq_512,
1897---
1898>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpq_512,
189912809c12724
1900<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpi_512,
1901---
1902>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpi_512,
190312811c12726
1904<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpq_512,
1905---
1906>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpq_512,
190712813,12814c12728,12729
1908<
1909<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qps_512,
1910---
1911>   
1912>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qps_512,
191312817c12732
1914<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qpd_512,
1915---
1916>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qpd_512,
191712820c12735
1918<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dpd_512,
1919---
1920>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dpd_512,
192112823c12738
1922<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dps_512,
1923---
1924>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dps_512,
192512826c12741
1926<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qps_512,
1927---
1928>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qps_512,
192912829c12744
1930<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qpd_512,
1931---
1932>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qpd_512,
193312832c12747
1934<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dpd_512,
1935---
1936>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dpd_512,
193712835c12750
1938<   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dps_512,
1939---
1940>   IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dps_512,
194113929c13844
1942<   if (Op.getOpcode() == ISD::SHL &&
1943---
1944>   if (Op.getOpcode() == ISD::SHL &&
194514021c13936
1946<
1947---
1948>     
194914026c13941
1950<       SDValue Splat1 =
1951---
1952>       SDValue Splat1 =
195314029c13944
1954<       SDValue Splat2 =
1955---
1956>       SDValue Splat2 =
195714530,14563d14444
1958<   // Redirect Parabix Operation Lowering
1959<   // TODO: Combine these logic together.
1960<   // SETCC would always return i1 vector, but it may not be parabix op
1961<   if (Op.getOpcode() != ISD::SETCC && Op.getValueType().isParabixVector())
1962<     return LowerParabixOperation(Op, DAG);
1963<   if (Op.getOpcode() == ISD::MUL && Op.getSimpleValueType() == MVT::v16i8)
1964<     return LowerParabixOperation(Op, DAG);
1965<   if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1966<       Op.getOperand(0).getValueType().isParabixVector())
1967<     return LowerParabixOperation(Op, DAG);
1968<   if (Op.getOpcode() == ISD::SETCC &&
1969<       Op.getOperand(0).getValueType().isParabixVector())
1970<     return LowerParabixOperation(Op, DAG);
1971<   if (Op.getOpcode() == ISD::VSELECT &&
1972<       Op.getOperand(0).getSimpleValueType() == MVT::v32i1 &&
1973<       Op.getOperand(1).getSimpleValueType() == MVT::v32i8 &&
1974<       Subtarget->hasAVX2()) {
1975<     //for VSELECT, if the mask is v32i1, zext it to v32i8, otherwise
1976<     //it's legal.
1977<     return LowerParabixOperation(Op, DAG);
1978<   }
1979<   if (Op.getOpcode() == ISD::SIGN_EXTEND &&
1980<       Op.getOperand(0).getSimpleValueType() == MVT::v32i1 &&
1981<       Op.getSimpleValueType() == MVT::v32i8 &&
1982<       Subtarget->hasAVX2()) {
1983<     return LowerParabixOperation(Op, DAG);
1984<   }
1985<
1986<   if (Op.getOpcode() == ISD::STORE)
1987<   {
1988<     dbgs() << "Store that are not redirected: \n";
1989<     Op.dumpr();
1990<   }
1991<
199215031,15032d14911
1993<   case X86ISD::PACKSS:             return "X86ISD::PACKSS";
1994<   case X86ISD::PACKUS:             return "X86ISD::PACKUS";
199517031c16910
1996< // to remove extra copies in the loop.
1997---
1998> // to remove extra copies in the loop.   
199918335c18214
2000<       CondVT == VT) {
2001---
2002>       CondVT == VT) {
200318386c18265
2004<
2005---
2006>       
200718859c18738
2008<
2009---
2010>     
201119640,19646d19518
2012<     // if Memsz is smaller than i8, no bigger interger type would devides the total
2013<     // loaded size. e.g. MemVT = v4i1, RegVT= v4i32, Numloads will be zero.
2014<     if (MemSz % SclrLoadTy.getSizeInBits() != 0) {
2015<       //Can't find such a scalar type
2016<       return SDValue();
2017<     }
2018<
201920713,20718d20584
2020<   //Parabix
2021<   //Redirect to Parabix combine logic first. If nothing changed, go over the
2022<   //original combine logic.
2023<   SDValue R = PerformParabixDAGCombine(N, DCI);
2024<   if (R.getNode()) return R;
2025<
202621555c21421
2027<   // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
2028---
2029>   // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.   
2030diff -r parabix-llvm/lib/Target/X86/X86ISelLowering.h r209855/lib/Target/X86/X86ISelLowering.h
2031318,319d317
2032<       PACKSS,
2033<       PACKUS,
2034590,592d587
2035<     /// LowerParabixOperation
2036<     SDValue LowerParabixOperation(SDValue Op, SelectionDAG &DAG) const;
2037<
2038602,603d596
2039<     SDValue PerformParabixDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
2040<
2041893d885
2042<     SDValue PXLowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
2043Only in parabix-llvm/lib/Target/X86: X86ParabixISelLowering.cpp
2044Only in parabix-llvm/lib/Target/X86: X86ParabixISelLowering.h
2045diff -r parabix-llvm/lib/Target/X86/X86RegisterInfo.td r209855/lib/Target/X86/X86RegisterInfo.td
2046335,339d334
2047< // GR32 Extended, used for parabix
2048< def GR32X : RegisterClass<"X86", [v32i1, i32], 32,
2049<                          (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
2050<                               R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
2051<
2052347,351d341
2053< // GR64 Extended, used for parabix
2054< def GR64X : RegisterClass<"X86", [v64i1, i64], 64,
2055<                          (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
2056<                               RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
2057<
2058450,454d439
2059<
2060< // Parabix
2061< def VR128PX : RegisterClass<"X86", [v128i1, v64i2, v32i4, v16i8, v8i16,
2062<                                     v4i32, v2i64, v4f32, v2f64],
2063<                             128, (add FR32)>;
2064diff -r parabix-llvm/lib/Transforms/InstCombine/InstructionCombining.cpp r209855/lib/Transforms/InstCombine/InstructionCombining.cpp
20651236,1238d1235
2066<       // Keep track of the type as we walk the GEP.
2067<       Type *CurTy = Op1->getOperand(0)->getType()->getScalarType();
2068<
20691251,1252c1248,1255
2070<             if (J > 1 && CurTy->isStructTy())
2071<               return nullptr;
2072---
2073>             if (J > 1) {
2074>               SmallVector<Value*, 8> Idxs(GEP.idx_begin(), GEP.idx_begin()+J-1);
2075>               Type *Ty =
2076>                 GetElementPtrInst::getIndexedType(Op1->getOperand(0)->getType(),
2077>                                                   Idxs);
2078>               if (Ty->isStructTy())
2079>                 return nullptr;
2080>             }
20811263,1271d1265
2082<           }
2083<         }
2084<
2085<         // Sink down a layer of the type for the next iteration.
2086<         if (J > 0) {
2087<           if (CompositeType *CT = dyn_cast<CompositeType>(CurTy)) {
2088<             CurTy = CT->getTypeAtIndex(Op1->getOperand(J));
2089<           } else {
2090<             CurTy = nullptr;
2091diff -r parabix-llvm/lib/Transforms/IPO/FunctionAttrs.cpp r209855/lib/Transforms/IPO/FunctionAttrs.cpp
2092452,463d451
2093<       bool Captures = true;
2094<
2095<       if (I->getType()->isVoidTy())
2096<         Captures = false;
2097<
2098<       auto AddUsersToWorklistIfCapturing = [&] {
2099<         if (Captures)
2100<           for (Use &UU : I->uses())
2101<             if (Visited.insert(&UU))
2102<               Worklist.push_back(&UU);
2103<       };
2104<
2105465,466c453
2106<       if (CS.doesNotAccessMemory()) {
2107<         AddUsersToWorklistIfCapturing();
2108---
2109>       if (CS.doesNotAccessMemory())
2110468d454
2111<       }
2112474d459
2113<           AddUsersToWorklistIfCapturing();
2114489d473
2115<           Captures &= !CS.doesNotCapture(A - B);
2116498d481
2117<       AddUsersToWorklistIfCapturing();
2118Only in parabix-llvm: README.md
2119Only in r209855: README.txt
2120diff -r parabix-llvm/test/CodeGen/X86/avx-intrinsics-x86.ll r209855/test/CodeGen/X86/avx-intrinsics-x86.ll
21212221a2222,2229
2122> define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) {
2123>   ; CHECK: vbroadcastsd
2124>   %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1]
2125>   ret <4 x double> %res
2126> }
2127> declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly
2128>
2129>
21302235a2244,2259
2131>
2132>
2133> define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
2134>   ; CHECK: vbroadcastss
2135>   %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
2136>   ret <4 x float> %res
2137> }
2138> declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
2139>
2140>
2141> define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
2142>   ; CHECK: vbroadcastss
2143>   %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
2144>   ret <8 x float> %res
2145> }
2146> declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
2147Only in parabix-llvm/test/CodeGen/X86: x86-upgrade-avx-vbroadcast.ll
2148Only in parabix-llvm/test: Parabix
2149diff -r parabix-llvm/test/TableGen/intrinsic-varargs.td r209855/test/TableGen/intrinsic-varargs.td
215026c26
2151< def isVoid : ValueType<0, 104>;   // Produces no value
2152---
2153> def isVoid : ValueType<0, 56>;   // Produces no value
2154diff -r parabix-llvm/test/Transforms/FunctionAttrs/nocapture.ll r209855/test/Transforms/FunctionAttrs/nocapture.ll
215571c71
2156< ; CHECK: define i1 @c7(i32* readonly %q, i32 %bitno)
2157---
2158> ; CHECK: define i1 @c7(i32* readnone %q, i32 %bitno)
2159diff -r parabix-llvm/test/Transforms/FunctionAttrs/readattrs.ll r209855/test/Transforms/FunctionAttrs/readattrs.ll
216054,67d53
2161<
2162< ; CHECK: define i32* @test8_1(i32* readnone %p)
2163< define i32* @test8_1(i32* %p) {
2164< entry:
2165<   ret i32* %p
2166< }
2167<
2168< ; CHECK: define void @test8_2(i32* %p)
2169< define void @test8_2(i32* %p) {
2170< entry:
2171<   %call = call i32* @test8_1(i32* %p)
2172<   store i32 10, i32* %call, align 4
2173<   ret void
2174< }
2175diff -r parabix-llvm/tools/CMakeLists.txt r209855/tools/CMakeLists.txt
217672c72
2177< #add_llvm_external_project(clang)
2178---
2179> add_llvm_external_project(clang)
2180diff -r parabix-llvm/tools/llc/llc.cpp r209855/tools/llc/llc.cpp
2181179,180d178
2182<   dbgs() << "WARN: Meng's build of llc running...\n";
2183<
2184diff -r parabix-llvm/utils/TableGen/CodeGenTarget.cpp r209855/utils/TableGen/CodeGenTarget.cpp
218554,60c54,58
2186<   case MVT::i1:     return "MVT::i1";
2187<   case MVT::i2:     return "MVT::i2";
2188<   case MVT::i4:     return "MVT::i4";
2189<   case MVT::i8:     return "MVT::i8";
2190<   case MVT::i16:     return "MVT::i16";
2191<   case MVT::i32:     return "MVT::i32";
2192<   case MVT::i64:     return "MVT::i64";
2193---
2194>   case MVT::i1:       return "MVT::i1";
2195>   case MVT::i8:       return "MVT::i8";
2196>   case MVT::i16:      return "MVT::i16";
2197>   case MVT::i32:      return "MVT::i32";
2198>   case MVT::i64:      return "MVT::i64";
219962,65d59
2200<   case MVT::i256:     return "MVT::i256";
2201<   case MVT::i512:     return "MVT::i512";
2202<   case MVT::i1024:     return "MVT::i1024";
2203<
220478d71
2205<   case MVT::v1i1:     return "MVT::v1i1";
220682,107c75,77
2207<   case MVT::v16i1:     return "MVT::v16i1";
2208<   case MVT::v32i1:     return "MVT::v32i1";
2209<   case MVT::v64i1:     return "MVT::v64i1";
2210<   case MVT::v128i1:     return "MVT::v128i1";
2211<   case MVT::v256i1:     return "MVT::v256i1";
2212<   case MVT::v512i1:     return "MVT::v512i1";
2213<   case MVT::v1024i1:     return "MVT::v1024i1";
2214<   case MVT::v1i2:     return "MVT::v1i2";
2215<   case MVT::v2i2:     return "MVT::v2i2";
2216<   case MVT::v4i2:     return "MVT::v4i2";
2217<   case MVT::v8i2:     return "MVT::v8i2";
2218<   case MVT::v16i2:     return "MVT::v16i2";
2219<   case MVT::v32i2:     return "MVT::v32i2";
2220<   case MVT::v64i2:     return "MVT::v64i2";
2221<   case MVT::v128i2:     return "MVT::v128i2";
2222<   case MVT::v256i2:     return "MVT::v256i2";
2223<   case MVT::v512i2:     return "MVT::v512i2";
2224<   case MVT::v1i4:     return "MVT::v1i4";
2225<   case MVT::v2i4:     return "MVT::v2i4";
2226<   case MVT::v4i4:     return "MVT::v4i4";
2227<   case MVT::v8i4:     return "MVT::v8i4";
2228<   case MVT::v16i4:     return "MVT::v16i4";
2229<   case MVT::v32i4:     return "MVT::v32i4";
2230<   case MVT::v64i4:     return "MVT::v64i4";
2231<   case MVT::v128i4:     return "MVT::v128i4";
2232<   case MVT::v256i4:     return "MVT::v256i4";
2233---
2234>   case MVT::v16i1:    return "MVT::v16i1";
2235>   case MVT::v32i1:    return "MVT::v32i1";
2236>   case MVT::v64i1:    return "MVT::v64i1";
2237112,163c82,112
2238<   case MVT::v16i8:     return "MVT::v16i8";
2239<   case MVT::v32i8:     return "MVT::v32i8";
2240<   case MVT::v64i8:     return "MVT::v64i8";
2241<   case MVT::v128i8:     return "MVT::v128i8";
2242<   case MVT::v1i16:     return "MVT::v1i16";
2243<   case MVT::v2i16:     return "MVT::v2i16";
2244<   case MVT::v4i16:     return "MVT::v4i16";
2245<   case MVT::v8i16:     return "MVT::v8i16";
2246<   case MVT::v16i16:     return "MVT::v16i16";
2247<   case MVT::v32i16:     return "MVT::v32i16";
2248<   case MVT::v64i16:     return "MVT::v64i16";
2249<   case MVT::v1i32:     return "MVT::v1i32";
2250<   case MVT::v2i32:     return "MVT::v2i32";
2251<   case MVT::v4i32:     return "MVT::v4i32";
2252<   case MVT::v8i32:     return "MVT::v8i32";
2253<   case MVT::v16i32:     return "MVT::v16i32";
2254<   case MVT::v32i32:     return "MVT::v32i32";
2255<   case MVT::v1i64:     return "MVT::v1i64";
2256<   case MVT::v2i64:     return "MVT::v2i64";
2257<   case MVT::v4i64:     return "MVT::v4i64";
2258<   case MVT::v8i64:     return "MVT::v8i64";
2259<   case MVT::v16i64:     return "MVT::v16i64";
2260<   case MVT::v1i128:     return "MVT::v1i128";
2261<   case MVT::v2i128:     return "MVT::v2i128";
2262<   case MVT::v4i128:     return "MVT::v4i128";
2263<   case MVT::v8i128:     return "MVT::v8i128";
2264<   case MVT::v1i256:     return "MVT::v1i256";
2265<   case MVT::v2i256:     return "MVT::v2i256";
2266<   case MVT::v4i256:     return "MVT::v4i256";
2267<   case MVT::v1i512:     return "MVT::v1i512";
2268<   case MVT::v2i512:     return "MVT::v2i512";
2269<   case MVT::v1i1024:     return "MVT::v1i1024";
2270<
2271<   case MVT::v1f16:     return "MVT::v1f16";
2272<   case MVT::v2f16:     return "MVT::v2f16";
2273<   case MVT::v4f16:     return "MVT::v4f16";
2274<   case MVT::v8f16:     return "MVT::v8f16";
2275<   case MVT::v16f16:     return "MVT::v16f16";
2276<   case MVT::v32f16:     return "MVT::v32f16";
2277<   case MVT::v64f16:     return "MVT::v64f16";
2278<   case MVT::v1f32:     return "MVT::v1f32";
2279<   case MVT::v2f32:     return "MVT::v2f32";
2280<   case MVT::v4f32:     return "MVT::v4f32";
2281<   case MVT::v8f32:     return "MVT::v8f32";
2282<   case MVT::v16f32:     return "MVT::v16f32";
2283<   case MVT::v32f32:     return "MVT::v32f32";
2284<   case MVT::v1f64:     return "MVT::v1f64";
2285<   case MVT::v2f64:     return "MVT::v2f64";
2286<   case MVT::v4f64:     return "MVT::v4f64";
2287<   case MVT::v8f64:     return "MVT::v8f64";
2288<   case MVT::v16f64:     return "MVT::v16f64";
2289<
2290---
2291>   case MVT::v16i8:    return "MVT::v16i8";
2292>   case MVT::v32i8:    return "MVT::v32i8";
2293>   case MVT::v64i8:    return "MVT::v64i8";
2294>   case MVT::v1i16:    return "MVT::v1i16";
2295>   case MVT::v2i16:    return "MVT::v2i16";
2296>   case MVT::v4i16:    return "MVT::v4i16";
2297>   case MVT::v8i16:    return "MVT::v8i16";
2298>   case MVT::v16i16:   return "MVT::v16i16";
2299>   case MVT::v32i16:   return "MVT::v32i16";
2300>   case MVT::v1i32:    return "MVT::v1i32";
2301>   case MVT::v2i32:    return "MVT::v2i32";
2302>   case MVT::v4i32:    return "MVT::v4i32";
2303>   case MVT::v8i32:    return "MVT::v8i32";
2304>   case MVT::v16i32:   return "MVT::v16i32";
2305>   case MVT::v1i64:    return "MVT::v1i64";
2306>   case MVT::v2i64:    return "MVT::v2i64";
2307>   case MVT::v4i64:    return "MVT::v4i64";
2308>   case MVT::v8i64:    return "MVT::v8i64";
2309>   case MVT::v16i64:   return "MVT::v16i64";
2310>   case MVT::v2f16:    return "MVT::v2f16";
2311>   case MVT::v4f16:    return "MVT::v4f16";
2312>   case MVT::v8f16:    return "MVT::v8f16";
2313>   case MVT::v1f32:    return "MVT::v1f32";
2314>   case MVT::v2f32:    return "MVT::v2f32";
2315>   case MVT::v4f32:    return "MVT::v4f32";
2316>   case MVT::v8f32:    return "MVT::v8f32";
2317>   case MVT::v16f32:   return "MVT::v16f32";
2318>   case MVT::v1f64:    return "MVT::v1f64";
2319>   case MVT::v2f64:    return "MVT::v2f64";
2320>   case MVT::v4f64:    return "MVT::v4f64";
2321>   case MVT::v8f64:    return "MVT::v8f64";
2322diff -r parabix-llvm/utils/vim/llvm.vim r209855/utils/vim/llvm.vim
23234c4
2324< " Version:      $Revision$
2325---
2326> " Version:      $Revision: 203866 $
2327diff -r parabix-llvm/utils/vim/tablegen.vim r209855/utils/vim/tablegen.vim
23284c4
2329< " Version:    $Revision$
2330---
2331> " Version:    $Revision: 151164 $
2332diff -r parabix-llvm/utils/vim/vimrc r209855/utils/vim/vimrc
23332c2
2334< " $Revision$
2335---
2336> " $Revision: 176235 $
Note: See TracBrowser for help on using the repository browser.