source: proto/Compiler/CCGO_HMCPS.py @ 2802

Last change on this file since 2802 was 2802, checked in by cameron, 7 years ago

Fast bitpack implementation

File size: 35.4 KB
Line 
1#
2# CCGO_HMCPS.py
3#
4# Carry Code Generator Object using Hierarchical Merging Carry Pack Strategy
5#
6# Robert D. Cameron
7# November 26, 2012
8# Licensed under Open Software License 3.0
9#
10import ast
11import carryInfo, CCGO
12
13#
14# Helper functions
15#
16def TestHelper_Bitblock_Or(testExpr, bitBlockExpr):
17    if isinstance(testExpr, ast.Call):
18      assert isinstance(testExpr.func, ast.Name)
19      assert testExpr.func.id == 'bitblock::any'
20      testExpr.args[0] = make_call('simd_or', [bitBlockExpr, testExpr.args[0]])
21      return testExpr
22    else:
23      return ast.BinOp(testExpr, ast.BitOr(), make_call('bitblock::any', [bitBlockExpr]))
24
25def TestHelper_Integer_Or(testExpr, intExpr):
26    return ast.BinOp(testExpr, ast.BitOr(), intExpr)
27
28def mk_var(var, mode=ast.Load()):
29  if isinstance(var, str): 
30        var = ast.Name(var, mode)
31  return var
32 
33def make_assign(var, expr):
34   if isinstance(var, str): 
35        var = ast.Name(var, ast.Store())
36   return ast.Assign([var], expr)
37
38def make_index(var, num, mode=ast.Load()):
39  if isinstance(var, str): 
40        var = ast.Name(var, ast.Load())
41  return ast.Subscript(var, ast.Index(ast.Num(num)), mode)
42
43def make_index_store(var, num):
44  if isinstance(var, str): 
45        var = ast.Name(var, ast.Load())
46  return ast.Subscript(var, ast.Index(ast.Num(num)), ast.Store())
47
48def make_att(var, att, mode=ast.Load()):
49  if isinstance(var, str): 
50        var = ast.Name(var, ast.Load())
51  return ast.Attribute(var, att, mode)
52
53def make_att_store(var, att):
54  if isinstance(var, str): 
55        var = ast.Name(var, ast.Load())
56  return ast.Attribute(var, att, ast.Store())
57
58def make_call(fn_name, args):
59  if isinstance(fn_name, str): 
60        fn_name = ast.Name(fn_name, ast.Load())
61  return ast.Call(fn_name, args, [], None, None)
62
63def make_callStmt(fn_name, args):
64  if isinstance(fn_name, str): fn_name = ast.Name(fn_name, ast.Load())
65  return ast.Expr(ast.Call(fn_name, args, [], None, None))
66
67def make_mergeh(fw, x, y):
68  return make_call("esimd<%i>::mergeh" % fw, [mk_var(x), mk_var(y)])
69
70def make_zero(fw):
71  return make_call("simd<%i>::constant<0>" % fw, [])
72
73 
74#
75#
76# Carry Pack Assignment Strategy
77#
78# The hierarchical merging carry pack strategy packs carries
79# into packs of 2, 4, 8 and 16.   For example, to pack
80# 4 carries c0, c1, c2, and c3 into the 32-bit fields of
81# a 128-bit register, the following operations are used.
82#
83# c0 = pablo.SomeCarryGeneratingFn(...)
84# c1 = pablo.SomeCarryGeneratingFn(...)
85# c1_0 = esimd::mergeh<32>(c1, c0)
86# c2 = pablo.SomeCarryGeneratingFn(...)
87# c3 = pablo.SomeCarryGeneratingFn(...)
88# c3_2 = esimd::mergeh<32>(c3, c2)
89# c3_0 = esimd::mergeh<64>(c3_2, c1_0)
90#
91#
92# Packing operations are generated sequentially when
93# the appropriate individual carries or subpacks become
94# available.   
95#
96# Generate the packing operations assuming that the
97# carry_num carry has just been generated.
98#
99
100def pow2ceil(n):
101   c = 1
102   while c < n: c *= 2 
103   return c
104
105def pow2floor(n):
106   c = 1
107   while c <= n: c *= 2 
108   return c/2
109
110def low_bit(n):
111   return n - (n & (n-1))
112   
113def align(n, align_base):
114  return ((n + align_base - 1) / align_base) * align_base
115
116def determine_aligned_block_sizes(pack_size, cis, max_whiles_per_pack = 1, min_block_size = 1):
117  aligned_size = {}
118  for i in range(cis.block_count): aligned_size[i] = 0
119  seen = []
120  for i in range(cis.block_count):
121    # Work backwards to process all child blocks before the parent
122    # so that the parent incorporates the updated child counts.
123    b = cis.block_count - i - 1
124    b_carries = 0
125    op = cis.block_first_op[b]
126    while op < cis.block_first_op[b] + cis.block_op_count[b]:
127      sb = cis.containing_block[op]
128      if sb == b:
129        if op not in cis.advance_amount.keys(): b_carries += 1
130        elif cis.advance_amount[op] == 1: b_carries += 1
131        op += 1
132      else: 
133        align_base = aligned_size[sb]
134        if align_base > pack_size: align_base = pack_size
135        b_carries = align(b_carries, align_base)
136        b_carries += aligned_size[sb]
137        op += cis.block_op_count[sb]
138    #
139    # Align to min block size
140    aligned_size[b] = align(b_carries, min_block_size)
141    # Force whiles to use full packs; this possibly can be relaxed.
142    if cis.whileblock[b]:
143      aligned_size[b] = align(aligned_size[b], pack_size/max_whiles_per_pack)
144    if aligned_size[b] > pack_size:
145      aligned_size[b] = align(aligned_size[b], pack_size)
146    else:
147      aligned_size[b] = pow2ceil(aligned_size[b])
148  return aligned_size
149 
150MAX_LINE_LENGTH = 80
151
152def BitBlock_decls_from_vars(varlist):
153  global MAX_LINE_LENGTH
154  decls =  ""
155  if not len(varlist) == 0:
156          decls = "             BitBlock"
157          pending = ""
158          linelgth = 10
159          for v in varlist:
160            if linelgth + len(v) + 2 <= MAX_LINE_LENGTH:
161              decls += pending + " " + v
162              linelgth += len(pending + v) + 1
163            else:
164              decls += ";\n             BitBlock " + v
165              linelgth = 11 + len(v)
166            pending = ","
167          decls += ";"
168  return decls
169 
170def block_contains(b0, b1, parent_block_map):
171  if b0 == b1: return True
172  elif b1 == 0: return False
173  else: return block_contains(b0, parent_block_map[b1], parent_block_map)
174 
175class HMCPS_CCGO(CCGO.CCGO):
176    def __init__(self, BLOCK_SIZE, fw, carryInfoSet, carryPackVarName='carryG', temp_prefix='__c'):
177        self.BLOCK_SIZE = BLOCK_SIZE
178        self.fw = fw
179        self.field_count = self.BLOCK_SIZE/fw
180        self.carryInfoSet = carryInfoSet
181        self.carryPackVar = carryPackVarName
182        self.temp_prefix = temp_prefix
183
184    def allocate_all(self):
185        self.aligned_size = determine_aligned_block_sizes(self.field_count, self.carryInfoSet)
186        self.carryPack_count = (self.aligned_size[0] + self.field_count - 1) / self.field_count
187        self.totalPack_count = self.carryPack_count + self.carryInfoSet.adv_n_count
188        self.alloc_map = {}
189        self.alloc_map[0] = 0
190        self.adv_n_map = {}
191        self.block_base = {}
192        self.allocate_ops()
193        # carry_offset is used within the inner body of while loops to access local carries.
194        # The calculated (ub, rp) value is reduced by this amount for the local carry Pack(s).
195        self.carry_offset = 0
196#
197# Carry Storage/Access
198#
199# Carries are stored in one or more ubitblocks as byte values.
200# For each block, the carry count is rounded up to the nearest power of 2 ceiling P,
201# so that the carry test for that block is accessible as a single value of P bytes.
202# Packs of 1, 2, 4 or 8 carries are respectively represented
203# as one or more _8, _16, _32 or _64 values.  (Members of ubitblock union.)
204#
205#
206# Allocation phase determines the ubitblock_no and count for each block.
207
208#  carry-in access is a byte load  carryG[packno]._8[offset]
209#  carryout store is to a local pack var until we get to the final byte of a pack
210#
211#  if-test: let P be pack_size in {1,2,4,8,...}
212#    if P <= 8, use an integer test expression cG[packno]._%i % (P * 8)[block_offset]
213#     
214#  while test similar
215#    local while decl: use a copy of carryPack
216#    while finalize  carry combine:   round up and |= into structure
217#
218    def carry_pack_full(self, ub, v = None, mode = ast.Load()):
219       if v == None: v = self.carryPackVar
220       return make_att(make_index(v, ub), '_128', mode)
221
222    def carry_pack_index(self, fw, ub, rp, mode = ast.Load()):
223       return make_index(make_att(make_index(self.carryPackVar, ub), '_%i' % fw), rp, mode)
224
225    def local_pack_full(self, ub, mode = ast.Load()):
226       return self.carry_pack_full(ub, "sub" + self.carryPackVar, mode)
227
228    def local_pack_index(self, fw, ub, rp, mode = ast.Load()):
229       v = "sub" + self.carryPackVar
230       return make_index(make_att(make_index(v, ub), '_%i' % fw), rp, mode)
231 
232
233    def cg_temp(self, hi_carry, lo_carry = None):
234      if lo_carry == None or hi_carry == lo_carry: return "%s%i" % (self.temp_prefix, hi_carry)
235      else: return "%s%i_%i" % (self.temp_prefix, hi_carry, lo_carry)
236   
237    def local_temp(self, hi_carry, lo_carry = None):
238      if lo_carry == None or hi_carry == lo_carry: return "sub%s%i" % (self.temp_prefix, hi_carry)
239      else: return "sub%s%i_%i" % (self.temp_prefix, hi_carry, lo_carry)
240   
241    def gen_merges(self, carry_last, carry_base, add_decl = False):
242      size = carry_last - carry_base + 1
243      if carry_last & size: 
244        v1 = mk_var(self.cg_temp(carry_last, carry_base))
245        v0 = mk_var(self.cg_temp(carry_last - size, carry_base - size))
246        v2 = mk_var(self.cg_temp(carry_last, carry_base - size), ast.Store())
247        assigs = []
248        if add_decl: assigs.append(make_callStmt('BitBlock_declare', [v2]))
249        assigs.append(make_assign(v2, make_mergeh(self.fw * size, v1, v0)))
250        return assigs + self.gen_merges(carry_last, carry_base - size, add_decl)
251      else: return []
252
253    #
254    #  Given that carry_num carries have been generated and packed,
255    #  add zero_count additional carry zero values and pack.
256    #  Use shifts to introduce multiple zeroes, where possible.
257    #
258    def gen_multiple_carry_zero_then_pack(self, carry_num, zero_count, add_decl = False):
259      stmts = []
260      if zero_count == 0: return stmts
261      pending_carry_pack_size = low_bit(carry_num)
262      pending_carry_base = carry_num - pending_carry_pack_size
263      # We may be able to fill zeroes by shifting.
264      # But the shift is limited by any further pending carry pack and
265      # the constraint that the result must produce a well-formed pack
266      # having a power-of-2 entries.
267      #
268      final_num = carry_num + zero_count
269      pack_size2 = low_bit(pending_carry_base)
270      if pending_carry_base == 0:
271        shift = pow2floor(final_num) - pending_carry_pack_size
272      else:
273        shift = min(low_bit(pending_carry_base), low_bit(final_num)) - pending_carry_pack_size
274      if pending_carry_pack_size == 0 or shift == 0:
275        # There is either no pending pack or we are not generating enough
276        # carry zeroes to combine into the pending pack, so we can only add new
277        # packs.
278        #
279        if zero_count == 1: 
280          v = mk_var(self.cg_temp(carry_num))
281          if add_decl: stmts.append(make_callStmt('BitBlock_declare', [v]))
282          stmts.append(make_assign(v, make_zero(self.fw)))
283          return stmts
284        else: 
285          zero_count_floor = pow2floor(zero_count)
286          hi_num = carry_num + zero_count_floor
287          v = mk_var(self.cg_temp(hi_num - 1, carry_num))
288          if add_decl: stmts.append(make_callStmt('BitBlock_declare', [v]))
289          stmts.append(make_assign(v, make_zero(self.fw)))
290          remaining_zeroes = zero_count - zero_count_floor
291          return stmts + self.gen_multiple_carry_zero_then_pack(hi_num, remaining_zeroes, add_decl) 
292      #
293      shift_result = mk_var(self.cg_temp(carry_num + shift - 1, pending_carry_base))
294      pending = self.cg_temp(carry_num - 1, pending_carry_base)
295      #a1 = make_assign(shift_result, make_call('bitblock::srli<%i>' % (self.fw * shift), [mk_var(pending)]))
296      if add_decl: stmts.append(make_callStmt('BitBlock_declare', [shift_result]))
297      stmts.append(make_assign(shift_result, make_call('mvmd<%i>::srli<%i>' % (self.fw, shift), [mk_var(pending)])))
298      # Do any necessary merges
299      m = self.gen_merges(carry_num + shift - 1,  pending_carry_base, add_decl)
300      return stmts + m + self.gen_multiple_carry_zero_then_pack(carry_num + shift, zero_count - shift, add_decl)
301
302
303    def allocate_ops(self):
304      carry_count = 0
305      adv_n_count = 0
306      for op in range(self.carryInfoSet.operation_count):
307        b = self.carryInfoSet.containing_block[op]
308        if op != 0: 
309          # If we've just left a block, ensure that we are aligned.
310          b_last = self.carryInfoSet.containing_block[op-1]
311          if not block_contains(b_last, b, self.carryInfoSet.parent_block):
312            # find the max-sized block just exited.
313            while not block_contains(self.carryInfoSet.parent_block[b_last], b, self.carryInfoSet.parent_block):
314              b_last = self.carryInfoSet.parent_block[b_last]
315            align_base = self.aligned_size[b_last]
316            if align_base > self.field_count: align_base = self.field_count
317            carry_count = align(carry_count, align_base)         
318        if self.carryInfoSet.block_first_op[b] == op:
319          # If we're just entering a block, ensure that we are aligned.
320          align_base = self.aligned_size[b]
321          if align_base > self.field_count: align_base = self.field_count
322          carry_count = align(carry_count, align_base)
323        if op not in self.carryInfoSet.advance_amount.keys():
324          self.alloc_map[op] = carry_count
325          carry_count += 1
326        elif self.carryInfoSet.advance_amount[op] == 1: 
327          self.alloc_map[op] = carry_count
328          carry_count += 1
329        else:
330          # Advance_n op, carry_count does not change.
331          self.alloc_map[op] = carry_count
332          self.adv_n_map[op] = adv_n_count
333          adv_n_count += 1
334      # When processing the last operation, make sure that the "next" operation
335      # appears to start a new pack.
336      self.alloc_map[self.carryInfoSet.operation_count] = align(carry_count, self.field_count)
337      for b in range(self.carryInfoSet.block_count): 
338         self.block_base[b] = self.alloc_map[self.carryInfoSet.block_first_op[b]]
339     
340    def GenerateCarryDecls(self):
341        return "  ubitblock %s [%i];\n" % (self.carryPackVar, self.totalPack_count)
342    def GenerateInitializations(self):
343        v = self.carryPackVar       
344        inits = ""
345        for i in range(0, self.totalPack_count):
346          inits += "%s[%i]._128 = simd<%i>::constant<0>();\n" % (v, i, self.fw)
347        for op_no in range(self.carryInfoSet.block_op_count[0]):
348          if op_no in self.carryInfoSet.init_one_list: 
349            posn = self.alloc_map[op_no]
350            ub = posn/self.field_count
351            rp = posn%self.field_count
352            inits += "%s[%i]._%i[%i] = 1;\n" % (self.carryPackVar, ub, self.fw, rp)
353        return inits
354    def GenerateStreamFunctionDecls(self):
355        f = self.field_count
356        s = 1
357        decls = []
358        while f > 0:
359          decls += [self.cg_temp(s*(i+1)-1, s*i) for i in range(f)]
360          f = f/2
361          s = s * 2
362        return BitBlock_decls_from_vars(decls)
363
364    def GenerateCarryInAccess(self, operation_no):
365        block_no = self.carryInfoSet.containing_block[operation_no]
366        posn = self.alloc_map[operation_no] - self.carry_offset
367        ub = posn/self.field_count
368        rp = posn%self.field_count
369        return make_call("convert", [self.carry_pack_index(self.fw, ub, rp)])
370    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
371        block_no = self.carryInfoSet.containing_block[operation_no]
372        posn = self.alloc_map[operation_no] - self.carry_offset
373        ub = posn/self.field_count
374        rp = posn%self.field_count
375        # Save the carry in the carry temp variable and then merge
376        # pending carry temps as far as possible.
377        assigs = [make_assign(self.temp_prefix + repr(rp), carry_out_expr)] 
378        assigs += self.gen_merges(rp, rp)
379        # Only generate an actual store for the last carryout in a pack.
380        next_op = operation_no + 1
381        while self.adv_n_map.has_key(next_op): next_op += 1
382        next_posn = self.alloc_map[next_op] - self.carry_offset
383        skip = next_posn - posn - 1
384        if skip > 0: 
385          assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip)
386        if next_posn % self.field_count == 0:
387          shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw-1)
388          storable_carry_in_form = make_call(shift_op, [mk_var(self.cg_temp(self.field_count - 1, 0))])
389          assigs.append(make_assign(self.carry_pack_full(ub, mode = ast.Store()), storable_carry_in_form))
390        return assigs
391    def GenerateAdvanceInAccess(self, operation_no):
392        return self.carry_pack_full(self.carryPack_count + self.adv_n_map[operation_no])
393    def GenerateAdvanceOutStore(self, operation_no, adv_out_expr):
394        return [ast.Assign([self.carry_pack_full(self.carryPack_count + self.adv_n_map[operation_no], mode=ast.Store())], 
395                           make_call("bitblock::srli<64>", [adv_out_expr]))]
396    def GenerateTestAll(self, instance_name):
397        if self.totalPack_count == 0: return ast.Num(0)
398        else:
399            v = make_att(instance_name, self.carryPackVar)
400            t = self.carry_pack_full(0, v)
401            for i in range(1, self.totalPack_count): 
402              t2 = self.carry_pack_full(i, v)
403              t = make_call('simd_or', [t, t2])
404            return make_call('bitblock::any', [t])
405    def GenerateTest(self, block_no, testExpr):
406        posn = self.block_base[block_no] - self.carry_offset
407        ub = posn/self.field_count
408        rp = posn%self.field_count
409        count = self.aligned_size[block_no] 
410        width = count * self.fw
411        if count < self.field_count:
412            t = self.carry_pack_index(width, ub, rp/count)
413            return TestHelper_Integer_Or(testExpr, t)
414        else:
415            t = self.carry_pack_full(ub)
416            for i in range(1, count/self.field_count): 
417              v2 = self.carry_pack_full(ub + i)
418              t = make_call('simd_or', [t, v2])
419            return TestHelper_Bitblock_Or(testExpr, t)
420    def GenerateCarryIfTest(self, block_no, ifTest):
421        return self.GenerateTest(block_no, ifTest)
422
423    def GenerateCarryElseFinalization(self, block_no):
424        # if the block consists of full carry packs, then
425        # no action need be taken: the corresponding carry-in packs
426        # must already be zero, or the then branch would have been taken.
427        count = self.aligned_size[block_no]
428        if count % self.field_count == 0: return []
429        # The block has half a carry-pack or less.
430        assigs = []
431        posn = self.block_base[block_no] - self.carry_offset
432        ub = posn / self.field_count
433        rp = posn % self.field_count
434        next_op = self.carryInfoSet.block_first_op[block_no] + self.carryInfoSet.block_op_count[block_no]
435        end_pos = (self.alloc_map[next_op]  - self.carry_offset - 1) % self.field_count
436        assigs = self.gen_multiple_carry_zero_then_pack(rp, end_pos - rp + 1)
437        if (end_pos + 1) % self.field_count == 0:
438          shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw-1)
439          storable_carry_in_form = make_call(shift_op, [mk_var(self.cg_temp(self.field_count - 1, 0))])
440          assigs.append(make_assign(self.carry_pack_full(ub, mode = ast.Store()), storable_carry_in_form))
441        return assigs
442
443    def GenerateLocalDeclare(self, block_no):
444        if self.carryInfoSet.block_op_count[block_no] == 0: return []
445        count = self.aligned_size[block_no] 
446        if count >= self.field_count:
447          ub_count = count / self.field_count
448          decls = [make_callStmt('ubitblock_declare', [mk_var('sub' + self.carryPackVar), ast.Num(ub_count)])]
449          count = self.field_count
450        else: decls = []
451        # Generate carry pack temps.
452        f = count
453        s = 1
454        temps = []
455        while f > 0:
456          temps += [self.local_temp(s*(i+1)-1, s*i) for i in range(f)]
457          f = f/2
458          s = s * 2
459        #return BitBlock_decls_from_vars(decls)
460        return decls + [make_callStmt('BitBlock_declare', [mk_var(t)]) for t in temps]
461   
462    def GenerateCarryWhileTest(self, block_no, testExpr):
463        return self.GenerateTest(block_no, testExpr)
464
465    def EnterLocalWhileBlock(self, operation_offset): 
466        self.carryPackVar = "sub" + self.carryPackVar
467        self.temp_prefix = "sub" + self.temp_prefix
468        self.carry_offset = self.alloc_map[operation_offset]
469
470    def ExitLocalWhileBlock(self): 
471        self.carryPackVar = self.carryPackVar[3:]
472        self.temp_prefix = self.temp_prefix[3:]
473        self.carry_offset = 0
474       
475    def GenerateCarryWhileFinalization(self, block_no):
476        posn = self.block_base[block_no]
477        ub = posn/self.field_count
478        rp = posn%self.field_count
479        count = self.aligned_size[block_no]
480        if count < self.field_count:
481          v0 = self.cg_temp(rp + count - 1, rp)
482          lv0 = self.local_temp(count - 1, 0)
483          return [make_assign(v0, make_call('simd_or', [mk_var(v0), mk_var(lv0)]))]
484        n = (count+self.field_count-1)/self.field_count
485        assigs = []
486        for i in range(n):
487          assigs.append(make_assign(self.carry_pack_full(ub + i, mode = ast.Store()), make_call('simd_or', [self.carry_pack_full(ub + i), self.local_pack_full(i)])))
488        return assigs
489    def GenerateStreamFunctionFinalization(self):
490        return []
491
492#
493#  A version of HMCPS_CCGO eliminating use of "convert"
494#
495class HMCPS_CCGO2(HMCPS_CCGO):
496
497
498    def GenerateCarryInAccess(self, operation_no):
499        block_no = self.carryInfoSet.containing_block[operation_no]
500        posn = self.alloc_map[operation_no] - self.carry_offset
501        ub = posn/self.field_count
502        rp = posn%self.field_count
503        #return make_call("convert", [self.carry_pack_index(self.fw, ub, rp)])
504        if rp == 0: e = self.carry_pack_full(ub)
505        else: e = make_call("mvmd<%i>::srli<%i>" %(self.fw, rp), [self.carry_pack_full(ub)])
506        if rp == self.field_count - 1:
507          return e
508        else: return make_call('simd_and', [e, mk_var("simd_const_1")])
509
510#
511#  Eliminating ubitblock
512#
513class HMCPS_CCGO3(HMCPS_CCGO2):
514
515    def carry_pack_full(self, ub, v = None, mode = ast.Load()):
516       if v == None: v = self.carryPackVar
517       return make_index(v, ub, mode)
518
519    def carry_pack_index(self, fw, ub, rp, mode = ast.Load()):
520       return make_call("mvmd<%i>::extract<%i>" % (fw, rp), [self.carry_pack_full(ub)])
521
522    def GenerateCarryDecls(self):
523        return "  BitBlock %s [%i];\n" % (self.carryPackVar, self.totalPack_count)
524
525    def GenerateInitializations(self):
526        v = self.carryPackVar       
527        inits = ""
528        for i in range(0, self.totalPack_count):
529          inits += "%s[%i] = simd<%i>::constant<0>();\n" % (v, i, self.fw)
530        for op_no in range(self.carryInfoSet.block_op_count[0]):
531          if op_no in self.carryInfoSet.init_one_list: 
532            posn = self.alloc_map[op_no]
533            ub = posn/self.field_count
534            rp = posn%self.field_count
535            v = "%s[%i]" % (self.carryPackVar, ub)
536            inits += "%s = simd_or(%s, mvmd<%i>::slli<%i>(simd_const_1)) ;\n" % (v, v, self.fw, rp)
537        return inits
538
539    def GenerateLocalDeclare(self, block_no):
540        if self.carryInfoSet.block_op_count[block_no] == 0: return []
541        count = self.aligned_size[block_no] 
542        if count >= self.field_count:
543          ub_count = count / self.field_count
544          decls = [make_callStmt('BitBlock_declare', [self.local_pack_full(ub_count)])]
545          decls += [make_assign(self.local_pack_full(i, ast.Store()), make_zero(self.fw)) for i in range(ub_count)]
546          count = self.field_count
547        else: decls = []
548        # Generate carry pack temps.
549        f = count
550        s = 1
551        temps = []
552        while f > 0:
553          temps += [self.local_temp(s*(i+1)-1, s*i) for i in range(f)]
554          f = f/2
555          s = s * 2
556        #return BitBlock_decls_from_vars(decls)
557        return decls + [make_callStmt('BitBlock_declare', [mk_var(t)]) for t in temps]
558
559#
560#  A version of HMCPS_CCGO with bit packing using hsimd:signmask
561#
562class HMCPS_CCGO_BitPack(HMCPS_CCGO):
563
564    def allocate_all(self):
565        self.aligned_size = determine_aligned_block_sizes(self.field_count, self.carryInfoSet, min_block_size=8)
566        self.carryPack_count = (self.aligned_size[0] + self.BLOCK_SIZE - 1) / self.BLOCK_SIZE
567        self.totalPack_count = self.carryPack_count + self.carryInfoSet.adv_n_count
568        self.alloc_map = {}
569        self.alloc_map[0] = 0
570        self.adv_n_map = {}
571        self.block_base = {}
572        self.allocate_ops()
573        # carry_offset is used within the inner body of while loops to access local carries.
574        # The calculated (ub, rp) value is reduced by this amount for the local carry Pack(s).
575        self.carry_offset = 0
576
577    def GenerateCarryInAccess(self, operation_no):
578        block_no = self.carryInfoSet.containing_block[operation_no]
579        posn = self.alloc_map[operation_no] - self.carry_offset
580        pk = posn/self.BLOCK_SIZE
581        rp = posn%self.BLOCK_SIZE
582        if rp == 0: e = self.carry_pack_full(pk)
583        elif rp < self.BLOCK_SIZE/2: e = make_call("simd<%i>::srli<%i>" %(self.BLOCK_SIZE/2, rp), [self.carry_pack_full(pk)])
584        else: e = make_call("bitblock::srli<%i>" %(rp), [self.carry_pack_full(pk)])
585        if rp == self.BLOCK_SIZE - 1:
586          return e
587        else: return make_call('simd_and', [e, mk_var("simd_const_1")])
588
589
590    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
591        block_no = self.carryInfoSet.containing_block[operation_no]
592        posn = self.alloc_map[operation_no] - self.carry_offset
593        rp = posn%self.field_count
594        # Save the carry in the carry temp variable and then merge
595        # pending carry temps as far as possible.
596        assigs = [make_assign(self.temp_prefix + repr(rp), carry_out_expr)] 
597        assigs += self.gen_merges(rp, rp)
598        # Only generate an actual store for the last carryout in a pack.
599        next_op = operation_no + 1
600        while self.adv_n_map.has_key(next_op): next_op += 1
601        next_posn = self.alloc_map[next_op] - self.carry_offset
602        skip = next_posn - posn - 1
603        if skip > 0: 
604          assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip)
605        #print (posn, skip)
606        if next_posn % self.field_count == 0:
607          pk = posn/self.BLOCK_SIZE
608          fd = (posn%self.BLOCK_SIZE)/self.field_count
609          mask_op = "hsimd<%i>::signmask" % (self.fw)
610          storable_carry_in_form = make_call(mask_op, [mk_var(self.cg_temp(self.field_count - 1, 0))])
611          assigs.append(make_assign(self.carry_pack_index(self.field_count, pk, fd, mode = ast.Store()), storable_carry_in_form))
612        return assigs
613
614
615    def GenerateTest(self, block_no, testExpr):
616        int_size = self.BLOCK_SIZE/2
617        posn = self.block_base[block_no] - self.carry_offset
618        pk = posn/self.BLOCK_SIZE
619        fd = (posn%self.BLOCK_SIZE)/int_size
620        rp = posn%int_size
621        sz = self.aligned_size[block_no]
622        if sz in [8, 16, 32, 64] and align(posn, sz) == posn:
623            fd = (posn%self.BLOCK_SIZE)/sz
624            t = self.carry_pack_index(sz, pk, fd)
625            return TestHelper_Integer_Or(testExpr, t)
626        elif sz == self.BLOCK_SIZE and align(posn, sz) == posn:
627            fd = (posn%self.BLOCK_SIZE)/sz
628            t = self.carry_pack_full(pk)
629            return TestHelper_Bitblock_Or(testExpr, t)
630        elif rp + sz <= int_size:
631            e = self.carry_pack_index(int_size, pk, fd)
632            t = ast.BinOp(e, ast.BitAnd(), ast.Num(((1<<sz) - 1)<<rp))
633            return TestHelper_Integer_Or(testExpr, t)
634        else:
635            e = self.carry_pack_index(int_size, pk, fd)
636            t = ast.BinOp(e, ast.BitAnd(), ast.Num(((1<<(int_size-rp)) - 1)<<rp))
637            sz -= (int_size-rp)
638            posn += (int_size-rp)
639            pk = posn/self.BLOCK_SIZE
640            fd = (posn%self.BLOCK_SIZE)/int_size
641            while sz >= int_size:
642              t = ast.BinOp(t, ast.BitOr(), self.carry_pack_index(int_size, pk, fd))
643              sz -= int_size
644              posn += int_size
645              pk = posn/self.BLOCK_SIZE
646              fd = (posn%self.BLOCK_SIZE)/int_size
647            if sz > 0:
648              e = self.carry_pack_index(int_size, pk, fd)
649              t = ast.BinOp(t, ast.BitOr(), ast.BinOp(e, ast.BitAnd(), ast.Num((1<<sz) -1)))
650            return TestHelper_Integer_Or(testExpr, t)
651           
652    def GenerateInitializations(self):
653        v = self.carryPackVar       
654        inits = ""
655        for i in range(0, self.totalPack_count):
656          inits += "%s[%i]._128 = simd<%i>::constant<0>();\n" % (v, i, self.fw)
657        for op_no in range(self.carryInfoSet.block_op_count[0]):
658          if op_no in self.carryInfoSet.init_one_list: 
659            posn = self.alloc_map[op_no]
660            pk = posn/self.BLOCK_SIZE
661            fd = (posn%self.BLOCK_SIZE)/self.field_count
662            rp = posn%self.BLOCK_SIZE
663            inits += "%s[%i]._%i[%i] |= 1 << %i;\n" % (self.carryPackVar, pk, self.fw, fd, rp)
664        return inits
665
666
667    def GenerateCarryElseFinalization(self, block_no):
668        # if the block consists of full carry packs, then
669        # no action need be taken: the corresponding carry-in packs
670        # must already be zero, or the then branch would have been taken.
671        count = self.aligned_size[block_no]
672        if count % self.field_count == 0: return []
673        # The block has half a carry-pack or less.
674        assigs = []
675        posn = self.block_base[block_no] - self.carry_offset
676        ub = posn / self.field_count
677        rp = posn % self.field_count
678        next_op = self.carryInfoSet.block_first_op[block_no] + self.carryInfoSet.block_op_count[block_no]
679        end_pos = (self.alloc_map[next_op]  - self.carry_offset - 1) % self.field_count
680        #print rp, next_op,self.alloc_map[next_op]
681        #assigs = [make_assign(self.cg_temp(end_pos, rp), make_zero(self.fw))]
682        assigs = self.gen_multiple_carry_zero_then_pack(rp, end_pos - rp + 1)
683        if (end_pos + 1) % self.field_count == 0:
684          pk = posn/self.BLOCK_SIZE
685          fd = (posn%self.BLOCK_SIZE)/self.field_count
686          mask_op = "hsimd<%i>::signmask" % (self.fw)
687          storable_carry_in_form = make_call(mask_op, [mk_var(self.cg_temp(self.field_count - 1, 0))])
688          assigs.append(make_assign(self.carry_pack_index(self.field_count, pk, fd, mode = ast.Store()), storable_carry_in_form))
689        return assigs
690
691#
692    def GenerateCarryWhileFinalization(self, block_no):
693        posn = self.block_base[block_no]
694        sz = self.aligned_size[block_no] 
695        if sz < self.field_count:
696          rp = posn%self.field_count
697          v0 = self.cg_temp(rp + sz - 1, rp)
698          lv0 = self.local_temp(sz - 1, 0)
699          return [make_assign(v0, make_call('simd_or', [mk_var(v0), mk_var(lv0)]))]
700        local_posn = 0
701        pk = posn/self.BLOCK_SIZE
702        assigs = []
703        for i in range((sz + self.field_count -1)/self.field_count): 
704          pk = posn/self.BLOCK_SIZE
705          fd = (posn%self.BLOCK_SIZE)/self.field_count
706          local_pk = local_posn/self.BLOCK_SIZE
707          local_fd = (local_posn%self.BLOCK_SIZE)/self.field_count
708          v0 = self.carry_pack_index(self.field_count, pk, fd)
709          lv0 = self.local_pack_index(self.field_count, local_pk, local_fd)
710          assigs.append(make_assign([self.carry_pack_index(self.field_count, pk, fd, ast.Store())], ast.BinOp(v0, ast.BitOr(), lv0)))
711          posn += self.field_count
712          local_posn += self.field_count
713        return assigs
714
715
716
717class HMCPS_CCGO_BitPack2(HMCPS_CCGO_BitPack):
718
719    def allocate_all(self):
720        self.aligned_size = determine_aligned_block_sizes(self.BLOCK_SIZE, self.carryInfoSet, max_whiles_per_pack = 8, min_block_size = self.field_count)
721        self.carryPack_count = (self.aligned_size[0] + self.BLOCK_SIZE - 1) / self.BLOCK_SIZE
722        self.totalPack_count = self.carryPack_count + self.carryInfoSet.adv_n_count
723        self.alloc_map = {}
724        self.alloc_map[0] = 0
725        self.last_carry_map = {}
726        self.adv_n_map = {}
727        self.block_base = {}
728        self.allocate_ops()
729        # carry_offset is used within the inner body of while loops to access local carries.
730        # The calculated (ub, rp) value is reduced by this amount for the local carry Pack(s).
731        self.carry_offset = 0
732
733
734    def allocate_block_positions(self):
735        # First allocate the base position of each block relative to its
736        # parent block, such that the relative position is a multiple
737        # of its aligned_size or the pack_size, whichever is smallest.
738        rel_block_posns = [0 for b in range(self.carryInfoSet.block_count)]
739        self.direct_carries = carryInfo.direct_block_carries(self.carryInfoSet)
740        self.aligned_direct = [align(d, max(min(pow2ceil(d), self.BLOCK_SIZE), self.field_count)) for d in self.direct_carries]
741        working_allocation_bitmap = [((1 << a) - 1) for a in self.aligned_direct]
742        for b in range(1, self.carryInfoSet.block_count):
743            prnt = self.carryInfoSet.parent_block[b]
744            sz = self.aligned_size[b]
745            sz_map = (1 << sz) - 1
746            posn = 0
747            while sz_map & working_allocation_bitmap[prnt] != 0:
748                posn += sz
749                sz_map <<= sz
750            working_allocation_bitmap[prnt] |= sz_map
751            rel_block_posns[b] = posn
752        # Now compute absolute positions
753        self.block_base[0] = 0
754        for b in range(1, self.carryInfoSet.block_count): 
755            self.block_base[b] = self.block_base[self.carryInfoSet.parent_block[b]] + rel_block_posns[b]
756
757#
758#  Given the relative base positions of each block, allocate
759#  its carries.
760#
761    def allocate_ops(self):
762        self.allocate_block_positions()
763        adv_n_count = 0
764        carry_posn = [self.block_base[b] for b in range(self.carryInfoSet.block_count)]
765        for op in range(self.carryInfoSet.operation_count):
766            b = self.carryInfoSet.containing_block[op]
767            self.alloc_map[op] = carry_posn[b]
768            if op not in self.carryInfoSet.advance_amount.keys():
769                carry_posn[b] += 1
770                self.last_carry_map[b] = op
771            elif self.carryInfoSet.advance_amount[op] == 1: 
772                carry_posn[b] += 1
773                self.last_carry_map[b] = op
774            else:
775                self.adv_n_map[op] = adv_n_count
776                adv_n_count += 1
777        # When processing the last operation, make sure that the "next" operation
778        # appears to start a new pack.
779        self.alloc_map[self.carryInfoSet.operation_count] = self.aligned_size[0]
780
781    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
782        block_no = self.carryInfoSet.containing_block[operation_no]
783        posn = self.alloc_map[operation_no] - self.carry_offset
784        add_decl = self.alloc_map[operation_no] - self.block_base[block_no] <= self.field_count
785        rp = posn%self.field_count
786        # Save the carry in the carry temp variable and then merge
787        # pending carry temps as far as possible.
788        v = mk_var(self.temp_prefix + repr(rp))
789        assigs = []
790        if add_decl: assigs.append(make_callStmt('BitBlock_declare', [v]))
791        assigs.append(make_assign(v, carry_out_expr))
792        assigs += self.gen_merges(rp, rp, add_decl)
793        # Only generate an actual store for the last carryout in a pack.
794        if operation_no == self.last_carry_map[block_no]:
795          skip = self.block_base[block_no] + self.aligned_direct[block_no] - self.alloc_map[operation_no] - 1
796          if skip > 0: 
797            assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip, add_decl)
798          #print (posn, skip)
799          pk = posn/self.BLOCK_SIZE
800          mask_blk = (posn%self.BLOCK_SIZE)/self.field_count
801          mask_op = "hsimd<%i>::signmask" % (self.fw)
802          storable_carry_in_form = make_call(mask_op, [mk_var(self.cg_temp(self.field_count - 1, 0))])
803          assigs.append(make_assign(self.carry_pack_index(self.field_count, pk, mask_blk, mode = ast.Store()), storable_carry_in_form))
804        return assigs
805
806    def GenerateLocalDeclare(self, block_no):
807        if self.carryInfoSet.block_op_count[block_no] == 0: return []
808        count = self.aligned_size[block_no] 
809        ub_count = (count + self.BLOCK_SIZE - 1)/ self.BLOCK_SIZE
810        decls = [make_callStmt('ubitblock_declare', [mk_var("sub" + self.carryPackVar), ast.Num(ub_count)])]
811        return decls
812
813    def GenerateCarryWhileFinalization(self, block_no):
814        posn = self.block_base[block_no]
815        pk = posn/self.BLOCK_SIZE
816        count = self.aligned_size[block_no]
817        assigs = []
818        if count >= self.BLOCK_SIZE:
819          for i in range(count/self.BLOCK_SIZE):
820            assigs.append(make_assign(self.carry_pack_full(pk + i, mode = ast.Store()), make_call('simd_or', [self.carry_pack_full(pk + i), self.local_pack_full(i)])))
821
822        else:
823          rp = (posn%self.BLOCK_SIZE)/count
824          expr = ast.BinOp(self.carry_pack_index(count, pk, rp), ast.BitOr(), self.local_pack_index(count, 0, 0))
825          assigs.append(make_assign(self.carry_pack_index(count, pk, rp, mode = ast.Store()), expr))
826        return assigs
827
828
829    def GenerateCarryElseFinalization(self, block_no):  return []
830
831    def GenerateStreamFunctionDecls(self): return ""
832
833
Note: See TracBrowser for help on using the repository browser.