source: proto/Compiler/CCGO_HMCPS.py @ 3095

Last change on this file since 3095 was 3017, checked in by cameron, 6 years ago

Factor out AST helpers to mkast.py

File size: 34.3 KB
RevLine 
[2700]1#
2# CCGO_HMCPS.py
3#
4# Carry Code Generator Object using Hierarchical Merging Carry Pack Strategy
5#
6# Robert D. Cameron
7# November 26, 2012
8# Licensed under Open Software License 3.0
9#
[3017]10import ast, mkast
[2802]11import carryInfo, CCGO
[2700]12
13#
14# Helper functions
15#
16def TestHelper_Bitblock_Or(testExpr, bitBlockExpr):
[2786]17    if isinstance(testExpr, ast.Call):
18      assert isinstance(testExpr.func, ast.Name)
19      assert testExpr.func.id == 'bitblock::any'
[3017]20      testExpr.args[0] = mkast.call('simd_or', [bitBlockExpr, testExpr.args[0]])
[2786]21      return testExpr
22    else:
[3017]23      return ast.BinOp(testExpr, ast.BitOr(), mkast.call('bitblock::any', [bitBlockExpr]))
[2700]24
25def TestHelper_Integer_Or(testExpr, intExpr):
26    return ast.BinOp(testExpr, ast.BitOr(), intExpr)
27
28
29#
30#
31# Carry Pack Assignment Strategy
32#
33# The hierarchical merging carry pack strategy packs carries
[2798]34# into packs of 2, 4, 8 and 16.   For example, to pack
[2700]35# 4 carries c0, c1, c2, and c3 into the 32-bit fields of
36# a 128-bit register, the following operations are used.
37#
38# c0 = pablo.SomeCarryGeneratingFn(...)
39# c1 = pablo.SomeCarryGeneratingFn(...)
40# c1_0 = esimd::mergeh<32>(c1, c0)
41# c2 = pablo.SomeCarryGeneratingFn(...)
42# c3 = pablo.SomeCarryGeneratingFn(...)
43# c3_2 = esimd::mergeh<32>(c3, c2)
44# c3_0 = esimd::mergeh<64>(c3_2, c1_0)
45#
46#
47# Packing operations are generated sequentially when
48# the appropriate individual carries or subpacks become
49# available.   
50#
51# Generate the packing operations assuming that the
52# carry_num carry has just been generated.
53#
54
55def pow2ceil(n):
56   c = 1
57   while c < n: c *= 2 
58   return c
59
60def pow2floor(n):
61   c = 1
62   while c <= n: c *= 2 
63   return c/2
[2703]64
65def low_bit(n):
66   return n - (n & (n-1))
[2700]67   
68def align(n, align_base):
69  return ((n + align_base - 1) / align_base) * align_base
70
[2798]71def determine_aligned_block_sizes(pack_size, cis, max_whiles_per_pack = 1, min_block_size = 1):
[2700]72  aligned_size = {}
73  for i in range(cis.block_count): aligned_size[i] = 0
74  seen = []
75  for i in range(cis.block_count):
76    # Work backwards to process all child blocks before the parent
77    # so that the parent incorporates the updated child counts.
78    b = cis.block_count - i - 1
79    b_carries = 0
80    op = cis.block_first_op[b]
81    while op < cis.block_first_op[b] + cis.block_op_count[b]:
82      sb = cis.containing_block[op]
83      if sb == b:
84        if op not in cis.advance_amount.keys(): b_carries += 1
85        elif cis.advance_amount[op] == 1: b_carries += 1
86        op += 1
87      else: 
88        align_base = aligned_size[sb]
89        if align_base > pack_size: align_base = pack_size
90        b_carries = align(b_carries, align_base)
91        b_carries += aligned_size[sb]
92        op += cis.block_op_count[sb]
[2798]93    #
94    # Align to min block size
95    aligned_size[b] = align(b_carries, min_block_size)
96    # Force whiles to use full packs; this possibly can be relaxed.
97    if cis.whileblock[b]:
98      aligned_size[b] = align(aligned_size[b], pack_size/max_whiles_per_pack)
99    if aligned_size[b] > pack_size:
100      aligned_size[b] = align(aligned_size[b], pack_size)
[2700]101    else:
[2798]102      aligned_size[b] = pow2ceil(aligned_size[b])
[2700]103  return aligned_size
104 
105MAX_LINE_LENGTH = 80
106
107def BitBlock_decls_from_vars(varlist):
108  global MAX_LINE_LENGTH
109  decls =  ""
110  if not len(varlist) == 0:
111          decls = "             BitBlock"
112          pending = ""
113          linelgth = 10
114          for v in varlist:
115            if linelgth + len(v) + 2 <= MAX_LINE_LENGTH:
116              decls += pending + " " + v
117              linelgth += len(pending + v) + 1
118            else:
119              decls += ";\n             BitBlock " + v
120              linelgth = 11 + len(v)
121            pending = ","
122          decls += ";"
123  return decls
124 
[2701]125def block_contains(b0, b1, parent_block_map):
126  if b0 == b1: return True
127  elif b1 == 0: return False
128  else: return block_contains(b0, parent_block_map[b1], parent_block_map)
129 
[2700]130class HMCPS_CCGO(CCGO.CCGO):
[2799]131    def __init__(self, BLOCK_SIZE, fw, carryInfoSet, carryPackVarName='carryG', temp_prefix='__c'):
132        self.BLOCK_SIZE = BLOCK_SIZE
[2700]133        self.fw = fw
[2799]134        self.field_count = self.BLOCK_SIZE/fw
[2700]135        self.carryInfoSet = carryInfoSet
[2798]136        self.carryPackVar = carryPackVarName
[2700]137        self.temp_prefix = temp_prefix
[2799]138
139    def allocate_all(self):
140        self.aligned_size = determine_aligned_block_sizes(self.field_count, self.carryInfoSet)
[2798]141        self.carryPack_count = (self.aligned_size[0] + self.field_count - 1) / self.field_count
[2799]142        self.totalPack_count = self.carryPack_count + self.carryInfoSet.adv_n_count
[2700]143        self.alloc_map = {}
144        self.alloc_map[0] = 0
[2790]145        self.adv_n_map = {}
[2700]146        self.block_base = {}
147        self.allocate_ops()
[2701]148        # carry_offset is used within the inner body of while loops to access local carries.
[2798]149        # The calculated (ub, rp) value is reduced by this amount for the local carry Pack(s).
[2701]150        self.carry_offset = 0
[2707]151#
152# Carry Storage/Access
153#
154# Carries are stored in one or more ubitblocks as byte values.
155# For each block, the carry count is rounded up to the nearest power of 2 ceiling P,
156# so that the carry test for that block is accessible as a single value of P bytes.
157# Packs of 1, 2, 4 or 8 carries are respectively represented
158# as one or more _8, _16, _32 or _64 values.  (Members of ubitblock union.)
159#
160#
161# Allocation phase determines the ubitblock_no and count for each block.
162
163#  carry-in access is a byte load  carryG[packno]._8[offset]
164#  carryout store is to a local pack var until we get to the final byte of a pack
165#
166#  if-test: let P be pack_size in {1,2,4,8,...}
167#    if P <= 8, use an integer test expression cG[packno]._%i % (P * 8)[block_offset]
168#     
169#  while test similar
[2798]170#    local while decl: use a copy of carryPack
[2707]171#    while finalize  carry combine:   round up and |= into structure
172#
[2787]173    def carry_pack_full(self, ub, v = None, mode = ast.Load()):
[2798]174       if v == None: v = self.carryPackVar
[3017]175       return mkast.att(mkast.index(v, ub), '_128', mode)
[2707]176
177    def carry_pack_index(self, fw, ub, rp, mode = ast.Load()):
[3017]178       return mkast.index(mkast.att(mkast.index(self.carryPackVar, ub), '_%i' % fw), rp, mode)
[2707]179
180    def local_pack_full(self, ub, mode = ast.Load()):
[2798]181       return self.carry_pack_full(ub, "sub" + self.carryPackVar, mode)
[2707]182
[2799]183    def local_pack_index(self, fw, ub, rp, mode = ast.Load()):
184       v = "sub" + self.carryPackVar
[3017]185       return mkast.index(mkast.att(mkast.index(v, ub), '_%i' % fw), rp, mode)
[2799]186 
[2707]187
[2703]188    def cg_temp(self, hi_carry, lo_carry = None):
189      if lo_carry == None or hi_carry == lo_carry: return "%s%i" % (self.temp_prefix, hi_carry)
190      else: return "%s%i_%i" % (self.temp_prefix, hi_carry, lo_carry)
191   
192    def local_temp(self, hi_carry, lo_carry = None):
193      if lo_carry == None or hi_carry == lo_carry: return "sub%s%i" % (self.temp_prefix, hi_carry)
[2705]194      else: return "sub%s%i_%i" % (self.temp_prefix, hi_carry, lo_carry)
[2703]195   
[2802]196    def gen_merges(self, carry_last, carry_base, add_decl = False):
[2703]197      size = carry_last - carry_base + 1
198      if carry_last & size: 
[3017]199        v1 = mkast.var(self.cg_temp(carry_last, carry_base))
200        v0 = mkast.var(self.cg_temp(carry_last - size, carry_base - size))
201        v2 = mkast.var(self.cg_temp(carry_last, carry_base - size), ast.Store())
[2802]202        assigs = []
[3017]203        if add_decl: assigs.append(mkast.callStmt('BitBlock_declare', [v2]))
204        assigs.append(mkast.assign(v2, mkast.mergeh(self.fw * size, v1, v0)))
[2802]205        return assigs + self.gen_merges(carry_last, carry_base - size, add_decl)
[2703]206      else: return []
207
208    #
209    #  Given that carry_num carries have been generated and packed,
210    #  add zero_count additional carry zero values and pack.
211    #  Use shifts to introduce multiple zeroes, where possible.
212    #
[2802]213    def gen_multiple_carry_zero_then_pack(self, carry_num, zero_count, add_decl = False):
214      stmts = []
215      if zero_count == 0: return stmts
[2703]216      pending_carry_pack_size = low_bit(carry_num)
217      pending_carry_base = carry_num - pending_carry_pack_size
218      # We may be able to fill zeroes by shifting.
219      # But the shift is limited by any further pending carry pack and
220      # the constraint that the result must produce a well-formed pack
221      # having a power-of-2 entries.
222      #
223      final_num = carry_num + zero_count
224      pack_size2 = low_bit(pending_carry_base)
225      if pending_carry_base == 0:
226        shift = pow2floor(final_num) - pending_carry_pack_size
227      else:
228        shift = min(low_bit(pending_carry_base), low_bit(final_num)) - pending_carry_pack_size
229      if pending_carry_pack_size == 0 or shift == 0:
230        # There is either no pending pack or we are not generating enough
231        # carry zeroes to combine into the pending pack, so we can only add new
232        # packs.
233        #
[2802]234        if zero_count == 1: 
[3017]235          v = mkast.var(self.cg_temp(carry_num))
236          if add_decl: stmts.append(mkast.callStmt('BitBlock_declare', [v]))
237          stmts.append(mkast.assign(v, mkast.zero(self.fw)))
[2802]238          return stmts
[2703]239        else: 
240          zero_count_floor = pow2floor(zero_count)
241          hi_num = carry_num + zero_count_floor
[3017]242          v = mkast.var(self.cg_temp(hi_num - 1, carry_num))
243          if add_decl: stmts.append(mkast.callStmt('BitBlock_declare', [v]))
244          stmts.append(mkast.assign(v, mkast.zero(self.fw)))
[2703]245          remaining_zeroes = zero_count - zero_count_floor
[2802]246          return stmts + self.gen_multiple_carry_zero_then_pack(hi_num, remaining_zeroes, add_decl) 
[2703]247      #
[3017]248      shift_result = mkast.var(self.cg_temp(carry_num + shift - 1, pending_carry_base))
[2703]249      pending = self.cg_temp(carry_num - 1, pending_carry_base)
[3017]250      #a1 = mkast.assign(shift_result, mkast.call('bitblock::srli<%i>' % (self.fw * shift), [mkast.var(pending)]))
251      if add_decl: stmts.append(mkast.callStmt('BitBlock_declare', [shift_result]))
252      stmts.append(mkast.assign(shift_result, mkast.call('mvmd<%i>::srli<%i>' % (self.fw, shift), [mkast.var(pending)])))
[2703]253      # Do any necessary merges
[2802]254      m = self.gen_merges(carry_num + shift - 1,  pending_carry_base, add_decl)
255      return stmts + m + self.gen_multiple_carry_zero_then_pack(carry_num + shift, zero_count - shift, add_decl)
[2703]256
257
[2700]258    def allocate_ops(self):
259      carry_count = 0
[2790]260      adv_n_count = 0
[2700]261      for op in range(self.carryInfoSet.operation_count):
[2701]262        b = self.carryInfoSet.containing_block[op]
263        if op != 0: 
264          # If we've just left a block, ensure that we are aligned.
265          b_last = self.carryInfoSet.containing_block[op-1]
266          if not block_contains(b_last, b, self.carryInfoSet.parent_block):
267            # find the max-sized block just exited.
268            while not block_contains(self.carryInfoSet.parent_block[b_last], b, self.carryInfoSet.parent_block):
269              b_last = self.carryInfoSet.parent_block[b_last]
270            align_base = self.aligned_size[b_last]
[2700]271            if align_base > self.field_count: align_base = self.field_count
[2701]272            carry_count = align(carry_count, align_base)         
273        if self.carryInfoSet.block_first_op[b] == op:
274          # If we're just entering a block, ensure that we are aligned.
275          align_base = self.aligned_size[b]
276          if align_base > self.field_count: align_base = self.field_count
277          carry_count = align(carry_count, align_base)
[2700]278        if op not in self.carryInfoSet.advance_amount.keys():
279          self.alloc_map[op] = carry_count
280          carry_count += 1
281        elif self.carryInfoSet.advance_amount[op] == 1: 
282          self.alloc_map[op] = carry_count
283          carry_count += 1
[2790]284        else:
285          # Advance_n op, carry_count does not change.
286          self.alloc_map[op] = carry_count
287          self.adv_n_map[op] = adv_n_count
288          adv_n_count += 1
[2701]289      # When processing the last operation, make sure that the "next" operation
290      # appears to start a new pack.
291      self.alloc_map[self.carryInfoSet.operation_count] = align(carry_count, self.field_count)
[2718]292      for b in range(self.carryInfoSet.block_count): 
293         self.block_base[b] = self.alloc_map[self.carryInfoSet.block_first_op[b]]
[2700]294     
295    def GenerateCarryDecls(self):
[2798]296        return "  ubitblock %s [%i];\n" % (self.carryPackVar, self.totalPack_count)
[2700]297    def GenerateInitializations(self):
[2798]298        v = self.carryPackVar       
[2700]299        inits = ""
[2798]300        for i in range(0, self.totalPack_count):
[2700]301          inits += "%s[%i]._128 = simd<%i>::constant<0>();\n" % (v, i, self.fw)
302        for op_no in range(self.carryInfoSet.block_op_count[0]):
303          if op_no in self.carryInfoSet.init_one_list: 
304            posn = self.alloc_map[op_no]
305            ub = posn/self.field_count
306            rp = posn%self.field_count
[2798]307            inits += "%s[%i]._%i[%i] = 1;\n" % (self.carryPackVar, ub, self.fw, rp)
[2700]308        return inits
309    def GenerateStreamFunctionDecls(self):
310        f = self.field_count
[2703]311        s = 1
312        decls = []
313        while f > 0:
314          decls += [self.cg_temp(s*(i+1)-1, s*i) for i in range(f)]
[2700]315          f = f/2
[2703]316          s = s * 2
[2700]317        return BitBlock_decls_from_vars(decls)
318
319    def GenerateCarryInAccess(self, operation_no):
320        block_no = self.carryInfoSet.containing_block[operation_no]
[2701]321        posn = self.alloc_map[operation_no] - self.carry_offset
[2700]322        ub = posn/self.field_count
323        rp = posn%self.field_count
[3017]324        return mkast.call("convert", [self.carry_pack_index(self.fw, ub, rp)])
[2700]325    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
326        block_no = self.carryInfoSet.containing_block[operation_no]
[2701]327        posn = self.alloc_map[operation_no] - self.carry_offset
[2700]328        ub = posn/self.field_count
329        rp = posn%self.field_count
[2791]330        # Save the carry in the carry temp variable and then merge
331        # pending carry temps as far as possible.
[3017]332        assigs = [mkast.assign(self.temp_prefix + repr(rp), carry_out_expr)] 
[2703]333        assigs += self.gen_merges(rp, rp)
[2791]334        # Only generate an actual store for the last carryout in a pack.
335        next_op = operation_no + 1
336        while self.adv_n_map.has_key(next_op): next_op += 1
337        next_posn = self.alloc_map[next_op] - self.carry_offset
[2701]338        skip = next_posn - posn - 1
[2700]339        if skip > 0: 
[2703]340          assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip)
[2701]341        if next_posn % self.field_count == 0:
[2700]342          shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw-1)
[3017]343          storable_carry_in_form = mkast.call(shift_op, [mkast.var(self.cg_temp(self.field_count - 1, 0))])
344          assigs.append(mkast.assign(self.carry_pack_full(ub, mode = ast.Store()), storable_carry_in_form))
[2700]345        return assigs
[2790]346    def GenerateAdvanceInAccess(self, operation_no):
[2798]347        return self.carry_pack_full(self.carryPack_count + self.adv_n_map[operation_no])
[2790]348    def GenerateAdvanceOutStore(self, operation_no, adv_out_expr):
[2798]349        return [ast.Assign([self.carry_pack_full(self.carryPack_count + self.adv_n_map[operation_no], mode=ast.Store())], 
[3017]350                           mkast.call("bitblock::srli<64>", [adv_out_expr]))]
[2786]351    def GenerateTestAll(self, instance_name):
[2798]352        if self.totalPack_count == 0: return ast.Num(0)
[2786]353        else:
[3017]354            v = mkast.att(instance_name, self.carryPackVar)
[2787]355            t = self.carry_pack_full(0, v)
[2798]356            for i in range(1, self.totalPack_count): 
[2787]357              t2 = self.carry_pack_full(i, v)
[3017]358              t = mkast.call('simd_or', [t, t2])
359            return mkast.call('bitblock::any', [t])
[2700]360    def GenerateTest(self, block_no, testExpr):
[2701]361        posn = self.block_base[block_no] - self.carry_offset
[2700]362        ub = posn/self.field_count
363        rp = posn%self.field_count
364        count = self.aligned_size[block_no] 
365        width = count * self.fw
[2708]366        if count < self.field_count:
[2707]367            t = self.carry_pack_index(width, ub, rp/count)
[2700]368            return TestHelper_Integer_Or(testExpr, t)
369        else:
[2707]370            t = self.carry_pack_full(ub)
[2700]371            for i in range(1, count/self.field_count): 
[2707]372              v2 = self.carry_pack_full(ub + i)
[3017]373              t = mkast.call('simd_or', [t, v2])
[2700]374            return TestHelper_Bitblock_Or(testExpr, t)
375    def GenerateCarryIfTest(self, block_no, ifTest):
376        return self.GenerateTest(block_no, ifTest)
377
378    def GenerateCarryElseFinalization(self, block_no):
379        # if the block consists of full carry packs, then
380        # no action need be taken: the corresponding carry-in packs
381        # must already be zero, or the then branch would have been taken.
382        count = self.aligned_size[block_no]
383        if count % self.field_count == 0: return []
[2701]384        # The block has half a carry-pack or less.
[2700]385        assigs = []
[2701]386        posn = self.block_base[block_no] - self.carry_offset
387        ub = posn / self.field_count
388        rp = posn % self.field_count
389        next_op = self.carryInfoSet.block_first_op[block_no] + self.carryInfoSet.block_op_count[block_no]
[2703]390        end_pos = (self.alloc_map[next_op]  - self.carry_offset - 1) % self.field_count
391        assigs = self.gen_multiple_carry_zero_then_pack(rp, end_pos - rp + 1)
392        if (end_pos + 1) % self.field_count == 0:
393          shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw-1)
[3017]394          storable_carry_in_form = mkast.call(shift_op, [mkast.var(self.cg_temp(self.field_count - 1, 0))])
395          assigs.append(mkast.assign(self.carry_pack_full(ub, mode = ast.Store()), storable_carry_in_form))
[2700]396        return assigs
397
398    def GenerateLocalDeclare(self, block_no):
399        if self.carryInfoSet.block_op_count[block_no] == 0: return []
[2701]400        count = self.aligned_size[block_no] 
401        if count >= self.field_count:
402          ub_count = count / self.field_count
[3017]403          decls = [mkast.callStmt('ubitblock_declare', [mkast.var('sub' + self.carryPackVar), ast.Num(ub_count)])]
[2701]404          count = self.field_count
405        else: decls = []
406        # Generate carry pack temps.
[2703]407        f = count
408        s = 1
409        temps = []
410        while f > 0:
411          temps += [self.local_temp(s*(i+1)-1, s*i) for i in range(f)]
[2700]412          f = f/2
[2703]413          s = s * 2
[2700]414        #return BitBlock_decls_from_vars(decls)
[3017]415        return decls + [mkast.callStmt('BitBlock_declare', [mkast.var(t)]) for t in temps]
[2700]416   
417    def GenerateCarryWhileTest(self, block_no, testExpr):
418        return self.GenerateTest(block_no, testExpr)
419
420    def EnterLocalWhileBlock(self, operation_offset): 
[2798]421        self.carryPackVar = "sub" + self.carryPackVar
[2701]422        self.temp_prefix = "sub" + self.temp_prefix
423        self.carry_offset = self.alloc_map[operation_offset]
[2799]424
[2700]425    def ExitLocalWhileBlock(self): 
[2798]426        self.carryPackVar = self.carryPackVar[3:]
[2701]427        self.temp_prefix = self.temp_prefix[3:]
428        self.carry_offset = 0
[2700]429       
430    def GenerateCarryWhileFinalization(self, block_no):
[2701]431        posn = self.block_base[block_no]
[2700]432        ub = posn/self.field_count
[2701]433        rp = posn%self.field_count
[2802]434        count = self.aligned_size[block_no]
[2701]435        if count < self.field_count:
[2703]436          v0 = self.cg_temp(rp + count - 1, rp)
437          lv0 = self.local_temp(count - 1, 0)
[3017]438          return [mkast.assign(v0, mkast.call('simd_or', [mkast.var(v0), mkast.var(lv0)]))]
[2700]439        n = (count+self.field_count-1)/self.field_count
440        assigs = []
441        for i in range(n):
[3017]442          assigs.append(mkast.assign(self.carry_pack_full(ub + i, mode = ast.Store()), mkast.call('simd_or', [self.carry_pack_full(ub + i), self.local_pack_full(i)])))
[2700]443        return assigs
444    def GenerateStreamFunctionFinalization(self):
[2701]445        return []
[2700]446
[2708]447#
[2795]448#  A version of HMCPS_CCGO eliminating use of "convert"
[2708]449#
450class HMCPS_CCGO2(HMCPS_CCGO):
451
[2795]452
453    def GenerateCarryInAccess(self, operation_no):
454        block_no = self.carryInfoSet.containing_block[operation_no]
455        posn = self.alloc_map[operation_no] - self.carry_offset
456        ub = posn/self.field_count
457        rp = posn%self.field_count
[3017]458        #return mkast.call("convert", [self.carry_pack_index(self.fw, ub, rp)])
[2795]459        if rp == 0: e = self.carry_pack_full(ub)
[3017]460        else: e = mkast.call("mvmd<%i>::srli<%i>" %(self.fw, rp), [self.carry_pack_full(ub)])
[2795]461        if rp == self.field_count - 1:
462          return e
[3017]463        else: return mkast.call('simd_and', [e, mkast.var("simd_const_1")])
[2795]464
465#
466#  Eliminating ubitblock
467#
468class HMCPS_CCGO3(HMCPS_CCGO2):
469
[2787]470    def carry_pack_full(self, ub, v = None, mode = ast.Load()):
[2798]471       if v == None: v = self.carryPackVar
[3017]472       return mkast.index(v, ub, mode)
[2708]473
474    def carry_pack_index(self, fw, ub, rp, mode = ast.Load()):
[3017]475       return mkast.call("mvmd<%i>::extract<%i>" % (fw, rp), [self.carry_pack_full(ub)])
[2708]476
477    def GenerateCarryDecls(self):
[2798]478        return "  BitBlock %s [%i];\n" % (self.carryPackVar, self.totalPack_count)
[2708]479
480    def GenerateInitializations(self):
[2798]481        v = self.carryPackVar       
[2795]482        inits = ""
[2798]483        for i in range(0, self.totalPack_count):
[2708]484          inits += "%s[%i] = simd<%i>::constant<0>();\n" % (v, i, self.fw)
485        for op_no in range(self.carryInfoSet.block_op_count[0]):
486          if op_no in self.carryInfoSet.init_one_list: 
487            posn = self.alloc_map[op_no]
488            ub = posn/self.field_count
489            rp = posn%self.field_count
[2798]490            v = "%s[%i]" % (self.carryPackVar, ub)
[2708]491            inits += "%s = simd_or(%s, mvmd<%i>::slli<%i>(simd_const_1)) ;\n" % (v, v, self.fw, rp)
492        return inits
493
494    def GenerateLocalDeclare(self, block_no):
495        if self.carryInfoSet.block_op_count[block_no] == 0: return []
496        count = self.aligned_size[block_no] 
497        if count >= self.field_count:
498          ub_count = count / self.field_count
[3017]499          decls = [mkast.callStmt('BitBlock_declare', [self.local_pack_full(ub_count)])]
500          decls += [mkast.assign(self.local_pack_full(i, ast.Store()), mkast.zero(self.fw)) for i in range(ub_count)]
[2708]501          count = self.field_count
502        else: decls = []
503        # Generate carry pack temps.
504        f = count
505        s = 1
506        temps = []
507        while f > 0:
508          temps += [self.local_temp(s*(i+1)-1, s*i) for i in range(f)]
509          f = f/2
510          s = s * 2
511        #return BitBlock_decls_from_vars(decls)
[3017]512        return decls + [mkast.callStmt('BitBlock_declare', [mkast.var(t)]) for t in temps]
[2795]513
[2800]514#
515#  A version of HMCPS_CCGO with bit packing using hsimd:signmask
516#
517class HMCPS_CCGO_BitPack(HMCPS_CCGO):
518
519    def allocate_all(self):
[2802]520        self.aligned_size = determine_aligned_block_sizes(self.field_count, self.carryInfoSet, min_block_size=8)
[2800]521        self.carryPack_count = (self.aligned_size[0] + self.BLOCK_SIZE - 1) / self.BLOCK_SIZE
522        self.totalPack_count = self.carryPack_count + self.carryInfoSet.adv_n_count
523        self.alloc_map = {}
524        self.alloc_map[0] = 0
525        self.adv_n_map = {}
526        self.block_base = {}
527        self.allocate_ops()
528        # carry_offset is used within the inner body of while loops to access local carries.
529        # The calculated (ub, rp) value is reduced by this amount for the local carry Pack(s).
530        self.carry_offset = 0
531
532    def GenerateCarryInAccess(self, operation_no):
533        block_no = self.carryInfoSet.containing_block[operation_no]
534        posn = self.alloc_map[operation_no] - self.carry_offset
535        pk = posn/self.BLOCK_SIZE
536        rp = posn%self.BLOCK_SIZE
537        if rp == 0: e = self.carry_pack_full(pk)
[3017]538        elif rp < self.BLOCK_SIZE/2: e = mkast.call("simd<%i>::srli<%i>" %(self.BLOCK_SIZE/2, rp), [self.carry_pack_full(pk)])
539        else: e = mkast.call("bitblock::srli<%i>" %(rp), [self.carry_pack_full(pk)])
[2800]540        if rp == self.BLOCK_SIZE - 1:
541          return e
[3017]542        else: return mkast.call('simd_and', [e, mkast.var("simd_const_1")])
[2800]543
544
545    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
546        block_no = self.carryInfoSet.containing_block[operation_no]
547        posn = self.alloc_map[operation_no] - self.carry_offset
548        rp = posn%self.field_count
549        # Save the carry in the carry temp variable and then merge
550        # pending carry temps as far as possible.
[3017]551        assigs = [mkast.assign(self.temp_prefix + repr(rp), carry_out_expr)] 
[2800]552        assigs += self.gen_merges(rp, rp)
553        # Only generate an actual store for the last carryout in a pack.
554        next_op = operation_no + 1
555        while self.adv_n_map.has_key(next_op): next_op += 1
556        next_posn = self.alloc_map[next_op] - self.carry_offset
557        skip = next_posn - posn - 1
558        if skip > 0: 
559          assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip)
560        #print (posn, skip)
561        if next_posn % self.field_count == 0:
562          pk = posn/self.BLOCK_SIZE
563          fd = (posn%self.BLOCK_SIZE)/self.field_count
564          mask_op = "hsimd<%i>::signmask" % (self.fw)
[3017]565          storable_carry_in_form = mkast.call(mask_op, [mkast.var(self.cg_temp(self.field_count - 1, 0))])
566          assigs.append(mkast.assign(self.carry_pack_index(self.field_count, pk, fd, mode = ast.Store()), storable_carry_in_form))
[2800]567        return assigs
568
569
570    def GenerateTest(self, block_no, testExpr):
571        int_size = self.BLOCK_SIZE/2
572        posn = self.block_base[block_no] - self.carry_offset
573        pk = posn/self.BLOCK_SIZE
574        fd = (posn%self.BLOCK_SIZE)/int_size
575        rp = posn%int_size
576        sz = self.aligned_size[block_no]
577        if sz in [8, 16, 32, 64] and align(posn, sz) == posn:
578            fd = (posn%self.BLOCK_SIZE)/sz
579            t = self.carry_pack_index(sz, pk, fd)
580            return TestHelper_Integer_Or(testExpr, t)
[2802]581        elif sz == self.BLOCK_SIZE and align(posn, sz) == posn:
582            fd = (posn%self.BLOCK_SIZE)/sz
583            t = self.carry_pack_full(pk)
584            return TestHelper_Bitblock_Or(testExpr, t)
[2800]585        elif rp + sz <= int_size:
586            e = self.carry_pack_index(int_size, pk, fd)
587            t = ast.BinOp(e, ast.BitAnd(), ast.Num(((1<<sz) - 1)<<rp))
588            return TestHelper_Integer_Or(testExpr, t)
589        else:
590            e = self.carry_pack_index(int_size, pk, fd)
591            t = ast.BinOp(e, ast.BitAnd(), ast.Num(((1<<(int_size-rp)) - 1)<<rp))
592            sz -= (int_size-rp)
593            posn += (int_size-rp)
594            pk = posn/self.BLOCK_SIZE
595            fd = (posn%self.BLOCK_SIZE)/int_size
596            while sz >= int_size:
597              t = ast.BinOp(t, ast.BitOr(), self.carry_pack_index(int_size, pk, fd))
598              sz -= int_size
599              posn += int_size
600              pk = posn/self.BLOCK_SIZE
601              fd = (posn%self.BLOCK_SIZE)/int_size
602            if sz > 0:
603              e = self.carry_pack_index(int_size, pk, fd)
604              t = ast.BinOp(t, ast.BitOr(), ast.BinOp(e, ast.BitAnd(), ast.Num((1<<sz) -1)))
605            return TestHelper_Integer_Or(testExpr, t)
606           
607    def GenerateInitializations(self):
608        v = self.carryPackVar       
609        inits = ""
610        for i in range(0, self.totalPack_count):
611          inits += "%s[%i]._128 = simd<%i>::constant<0>();\n" % (v, i, self.fw)
612        for op_no in range(self.carryInfoSet.block_op_count[0]):
613          if op_no in self.carryInfoSet.init_one_list: 
614            posn = self.alloc_map[op_no]
615            pk = posn/self.BLOCK_SIZE
616            fd = (posn%self.BLOCK_SIZE)/self.field_count
617            rp = posn%self.BLOCK_SIZE
618            inits += "%s[%i]._%i[%i] |= 1 << %i;\n" % (self.carryPackVar, pk, self.fw, fd, rp)
619        return inits
620
621
622    def GenerateCarryElseFinalization(self, block_no):
623        # if the block consists of full carry packs, then
624        # no action need be taken: the corresponding carry-in packs
625        # must already be zero, or the then branch would have been taken.
626        count = self.aligned_size[block_no]
627        if count % self.field_count == 0: return []
628        # The block has half a carry-pack or less.
629        assigs = []
630        posn = self.block_base[block_no] - self.carry_offset
631        ub = posn / self.field_count
632        rp = posn % self.field_count
633        next_op = self.carryInfoSet.block_first_op[block_no] + self.carryInfoSet.block_op_count[block_no]
634        end_pos = (self.alloc_map[next_op]  - self.carry_offset - 1) % self.field_count
635        #print rp, next_op,self.alloc_map[next_op]
[3017]636        #assigs = [mkast.assign(self.cg_temp(end_pos, rp), mkast.zero(self.fw))]
[2800]637        assigs = self.gen_multiple_carry_zero_then_pack(rp, end_pos - rp + 1)
638        if (end_pos + 1) % self.field_count == 0:
639          pk = posn/self.BLOCK_SIZE
640          fd = (posn%self.BLOCK_SIZE)/self.field_count
641          mask_op = "hsimd<%i>::signmask" % (self.fw)
[3017]642          storable_carry_in_form = mkast.call(mask_op, [mkast.var(self.cg_temp(self.field_count - 1, 0))])
643          assigs.append(mkast.assign(self.carry_pack_index(self.field_count, pk, fd, mode = ast.Store()), storable_carry_in_form))
[2800]644        return assigs
645
646#
647    def GenerateCarryWhileFinalization(self, block_no):
648        posn = self.block_base[block_no]
649        sz = self.aligned_size[block_no] 
650        if sz < self.field_count:
651          rp = posn%self.field_count
652          v0 = self.cg_temp(rp + sz - 1, rp)
653          lv0 = self.local_temp(sz - 1, 0)
[3017]654          return [mkast.assign(v0, mkast.call('simd_or', [mkast.var(v0), mkast.var(lv0)]))]
[2800]655        local_posn = 0
656        pk = posn/self.BLOCK_SIZE
657        assigs = []
658        for i in range((sz + self.field_count -1)/self.field_count): 
659          pk = posn/self.BLOCK_SIZE
660          fd = (posn%self.BLOCK_SIZE)/self.field_count
661          local_pk = local_posn/self.BLOCK_SIZE
662          local_fd = (local_posn%self.BLOCK_SIZE)/self.field_count
663          v0 = self.carry_pack_index(self.field_count, pk, fd)
664          lv0 = self.local_pack_index(self.field_count, local_pk, local_fd)
[3017]665          assigs.append(mkast.assign([self.carry_pack_index(self.field_count, pk, fd, ast.Store())], ast.BinOp(v0, ast.BitOr(), lv0)))
[2800]666          posn += self.field_count
667          local_posn += self.field_count
668        return assigs
[2802]669
670
671
672class HMCPS_CCGO_BitPack2(HMCPS_CCGO_BitPack):
673
674    def allocate_all(self):
675        self.aligned_size = determine_aligned_block_sizes(self.BLOCK_SIZE, self.carryInfoSet, max_whiles_per_pack = 8, min_block_size = self.field_count)
676        self.carryPack_count = (self.aligned_size[0] + self.BLOCK_SIZE - 1) / self.BLOCK_SIZE
677        self.totalPack_count = self.carryPack_count + self.carryInfoSet.adv_n_count
678        self.alloc_map = {}
679        self.alloc_map[0] = 0
680        self.last_carry_map = {}
681        self.adv_n_map = {}
682        self.block_base = {}
683        self.allocate_ops()
684        # carry_offset is used within the inner body of while loops to access local carries.
685        # The calculated (ub, rp) value is reduced by this amount for the local carry Pack(s).
686        self.carry_offset = 0
687
688
689    def allocate_block_positions(self):
690        # First allocate the base position of each block relative to its
691        # parent block, such that the relative position is a multiple
692        # of its aligned_size or the pack_size, whichever is smallest.
693        rel_block_posns = [0 for b in range(self.carryInfoSet.block_count)]
694        self.direct_carries = carryInfo.direct_block_carries(self.carryInfoSet)
695        self.aligned_direct = [align(d, max(min(pow2ceil(d), self.BLOCK_SIZE), self.field_count)) for d in self.direct_carries]
696        working_allocation_bitmap = [((1 << a) - 1) for a in self.aligned_direct]
697        for b in range(1, self.carryInfoSet.block_count):
698            prnt = self.carryInfoSet.parent_block[b]
699            sz = self.aligned_size[b]
700            sz_map = (1 << sz) - 1
701            posn = 0
702            while sz_map & working_allocation_bitmap[prnt] != 0:
703                posn += sz
704                sz_map <<= sz
705            working_allocation_bitmap[prnt] |= sz_map
706            rel_block_posns[b] = posn
707        # Now compute absolute positions
708        self.block_base[0] = 0
709        for b in range(1, self.carryInfoSet.block_count): 
710            self.block_base[b] = self.block_base[self.carryInfoSet.parent_block[b]] + rel_block_posns[b]
711
712#
713#  Given the relative base positions of each block, allocate
714#  its carries.
715#
716    def allocate_ops(self):
717        self.allocate_block_positions()
718        adv_n_count = 0
719        carry_posn = [self.block_base[b] for b in range(self.carryInfoSet.block_count)]
720        for op in range(self.carryInfoSet.operation_count):
721            b = self.carryInfoSet.containing_block[op]
722            self.alloc_map[op] = carry_posn[b]
723            if op not in self.carryInfoSet.advance_amount.keys():
724                carry_posn[b] += 1
725                self.last_carry_map[b] = op
726            elif self.carryInfoSet.advance_amount[op] == 1: 
727                carry_posn[b] += 1
728                self.last_carry_map[b] = op
729            else:
730                self.adv_n_map[op] = adv_n_count
731                adv_n_count += 1
732        # When processing the last operation, make sure that the "next" operation
733        # appears to start a new pack.
734        self.alloc_map[self.carryInfoSet.operation_count] = self.aligned_size[0]
735
736    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
737        block_no = self.carryInfoSet.containing_block[operation_no]
738        posn = self.alloc_map[operation_no] - self.carry_offset
739        add_decl = self.alloc_map[operation_no] - self.block_base[block_no] <= self.field_count
740        rp = posn%self.field_count
741        # Save the carry in the carry temp variable and then merge
742        # pending carry temps as far as possible.
[3017]743        v = mkast.var(self.temp_prefix + repr(rp))
[2802]744        assigs = []
[3017]745        if add_decl: assigs.append(mkast.callStmt('BitBlock_declare', [v]))
746        assigs.append(mkast.assign(v, carry_out_expr))
[2802]747        assigs += self.gen_merges(rp, rp, add_decl)
748        # Only generate an actual store for the last carryout in a pack.
749        if operation_no == self.last_carry_map[block_no]:
750          skip = self.block_base[block_no] + self.aligned_direct[block_no] - self.alloc_map[operation_no] - 1
751          if skip > 0: 
752            assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip, add_decl)
753          #print (posn, skip)
754          pk = posn/self.BLOCK_SIZE
755          mask_blk = (posn%self.BLOCK_SIZE)/self.field_count
756          mask_op = "hsimd<%i>::signmask" % (self.fw)
[3017]757          storable_carry_in_form = mkast.call(mask_op, [mkast.var(self.cg_temp(self.field_count - 1, 0))])
758          assigs.append(mkast.assign(self.carry_pack_index(self.field_count, pk, mask_blk, mode = ast.Store()), storable_carry_in_form))
[2802]759        return assigs
760
761    def GenerateLocalDeclare(self, block_no):
762        if self.carryInfoSet.block_op_count[block_no] == 0: return []
763        count = self.aligned_size[block_no] 
764        ub_count = (count + self.BLOCK_SIZE - 1)/ self.BLOCK_SIZE
[3017]765        decls = [mkast.callStmt('ubitblock_declare', [mkast.var("sub" + self.carryPackVar), ast.Num(ub_count)])]
[2802]766        return decls
767
768    def GenerateCarryWhileFinalization(self, block_no):
769        posn = self.block_base[block_no]
770        pk = posn/self.BLOCK_SIZE
771        count = self.aligned_size[block_no]
772        assigs = []
773        if count >= self.BLOCK_SIZE:
774          for i in range(count/self.BLOCK_SIZE):
[3017]775            assigs.append(mkast.assign(self.carry_pack_full(pk + i, mode = ast.Store()), mkast.call('simd_or', [self.carry_pack_full(pk + i), self.local_pack_full(i)])))
[2802]776
777        else:
778          rp = (posn%self.BLOCK_SIZE)/count
779          expr = ast.BinOp(self.carry_pack_index(count, pk, rp), ast.BitOr(), self.local_pack_index(count, 0, 0))
[3017]780          assigs.append(mkast.assign(self.carry_pack_index(count, pk, rp, mode = ast.Store()), expr))
[2802]781        return assigs
782
783
784    def GenerateCarryElseFinalization(self, block_no):  return []
785
786    def GenerateStreamFunctionDecls(self): return ""
787
788
Note: See TracBrowser for help on using the repository browser.