source: proto/Compiler/CCGO_HMCPS.py @ 3095

Last change on this file since 3095 was 3017, checked in by cameron, 6 years ago

Factor out AST helpers to mkast.py

File size: 34.3 KB
Line 
1#
2# CCGO_HMCPS.py
3#
4# Carry Code Generator Object using Hierarchical Merging Carry Pack Strategy
5#
6# Robert D. Cameron
7# November 26, 2012
8# Licensed under Open Software License 3.0
9#
10import ast, mkast
11import carryInfo, CCGO
12
13#
14# Helper functions
15#
16def TestHelper_Bitblock_Or(testExpr, bitBlockExpr):
17    if isinstance(testExpr, ast.Call):
18      assert isinstance(testExpr.func, ast.Name)
19      assert testExpr.func.id == 'bitblock::any'
20      testExpr.args[0] = mkast.call('simd_or', [bitBlockExpr, testExpr.args[0]])
21      return testExpr
22    else:
23      return ast.BinOp(testExpr, ast.BitOr(), mkast.call('bitblock::any', [bitBlockExpr]))
24
25def TestHelper_Integer_Or(testExpr, intExpr):
26    return ast.BinOp(testExpr, ast.BitOr(), intExpr)
27
28
29#
30#
31# Carry Pack Assignment Strategy
32#
33# The hierarchical merging carry pack strategy packs carries
34# into packs of 2, 4, 8 and 16.   For example, to pack
35# 4 carries c0, c1, c2, and c3 into the 32-bit fields of
36# a 128-bit register, the following operations are used.
37#
38# c0 = pablo.SomeCarryGeneratingFn(...)
39# c1 = pablo.SomeCarryGeneratingFn(...)
40# c1_0 = esimd::mergeh<32>(c1, c0)
41# c2 = pablo.SomeCarryGeneratingFn(...)
42# c3 = pablo.SomeCarryGeneratingFn(...)
43# c3_2 = esimd::mergeh<32>(c3, c2)
44# c3_0 = esimd::mergeh<64>(c3_2, c1_0)
45#
46#
47# Packing operations are generated sequentially when
48# the appropriate individual carries or subpacks become
49# available.   
50#
51# Generate the packing operations assuming that the
52# carry_num carry has just been generated.
53#
54
55def pow2ceil(n):
56   c = 1
57   while c < n: c *= 2 
58   return c
59
60def pow2floor(n):
61   c = 1
62   while c <= n: c *= 2 
63   return c/2
64
65def low_bit(n):
66   return n - (n & (n-1))
67   
68def align(n, align_base):
69  return ((n + align_base - 1) / align_base) * align_base
70
71def determine_aligned_block_sizes(pack_size, cis, max_whiles_per_pack = 1, min_block_size = 1):
72  aligned_size = {}
73  for i in range(cis.block_count): aligned_size[i] = 0
74  seen = []
75  for i in range(cis.block_count):
76    # Work backwards to process all child blocks before the parent
77    # so that the parent incorporates the updated child counts.
78    b = cis.block_count - i - 1
79    b_carries = 0
80    op = cis.block_first_op[b]
81    while op < cis.block_first_op[b] + cis.block_op_count[b]:
82      sb = cis.containing_block[op]
83      if sb == b:
84        if op not in cis.advance_amount.keys(): b_carries += 1
85        elif cis.advance_amount[op] == 1: b_carries += 1
86        op += 1
87      else: 
88        align_base = aligned_size[sb]
89        if align_base > pack_size: align_base = pack_size
90        b_carries = align(b_carries, align_base)
91        b_carries += aligned_size[sb]
92        op += cis.block_op_count[sb]
93    #
94    # Align to min block size
95    aligned_size[b] = align(b_carries, min_block_size)
96    # Force whiles to use full packs; this possibly can be relaxed.
97    if cis.whileblock[b]:
98      aligned_size[b] = align(aligned_size[b], pack_size/max_whiles_per_pack)
99    if aligned_size[b] > pack_size:
100      aligned_size[b] = align(aligned_size[b], pack_size)
101    else:
102      aligned_size[b] = pow2ceil(aligned_size[b])
103  return aligned_size
104 
105MAX_LINE_LENGTH = 80
106
107def BitBlock_decls_from_vars(varlist):
108  global MAX_LINE_LENGTH
109  decls =  ""
110  if not len(varlist) == 0:
111          decls = "             BitBlock"
112          pending = ""
113          linelgth = 10
114          for v in varlist:
115            if linelgth + len(v) + 2 <= MAX_LINE_LENGTH:
116              decls += pending + " " + v
117              linelgth += len(pending + v) + 1
118            else:
119              decls += ";\n             BitBlock " + v
120              linelgth = 11 + len(v)
121            pending = ","
122          decls += ";"
123  return decls
124 
125def block_contains(b0, b1, parent_block_map):
126  if b0 == b1: return True
127  elif b1 == 0: return False
128  else: return block_contains(b0, parent_block_map[b1], parent_block_map)
129 
130class HMCPS_CCGO(CCGO.CCGO):
131    def __init__(self, BLOCK_SIZE, fw, carryInfoSet, carryPackVarName='carryG', temp_prefix='__c'):
132        self.BLOCK_SIZE = BLOCK_SIZE
133        self.fw = fw
134        self.field_count = self.BLOCK_SIZE/fw
135        self.carryInfoSet = carryInfoSet
136        self.carryPackVar = carryPackVarName
137        self.temp_prefix = temp_prefix
138
139    def allocate_all(self):
140        self.aligned_size = determine_aligned_block_sizes(self.field_count, self.carryInfoSet)
141        self.carryPack_count = (self.aligned_size[0] + self.field_count - 1) / self.field_count
142        self.totalPack_count = self.carryPack_count + self.carryInfoSet.adv_n_count
143        self.alloc_map = {}
144        self.alloc_map[0] = 0
145        self.adv_n_map = {}
146        self.block_base = {}
147        self.allocate_ops()
148        # carry_offset is used within the inner body of while loops to access local carries.
149        # The calculated (ub, rp) value is reduced by this amount for the local carry Pack(s).
150        self.carry_offset = 0
151#
152# Carry Storage/Access
153#
154# Carries are stored in one or more ubitblocks as byte values.
155# For each block, the carry count is rounded up to the nearest power of 2 ceiling P,
156# so that the carry test for that block is accessible as a single value of P bytes.
157# Packs of 1, 2, 4 or 8 carries are respectively represented
158# as one or more _8, _16, _32 or _64 values.  (Members of ubitblock union.)
159#
160#
161# Allocation phase determines the ubitblock_no and count for each block.
162
163#  carry-in access is a byte load  carryG[packno]._8[offset]
164#  carryout store is to a local pack var until we get to the final byte of a pack
165#
166#  if-test: let P be pack_size in {1,2,4,8,...}
167#    if P <= 8, use an integer test expression cG[packno]._%i % (P * 8)[block_offset]
168#     
169#  while test similar
170#    local while decl: use a copy of carryPack
171#    while finalize  carry combine:   round up and |= into structure
172#
173    def carry_pack_full(self, ub, v = None, mode = ast.Load()):
174       if v == None: v = self.carryPackVar
175       return mkast.att(mkast.index(v, ub), '_128', mode)
176
177    def carry_pack_index(self, fw, ub, rp, mode = ast.Load()):
178       return mkast.index(mkast.att(mkast.index(self.carryPackVar, ub), '_%i' % fw), rp, mode)
179
180    def local_pack_full(self, ub, mode = ast.Load()):
181       return self.carry_pack_full(ub, "sub" + self.carryPackVar, mode)
182
183    def local_pack_index(self, fw, ub, rp, mode = ast.Load()):
184       v = "sub" + self.carryPackVar
185       return mkast.index(mkast.att(mkast.index(v, ub), '_%i' % fw), rp, mode)
186 
187
188    def cg_temp(self, hi_carry, lo_carry = None):
189      if lo_carry == None or hi_carry == lo_carry: return "%s%i" % (self.temp_prefix, hi_carry)
190      else: return "%s%i_%i" % (self.temp_prefix, hi_carry, lo_carry)
191   
192    def local_temp(self, hi_carry, lo_carry = None):
193      if lo_carry == None or hi_carry == lo_carry: return "sub%s%i" % (self.temp_prefix, hi_carry)
194      else: return "sub%s%i_%i" % (self.temp_prefix, hi_carry, lo_carry)
195   
196    def gen_merges(self, carry_last, carry_base, add_decl = False):
197      size = carry_last - carry_base + 1
198      if carry_last & size: 
199        v1 = mkast.var(self.cg_temp(carry_last, carry_base))
200        v0 = mkast.var(self.cg_temp(carry_last - size, carry_base - size))
201        v2 = mkast.var(self.cg_temp(carry_last, carry_base - size), ast.Store())
202        assigs = []
203        if add_decl: assigs.append(mkast.callStmt('BitBlock_declare', [v2]))
204        assigs.append(mkast.assign(v2, mkast.mergeh(self.fw * size, v1, v0)))
205        return assigs + self.gen_merges(carry_last, carry_base - size, add_decl)
206      else: return []
207
208    #
209    #  Given that carry_num carries have been generated and packed,
210    #  add zero_count additional carry zero values and pack.
211    #  Use shifts to introduce multiple zeroes, where possible.
212    #
213    def gen_multiple_carry_zero_then_pack(self, carry_num, zero_count, add_decl = False):
214      stmts = []
215      if zero_count == 0: return stmts
216      pending_carry_pack_size = low_bit(carry_num)
217      pending_carry_base = carry_num - pending_carry_pack_size
218      # We may be able to fill zeroes by shifting.
219      # But the shift is limited by any further pending carry pack and
220      # the constraint that the result must produce a well-formed pack
221      # having a power-of-2 entries.
222      #
223      final_num = carry_num + zero_count
224      pack_size2 = low_bit(pending_carry_base)
225      if pending_carry_base == 0:
226        shift = pow2floor(final_num) - pending_carry_pack_size
227      else:
228        shift = min(low_bit(pending_carry_base), low_bit(final_num)) - pending_carry_pack_size
229      if pending_carry_pack_size == 0 or shift == 0:
230        # There is either no pending pack or we are not generating enough
231        # carry zeroes to combine into the pending pack, so we can only add new
232        # packs.
233        #
234        if zero_count == 1: 
235          v = mkast.var(self.cg_temp(carry_num))
236          if add_decl: stmts.append(mkast.callStmt('BitBlock_declare', [v]))
237          stmts.append(mkast.assign(v, mkast.zero(self.fw)))
238          return stmts
239        else: 
240          zero_count_floor = pow2floor(zero_count)
241          hi_num = carry_num + zero_count_floor
242          v = mkast.var(self.cg_temp(hi_num - 1, carry_num))
243          if add_decl: stmts.append(mkast.callStmt('BitBlock_declare', [v]))
244          stmts.append(mkast.assign(v, mkast.zero(self.fw)))
245          remaining_zeroes = zero_count - zero_count_floor
246          return stmts + self.gen_multiple_carry_zero_then_pack(hi_num, remaining_zeroes, add_decl) 
247      #
248      shift_result = mkast.var(self.cg_temp(carry_num + shift - 1, pending_carry_base))
249      pending = self.cg_temp(carry_num - 1, pending_carry_base)
250      #a1 = mkast.assign(shift_result, mkast.call('bitblock::srli<%i>' % (self.fw * shift), [mkast.var(pending)]))
251      if add_decl: stmts.append(mkast.callStmt('BitBlock_declare', [shift_result]))
252      stmts.append(mkast.assign(shift_result, mkast.call('mvmd<%i>::srli<%i>' % (self.fw, shift), [mkast.var(pending)])))
253      # Do any necessary merges
254      m = self.gen_merges(carry_num + shift - 1,  pending_carry_base, add_decl)
255      return stmts + m + self.gen_multiple_carry_zero_then_pack(carry_num + shift, zero_count - shift, add_decl)
256
257
258    def allocate_ops(self):
259      carry_count = 0
260      adv_n_count = 0
261      for op in range(self.carryInfoSet.operation_count):
262        b = self.carryInfoSet.containing_block[op]
263        if op != 0: 
264          # If we've just left a block, ensure that we are aligned.
265          b_last = self.carryInfoSet.containing_block[op-1]
266          if not block_contains(b_last, b, self.carryInfoSet.parent_block):
267            # find the max-sized block just exited.
268            while not block_contains(self.carryInfoSet.parent_block[b_last], b, self.carryInfoSet.parent_block):
269              b_last = self.carryInfoSet.parent_block[b_last]
270            align_base = self.aligned_size[b_last]
271            if align_base > self.field_count: align_base = self.field_count
272            carry_count = align(carry_count, align_base)         
273        if self.carryInfoSet.block_first_op[b] == op:
274          # If we're just entering a block, ensure that we are aligned.
275          align_base = self.aligned_size[b]
276          if align_base > self.field_count: align_base = self.field_count
277          carry_count = align(carry_count, align_base)
278        if op not in self.carryInfoSet.advance_amount.keys():
279          self.alloc_map[op] = carry_count
280          carry_count += 1
281        elif self.carryInfoSet.advance_amount[op] == 1: 
282          self.alloc_map[op] = carry_count
283          carry_count += 1
284        else:
285          # Advance_n op, carry_count does not change.
286          self.alloc_map[op] = carry_count
287          self.adv_n_map[op] = adv_n_count
288          adv_n_count += 1
289      # When processing the last operation, make sure that the "next" operation
290      # appears to start a new pack.
291      self.alloc_map[self.carryInfoSet.operation_count] = align(carry_count, self.field_count)
292      for b in range(self.carryInfoSet.block_count): 
293         self.block_base[b] = self.alloc_map[self.carryInfoSet.block_first_op[b]]
294     
295    def GenerateCarryDecls(self):
296        return "  ubitblock %s [%i];\n" % (self.carryPackVar, self.totalPack_count)
297    def GenerateInitializations(self):
298        v = self.carryPackVar       
299        inits = ""
300        for i in range(0, self.totalPack_count):
301          inits += "%s[%i]._128 = simd<%i>::constant<0>();\n" % (v, i, self.fw)
302        for op_no in range(self.carryInfoSet.block_op_count[0]):
303          if op_no in self.carryInfoSet.init_one_list: 
304            posn = self.alloc_map[op_no]
305            ub = posn/self.field_count
306            rp = posn%self.field_count
307            inits += "%s[%i]._%i[%i] = 1;\n" % (self.carryPackVar, ub, self.fw, rp)
308        return inits
309    def GenerateStreamFunctionDecls(self):
310        f = self.field_count
311        s = 1
312        decls = []
313        while f > 0:
314          decls += [self.cg_temp(s*(i+1)-1, s*i) for i in range(f)]
315          f = f/2
316          s = s * 2
317        return BitBlock_decls_from_vars(decls)
318
319    def GenerateCarryInAccess(self, operation_no):
320        block_no = self.carryInfoSet.containing_block[operation_no]
321        posn = self.alloc_map[operation_no] - self.carry_offset
322        ub = posn/self.field_count
323        rp = posn%self.field_count
324        return mkast.call("convert", [self.carry_pack_index(self.fw, ub, rp)])
325    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
326        block_no = self.carryInfoSet.containing_block[operation_no]
327        posn = self.alloc_map[operation_no] - self.carry_offset
328        ub = posn/self.field_count
329        rp = posn%self.field_count
330        # Save the carry in the carry temp variable and then merge
331        # pending carry temps as far as possible.
332        assigs = [mkast.assign(self.temp_prefix + repr(rp), carry_out_expr)] 
333        assigs += self.gen_merges(rp, rp)
334        # Only generate an actual store for the last carryout in a pack.
335        next_op = operation_no + 1
336        while self.adv_n_map.has_key(next_op): next_op += 1
337        next_posn = self.alloc_map[next_op] - self.carry_offset
338        skip = next_posn - posn - 1
339        if skip > 0: 
340          assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip)
341        if next_posn % self.field_count == 0:
342          shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw-1)
343          storable_carry_in_form = mkast.call(shift_op, [mkast.var(self.cg_temp(self.field_count - 1, 0))])
344          assigs.append(mkast.assign(self.carry_pack_full(ub, mode = ast.Store()), storable_carry_in_form))
345        return assigs
346    def GenerateAdvanceInAccess(self, operation_no):
347        return self.carry_pack_full(self.carryPack_count + self.adv_n_map[operation_no])
348    def GenerateAdvanceOutStore(self, operation_no, adv_out_expr):
349        return [ast.Assign([self.carry_pack_full(self.carryPack_count + self.adv_n_map[operation_no], mode=ast.Store())], 
350                           mkast.call("bitblock::srli<64>", [adv_out_expr]))]
351    def GenerateTestAll(self, instance_name):
352        if self.totalPack_count == 0: return ast.Num(0)
353        else:
354            v = mkast.att(instance_name, self.carryPackVar)
355            t = self.carry_pack_full(0, v)
356            for i in range(1, self.totalPack_count): 
357              t2 = self.carry_pack_full(i, v)
358              t = mkast.call('simd_or', [t, t2])
359            return mkast.call('bitblock::any', [t])
360    def GenerateTest(self, block_no, testExpr):
361        posn = self.block_base[block_no] - self.carry_offset
362        ub = posn/self.field_count
363        rp = posn%self.field_count
364        count = self.aligned_size[block_no] 
365        width = count * self.fw
366        if count < self.field_count:
367            t = self.carry_pack_index(width, ub, rp/count)
368            return TestHelper_Integer_Or(testExpr, t)
369        else:
370            t = self.carry_pack_full(ub)
371            for i in range(1, count/self.field_count): 
372              v2 = self.carry_pack_full(ub + i)
373              t = mkast.call('simd_or', [t, v2])
374            return TestHelper_Bitblock_Or(testExpr, t)
375    def GenerateCarryIfTest(self, block_no, ifTest):
376        return self.GenerateTest(block_no, ifTest)
377
378    def GenerateCarryElseFinalization(self, block_no):
379        # if the block consists of full carry packs, then
380        # no action need be taken: the corresponding carry-in packs
381        # must already be zero, or the then branch would have been taken.
382        count = self.aligned_size[block_no]
383        if count % self.field_count == 0: return []
384        # The block has half a carry-pack or less.
385        assigs = []
386        posn = self.block_base[block_no] - self.carry_offset
387        ub = posn / self.field_count
388        rp = posn % self.field_count
389        next_op = self.carryInfoSet.block_first_op[block_no] + self.carryInfoSet.block_op_count[block_no]
390        end_pos = (self.alloc_map[next_op]  - self.carry_offset - 1) % self.field_count
391        assigs = self.gen_multiple_carry_zero_then_pack(rp, end_pos - rp + 1)
392        if (end_pos + 1) % self.field_count == 0:
393          shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw-1)
394          storable_carry_in_form = mkast.call(shift_op, [mkast.var(self.cg_temp(self.field_count - 1, 0))])
395          assigs.append(mkast.assign(self.carry_pack_full(ub, mode = ast.Store()), storable_carry_in_form))
396        return assigs
397
398    def GenerateLocalDeclare(self, block_no):
399        if self.carryInfoSet.block_op_count[block_no] == 0: return []
400        count = self.aligned_size[block_no] 
401        if count >= self.field_count:
402          ub_count = count / self.field_count
403          decls = [mkast.callStmt('ubitblock_declare', [mkast.var('sub' + self.carryPackVar), ast.Num(ub_count)])]
404          count = self.field_count
405        else: decls = []
406        # Generate carry pack temps.
407        f = count
408        s = 1
409        temps = []
410        while f > 0:
411          temps += [self.local_temp(s*(i+1)-1, s*i) for i in range(f)]
412          f = f/2
413          s = s * 2
414        #return BitBlock_decls_from_vars(decls)
415        return decls + [mkast.callStmt('BitBlock_declare', [mkast.var(t)]) for t in temps]
416   
417    def GenerateCarryWhileTest(self, block_no, testExpr):
418        return self.GenerateTest(block_no, testExpr)
419
420    def EnterLocalWhileBlock(self, operation_offset): 
421        self.carryPackVar = "sub" + self.carryPackVar
422        self.temp_prefix = "sub" + self.temp_prefix
423        self.carry_offset = self.alloc_map[operation_offset]
424
425    def ExitLocalWhileBlock(self): 
426        self.carryPackVar = self.carryPackVar[3:]
427        self.temp_prefix = self.temp_prefix[3:]
428        self.carry_offset = 0
429       
430    def GenerateCarryWhileFinalization(self, block_no):
431        posn = self.block_base[block_no]
432        ub = posn/self.field_count
433        rp = posn%self.field_count
434        count = self.aligned_size[block_no]
435        if count < self.field_count:
436          v0 = self.cg_temp(rp + count - 1, rp)
437          lv0 = self.local_temp(count - 1, 0)
438          return [mkast.assign(v0, mkast.call('simd_or', [mkast.var(v0), mkast.var(lv0)]))]
439        n = (count+self.field_count-1)/self.field_count
440        assigs = []
441        for i in range(n):
442          assigs.append(mkast.assign(self.carry_pack_full(ub + i, mode = ast.Store()), mkast.call('simd_or', [self.carry_pack_full(ub + i), self.local_pack_full(i)])))
443        return assigs
444    def GenerateStreamFunctionFinalization(self):
445        return []
446
447#
448#  A version of HMCPS_CCGO eliminating use of "convert"
449#
450class HMCPS_CCGO2(HMCPS_CCGO):
451
452
453    def GenerateCarryInAccess(self, operation_no):
454        block_no = self.carryInfoSet.containing_block[operation_no]
455        posn = self.alloc_map[operation_no] - self.carry_offset
456        ub = posn/self.field_count
457        rp = posn%self.field_count
458        #return mkast.call("convert", [self.carry_pack_index(self.fw, ub, rp)])
459        if rp == 0: e = self.carry_pack_full(ub)
460        else: e = mkast.call("mvmd<%i>::srli<%i>" %(self.fw, rp), [self.carry_pack_full(ub)])
461        if rp == self.field_count - 1:
462          return e
463        else: return mkast.call('simd_and', [e, mkast.var("simd_const_1")])
464
465#
466#  Eliminating ubitblock
467#
468class HMCPS_CCGO3(HMCPS_CCGO2):
469
470    def carry_pack_full(self, ub, v = None, mode = ast.Load()):
471       if v == None: v = self.carryPackVar
472       return mkast.index(v, ub, mode)
473
474    def carry_pack_index(self, fw, ub, rp, mode = ast.Load()):
475       return mkast.call("mvmd<%i>::extract<%i>" % (fw, rp), [self.carry_pack_full(ub)])
476
477    def GenerateCarryDecls(self):
478        return "  BitBlock %s [%i];\n" % (self.carryPackVar, self.totalPack_count)
479
480    def GenerateInitializations(self):
481        v = self.carryPackVar       
482        inits = ""
483        for i in range(0, self.totalPack_count):
484          inits += "%s[%i] = simd<%i>::constant<0>();\n" % (v, i, self.fw)
485        for op_no in range(self.carryInfoSet.block_op_count[0]):
486          if op_no in self.carryInfoSet.init_one_list: 
487            posn = self.alloc_map[op_no]
488            ub = posn/self.field_count
489            rp = posn%self.field_count
490            v = "%s[%i]" % (self.carryPackVar, ub)
491            inits += "%s = simd_or(%s, mvmd<%i>::slli<%i>(simd_const_1)) ;\n" % (v, v, self.fw, rp)
492        return inits
493
494    def GenerateLocalDeclare(self, block_no):
495        if self.carryInfoSet.block_op_count[block_no] == 0: return []
496        count = self.aligned_size[block_no] 
497        if count >= self.field_count:
498          ub_count = count / self.field_count
499          decls = [mkast.callStmt('BitBlock_declare', [self.local_pack_full(ub_count)])]
500          decls += [mkast.assign(self.local_pack_full(i, ast.Store()), mkast.zero(self.fw)) for i in range(ub_count)]
501          count = self.field_count
502        else: decls = []
503        # Generate carry pack temps.
504        f = count
505        s = 1
506        temps = []
507        while f > 0:
508          temps += [self.local_temp(s*(i+1)-1, s*i) for i in range(f)]
509          f = f/2
510          s = s * 2
511        #return BitBlock_decls_from_vars(decls)
512        return decls + [mkast.callStmt('BitBlock_declare', [mkast.var(t)]) for t in temps]
513
514#
515#  A version of HMCPS_CCGO with bit packing using hsimd:signmask
516#
517class HMCPS_CCGO_BitPack(HMCPS_CCGO):
518
519    def allocate_all(self):
520        self.aligned_size = determine_aligned_block_sizes(self.field_count, self.carryInfoSet, min_block_size=8)
521        self.carryPack_count = (self.aligned_size[0] + self.BLOCK_SIZE - 1) / self.BLOCK_SIZE
522        self.totalPack_count = self.carryPack_count + self.carryInfoSet.adv_n_count
523        self.alloc_map = {}
524        self.alloc_map[0] = 0
525        self.adv_n_map = {}
526        self.block_base = {}
527        self.allocate_ops()
528        # carry_offset is used within the inner body of while loops to access local carries.
529        # The calculated (ub, rp) value is reduced by this amount for the local carry Pack(s).
530        self.carry_offset = 0
531
532    def GenerateCarryInAccess(self, operation_no):
533        block_no = self.carryInfoSet.containing_block[operation_no]
534        posn = self.alloc_map[operation_no] - self.carry_offset
535        pk = posn/self.BLOCK_SIZE
536        rp = posn%self.BLOCK_SIZE
537        if rp == 0: e = self.carry_pack_full(pk)
538        elif rp < self.BLOCK_SIZE/2: e = mkast.call("simd<%i>::srli<%i>" %(self.BLOCK_SIZE/2, rp), [self.carry_pack_full(pk)])
539        else: e = mkast.call("bitblock::srli<%i>" %(rp), [self.carry_pack_full(pk)])
540        if rp == self.BLOCK_SIZE - 1:
541          return e
542        else: return mkast.call('simd_and', [e, mkast.var("simd_const_1")])
543
544
545    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
546        block_no = self.carryInfoSet.containing_block[operation_no]
547        posn = self.alloc_map[operation_no] - self.carry_offset
548        rp = posn%self.field_count
549        # Save the carry in the carry temp variable and then merge
550        # pending carry temps as far as possible.
551        assigs = [mkast.assign(self.temp_prefix + repr(rp), carry_out_expr)] 
552        assigs += self.gen_merges(rp, rp)
553        # Only generate an actual store for the last carryout in a pack.
554        next_op = operation_no + 1
555        while self.adv_n_map.has_key(next_op): next_op += 1
556        next_posn = self.alloc_map[next_op] - self.carry_offset
557        skip = next_posn - posn - 1
558        if skip > 0: 
559          assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip)
560        #print (posn, skip)
561        if next_posn % self.field_count == 0:
562          pk = posn/self.BLOCK_SIZE
563          fd = (posn%self.BLOCK_SIZE)/self.field_count
564          mask_op = "hsimd<%i>::signmask" % (self.fw)
565          storable_carry_in_form = mkast.call(mask_op, [mkast.var(self.cg_temp(self.field_count - 1, 0))])
566          assigs.append(mkast.assign(self.carry_pack_index(self.field_count, pk, fd, mode = ast.Store()), storable_carry_in_form))
567        return assigs
568
569
570    def GenerateTest(self, block_no, testExpr):
571        int_size = self.BLOCK_SIZE/2
572        posn = self.block_base[block_no] - self.carry_offset
573        pk = posn/self.BLOCK_SIZE
574        fd = (posn%self.BLOCK_SIZE)/int_size
575        rp = posn%int_size
576        sz = self.aligned_size[block_no]
577        if sz in [8, 16, 32, 64] and align(posn, sz) == posn:
578            fd = (posn%self.BLOCK_SIZE)/sz
579            t = self.carry_pack_index(sz, pk, fd)
580            return TestHelper_Integer_Or(testExpr, t)
581        elif sz == self.BLOCK_SIZE and align(posn, sz) == posn:
582            fd = (posn%self.BLOCK_SIZE)/sz
583            t = self.carry_pack_full(pk)
584            return TestHelper_Bitblock_Or(testExpr, t)
585        elif rp + sz <= int_size:
586            e = self.carry_pack_index(int_size, pk, fd)
587            t = ast.BinOp(e, ast.BitAnd(), ast.Num(((1<<sz) - 1)<<rp))
588            return TestHelper_Integer_Or(testExpr, t)
589        else:
590            e = self.carry_pack_index(int_size, pk, fd)
591            t = ast.BinOp(e, ast.BitAnd(), ast.Num(((1<<(int_size-rp)) - 1)<<rp))
592            sz -= (int_size-rp)
593            posn += (int_size-rp)
594            pk = posn/self.BLOCK_SIZE
595            fd = (posn%self.BLOCK_SIZE)/int_size
596            while sz >= int_size:
597              t = ast.BinOp(t, ast.BitOr(), self.carry_pack_index(int_size, pk, fd))
598              sz -= int_size
599              posn += int_size
600              pk = posn/self.BLOCK_SIZE
601              fd = (posn%self.BLOCK_SIZE)/int_size
602            if sz > 0:
603              e = self.carry_pack_index(int_size, pk, fd)
604              t = ast.BinOp(t, ast.BitOr(), ast.BinOp(e, ast.BitAnd(), ast.Num((1<<sz) -1)))
605            return TestHelper_Integer_Or(testExpr, t)
606           
607    def GenerateInitializations(self):
608        v = self.carryPackVar       
609        inits = ""
610        for i in range(0, self.totalPack_count):
611          inits += "%s[%i]._128 = simd<%i>::constant<0>();\n" % (v, i, self.fw)
612        for op_no in range(self.carryInfoSet.block_op_count[0]):
613          if op_no in self.carryInfoSet.init_one_list: 
614            posn = self.alloc_map[op_no]
615            pk = posn/self.BLOCK_SIZE
616            fd = (posn%self.BLOCK_SIZE)/self.field_count
617            rp = posn%self.BLOCK_SIZE
618            inits += "%s[%i]._%i[%i] |= 1 << %i;\n" % (self.carryPackVar, pk, self.fw, fd, rp)
619        return inits
620
621
622    def GenerateCarryElseFinalization(self, block_no):
623        # if the block consists of full carry packs, then
624        # no action need be taken: the corresponding carry-in packs
625        # must already be zero, or the then branch would have been taken.
626        count = self.aligned_size[block_no]
627        if count % self.field_count == 0: return []
628        # The block has half a carry-pack or less.
629        assigs = []
630        posn = self.block_base[block_no] - self.carry_offset
631        ub = posn / self.field_count
632        rp = posn % self.field_count
633        next_op = self.carryInfoSet.block_first_op[block_no] + self.carryInfoSet.block_op_count[block_no]
634        end_pos = (self.alloc_map[next_op]  - self.carry_offset - 1) % self.field_count
635        #print rp, next_op,self.alloc_map[next_op]
636        #assigs = [mkast.assign(self.cg_temp(end_pos, rp), mkast.zero(self.fw))]
637        assigs = self.gen_multiple_carry_zero_then_pack(rp, end_pos - rp + 1)
638        if (end_pos + 1) % self.field_count == 0:
639          pk = posn/self.BLOCK_SIZE
640          fd = (posn%self.BLOCK_SIZE)/self.field_count
641          mask_op = "hsimd<%i>::signmask" % (self.fw)
642          storable_carry_in_form = mkast.call(mask_op, [mkast.var(self.cg_temp(self.field_count - 1, 0))])
643          assigs.append(mkast.assign(self.carry_pack_index(self.field_count, pk, fd, mode = ast.Store()), storable_carry_in_form))
644        return assigs
645
646#
647    def GenerateCarryWhileFinalization(self, block_no):
648        posn = self.block_base[block_no]
649        sz = self.aligned_size[block_no] 
650        if sz < self.field_count:
651          rp = posn%self.field_count
652          v0 = self.cg_temp(rp + sz - 1, rp)
653          lv0 = self.local_temp(sz - 1, 0)
654          return [mkast.assign(v0, mkast.call('simd_or', [mkast.var(v0), mkast.var(lv0)]))]
655        local_posn = 0
656        pk = posn/self.BLOCK_SIZE
657        assigs = []
658        for i in range((sz + self.field_count -1)/self.field_count): 
659          pk = posn/self.BLOCK_SIZE
660          fd = (posn%self.BLOCK_SIZE)/self.field_count
661          local_pk = local_posn/self.BLOCK_SIZE
662          local_fd = (local_posn%self.BLOCK_SIZE)/self.field_count
663          v0 = self.carry_pack_index(self.field_count, pk, fd)
664          lv0 = self.local_pack_index(self.field_count, local_pk, local_fd)
665          assigs.append(mkast.assign([self.carry_pack_index(self.field_count, pk, fd, ast.Store())], ast.BinOp(v0, ast.BitOr(), lv0)))
666          posn += self.field_count
667          local_posn += self.field_count
668        return assigs
669
670
671
672class HMCPS_CCGO_BitPack2(HMCPS_CCGO_BitPack):
673
674    def allocate_all(self):
675        self.aligned_size = determine_aligned_block_sizes(self.BLOCK_SIZE, self.carryInfoSet, max_whiles_per_pack = 8, min_block_size = self.field_count)
676        self.carryPack_count = (self.aligned_size[0] + self.BLOCK_SIZE - 1) / self.BLOCK_SIZE
677        self.totalPack_count = self.carryPack_count + self.carryInfoSet.adv_n_count
678        self.alloc_map = {}
679        self.alloc_map[0] = 0
680        self.last_carry_map = {}
681        self.adv_n_map = {}
682        self.block_base = {}
683        self.allocate_ops()
684        # carry_offset is used within the inner body of while loops to access local carries.
685        # The calculated (ub, rp) value is reduced by this amount for the local carry Pack(s).
686        self.carry_offset = 0
687
688
689    def allocate_block_positions(self):
690        # First allocate the base position of each block relative to its
691        # parent block, such that the relative position is a multiple
692        # of its aligned_size or the pack_size, whichever is smallest.
693        rel_block_posns = [0 for b in range(self.carryInfoSet.block_count)]
694        self.direct_carries = carryInfo.direct_block_carries(self.carryInfoSet)
695        self.aligned_direct = [align(d, max(min(pow2ceil(d), self.BLOCK_SIZE), self.field_count)) for d in self.direct_carries]
696        working_allocation_bitmap = [((1 << a) - 1) for a in self.aligned_direct]
697        for b in range(1, self.carryInfoSet.block_count):
698            prnt = self.carryInfoSet.parent_block[b]
699            sz = self.aligned_size[b]
700            sz_map = (1 << sz) - 1
701            posn = 0
702            while sz_map & working_allocation_bitmap[prnt] != 0:
703                posn += sz
704                sz_map <<= sz
705            working_allocation_bitmap[prnt] |= sz_map
706            rel_block_posns[b] = posn
707        # Now compute absolute positions
708        self.block_base[0] = 0
709        for b in range(1, self.carryInfoSet.block_count): 
710            self.block_base[b] = self.block_base[self.carryInfoSet.parent_block[b]] + rel_block_posns[b]
711
712#
713#  Given the relative base positions of each block, allocate
714#  its carries.
715#
716    def allocate_ops(self):
717        self.allocate_block_positions()
718        adv_n_count = 0
719        carry_posn = [self.block_base[b] for b in range(self.carryInfoSet.block_count)]
720        for op in range(self.carryInfoSet.operation_count):
721            b = self.carryInfoSet.containing_block[op]
722            self.alloc_map[op] = carry_posn[b]
723            if op not in self.carryInfoSet.advance_amount.keys():
724                carry_posn[b] += 1
725                self.last_carry_map[b] = op
726            elif self.carryInfoSet.advance_amount[op] == 1: 
727                carry_posn[b] += 1
728                self.last_carry_map[b] = op
729            else:
730                self.adv_n_map[op] = adv_n_count
731                adv_n_count += 1
732        # When processing the last operation, make sure that the "next" operation
733        # appears to start a new pack.
734        self.alloc_map[self.carryInfoSet.operation_count] = self.aligned_size[0]
735
736    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
737        block_no = self.carryInfoSet.containing_block[operation_no]
738        posn = self.alloc_map[operation_no] - self.carry_offset
739        add_decl = self.alloc_map[operation_no] - self.block_base[block_no] <= self.field_count
740        rp = posn%self.field_count
741        # Save the carry in the carry temp variable and then merge
742        # pending carry temps as far as possible.
743        v = mkast.var(self.temp_prefix + repr(rp))
744        assigs = []
745        if add_decl: assigs.append(mkast.callStmt('BitBlock_declare', [v]))
746        assigs.append(mkast.assign(v, carry_out_expr))
747        assigs += self.gen_merges(rp, rp, add_decl)
748        # Only generate an actual store for the last carryout in a pack.
749        if operation_no == self.last_carry_map[block_no]:
750          skip = self.block_base[block_no] + self.aligned_direct[block_no] - self.alloc_map[operation_no] - 1
751          if skip > 0: 
752            assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip, add_decl)
753          #print (posn, skip)
754          pk = posn/self.BLOCK_SIZE
755          mask_blk = (posn%self.BLOCK_SIZE)/self.field_count
756          mask_op = "hsimd<%i>::signmask" % (self.fw)
757          storable_carry_in_form = mkast.call(mask_op, [mkast.var(self.cg_temp(self.field_count - 1, 0))])
758          assigs.append(mkast.assign(self.carry_pack_index(self.field_count, pk, mask_blk, mode = ast.Store()), storable_carry_in_form))
759        return assigs
760
761    def GenerateLocalDeclare(self, block_no):
762        if self.carryInfoSet.block_op_count[block_no] == 0: return []
763        count = self.aligned_size[block_no] 
764        ub_count = (count + self.BLOCK_SIZE - 1)/ self.BLOCK_SIZE
765        decls = [mkast.callStmt('ubitblock_declare', [mkast.var("sub" + self.carryPackVar), ast.Num(ub_count)])]
766        return decls
767
768    def GenerateCarryWhileFinalization(self, block_no):
769        posn = self.block_base[block_no]
770        pk = posn/self.BLOCK_SIZE
771        count = self.aligned_size[block_no]
772        assigs = []
773        if count >= self.BLOCK_SIZE:
774          for i in range(count/self.BLOCK_SIZE):
775            assigs.append(mkast.assign(self.carry_pack_full(pk + i, mode = ast.Store()), mkast.call('simd_or', [self.carry_pack_full(pk + i), self.local_pack_full(i)])))
776
777        else:
778          rp = (posn%self.BLOCK_SIZE)/count
779          expr = ast.BinOp(self.carry_pack_index(count, pk, rp), ast.BitOr(), self.local_pack_index(count, 0, 0))
780          assigs.append(mkast.assign(self.carry_pack_index(count, pk, rp, mode = ast.Store()), expr))
781        return assigs
782
783
784    def GenerateCarryElseFinalization(self, block_no):  return []
785
786    def GenerateStreamFunctionDecls(self): return ""
787
788
Note: See TracBrowser for help on using the repository browser.