source: proto/Compiler/CCGO_HMCPS.py @ 2708

Last change on this file since 2708 was 2708, checked in by cameron, 7 years ago

New experimental version eliminating ubitblock and convert; much faster

File size: 20.7 KB
Line 
1#
2# CCGO_HMCPS.py
3#
4# Carry Code Generator Object using Hierarchical Merging Carry Pack Strategy
5#
6# Robert D. Cameron
7# November 26, 2012
8# Licensed under Open Software License 3.0
9#
10import ast
11import CCGO
12
13# Copyright 2012, Robert D. Cameron
14# All rights reserved.
15#
16# Helper functions
17#
18def TestHelper_Bitblock_Or(testExpr, bitBlockExpr):
19    assert isinstance(testExpr, ast.Call)
20    assert isinstance(testExpr.func, ast.Name)
21    assert testExpr.func.id == 'bitblock::any'
22    testExpr.args[0] = make_call('simd_or', [bitBlockExpr, testExpr.args[0]])
23    return testExpr
24
25def TestHelper_Integer_Or(testExpr, intExpr):
26    return ast.BinOp(testExpr, ast.BitOr(), intExpr)
27
28def mk_var(var, mode=ast.Load()):
29  if isinstance(var, str): 
30        var = ast.Name(var, mode)
31  return var
32 
33def make_assign(var, expr):
34   if isinstance(var, str): 
35        var = ast.Name(var, ast.Store())
36   return ast.Assign([var], expr)
37
38def make_index(var, num, mode=ast.Load()):
39  if isinstance(var, str): 
40        var = ast.Name(var, ast.Load())
41  return ast.Subscript(var, ast.Index(ast.Num(num)), mode)
42
43def make_index_store(var, num):
44  if isinstance(var, str): 
45        var = ast.Name(var, ast.Load())
46  return ast.Subscript(var, ast.Index(ast.Num(num)), ast.Store())
47
48def make_att(var, att, mode=ast.Load()):
49  if isinstance(var, str): 
50        var = ast.Name(var, ast.Load())
51  return ast.Attribute(var, att, mode)
52
53def make_att_store(var, att):
54  if isinstance(var, str): 
55        var = ast.Name(var, ast.Load())
56  return ast.Attribute(var, att, ast.Store())
57
58def make_call(fn_name, args):
59  if isinstance(fn_name, str): 
60        fn_name = ast.Name(fn_name, ast.Load())
61  return ast.Call(fn_name, args, [], None, None)
62
63def make_callStmt(fn_name, args):
64  if isinstance(fn_name, str): fn_name = ast.Name(fn_name, ast.Load())
65  return ast.Expr(ast.Call(fn_name, args, [], None, None))
66
67def make_mergeh(fw, x, y):
68  return make_call("esimd<%i>::mergeh" % fw, [mk_var(x), mk_var(y)])
69
70def make_zero(fw):
71  return make_call("simd<%i>::constant<0>" % fw, [])
72
73 
74#
75#
76# Carry Pack Assignment Strategy
77#
78# The hierarchical merging carry pack strategy packs carries
79# into groups of 2, 4, 8 and 16.   For example, to pack
80# 4 carries c0, c1, c2, and c3 into the 32-bit fields of
81# a 128-bit register, the following operations are used.
82#
83# c0 = pablo.SomeCarryGeneratingFn(...)
84# c1 = pablo.SomeCarryGeneratingFn(...)
85# c1_0 = esimd::mergeh<32>(c1, c0)
86# c2 = pablo.SomeCarryGeneratingFn(...)
87# c3 = pablo.SomeCarryGeneratingFn(...)
88# c3_2 = esimd::mergeh<32>(c3, c2)
89# c3_0 = esimd::mergeh<64>(c3_2, c1_0)
90#
91#
92# Packing operations are generated sequentially when
93# the appropriate individual carries or subpacks become
94# available.   
95#
96# Generate the packing operations assuming that the
97# carry_num carry has just been generated.
98#
99
100def pow2ceil(n):
101   c = 1
102   while c < n: c *= 2 
103   return c
104
105def pow2floor(n):
106   c = 1
107   while c <= n: c *= 2 
108   return c/2
109
110def low_bit(n):
111   return n - (n & (n-1))
112   
113def align(n, align_base):
114  return ((n + align_base - 1) / align_base) * align_base
115
116def determine_aligned_block_sizes(pack_size, cis):
117  aligned_size = {}
118  for i in range(cis.block_count): aligned_size[i] = 0
119  seen = []
120  for i in range(cis.block_count):
121    # Work backwards to process all child blocks before the parent
122    # so that the parent incorporates the updated child counts.
123    b = cis.block_count - i - 1
124    b_carries = 0
125    op = cis.block_first_op[b]
126    while op < cis.block_first_op[b] + cis.block_op_count[b]:
127      sb = cis.containing_block[op]
128      if sb == b:
129        if op not in cis.advance_amount.keys(): b_carries += 1
130        elif cis.advance_amount[op] == 1: b_carries += 1
131        op += 1
132      else: 
133        align_base = aligned_size[sb]
134        if align_base > pack_size: align_base = pack_size
135        b_carries = align(b_carries, align_base)
136        b_carries += aligned_size[sb]
137        op += cis.block_op_count[sb]
138    # Force whiles to use full blocks; this possibly can be relaxed.
139    if cis.whileblock[b] or b_carries > pack_size:
140      aligned_size[b] = align(b_carries, pack_size)
141    else:
142      aligned_size[b] = pow2ceil(b_carries)
143  return aligned_size
144 
145MAX_LINE_LENGTH = 80
146
147def BitBlock_decls_from_vars(varlist):
148  global MAX_LINE_LENGTH
149  decls =  ""
150  if not len(varlist) == 0:
151          decls = "             BitBlock"
152          pending = ""
153          linelgth = 10
154          for v in varlist:
155            if linelgth + len(v) + 2 <= MAX_LINE_LENGTH:
156              decls += pending + " " + v
157              linelgth += len(pending + v) + 1
158            else:
159              decls += ";\n             BitBlock " + v
160              linelgth = 11 + len(v)
161            pending = ","
162          decls += ";"
163  return decls
164 
165def block_contains(b0, b1, parent_block_map):
166  if b0 == b1: return True
167  elif b1 == 0: return False
168  else: return block_contains(b0, parent_block_map[b1], parent_block_map)
169 
170class HMCPS_CCGO(CCGO.CCGO):
171    def __init__(self, fw, carryInfoSet, carryGroupVarName='carryG', temp_prefix='__c'):
172        self.fw = fw
173        self.field_count = 128/fw
174        self.carryInfoSet = carryInfoSet
175        self.carryGroupVar = carryGroupVarName
176        self.temp_prefix = temp_prefix
177        self.aligned_size = determine_aligned_block_sizes(self.field_count, carryInfoSet)
178        self.ubitblock_count = (self.aligned_size[0] + self.field_count - 1) / self.field_count
179        self.alloc_map = {}
180        self.alloc_map[0] = 0
181        self.block_base = {}
182        self.allocate_ops()
183        # carry_offset is used within the inner body of while loops to access local carries.
184        # The calculated (ub, rp) value is reduced by this amount for the local carry group(s).
185        self.carry_offset = 0
186
187#
188# Carry Storage/Access
189#
190# Carries are stored in one or more ubitblocks as byte values.
191# For each block, the carry count is rounded up to the nearest power of 2 ceiling P,
192# so that the carry test for that block is accessible as a single value of P bytes.
193# Packs of 1, 2, 4 or 8 carries are respectively represented
194# as one or more _8, _16, _32 or _64 values.  (Members of ubitblock union.)
195#
196#
197# Allocation phase determines the ubitblock_no and count for each block.
198
199#  carry-in access is a byte load  carryG[packno]._8[offset]
200#  carryout store is to a local pack var until we get to the final byte of a pack
201#
202#  if-test: let P be pack_size in {1,2,4,8,...}
203#    if P <= 8, use an integer test expression cG[packno]._%i % (P * 8)[block_offset]
204#     
205#  while test similar
206#    local while decl: use a copy of carryGroup
207#    while finalize  carry combine:   round up and |= into structure
208#
209    def carry_pack_full(self, ub, mode = ast.Load()):
210       return make_att(make_index(self.carryGroupVar, ub), '_128', mode)
211
212    def carry_pack_index(self, fw, ub, rp, mode = ast.Load()):
213       return make_index(make_att(make_index(self.carryGroupVar, ub), '_%i' % fw), rp, mode)
214
215    def local_pack_full(self, ub, mode = ast.Load()):
216       return make_att(make_index("sub" + self.carryGroupVar, ub), '_128', mode)
217
218
219
220    def cg_temp(self, hi_carry, lo_carry = None):
221      if lo_carry == None or hi_carry == lo_carry: return "%s%i" % (self.temp_prefix, hi_carry)
222      else: return "%s%i_%i" % (self.temp_prefix, hi_carry, lo_carry)
223   
224    def local_temp(self, hi_carry, lo_carry = None):
225      if lo_carry == None or hi_carry == lo_carry: return "sub%s%i" % (self.temp_prefix, hi_carry)
226      else: return "sub%s%i_%i" % (self.temp_prefix, hi_carry, lo_carry)
227   
228    def gen_merges(self, carry_last, carry_base):
229      size = carry_last - carry_base + 1
230      if carry_last & size: 
231        v1 = mk_var(self.cg_temp(carry_last, carry_base))
232        v0 = mk_var(self.cg_temp(carry_last - size, carry_base - size))
233        v2 = mk_var(self.cg_temp(carry_last, carry_base - size), ast.Store())
234        return [make_assign(v2, make_mergeh(self.fw * size, v1, v0))] + self.gen_merges(carry_last, carry_base - size)
235      else: return []
236
237    #
238    #  Given that carry_num carries have been generated and packed,
239    #  add zero_count additional carry zero values and pack.
240    #  Use shifts to introduce multiple zeroes, where possible.
241    #
242    def gen_multiple_carry_zero_then_pack(self, carry_num, zero_count):
243      if zero_count == 0: return []
244      pending_carry_pack_size = low_bit(carry_num)
245      pending_carry_base = carry_num - pending_carry_pack_size
246      # We may be able to fill zeroes by shifting.
247      # But the shift is limited by any further pending carry pack and
248      # the constraint that the result must produce a well-formed pack
249      # having a power-of-2 entries.
250      #
251      final_num = carry_num + zero_count
252      pack_size2 = low_bit(pending_carry_base)
253      if pending_carry_base == 0:
254        shift = pow2floor(final_num) - pending_carry_pack_size
255      else:
256        shift = min(low_bit(pending_carry_base), low_bit(final_num)) - pending_carry_pack_size
257      if pending_carry_pack_size == 0 or shift == 0:
258        # There is either no pending pack or we are not generating enough
259        # carry zeroes to combine into the pending pack, so we can only add new
260        # packs.
261        #
262        if zero_count == 1:  return [make_assign(self.cg_temp(carry_num), make_zero(self.fw))]
263        else: 
264          zero_count_floor = pow2floor(zero_count)
265          hi_num = carry_num + zero_count_floor
266          a1 = make_assign(self.cg_temp(hi_num - 1, carry_num), make_zero(self.fw))
267          remaining_zeroes = zero_count - zero_count_floor
268          return [a1] + self.gen_multiple_carry_zero_then_pack(hi_num, remaining_zeroes) 
269      #
270      shift_result = self.cg_temp(carry_num + shift - 1, pending_carry_base)
271      pending = self.cg_temp(carry_num - 1, pending_carry_base)
272      #print shift_result, " by shift ", pending, shift
273      a1 = make_assign(shift_result, make_call('bitblock::srli<%i>' % (self.fw * shift), [mk_var(pending)]))
274      # Do any necessary merges
275      m = self.gen_merges(carry_num + shift - 1,  pending_carry_base)
276      return [a1] + m + self.gen_multiple_carry_zero_then_pack(carry_num + shift, zero_count - shift)
277
278
279    def allocate_ops(self):
280      carry_count = 0
281      for op in range(self.carryInfoSet.operation_count):
282        b = self.carryInfoSet.containing_block[op]
283        if op != 0: 
284          # If we've just left a block, ensure that we are aligned.
285          b_last = self.carryInfoSet.containing_block[op-1]
286          if not block_contains(b_last, b, self.carryInfoSet.parent_block):
287            # find the max-sized block just exited.
288            while not block_contains(self.carryInfoSet.parent_block[b_last], b, self.carryInfoSet.parent_block):
289              b_last = self.carryInfoSet.parent_block[b_last]
290            align_base = self.aligned_size[b_last]
291            if align_base > self.field_count: align_base = self.field_count
292            carry_count = align(carry_count, align_base)         
293        if self.carryInfoSet.block_first_op[b] == op:
294          # If we're just entering a block, ensure that we are aligned.
295          align_base = self.aligned_size[b]
296          if align_base > self.field_count: align_base = self.field_count
297          carry_count = align(carry_count, align_base)
298          self.block_base[b] = carry_count
299        if op not in self.carryInfoSet.advance_amount.keys():
300          self.alloc_map[op] = carry_count
301          carry_count += 1
302        elif self.carryInfoSet.advance_amount[op] == 1: 
303          self.alloc_map[op] = carry_count
304          carry_count += 1
305      # When processing the last operation, make sure that the "next" operation
306      # appears to start a new pack.
307      self.alloc_map[self.carryInfoSet.operation_count] = align(carry_count, self.field_count)
308     
309    def GenerateCarryDecls(self):
310        return "  ubitblock %s [%i];\n" % (self.carryGroupVar, self.ubitblock_count)
311    def GenerateInitializations(self):
312        v = self.carryGroupVar       
313        inits = ""
314        for i in range(0, self.ubitblock_count):
315          inits += "%s[%i]._128 = simd<%i>::constant<0>();\n" % (v, i, self.fw)
316        for op_no in range(self.carryInfoSet.block_op_count[0]):
317          if op_no in self.carryInfoSet.init_one_list: 
318            posn = self.alloc_map[op_no]
319            ub = posn/self.field_count
320            rp = posn%self.field_count
321            inits += "%s[%i]._%i[%i] = 1;\n" % (self.carryGroupVar, ub, self.fw, rp)
322        return inits
323    def GenerateStreamFunctionDecls(self):
324        f = self.field_count
325        s = 1
326        decls = []
327        while f > 0:
328          decls += [self.cg_temp(s*(i+1)-1, s*i) for i in range(f)]
329          f = f/2
330          s = s * 2
331        return BitBlock_decls_from_vars(decls)
332
333    def GenerateCarryInAccess(self, operation_no):
334        block_no = self.carryInfoSet.containing_block[operation_no]
335        posn = self.alloc_map[operation_no] - self.carry_offset
336        ub = posn/self.field_count
337        rp = posn%self.field_count
338        return make_call("convert", [self.carry_pack_index(self.fw, ub, rp)])
339    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
340        block_no = self.carryInfoSet.containing_block[operation_no]
341        posn = self.alloc_map[operation_no] - self.carry_offset
342        ub = posn/self.field_count
343        rp = posn%self.field_count
344        # Only generate an actual store for the last carryout
345        assigs = [make_assign(self.temp_prefix + repr(rp), carry_out_expr)] 
346        assigs += self.gen_merges(rp, rp)
347        next_posn = self.alloc_map[operation_no + 1] - self.carry_offset
348        skip = next_posn - posn - 1
349        if skip > 0: 
350          assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip)
351        #print (posn, skip)
352        if next_posn % self.field_count == 0:
353          shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw-1)
354          storable_carry_in_form = make_call(shift_op, [mk_var(self.cg_temp(self.field_count - 1, 0))])
355          assigs.append(make_assign(self.carry_pack_full(ub, ast.Store()), storable_carry_in_form))
356        return assigs
357    def GenerateAdvanceInAccess(self, operation_no): pass
358        #adv_index = self.advIndex[operation_no - self.operation_offset]
359        #return mkCall(self.carryGroupVar + "." + 'get_pending64', [ast.Num(adv_index)])
360    def GenerateAdvanceOutStore(self, operation_no, adv_out_expr): pass
361        #adv_index = self.advIndex[operation_no - self.operation_offset]
362        #cq_index = adv_index + self.carry_count
363        #return [ast.Assign([ast.Subscript(self.CarryGroupAtt('cq'), ast.Index(ast.Num(cq_index)), ast.Store())],
364                           #mkCall("bitblock::srli<64>", [adv_out_expr]))]
365    def GenerateTest(self, block_no, testExpr):
366        posn = self.block_base[block_no] - self.carry_offset
367        ub = posn/self.field_count
368        rp = posn%self.field_count
369        count = self.aligned_size[block_no] 
370        width = count * self.fw
371        if count < self.field_count:
372            t = self.carry_pack_index(width, ub, rp/count)
373            return TestHelper_Integer_Or(testExpr, t)
374        else:
375            t = self.carry_pack_full(ub)
376            for i in range(1, count/self.field_count): 
377              v2 = self.carry_pack_full(ub + i)
378              t = make_call('simd_or', [t, v2])
379            return TestHelper_Bitblock_Or(testExpr, t)
380    def GenerateCarryIfTest(self, block_no, ifTest):
381        return self.GenerateTest(block_no, ifTest)
382
383    def GenerateCarryElseFinalization(self, block_no):
384        # if the block consists of full carry packs, then
385        # no action need be taken: the corresponding carry-in packs
386        # must already be zero, or the then branch would have been taken.
387        count = self.aligned_size[block_no]
388        if count % self.field_count == 0: return []
389        # The block has half a carry-pack or less.
390        assigs = []
391        posn = self.block_base[block_no] - self.carry_offset
392        ub = posn / self.field_count
393        rp = posn % self.field_count
394        next_op = self.carryInfoSet.block_first_op[block_no] + self.carryInfoSet.block_op_count[block_no]
395        end_pos = (self.alloc_map[next_op]  - self.carry_offset - 1) % self.field_count
396        #print rp, next_op,self.alloc_map[next_op]
397        #assigs = [make_assign(self.cg_temp(end_pos, rp), make_zero(self.fw))]
398        assigs = self.gen_multiple_carry_zero_then_pack(rp, end_pos - rp + 1)
399        if (end_pos + 1) % self.field_count == 0:
400          shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw-1)
401          storable_carry_in_form = make_call(shift_op, [mk_var(self.cg_temp(self.field_count - 1, 0))])
402          assigs.append(make_assign(self.carry_pack_full(ub, ast.Store()), storable_carry_in_form))
403        return assigs
404
405    def GenerateLocalDeclare(self, block_no):
406        if self.carryInfoSet.block_op_count[block_no] == 0: return []
407        count = self.aligned_size[block_no] 
408        if count >= self.field_count:
409          ub_count = count / self.field_count
410          decls = [make_callStmt('ubitblock_declare', [mk_var('sub' + self.carryGroupVar), ast.Num(ub_count)])]
411          count = self.field_count
412        else: decls = []
413        # Generate carry pack temps.
414        f = count
415        s = 1
416        temps = []
417        while f > 0:
418          temps += [self.local_temp(s*(i+1)-1, s*i) for i in range(f)]
419          f = f/2
420          s = s * 2
421        #return BitBlock_decls_from_vars(decls)
422        return decls + [make_callStmt('BitBlock_declare', [mk_var(t)]) for t in temps]
423   
424    def GenerateCarryWhileTest(self, block_no, testExpr):
425        return self.GenerateTest(block_no, testExpr)
426
427    def EnterLocalWhileBlock(self, operation_offset): 
428        self.carryGroupVar = "sub" + self.carryGroupVar
429        self.temp_prefix = "sub" + self.temp_prefix
430        self.carry_offset = self.alloc_map[operation_offset]
431        #print "self.carry_offset = %i" % self.carry_offset
432    def ExitLocalWhileBlock(self): 
433        self.carryGroupVar = self.carryGroupVar[3:]
434        self.temp_prefix = self.temp_prefix[3:]
435        self.carry_offset = 0
436       
437    def GenerateCarryWhileFinalization(self, block_no):
438        posn = self.block_base[block_no]
439        ub = posn/self.field_count
440        rp = posn%self.field_count
441        count = self.aligned_size[block_no] 
442        if count < self.field_count:
443          v0 = self.cg_temp(rp + count - 1, rp)
444          lv0 = self.local_temp(count - 1, 0)
445          return [make_assign(v0, make_call('simd_or', [mk_var(v0), mk_var(lv0)]))]
446        n = (count+self.field_count-1)/self.field_count
447        assigs = []
448        for i in range(n):
449          assigs.append(make_assign(self.carry_pack_full(ub + i, ast.Store()), make_call('simd_or', [self.carry_pack_full(ub + i), self.local_pack_full(i)])))
450        return assigs
451    def GenerateStreamFunctionFinalization(self):
452        return []
453
454#
455#  A version of HMCPS_CCGO eliminating ubitblocks
456#
457class HMCPS_CCGO2(HMCPS_CCGO):
458
459    def carry_pack_full(self, ub, mode = ast.Load()):
460       return make_index(self.carryGroupVar, ub, mode)
461
462    def carry_pack_index(self, fw, ub, rp, mode = ast.Load()):
463       return make_call("mvmd<%i>::extract<%i>" % (fw, rp), [self.carry_pack_full(ub)])
464
465    def local_pack_full(self, ub, mode = ast.Load()):
466       return make_index("sub" + self.carryGroupVar, ub, mode)
467
468    def GenerateCarryDecls(self):
469        return "  BitBlock simd_const_1\n;    BitBlock %s [%i];\n" % (self.carryGroupVar, self.ubitblock_count)
470
471    def GenerateInitializations(self):
472        v = self.carryGroupVar       
473        inits = "simd_const_1 = mvmd<16>::srli<7>(simd<16>::constant<1>());\n"
474        for i in range(0, self.ubitblock_count):
475          inits += "%s[%i] = simd<%i>::constant<0>();\n" % (v, i, self.fw)
476        for op_no in range(self.carryInfoSet.block_op_count[0]):
477          if op_no in self.carryInfoSet.init_one_list: 
478            posn = self.alloc_map[op_no]
479            ub = posn/self.field_count
480            rp = posn%self.field_count
481            v = "%s[%i]" % (self.carryGroupVar, ub)
482            inits += "%s = simd_or(%s, mvmd<%i>::slli<%i>(simd_const_1)) ;\n" % (v, v, self.fw, rp)
483        return inits
484
485    def GenerateCarryInAccess(self, operation_no):
486        block_no = self.carryInfoSet.containing_block[operation_no]
487        posn = self.alloc_map[operation_no] - self.carry_offset
488        ub = posn/self.field_count
489        rp = posn%self.field_count
490        #return make_call("convert", [self.carry_pack_index(self.fw, ub, rp)])
491        return make_call('simd_and', [make_call("mvmd<%i>::srli<%i>" %(self.fw, rp), [self.carry_pack_full(ub)]), mk_var("simd_const_1")])
492
493    def GenerateLocalDeclare(self, block_no):
494        if self.carryInfoSet.block_op_count[block_no] == 0: return []
495        count = self.aligned_size[block_no] 
496        if count >= self.field_count:
497          ub_count = count / self.field_count
498          decls = [make_callStmt('BitBlock_declare', [self.local_pack_full(ub_count)])]
499          decls += [make_assign(self.local_pack_full(i, ast.Store()), make_zero(self.fw)) for i in range(ub_count)]
500          count = self.field_count
501        else: decls = []
502        # Generate carry pack temps.
503        f = count
504        s = 1
505        temps = []
506        while f > 0:
507          temps += [self.local_temp(s*(i+1)-1, s*i) for i in range(f)]
508          f = f/2
509          s = s * 2
510        #return BitBlock_decls_from_vars(decls)
511        return decls + [make_callStmt('BitBlock_declare', [mk_var(t)]) for t in temps]
512   
513
Note: See TracBrowser for help on using the repository browser.