source: proto/Compiler/CCGO_HMCPS.py @ 2700

Last change on this file since 2700 was 2700, checked in by cameron, 6 years ago

Initial check-in for Hierarchical Merging Carry Pack Strategy

File size: 17.7 KB
Line 
1#
2# CCGO_HMCPS.py
3#
4# Carry Code Generator Object using Hierarchical Merging Carry Pack Strategy
5#
6# Robert D. Cameron
7# November 26, 2012
8# Licensed under Open Software License 3.0
9#
10import ast
11import CCGO
12
13# Copyright 2012, Robert D. Cameron
14# All rights reserved.
15#
16# Helper functions
17#
18def TestHelper_Bitblock_Or(testExpr, bitBlockExpr):
19    assert isinstance(testExpr, ast.Call)
20    assert isinstance(testExpr.func, ast.Name)
21    assert testExpr.func.id == 'bitblock::any'
22    testExpr.args[0] = make_call('simd_or', [bitBlockExpr, testExpr.args[0]])
23    return testExpr
24
25def TestHelper_Integer_Or(testExpr, intExpr):
26    return ast.BinOp(testExpr, ast.BitOr(), intExpr)
27
28def mk_var(var, mode=ast.Load()):
29  if isinstance(var, str): 
30        var = ast.Name(var, mode)
31  return var
32 
33def make_mergeh(fw, x, y):
34  #return "esimd<%i>::mergeh(%s, %s)" % (fw, x, y)
35  return make_call("esimd<%i>::mergeh" % fw, [mk_var(x), mk_var(y)])
36
37def make_assign(var, expr):
38  #return "%s = %s;\n" % (v, expr)
39  if isinstance(var, str): 
40        var = ast.Name(var, ast.Store())
41  return ast.Assign([var], expr)
42
43def make_zero(fw):
44  #return "simd<%i>::constant<0>() % fw
45  return make_call("simd<%i>::constant<0>" % fw, [])
46
47def make_index_load(var, num):
48  if isinstance(var, str): 
49        var = ast.Name(var, ast.Load())
50  return ast.Subscript(var, ast.Index(ast.Num(num)), ast.Load())
51
52def make_index_store(var, num):
53  if isinstance(var, str): 
54        var = ast.Name(var, ast.Load())
55  return ast.Subscript(var, ast.Index(ast.Num(num)), ast.Store())
56
57def make_att_load(var, att):
58  if isinstance(var, str): 
59        var = ast.Name(var, ast.Load())
60  return ast.Attribute(var, att, ast.Load())
61
62def make_att_store(var, att):
63  if isinstance(var, str): 
64        var = ast.Name(var, ast.Load())
65  return ast.Attribute(var, att, ast.Store())
66
67def make_call(fn_name, args):
68  if isinstance(fn_name, str): 
69        fn_name = ast.Name(fn_name, ast.Load())
70  return ast.Call(fn_name, args, [], None, None)
71
72def make_callStmt(fn_name, args):
73  if isinstance(fn_name, str): fn_name = ast.Name(fn_name, ast.Load())
74  return ast.Expr(ast.Call(fn_name, args, [], None, None))
75 
76#
77#
78# Carry Pack Assignment Strategy
79#
80# The hierarchical merging carry pack strategy packs carries
81# into groups of 2, 4, 8 and 16.   For example, to pack
82# 4 carries c0, c1, c2, and c3 into the 32-bit fields of
83# a 128-bit register, the following operations are used.
84#
85# c0 = pablo.SomeCarryGeneratingFn(...)
86# c1 = pablo.SomeCarryGeneratingFn(...)
87# c1_0 = esimd::mergeh<32>(c1, c0)
88# c2 = pablo.SomeCarryGeneratingFn(...)
89# c3 = pablo.SomeCarryGeneratingFn(...)
90# c3_2 = esimd::mergeh<32>(c3, c2)
91# c3_0 = esimd::mergeh<64>(c3_2, c1_0)
92#
93#
94# Packing operations are generated sequentially when
95# the appropriate individual carries or subpacks become
96# available.   
97#
98# Generate the packing operations assuming that the
99# carry_num carry has just been generated.
100#
101def gen_carry_pack(pack_fw, carry_num, temp_pfx):
102  # The range of carries now to be packed depends on
103  # the number of rightmost contiguous 1 bits
104  carry_range_size = (carry_num + 1) &~ carry_num
105  assign_list = []
106  i = 2
107  v1 = temp_pfx + repr(carry_num)
108  v0 = temp_pfx + repr(carry_num ^ 1)
109  fw = pack_fw
110  while i <= carry_range_size:
111    p = '%s%i_%i' % (temp_pfx, carry_num, carry_num - i + 1)
112    assign_list.append(make_assign(p, make_mergeh(fw, v1, v0)))
113    v1 = p
114    v0 = '%s%i_%i' % (temp_pfx, carry_num - i, carry_num - 2*i+1)
115    i *= 2
116    fw *= 2
117  return assign_list
118
119#
120# Pack in a zero carry value
121#
122def gen_carry_zero_then_pack(pack_fw, carry_num, temp_pfx):
123  # The range of carries now to be packed depends on
124  # the number of rightmost contiguous 1 bits
125  carry_range_size = (carry_num + 1) &~ carry_num
126  i = 2
127  v1 = temp_pfx + repr(carry_num)
128  v0 = temp_pfx + repr(carry_num ^ 1)
129  fw = pack_fw
130  assign_list = [make_assign(v1, make_zero(fw))]
131  while i <= carry_range_size:
132    p = '%s%i_%i' % (temp_pfx, carry_num, carry_num - i + 1)
133    assign_list.append(make_assign(p, make_mergeh(fw, v1, v0)))
134    v1 = p
135    v0 = '%s%i_%i' % (temp_pfx, carry_num - i, carry_num - 2*i+1)
136    i *= 2
137    fw *= 2
138  return assign_list
139
140#
141# Generate multiple zero carries to complete a carry pack.
142#
143#
144def gen_multiple_carry_zero_then_pack(pack_fw, carry_num, carry_count, temp_pfx):
145  assign_list = []
146  last = carry_num + carry_count
147  p2f = pow2floor(last)
148  if carry_num == 0:
149    assign_list.append(make_assign('%s%i_0' % (temp_pfx, p2f-1), make_zero(pack_fw)))
150    carry_num = p2f
151    carry_count -= p2f
152  else:
153    low_bit = carry_num &~ (carry_num - 1)
154    base = carry_num - low_bit
155    if low_bit == 1: pending = '%s%i' % (temp_pfx, carry_num - 1)
156    else: pending = '%s%i_%i' % (temp_pfx, carry_num - 1, base)
157    while base != 0 and carry_num < p2f:
158       next_bit = base &~ (base - 1)
159       shift = next_bit - low_bit
160       shift_result = '%s%i_%i' % (temp_pfx, carry_num - 1 + shift, base)
161       assign_list.append(make_assign(shift_result, make_call('mvmd<%i>::slli<%i>' % (pack_fw, shift), [mk_var(pending)])))
162       pending2 = '%s%i_%i' % (temp_pfx, base - 1, base - next_bit)
163       merge_result = '%s%i_%i' % (temp_pfx, carry_num - 1 + shift, base - next_bit)
164       assign_list.append(make_assign(merge_result, make_mergeh(pack_fw * next_bit, shift_result, pending2)))
165       carry_count -= shift
166       carry_num += shift
167       low_bit = carry_num &~ (carry_num - 1)
168       base = carry_num - low_bit
169       pending = merge_result
170    shift = p2f - low_bit
171    if shift != 0:
172       shift_result = '%s%i_0' % (temp_pfx, carry_num - 1 + shift)
173       assign_list.append(make_assign(shift_result, make_call('mvmd<%i>::slli<%i>' % (pack_fw, shift), [mk_var(pending)])))
174       carry_count -= shift
175       carry_num += shift
176 
177  for i in range(carry_count):
178    assign_list += gen_carry_zero_then_pack(pack_fw, carry_num + i, temp_pfx)
179  return assign_list
180
181
182
183#
184# Carry Storage/Access
185#
186# Carries are stored in one or more ubitblocks as byte values.
187# For each block, the carry count is rounded up to the nearest power of 2 ceiling P,
188# so that the carry test for that block is accessible as a single value of P bytes.
189# Packs of 1, 2, 4 or 8 carries are respectively represented
190# as one or more _8, _16, _32 or _64 values.  (Members of ubitblock union.)
191#
192#
193# Allocation phase determines the ubitblock_no and count for each block.
194
195#  carry-in access is a byte load  carryG[packno]._8[offset]
196#  carryout store is to a local pack var until we get to the final byte of a pack
197#
198#  if-test: let P be pack_size in {1,2,4,8,...}
199#    if P <= 8, use an integer test expression cG[packno]._%i % (P * 8)[block_offset]
200#     
201#  while test similar
202#    local while decl: use a copy of carryGroup
203#    while finalize  carry combine:   round up and |= into structure
204#
205def pow2ceil(n):
206   c = 1
207   while c < n: c *= 2 
208   return c
209
210def pow2floor(n):
211   c = 1
212   while c <= n: c *= 2 
213   return c/2
214   
215def align(n, align_base):
216  return ((n + align_base - 1) / align_base) * align_base
217
218def old_determine_aligned_block_sizes(pack_size, cis):
219  aligned_size = {}
220  for i in range(cis.block_count): aligned_size[i] = 0
221  seen = []
222  for i in range(cis.block_count):
223    # Work backwards to process all child blocks before the parent
224    # so that the parent incorporates the updated child counts.
225    b = cis.block_count - i - 1
226    # Start with the inherited carry count from processing childrem.
227    if not aligned_size.has_key(b): carry_count = 0
228    else: carry_count = aligned_size[b]
229    for op in range(cis.block_first_op[b], cis.block_first_op[b] + cis.block_op_count[b]):
230      if op in seen: continue
231      if op not in cis.advance_amount.keys(): carry_count += 1
232      elif cis.advance_amount[op] == 1: carry_count += 1
233      seen.append(op)
234    if carry_count > pack_size:
235      aligned_size[b] = ((carry_count - 1) / pack_size) * (pack_size + 1)
236    elif cis.whileblock[b]:
237      aligned_size[b] = pack_size
238    else:
239      aligned_size[b] = pow2ceil(carry_count)
240    if b != 0: aligned_size[cis.parent_block[b]] += aligned_size[b]
241  print aligned_size
242  return aligned_size
243
244def determine_aligned_block_sizes(pack_size, cis):
245  aligned_size = {}
246  for i in range(cis.block_count): aligned_size[i] = 0
247  seen = []
248  for i in range(cis.block_count):
249    # Work backwards to process all child blocks before the parent
250    # so that the parent incorporates the updated child counts.
251    b = cis.block_count - i - 1
252    b_carries = 0
253    op = cis.block_first_op[b]
254    while op < cis.block_first_op[b] + cis.block_op_count[b]:
255      sb = cis.containing_block[op]
256      if sb == b:
257        if op not in cis.advance_amount.keys(): b_carries += 1
258        elif cis.advance_amount[op] == 1: b_carries += 1
259        op += 1
260      else: 
261        align_base = aligned_size[sb]
262        if align_base > pack_size: align_base = pack_size
263        b_carries = align(b_carries, align_base)
264        b_carries += aligned_size[sb]
265        op += cis.block_op_count[sb]
266    if cis.whileblock[b] or aligned_size[b] > pack_size:
267      aligned_size[b] = align(b_carries, pack_size)
268    else:
269      aligned_size[b] = pow2ceil(b_carries)
270  return aligned_size
271 
272MAX_LINE_LENGTH = 80
273
274def BitBlock_decls_from_vars(varlist):
275  global MAX_LINE_LENGTH
276  decls =  ""
277  if not len(varlist) == 0:
278          decls = "             BitBlock"
279          pending = ""
280          linelgth = 10
281          for v in varlist:
282            if linelgth + len(v) + 2 <= MAX_LINE_LENGTH:
283              decls += pending + " " + v
284              linelgth += len(pending + v) + 1
285            else:
286              decls += ";\n             BitBlock " + v
287              linelgth = 11 + len(v)
288            pending = ","
289          decls += ";"
290  return decls
291
292 
293class HMCPS_CCGO(CCGO.CCGO):
294    def __init__(self, fw, carryInfoSet, carryGroupVarName='carryG', temp_prefix='__c'):
295        self.fw = fw
296        self.field_count = 128/fw
297        self.carryInfoSet = carryInfoSet
298        self.carryGroupVar = carryGroupVarName
299        self.temp_prefix = temp_prefix
300        self.aligned_size = determine_aligned_block_sizes(self.field_count, carryInfoSet)
301        self.ubitblock_count = (self.aligned_size[0] + self.field_count - 1) / self.field_count
302        self.alloc_map = {}
303        self.alloc_map[0] = 0
304        self.block_base = {}
305        self.allocate_ops()
306             
307             
308    def allocate_ops(self):
309      carry_count = 0
310      for op in range(self.carryInfoSet.operation_count):
311        b = self.carryInfoSet.containing_block[op]
312        if b !=0:
313          if self.carryInfoSet.block_first_op[b] == op:
314            align_base = self.aligned_size[b]
315            if align_base > self.field_count: align_base = self.field_count
316            carry_count = align(carry_count, align_base)
317            self.block_base[b] = carry_count
318        if op not in self.carryInfoSet.advance_amount.keys():
319          self.alloc_map[op] = carry_count
320          carry_count += 1
321        elif self.carryInfoSet.advance_amount[op] == 1: 
322          self.alloc_map[op] = carry_count
323          carry_count += 1
324     
325    def GenerateCarryDecls(self):
326        return "  ubitblock %s [%i];\n" % (self.carryGroupVar, self.ubitblock_count)
327    def GenerateInitializations(self):
328        v = self.carryGroupVar       
329        #const_0 = make_zero(self.fw)
330        #inits = [make_assign(make_index_store(v, i), const_0) for i in range(0, self.ubitblock_count)]
331        inits = ""
332        for i in range(0, self.ubitblock_count):
333          inits += "%s[%i]._128 = simd<%i>::constant<0>();\n" % (v, i, self.fw)
334        for op_no in range(self.carryInfoSet.block_op_count[0]):
335          if op_no in self.carryInfoSet.init_one_list: 
336            posn = self.alloc_map[op_no]
337            ub = posn/self.field_count
338            rp = posn%self.field_count
339            inits += "%s[%i]._%i[%i] = 1;\n" % (self.carryGroupVar, ub, self.fw, rp)
340            #v_ub = make_index_load(self.carryGroupVar, ub)
341            #v_ub_fw = make_att_load(v_ub, '_%i' % self.fw)
342            #inits.append(make_assign(make_index_store(v_ub_fw, rp), ast.Num(1)))
343        return inits
344    def GenerateStreamFunctionDecls(self):
345        f = self.field_count
346        decls = [self.temp_prefix + repr(i) for i in range(self.field_count)]
347        while f > 1:
348          f = f/2
349          s = self.field_count/f
350          decls += [self.temp_prefix + "%i_%i" % (s*(i+1)-1, s*i) for i in range(f)]
351        return BitBlock_decls_from_vars(decls)
352
353    def GenerateCarryInAccess(self, operation_no):
354        block_no = self.carryInfoSet.containing_block[operation_no]
355        posn = self.alloc_map[operation_no]
356        ub = posn/self.field_count
357        rp = posn%self.field_count
358        v_ub = make_index_load(self.carryGroupVar, ub)
359        v_ub_fw = make_att_load(v_ub, '_%i' % self.fw)
360        return make_call("convert", [make_index_load(v_ub_fw, rp)])
361    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
362        block_no = self.carryInfoSet.containing_block[operation_no]
363        posn = self.alloc_map[operation_no]
364        ub = posn/self.field_count
365        rp = posn%self.field_count
366        # Only generate an actual store for the last carryout
367        assigs = [make_assign(self.temp_prefix + repr(rp), carry_out_expr)] 
368        assigs += gen_carry_pack(self.fw, rp, self.temp_prefix)
369        skip = 0
370        if operation_no + 1 == self.carryInfoSet.operation_count: 
371          # Last operation
372          skip = self.field_count - rp - 1
373        else:
374          skip = self.alloc_map[operation_no + 1] - posn - 1
375        if skip > 0: 
376          assigs += gen_multiple_carry_zero_then_pack(self.fw, rp+1, skip, self.temp_prefix)
377        print (posn, skip)
378        if skip + posn + 1 == self.field_count:
379          v_ub = make_index_load(self.carryGroupVar, ub)
380          shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw-1)
381          assigs.append(make_assign(make_att_store(v_ub, '_128'), make_call(shift_op, [mk_var(self.temp_prefix + '%i_0' % (self.field_count - 1))])))
382        return assigs
383    def GenerateAdvanceInAccess(self, operation_no): pass
384        #adv_index = self.advIndex[operation_no - self.operation_offset]
385        #return mkCall(self.carryGroupVar + "." + 'get_pending64', [ast.Num(adv_index)])
386    def GenerateAdvanceOutStore(self, operation_no, adv_out_expr): pass
387        #adv_index = self.advIndex[operation_no - self.operation_offset]
388        #cq_index = adv_index + self.carry_count
389        #return [ast.Assign([ast.Subscript(self.CarryGroupAtt('cq'), ast.Index(ast.Num(cq_index)), ast.Store())],
390                           #mkCall("bitblock::srli<64>", [adv_out_expr]))]
391    def GenerateTest(self, block_no, testExpr):
392        posn = self.block_base[block_no]
393        ub = posn/self.field_count
394        rp = posn%self.field_count
395        count = self.aligned_size[block_no] 
396        width = count * self.fw
397        v_ub = make_index_load(self.carryGroupVar, ub)
398        if width <= 64:
399            t = make_index_load(make_att_load(v_ub, '_%i' % width), rp/count)
400            return TestHelper_Integer_Or(testExpr, t)
401        else:
402            t = make_att_load(v_ub, '_128')
403            for i in range(1, count/self.field_count): 
404              v2 = make_att_load(make_index_load(self.carryGroupVar, ub + i), '_128')
405              t = make_call('simd_or', [t, v2])
406            return TestHelper_Bitblock_Or(testExpr, t)
407    def GenerateCarryIfTest(self, block_no, ifTest):
408        return self.GenerateTest(block_no, ifTest)
409
410    def GenerateCarryElseFinalization(self, block_no):
411        # if the block consists of full carry packs, then
412        # no action need be taken: the corresponding carry-in packs
413        # must already be zero, or the then branch would have been taken.
414        count = self.aligned_size[block_no]
415        if count % self.field_count == 0: return []
416        assigs = []
417        posn = self.block_base[block_no]
418        ub = posn/self.field_count
419        rp = posn%self.field_count
420        assigs = gen_multiple_carry_zero_then_pack(self.fw, rp, count, self.temp_prefix)
421        return assigs
422
423    def GenerateLocalDeclare(self, block_no):
424        if self.carryInfoSet.block_op_count[block_no] == 0: return []
425        ub_count = (self.carryInfoSet.block_op_count[block_no] + self.field_count - 1) / self.field_count
426        decls = [make_callStmt('ubitblock_declare', [mk_var('sub' + self.carryGroupVar), ast.Num(ub_count)])]
427        # Generate all carry pack temps in case they are needed.
428        # This might be overkill in some cases.
429        f = self.field_count
430        temps = [self.temp_prefix + repr(i) for i in range(self.field_count)]
431        while f > 1:
432          f = f/2
433          s = self.field_count/f
434          temps += [self.temp_prefix + "%i_%i" % (s*(i+1)-1, s*i) for i in range(f)]
435        #return BitBlock_decls_from_vars(decls)
436        return decls + [make_callStmt('BitBlock_declare', [mk_var(t)]) for t in temps]
437   
438    def GenerateCarryWhileTest(self, block_no, testExpr):
439        return self.GenerateTest(block_no, testExpr)
440
441    def EnterLocalWhileBlock(self, operation_offset): 
442        self.carryGroupVar = "sub" + self.carryGroupVar
443    def ExitLocalWhileBlock(self): 
444        self.carryGroupVar = self.carryGroupVar[3:]
445
446       
447    def GenerateCarryWhileFinalization(self, block_no):
448        posn = self.alloc_map[block_no]
449        ub = posn/self.field_count
450        count = self.aligned_size[block_no] 
451        v = self.carryGroupVar
452        lv = "sub" + v
453        n = (count+self.field_count-1)/self.field_count
454        assigs = []
455        for i in range(n):
456          v_ub_i = make_index_load(v, ub + i)
457          assigs.append(make_assign(make_att_store(v_ub_i, '_128'), make_call('simd_or', [make_att_load(v_ub_i, '_128'), make_att_load(make_index_load(lv, i), '_128')])))
458        return assigs
459    def GenerateStreamFunctionFinalization(self):
460        if self.carryInfoSet.carry_count == 0: return []
461        # Generate statements to shift all carries from carry-out form to carry-in form.
462        v = self.carryGroupVar
463        n = (self.aligned_size[0] + self.field_count - 1)/self.field_count
464        shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw-1)
465        return [make_assign(make_att_store(make_index_load(v, i), '_128'), make_call(shift_op, [make_att_load(make_index_load(v, i), '_128')])) for i in range(n)]
466
Note: See TracBrowser for help on using the repository browser.