Changeset 2802 for proto


Ignore:
Timestamp:
Dec 31, 2012, 11:56:37 AM (7 years ago)
Author:
cameron
Message:

Fast bitpack implementation

Location:
proto/Compiler
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • proto/Compiler/CCGO_HMCPS.py

    r2801 r2802  
    99#
    1010import ast
    11 import CCGO
     11import carryInfo, CCGO
    1212
    1313#
     
    239239      else: return "sub%s%i_%i" % (self.temp_prefix, hi_carry, lo_carry)
    240240   
    241     def gen_merges(self, carry_last, carry_base):
     241    def gen_merges(self, carry_last, carry_base, add_decl = False):
    242242      size = carry_last - carry_base + 1
    243243      if carry_last & size: 
     
    245245        v0 = mk_var(self.cg_temp(carry_last - size, carry_base - size))
    246246        v2 = mk_var(self.cg_temp(carry_last, carry_base - size), ast.Store())
    247         return [make_assign(v2, make_mergeh(self.fw * size, v1, v0))] + self.gen_merges(carry_last, carry_base - size)
     247        assigs = []
     248        if add_decl: assigs.append(make_callStmt('BitBlock_declare', [v2]))
     249        assigs.append(make_assign(v2, make_mergeh(self.fw * size, v1, v0)))
     250        return assigs + self.gen_merges(carry_last, carry_base - size, add_decl)
    248251      else: return []
    249252
     
    253256    #  Use shifts to introduce multiple zeroes, where possible.
    254257    #
    255     def gen_multiple_carry_zero_then_pack(self, carry_num, zero_count):
    256       if zero_count == 0: return []
     258    def gen_multiple_carry_zero_then_pack(self, carry_num, zero_count, add_decl = False):
     259      stmts = []
     260      if zero_count == 0: return stmts
    257261      pending_carry_pack_size = low_bit(carry_num)
    258262      pending_carry_base = carry_num - pending_carry_pack_size
     
    273277        # packs.
    274278        #
    275         if zero_count == 1:  return [make_assign(self.cg_temp(carry_num), make_zero(self.fw))]
     279        if zero_count == 1: 
     280          v = mk_var(self.cg_temp(carry_num))
     281          if add_decl: stmts.append(make_callStmt('BitBlock_declare', [v]))
     282          stmts.append(make_assign(v, make_zero(self.fw)))
     283          return stmts
    276284        else:
    277285          zero_count_floor = pow2floor(zero_count)
    278286          hi_num = carry_num + zero_count_floor
    279           a1 = make_assign(self.cg_temp(hi_num - 1, carry_num), make_zero(self.fw))
     287          v = mk_var(self.cg_temp(hi_num - 1, carry_num))
     288          if add_decl: stmts.append(make_callStmt('BitBlock_declare', [v]))
     289          stmts.append(make_assign(v, make_zero(self.fw)))
    280290          remaining_zeroes = zero_count - zero_count_floor
    281           return [a1] + self.gen_multiple_carry_zero_then_pack(hi_num, remaining_zeroes
     291          return stmts + self.gen_multiple_carry_zero_then_pack(hi_num, remaining_zeroes, add_decl
    282292      #
    283       shift_result = self.cg_temp(carry_num + shift - 1, pending_carry_base)
     293      shift_result = mk_var(self.cg_temp(carry_num + shift - 1, pending_carry_base))
    284294      pending = self.cg_temp(carry_num - 1, pending_carry_base)
    285295      #a1 = make_assign(shift_result, make_call('bitblock::srli<%i>' % (self.fw * shift), [mk_var(pending)]))
    286       a1 = make_assign(shift_result, make_call('mvmd<%i>::srli<%i>' % (self.fw, shift), [mk_var(pending)]))
     296      if add_decl: stmts.append(make_callStmt('BitBlock_declare', [shift_result]))
     297      stmts.append(make_assign(shift_result, make_call('mvmd<%i>::srli<%i>' % (self.fw, shift), [mk_var(pending)])))
    287298      # Do any necessary merges
    288       m = self.gen_merges(carry_num + shift - 1,  pending_carry_base)
    289       return [a1] + m + self.gen_multiple_carry_zero_then_pack(carry_num + shift, zero_count - shift)
     299      m = self.gen_merges(carry_num + shift - 1,  pending_carry_base, add_decl)
     300      return stmts + m + self.gen_multiple_carry_zero_then_pack(carry_num + shift, zero_count - shift, add_decl)
    290301
    291302
     
    466477        ub = posn/self.field_count
    467478        rp = posn%self.field_count
    468         count = self.aligned_size[block_no] 
     479        count = self.aligned_size[block_no]
    469480        if count < self.field_count:
    470481          v0 = self.cg_temp(rp + count - 1, rp)
     
    552563
    553564    def allocate_all(self):
    554         self.aligned_size = determine_aligned_block_sizes(self.field_count, self.carryInfoSet, min_block_size = 8)
     565        self.aligned_size = determine_aligned_block_sizes(self.field_count, self.carryInfoSet, min_block_size=8)
    555566        self.carryPack_count = (self.aligned_size[0] + self.BLOCK_SIZE - 1) / self.BLOCK_SIZE
    556567        self.totalPack_count = self.carryPack_count + self.carryInfoSet.adv_n_count
     
    613624            t = self.carry_pack_index(sz, pk, fd)
    614625            return TestHelper_Integer_Or(testExpr, t)
     626        elif sz == self.BLOCK_SIZE and align(posn, sz) == posn:
     627            fd = (posn%self.BLOCK_SIZE)/sz
     628            t = self.carry_pack_full(pk)
     629            return TestHelper_Bitblock_Or(testExpr, t)
    615630        elif rp + sz <= int_size:
    616631            e = self.carry_pack_index(int_size, pk, fd)
     
    697712          local_posn += self.field_count
    698713        return assigs
     714
     715
     716
     717class HMCPS_CCGO_BitPack2(HMCPS_CCGO_BitPack):
     718
     719    def allocate_all(self):
     720        self.aligned_size = determine_aligned_block_sizes(self.BLOCK_SIZE, self.carryInfoSet, max_whiles_per_pack = 8, min_block_size = self.field_count)
     721        self.carryPack_count = (self.aligned_size[0] + self.BLOCK_SIZE - 1) / self.BLOCK_SIZE
     722        self.totalPack_count = self.carryPack_count + self.carryInfoSet.adv_n_count
     723        self.alloc_map = {}
     724        self.alloc_map[0] = 0
     725        self.last_carry_map = {}
     726        self.adv_n_map = {}
     727        self.block_base = {}
     728        self.allocate_ops()
     729        # carry_offset is used within the inner body of while loops to access local carries.
     730        # The calculated (ub, rp) value is reduced by this amount for the local carry Pack(s).
     731        self.carry_offset = 0
     732
     733
     734    def allocate_block_positions(self):
     735        # First allocate the base position of each block relative to its
     736        # parent block, such that the relative position is a multiple
     737        # of its aligned_size or the pack_size, whichever is smallest.
     738        rel_block_posns = [0 for b in range(self.carryInfoSet.block_count)]
     739        self.direct_carries = carryInfo.direct_block_carries(self.carryInfoSet)
     740        self.aligned_direct = [align(d, max(min(pow2ceil(d), self.BLOCK_SIZE), self.field_count)) for d in self.direct_carries]
     741        working_allocation_bitmap = [((1 << a) - 1) for a in self.aligned_direct]
     742        for b in range(1, self.carryInfoSet.block_count):
     743            prnt = self.carryInfoSet.parent_block[b]
     744            sz = self.aligned_size[b]
     745            sz_map = (1 << sz) - 1
     746            posn = 0
     747            while sz_map & working_allocation_bitmap[prnt] != 0:
     748                posn += sz
     749                sz_map <<= sz
     750            working_allocation_bitmap[prnt] |= sz_map
     751            rel_block_posns[b] = posn
     752        # Now compute absolute positions
     753        self.block_base[0] = 0
     754        for b in range(1, self.carryInfoSet.block_count):
     755            self.block_base[b] = self.block_base[self.carryInfoSet.parent_block[b]] + rel_block_posns[b]
     756
     757#
     758#  Given the relative base positions of each block, allocate
     759#  its carries.
     760#
     761    def allocate_ops(self):
     762        self.allocate_block_positions()
     763        adv_n_count = 0
     764        carry_posn = [self.block_base[b] for b in range(self.carryInfoSet.block_count)]
     765        for op in range(self.carryInfoSet.operation_count):
     766            b = self.carryInfoSet.containing_block[op]
     767            self.alloc_map[op] = carry_posn[b]
     768            if op not in self.carryInfoSet.advance_amount.keys():
     769                carry_posn[b] += 1
     770                self.last_carry_map[b] = op
     771            elif self.carryInfoSet.advance_amount[op] == 1:
     772                carry_posn[b] += 1
     773                self.last_carry_map[b] = op
     774            else:
     775                self.adv_n_map[op] = adv_n_count
     776                adv_n_count += 1
     777        # When processing the last operation, make sure that the "next" operation
     778        # appears to start a new pack.
     779        self.alloc_map[self.carryInfoSet.operation_count] = self.aligned_size[0]
     780
     781    def GenerateCarryOutStore(self, operation_no, carry_out_expr):
     782        block_no = self.carryInfoSet.containing_block[operation_no]
     783        posn = self.alloc_map[operation_no] - self.carry_offset
     784        add_decl = self.alloc_map[operation_no] - self.block_base[block_no] <= self.field_count
     785        rp = posn%self.field_count
     786        # Save the carry in the carry temp variable and then merge
     787        # pending carry temps as far as possible.
     788        v = mk_var(self.temp_prefix + repr(rp))
     789        assigs = []
     790        if add_decl: assigs.append(make_callStmt('BitBlock_declare', [v]))
     791        assigs.append(make_assign(v, carry_out_expr))
     792        assigs += self.gen_merges(rp, rp, add_decl)
     793        # Only generate an actual store for the last carryout in a pack.
     794        if operation_no == self.last_carry_map[block_no]:
     795          skip = self.block_base[block_no] + self.aligned_direct[block_no] - self.alloc_map[operation_no] - 1
     796          if skip > 0:
     797            assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip, add_decl)
     798          #print (posn, skip)
     799          pk = posn/self.BLOCK_SIZE
     800          mask_blk = (posn%self.BLOCK_SIZE)/self.field_count
     801          mask_op = "hsimd<%i>::signmask" % (self.fw)
     802          storable_carry_in_form = make_call(mask_op, [mk_var(self.cg_temp(self.field_count - 1, 0))])
     803          assigs.append(make_assign(self.carry_pack_index(self.field_count, pk, mask_blk, mode = ast.Store()), storable_carry_in_form))
     804        return assigs
     805
     806    def GenerateLocalDeclare(self, block_no):
     807        if self.carryInfoSet.block_op_count[block_no] == 0: return []
     808        count = self.aligned_size[block_no]
     809        ub_count = (count + self.BLOCK_SIZE - 1)/ self.BLOCK_SIZE
     810        decls = [make_callStmt('ubitblock_declare', [mk_var("sub" + self.carryPackVar), ast.Num(ub_count)])]
     811        return decls
     812
     813    def GenerateCarryWhileFinalization(self, block_no):
     814        posn = self.block_base[block_no]
     815        pk = posn/self.BLOCK_SIZE
     816        count = self.aligned_size[block_no]
     817        assigs = []
     818        if count >= self.BLOCK_SIZE:
     819          for i in range(count/self.BLOCK_SIZE):
     820            assigs.append(make_assign(self.carry_pack_full(pk + i, mode = ast.Store()), make_call('simd_or', [self.carry_pack_full(pk + i), self.local_pack_full(i)])))
     821
     822        else:
     823          rp = (posn%self.BLOCK_SIZE)/count
     824          expr = ast.BinOp(self.carry_pack_index(count, pk, rp), ast.BitOr(), self.local_pack_index(count, 0, 0))
     825          assigs.append(make_assign(self.carry_pack_index(count, pk, rp, mode = ast.Store()), expr))
     826        return assigs
     827
     828
     829    def GenerateCarryElseFinalization(self, block_no):  return []
     830
     831    def GenerateStreamFunctionDecls(self): return ""
     832
     833
  • proto/Compiler/carryInfo.py

    r2699 r2802  
    114114    self.block_visit(whileNode)
    115115
     116
     117
     118
     119#
     120#
     121# Determine the number of carries directly within each block.
     122#
     123def direct_block_carries(carryInfoSet, count_advance_1_as_carry = True, count_advance_n_as_n = False):
     124    carries = [0 for i in range(carryInfoSet.block_count)]
     125    for op in range(carryInfoSet.operation_count):
     126        b = carryInfoSet.containing_block[op]
     127        if op not in carryInfoSet.advance_amount.keys():
     128          carries[b] += 1
     129        elif carryInfoSet.advance_amount[op] == 1 and count_advance_1_as_carry:
     130          carries[b] += 1
     131        elif count_advance_n_as_n:
     132          carries[b] += carryInfoSet.advance_amount[op]
     133    return carries
     134
     135
    116136#
    117137#####################################
  • proto/Compiler/pablo.py

    r2801 r2802  
    330330    #elif ops <= 8: ccgo = CCGO_HMCPS.HMCPS_CCGO(BLOCK_SIZE, 16, carryInfoSet, 'carryG', '__c')
    331331    #else: ccgo = CCGO_HMCPS.HMCPS_CCGO2(BLOCK_SIZE, 16, carryInfoSet, 'carryG', '__c')
    332     else: ccgo = CCGO_HMCPS.HMCPS_CCGO_BitPack(BLOCK_SIZE, 8, carryInfoSet, 'carryG', '__c')
     332    else: ccgo = CCGO_HMCPS.HMCPS_CCGO_BitPack2(BLOCK_SIZE, 8, carryInfoSet, 'carryG', '__c')
    333333  else:
    334334    ccgo = CCGO.testCCGO(BLOCK_SIZE, carryInfoSet, 'carryQ')
Note: See TracChangeset for help on using the changeset viewer.