Changeset 2703 for proto/Compiler/CCGO_HMCPS.py
 Timestamp:
 Nov 30, 2012, 6:03:21 PM (6 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

proto/Compiler/CCGO_HMCPS.py
r2701 r2703 138 138 return assign_list 139 139 140 #141 # Generate multiple zero carries to complete a carry pack.142 #143 #144 def gen_multiple_carry_zero_then_pack(pack_fw, carry_num, carry_count, temp_pfx):145 assign_list = []146 last = carry_num + carry_count147 p2f = pow2floor(last)148 if carry_num == 0:149 assign_list.append(make_assign('%s%i_0' % (temp_pfx, p2f1), make_zero(pack_fw)))150 carry_num = p2f151 carry_count = p2f152 else:153 low_bit = carry_num &~ (carry_num  1)154 base = carry_num  low_bit155 if low_bit == 1: pending = '%s%i' % (temp_pfx, carry_num  1)156 else: pending = '%s%i_%i' % (temp_pfx, carry_num  1, base)157 while base != 0 and carry_num <= p2f:158 next_bit = base &~ (base  1)159 shift = next_bit  low_bit160 shift_result = '%s%i_%i' % (temp_pfx, carry_num  1 + shift, base)161 assign_list.append(make_assign(shift_result, make_call('mvmd<%i>::slli<%i>' % (pack_fw, shift), [mk_var(pending)])))162 pending2 = '%s%i_%i' % (temp_pfx, base  1, base  next_bit)163 merge_result = '%s%i_%i' % (temp_pfx, carry_num  1 + shift, base  next_bit)164 assign_list.append(make_assign(merge_result, make_mergeh(pack_fw * next_bit, shift_result, pending2)))165 carry_count = shift166 carry_num += shift167 low_bit = carry_num &~ (carry_num  1)168 base = carry_num  low_bit169 pending = merge_result170 shift = p2f  low_bit171 if shift != 0:172 shift_result = '%s%i_%i' % (temp_pfx, carry_num  1 + shift, base)173 assign_list.append(make_assign(shift_result, make_call('mvmd<%i>::slli<%i>' % (pack_fw, shift), [mk_var(pending)])))174 carry_count = shift175 carry_num += shift176 #177 for i in range(carry_count):178 assign_list += gen_carry_zero_then_pack(pack_fw, carry_num + i, temp_pfx)179 return assign_list180 181 140 182 141 … … 212 171 while c <= n: c *= 2 213 172 return c/2 173 174 def low_bit(n): 175 return n  (n & (n1)) 214 176 215 177 def align(n, align_base): … … 243 205 else: 244 206 aligned_size[b] = pow2ceil(b_carries) 245 print aligned_size246 207 return aligned_size 247 208 … … 288 249 self.carry_offset = 0 289 250 251 def cg_temp(self, hi_carry, lo_carry = None): 252 if lo_carry == None or hi_carry == lo_carry: return "%s%i" % (self.temp_prefix, hi_carry) 253 else: return "%s%i_%i" % (self.temp_prefix, hi_carry, lo_carry) 254 255 def local_temp(self, hi_carry, lo_carry = None): 256 if lo_carry == None or hi_carry == lo_carry: return "sub%s%i" % (self.temp_prefix, hi_carry) 257 else: return "sub%s_%i_%i" % (self.temp_prefix, hi_carry, lo_carry) 258 259 def gen_merges(self, carry_last, carry_base): 260 size = carry_last  carry_base + 1 261 if carry_last & size: 262 v1 = mk_var(self.cg_temp(carry_last, carry_base)) 263 v0 = mk_var(self.cg_temp(carry_last  size, carry_base  size)) 264 v2 = mk_var(self.cg_temp(carry_last, carry_base  size), ast.Store()) 265 return [make_assign(v2, make_mergeh(self.fw * size, v1, v0))] + self.gen_merges(carry_last, carry_base  size) 266 else: return [] 267 268 # 269 # Given that carry_num carries have been generated and packed, 270 # add zero_count additional carry zero values and pack. 271 # Use shifts to introduce multiple zeroes, where possible. 272 # 273 def gen_multiple_carry_zero_then_pack(self, carry_num, zero_count): 274 if zero_count == 0: return [] 275 pending_carry_pack_size = low_bit(carry_num) 276 pending_carry_base = carry_num  pending_carry_pack_size 277 # We may be able to fill zeroes by shifting. 278 # But the shift is limited by any further pending carry pack and 279 # the constraint that the result must produce a wellformed pack 280 # having a powerof2 entries. 281 # 282 final_num = carry_num + zero_count 283 pack_size2 = low_bit(pending_carry_base) 284 if pending_carry_base == 0: 285 shift = pow2floor(final_num)  pending_carry_pack_size 286 else: 287 shift = min(low_bit(pending_carry_base), low_bit(final_num))  pending_carry_pack_size 288 if pending_carry_pack_size == 0 or shift == 0: 289 # There is either no pending pack or we are not generating enough 290 # carry zeroes to combine into the pending pack, so we can only add new 291 # packs. 292 # 293 if zero_count == 1: return [make_assign(self.cg_temp(carry_num), make_zero(self.fw))] 294 else: 295 zero_count_floor = pow2floor(zero_count) 296 hi_num = carry_num + zero_count_floor 297 a1 = make_assign(self.cg_temp(hi_num  1, carry_num), make_zero(self.fw)) 298 remaining_zeroes = zero_count  zero_count_floor 299 return [a1] + self.gen_multiple_carry_zero_then_pack(hi_num, remaining_zeroes) 300 # 301 shift_result = self.cg_temp(carry_num + shift  1, pending_carry_base) 302 pending = self.cg_temp(carry_num  1, pending_carry_base) 303 #print shift_result, " by shift ", pending, shift 304 a1 = make_assign(shift_result, make_call('mvmd<%i>::slli<%i>' % (self.fw, shift), [mk_var(pending)])) 305 # Do any necessary merges 306 m = self.gen_merges(carry_num + shift  1, pending_carry_base) 307 return [a1] + m + self.gen_multiple_carry_zero_then_pack(carry_num + shift, zero_count  shift) 308 309 290 310 def allocate_ops(self): 291 311 carry_count = 0 … … 317 337 # appears to start a new pack. 318 338 self.alloc_map[self.carryInfoSet.operation_count] = align(carry_count, self.field_count) 319 print self.alloc_map320 339 321 340 def GenerateCarryDecls(self): … … 340 359 def GenerateStreamFunctionDecls(self): 341 360 f = self.field_count 342 decls = [self.temp_prefix + repr(i) for i in range(self.field_count)] 343 while f > 1: 361 s = 1 362 decls = [] 363 while f > 0: 364 decls += [self.cg_temp(s*(i+1)1, s*i) for i in range(f)] 344 365 f = f/2 345 s = self.field_count/f 346 decls += [self.temp_prefix + "%i_%i" % (s*(i+1)1, s*i) for i in range(f)] 366 s = s * 2 347 367 return BitBlock_decls_from_vars(decls) 348 368 … … 362 382 # Only generate an actual store for the last carryout 363 383 assigs = [make_assign(self.temp_prefix + repr(rp), carry_out_expr)] 364 assigs += gen_carry_pack(self.fw, rp, self.temp_prefix)384 assigs += self.gen_merges(rp, rp) 365 385 next_posn = self.alloc_map[operation_no + 1]  self.carry_offset 366 386 skip = next_posn  posn  1 367 387 if skip > 0: 368 assigs += gen_multiple_carry_zero_then_pack(self.fw, rp+1, skip, self.temp_prefix)388 assigs += self.gen_multiple_carry_zero_then_pack(rp+1, skip) 369 389 #print (posn, skip) 370 390 if next_posn % self.field_count == 0: 371 391 v_ub = make_index_load(self.carryGroupVar, ub) 372 392 shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw1) 373 storable_carry_in_form = make_call(shift_op, [mk_var(self. temp_prefix + '%i_0' %(self.field_count  1))])393 storable_carry_in_form = make_call(shift_op, [mk_var(self.cg_temp(self.field_count  1))]) 374 394 assigs.append(make_assign(make_att_store(v_ub, '_128'), storable_carry_in_form)) 375 395 return assigs … … 413 433 rp = posn % self.field_count 414 434 next_op = self.carryInfoSet.block_first_op[block_no] + self.carryInfoSet.block_op_count[block_no] 415 end_pos = (self.alloc_map[next_op]  1) % self.field_count 416 print rp, next_op,self.alloc_map[next_op] 417 if rp == end_pos: v = mk_var('%s%i' % (self.temp_prefix, rp)) 418 else: v = mk_var('%s%i_%i' % (self.temp_prefix, end_pos, rp)) 419 assigs = [make_assign(v, make_zero(self.fw))] 420 #assigs = gen_multiple_carry_zero_then_pack(self.fw, rp, end_pos  rp + 1, self.temp_prefix) 435 end_pos = (self.alloc_map[next_op]  self.carry_offset  1) % self.field_count 436 #print rp, next_op,self.alloc_map[next_op] 437 #assigs = [make_assign(self.cg_temp(end_pos, rp), make_zero(self.fw))] 438 assigs = self.gen_multiple_carry_zero_then_pack(rp, end_pos  rp + 1) 439 if (end_pos + 1) % self.field_count == 0: 440 v_ub = make_index_load(self.carryGroupVar, ub) 441 shift_op = "simd<%i>::srli<%i>" % (self.fw, self.fw1) 442 storable_carry_in_form = make_call(shift_op, [mk_var(self.cg_temp(self.field_count  1))]) 443 assigs.append(make_assign(make_att_store(v_ub, '_128'), storable_carry_in_form)) 444 421 445 return assigs 422 446 … … 430 454 else: decls = [] 431 455 # Generate carry pack temps. 432 temps = ["sub" + self.temp_prefix + repr(i) for i in range(count)] 433 f = count 434 while f > 1: 456 f = count 457 s = 1 458 temps = [] 459 while f > 0: 460 temps += [self.local_temp(s*(i+1)1, s*i) for i in range(f)] 435 461 f = f/2 436 s = count/f 437 temps += ["sub" + self.temp_prefix + "%i_%i" % (s*(i+1)1, s*i) for i in range(f)] 462 s = s * 2 438 463 #return BitBlock_decls_from_vars(decls) 439 464 return decls + [make_callStmt('BitBlock_declare', [mk_var(t)]) for t in temps] … … 446 471 self.temp_prefix = "sub" + self.temp_prefix 447 472 self.carry_offset = self.alloc_map[operation_offset] 448 print "self.carry_offset = %i" % self.carry_offset473 #print "self.carry_offset = %i" % self.carry_offset 449 474 def ExitLocalWhileBlock(self): 450 475 self.carryGroupVar = self.carryGroupVar[3:] … … 460 485 lv = "sub" + v 461 486 if count < self.field_count: 462 if count == 1: 463 v0 = '%s%i' % (self.temp_prefix, rp) 464 lv0 = '%s%0' % ("sub" + self.temp_prefix) 465 else: 466 v0 = '%s%i_%i' % (self.temp_prefix, rp + count  1, rp) 467 lv0 = '%s%i_0' % ("sub" + self.temp_prefix, count  1) 487 v0 = self.cg_temp(rp + count  1, rp) 488 lv0 = self.local_temp(count  1, 0) 468 489 return [make_assign(v0, make_call('simd_or', [mk_var(v0), mk_var(lv0)]))] 469 490 n = (count+self.field_count1)/self.field_count
Note: See TracChangeset
for help on using the changeset viewer.