Changeset 5534 for proto

Ignore:
Timestamp:
Jun 28, 2017, 3:05:39 PM (19 months ago)
Message:

lz4d prototype: convert scanThru(), position() to count forward zero/one, add prototype for basic sequential pablo

Location:
proto/lz4d
Files:
1 added
1 edited

Unmodified
Added
Removed
• proto/lz4d/lz4d_e_d.py

 r5530 import sys from Basis_Bits import Basis_bits import pablo ''' Bitwise Operation ''' import sequential_pablo # Bitwise Operation def bit_test(bit, index): return bit >> index & 1 def bit_clear(bit, index): mask = ~0 - (1 << index); return bit & mask ''' Methods that should be implemented in Pablo ''' def position(bits) : return len(bin(bits)) - 3 # can not use log2 since log2 will cause some inprecise results when the data is large # return int(math.log(bits, 2)) def singletonStream(bits): return bits - ((bits - 1) & bits) def singleton(pos): return 1 << pos # Advance by variable ''' Methods that has already been supported ''' def count_forward_zero(bits): if bits == 0 : return -1 return position(singletonStream(bits)) def extend_match_length(fromIndex, extender_bits): return count_forward_zero(~(extender_bits >> fromIndex)) ''' Helper Functions for extract_m0_and_d ''' def min_position(a,b,c,d): for i in [a,b,c,d]: if i > 0: temp = i break for i in [a,b,c,d]: if i > 0 and i < temp: temp = i return temp ''' Kernels ''' return bit & ~(1 << index); def bit_set(bit, index): return bit | (1 << index) def bit_set_value(bit, value, index): return bit & ~(1 << index) | (value << index) # Kernels def extract_blocks(file_content): offset = 0 def extract_E1_M0(file_content, basis_bits, block_data, extender): # CC_0x0X = (~basis_bits.bit_0) & (~basis_bits.bit_1) & (~basis_bits.bit_2) & (~basis_bits.bit_3) CC_0xFX = basis_bits.bit_0 & basis_bits.bit_1 & basis_bits.bit_2 & basis_bits.bit_3 CC_0xXF = basis_bits.bit_4 & basis_bits.bit_5 & basis_bits.bit_6 & basis_bits.bit_7 # Only count_forward_one/zero base on extender stream E1_marker = 0 M0_marker = 0 while token_pos < block_end_pos: token_marker = 1 << token_pos token = ord(file_content[token_pos]) #print(token_pos) # print(token_pos) # print(ord(file_content[token_pos])) literal_length_base = token >> 4 match_length_base = token & 0x0f extended_literal = token_marker & CC_0xFX extended_match = token_marker & CC_0xXF literal_length_end = pablo.ScanThru(token_marker, extended_literal | extender) literal_length_end_pos = position(literal_length_end) extended_literal_value = token & 0xf0 == 0xf0 literal_length_end_pos = sequential_pablo.position_of_next_zero(bit_set_value(extender, extended_literal_value, token_pos), token_pos) literal_extension_size = literal_length_end_pos - token_pos literal_length += (literal_extension_size - 1) * 255 + final_length_byte literal_marker_bit = pablo.Advance(literal_length_end) literal_marker_end_bit = pablo.AdvancebyPos(literal_marker_bit, literal_length) # handle e1 bits new_e1_bits = literal_marker_end_bit - literal_length_end # Assume we have enough output buffer new_e1_bits = sequential_pablo.generate_bit_one_mask(literal_length_end_pos, literal_length_end_pos + 1 + literal_length) E1_marker |= new_e1_bits # This branch will almost always be taken, there will just one misprediction penalty at the end. if offset_pos < block_end_pos: c = offset_pos - token_pos + 1 match_length_start = pablo.AdvancebyPos(token_marker, c)  # second bit of offset match_length_extend_start = pablo.AdvancebyPos(extended_match, c) match_length_end = pablo.ScanThru(match_length_start, match_length_extend_start | extender) match_extension_size = position(match_length_end) - position(match_length_start) match_length_start_pos = offset_pos + 1 extended_match_value = token & 0xf == 0xf match_extension_size = sequential_pablo.count_forward_one( bit_set_value(extender, extended_match_value, match_length_start_pos), match_length_start_pos) match_length = match_length_base + 4 match_length += (match_extension_size - 1) * 255 + ord(file_content[offset_pos + 1 + match_extension_size]) new_M0_bits = ((1 << (match_length - 1)) - 1) << output_pos # Handle M0 Bits # Assume we have enough output buffer new_M0_bits = sequential_pablo.generate_bit_one_mask(output_pos, output_pos + match_length - 1) output_pos += match_length M0_marker |= new_M0_bits #token_marker = pablo.Advance(match_length_end) token_pos = offset_pos + match_extension_size + 2 else: def deposit_compressed(file_content, block_data, e1_marker, m0_marker, outputStrBuffer): d_marker = ~(m0_marker | pablo.Advance(m0_marker)) # Maybe we need to calculate d_marker in other pablo kernel d_marker = ~(m0_marker | sequential_pablo.Advance(m0_marker)) output_pos = 0 continue output_pos_marker = pablo.ScanTo(singleton(output_pos), d_marker) output_pos = position(output_pos_marker) compressed_start_bit = singleton(block_start_pos) token_mark = compressed_start_bit last_match_len_marker = pablo.ScanTo(token_mark, e1_marker)  # first e1 bit last_match_len_marker_pos = position(last_match_len_marker) literal_end_mark = pablo.ScanThru(last_match_len_marker, e1_marker) literal_end_pos = position(literal_end_mark) output_pos = sequential_pablo.position_of_next_one(d_marker, output_pos) # There should be another cursor for d_marker # Position of first e1 bit last_match_len_marker_pos = sequential_pablo.position_of_next_one(e1_marker, block_start_pos) literal_end_pos = sequential_pablo.position_of_next_zero(e1_marker, last_match_len_marker_pos) literal_length = literal_end_pos - last_match_len_marker_pos - 1 match_offset = ord(file_content[offset_pos]) + (ord(file_content[offset_pos+ 1]) << 8) match_copy_pos = output_pos - match_offset output_mark = singleton(output_pos) match_end_mark = pablo.ScanThru(output_mark, m0_marker) match_length = position(match_end_mark) - output_pos + 1 match_end_mark_pos = sequential_pablo.position_of_next_zero(m0_marker, output_pos) match_length = match_end_mark_pos - output_pos + 1 # Match Copy #  /* next token */ last_match_len_marker = pablo.ScanTo(literal_end_mark, e1_marker) last_match_len_marker_pos = position(last_match_len_marker) literal_end_mark = pablo.ScanThru(last_match_len_marker, e1_marker) literal_end_pos = position(literal_end_mark) literal_length = literal_end_pos - position(last_match_len_marker) - 1 last_match_len_marker_pos = sequential_pablo.position_of_next_one(e1_marker, literal_end_pos) literal_end_pos = sequential_pablo.position_of_next_zero(e1_marker, last_match_len_marker_pos) literal_length = literal_end_pos - last_match_len_marker_pos - 1 # Literal Copy output_pos += literal_length def deposit_uncompressed(file_content, uncompressed_block_data, outputStrBuffer): file_content = f.read() basis_bits = Basis_bits() pablo.EOF_mask = pablo.transpose_streams(file_content, basis_bits) sequential_pablo.transpose_streams(file_content, basis_bits) outputStr = ''.join(outputStrBuffer) # print(outputStr) print(outputStr) with open(outputFile, 'w') as f: f.write(outputStr)
Note: See TracChangeset for help on using the changeset viewer.