Changeset 5534


Ignore:
Timestamp:
Jun 28, 2017, 3:05:39 PM (22 months ago)
Author:
xwa163
Message:

lz4d prototype: convert scanThru(), position() to count forward zero/one, add prototype for basic sequential pablo

Location:
proto/lz4d
Files:
1 added
1 edited

Legend:

Unmodified
Added
Removed
  • proto/lz4d/lz4d_e_d.py

    r5530 r5534  
    1 
    21import sys
    32from Basis_Bits import Basis_bits
    4 import pablo
    5 
    6 '''
    7 Bitwise Operation
    8 '''
     3import sequential_pablo
     4
     5# Bitwise Operation
    96def bit_test(bit, index):
    107    return bit >> index & 1
     8
     9
    1110def bit_clear(bit, index):
    12     mask = ~0 - (1 << index);
    13     return bit & mask
    14 
    15 
    16 '''
    17 Methods that should be implemented in Pablo
    18 '''
    19 def position(bits) :
    20     return len(bin(bits)) - 3
    21     # can not use log2 since log2 will cause some inprecise results when the data is large
    22     # return int(math.log(bits, 2))
    23 def singletonStream(bits):
    24     return bits - ((bits - 1) & bits)
    25 def singleton(pos):
    26     return 1 << pos
    27 
    28 # Advance by variable
    29 
    30 
    31 '''
    32 Methods that has already been supported
    33 '''
    34 def count_forward_zero(bits):
    35     if bits == 0 :
    36         return -1
    37     return position(singletonStream(bits))
    38 
    39 def extend_match_length(fromIndex, extender_bits):
    40     return count_forward_zero(~(extender_bits >> fromIndex))
    41 
    42 '''
    43 Helper Functions for extract_m0_and_d
    44 '''
    45 def min_position(a,b,c,d):
    46     for i in [a,b,c,d]:
    47         if i > 0:
    48             temp = i
    49             break
    50     for i in [a,b,c,d]:
    51         if i > 0 and i < temp:
    52             temp = i
    53     return temp
    54 
    55 '''
    56 Kernels
    57 '''
    58 
     11    return bit & ~(1 << index);
     12
     13
     14def bit_set(bit, index):
     15    return bit | (1 << index)
     16
     17
     18def bit_set_value(bit, value, index):
     19    return bit & ~(1 << index) | (value << index)
     20
     21
     22# Kernels
    5923def extract_blocks(file_content):
    6024    offset = 0
     
    178142
    179143def extract_E1_M0(file_content, basis_bits, block_data, extender):
    180     # CC_0x0X = (~basis_bits.bit_0) & (~basis_bits.bit_1) & (~basis_bits.bit_2) & (~basis_bits.bit_3)
    181 
    182     CC_0xFX = basis_bits.bit_0 & basis_bits.bit_1 & basis_bits.bit_2 & basis_bits.bit_3
    183     CC_0xXF = basis_bits.bit_4 & basis_bits.bit_5 & basis_bits.bit_6 & basis_bits.bit_7
    184 
     144    # Only count_forward_one/zero base on extender stream
    185145    E1_marker = 0
    186146    M0_marker = 0
     
    194154
    195155            while token_pos < block_end_pos:
    196                 token_marker = 1 << token_pos
    197 
    198156                token = ord(file_content[token_pos])
    199                 #print(token_pos)
     157                # print(token_pos)
    200158                # print(ord(file_content[token_pos]))
    201159                literal_length_base = token >> 4
    202160                match_length_base = token & 0x0f
    203161
    204                 extended_literal = token_marker & CC_0xFX
    205                 extended_match = token_marker & CC_0xXF
    206 
    207                 literal_length_end = pablo.ScanThru(token_marker, extended_literal | extender)
    208 
    209                 literal_length_end_pos = position(literal_length_end)
     162                extended_literal_value = token & 0xf0 == 0xf0
     163
     164                literal_length_end_pos = sequential_pablo.position_of_next_zero(bit_set_value(extender, extended_literal_value, token_pos), token_pos)
    210165
    211166                literal_extension_size = literal_length_end_pos - token_pos
     
    219174                    literal_length += (literal_extension_size - 1) * 255 + final_length_byte
    220175
    221                 literal_marker_bit = pablo.Advance(literal_length_end)
    222                 literal_marker_end_bit = pablo.AdvancebyPos(literal_marker_bit, literal_length)
    223 
    224176                # handle e1 bits
    225                 new_e1_bits = literal_marker_end_bit - literal_length_end
     177                # Assume we have enough output buffer
     178                new_e1_bits = sequential_pablo.generate_bit_one_mask(literal_length_end_pos, literal_length_end_pos + 1 + literal_length)
    226179                E1_marker |= new_e1_bits
    227180               
     
    232185                # This branch will almost always be taken, there will just one misprediction penalty at the end.
    233186                if offset_pos < block_end_pos: 
    234                     c = offset_pos - token_pos + 1
    235 
    236                     match_length_start = pablo.AdvancebyPos(token_marker, c)  # second bit of offset
    237                     match_length_extend_start = pablo.AdvancebyPos(extended_match, c)
    238                     match_length_end = pablo.ScanThru(match_length_start, match_length_extend_start | extender)
    239                     match_extension_size = position(match_length_end) - position(match_length_start)
     187                    match_length_start_pos = offset_pos + 1
     188                    extended_match_value = token & 0xf == 0xf
     189                    match_extension_size = sequential_pablo.count_forward_one(
     190                        bit_set_value(extender, extended_match_value, match_length_start_pos), match_length_start_pos)
    240191
    241192                    match_length = match_length_base + 4
     
    245196                        match_length += (match_extension_size - 1) * 255 + ord(file_content[offset_pos + 1 + match_extension_size])
    246197
    247                     new_M0_bits = ((1 << (match_length - 1)) - 1) << output_pos
     198                    # Handle M0 Bits
     199                    # Assume we have enough output buffer
     200                    new_M0_bits = sequential_pablo.generate_bit_one_mask(output_pos, output_pos + match_length - 1)
    248201                    output_pos += match_length
    249202                   
    250203                    M0_marker |= new_M0_bits
    251204                   
    252                     #token_marker = pablo.Advance(match_length_end)
    253205                    token_pos = offset_pos + match_extension_size + 2
    254206                else:
     
    263215
    264216def deposit_compressed(file_content, block_data, e1_marker, m0_marker, outputStrBuffer):
    265    
    266     d_marker = ~(m0_marker | pablo.Advance(m0_marker))
     217    # Maybe we need to calculate d_marker in other pablo kernel
     218    d_marker = ~(m0_marker | sequential_pablo.Advance(m0_marker))
    267219    output_pos = 0
    268220
     
    275227            continue
    276228
    277         output_pos_marker = pablo.ScanTo(singleton(output_pos), d_marker)
    278         output_pos = position(output_pos_marker)
    279 
    280         compressed_start_bit = singleton(block_start_pos)
    281         token_mark = compressed_start_bit
    282 
    283         last_match_len_marker = pablo.ScanTo(token_mark, e1_marker)  # first e1 bit
    284         last_match_len_marker_pos = position(last_match_len_marker)
    285         literal_end_mark = pablo.ScanThru(last_match_len_marker, e1_marker)
    286         literal_end_pos = position(literal_end_mark)
     229        output_pos = sequential_pablo.position_of_next_one(d_marker, output_pos) # There should be another cursor for d_marker
     230        # Position of first e1 bit
     231        last_match_len_marker_pos = sequential_pablo.position_of_next_one(e1_marker, block_start_pos)
     232
     233        literal_end_pos = sequential_pablo.position_of_next_zero(e1_marker, last_match_len_marker_pos)
    287234        literal_length = literal_end_pos - last_match_len_marker_pos - 1
    288235
     
    296243            match_offset = ord(file_content[offset_pos]) + (ord(file_content[offset_pos+ 1]) << 8)
    297244            match_copy_pos = output_pos - match_offset
    298 
    299             output_mark = singleton(output_pos)
    300             match_end_mark = pablo.ScanThru(output_mark, m0_marker)
    301 
    302             match_length = position(match_end_mark) - output_pos + 1
     245            match_end_mark_pos = sequential_pablo.position_of_next_zero(m0_marker, output_pos)
     246            match_length = match_end_mark_pos - output_pos + 1
    303247
    304248            # Match Copy
     
    309253
    310254            #  /* next token */
    311             last_match_len_marker = pablo.ScanTo(literal_end_mark, e1_marker)
    312             last_match_len_marker_pos = position(last_match_len_marker)
    313             literal_end_mark = pablo.ScanThru(last_match_len_marker, e1_marker)
    314             literal_end_pos = position(literal_end_mark)
    315             literal_length = literal_end_pos - position(last_match_len_marker) - 1
     255            last_match_len_marker_pos = sequential_pablo.position_of_next_one(e1_marker, literal_end_pos)
     256
     257            literal_end_pos = sequential_pablo.position_of_next_zero(e1_marker, last_match_len_marker_pos)
     258
     259            literal_length = literal_end_pos - last_match_len_marker_pos - 1
    316260
    317261            # Literal Copy
     
    320264
    321265            output_pos += literal_length
     266
    322267
    323268def deposit_uncompressed(file_content, uncompressed_block_data, outputStrBuffer):
     
    345290            file_content = f.read()
    346291        basis_bits = Basis_bits()
    347         pablo.EOF_mask = pablo.transpose_streams(file_content, basis_bits)
     292        sequential_pablo.transpose_streams(file_content, basis_bits)
    348293
    349294
     
    366311
    367312        outputStr = ''.join(outputStrBuffer)
    368         # print(outputStr)
     313        print(outputStr)
    369314        with open(outputFile, 'w') as f:
    370315            f.write(outputStr)
Note: See TracChangeset for help on using the changeset viewer.