Changeset 5539


Ignore:
Timestamp:
Jun 30, 2017, 12:18:34 PM (23 months ago)
Author:
xwa163
Message:

lz4d_prototype: add prototype for sequential_pablo_kernel, modify lz4d prototype to use sequential_pablo_kernel

Location:
proto/lz4d
Files:
4 added
1 edited

Legend:

Unmodified
Added
Removed
  • proto/lz4d/lz4d_e_d.py

    r5534 r5539  
    22from Basis_Bits import Basis_bits
    33import sequential_pablo
    4 
    5 # Bitwise Operation
    6 def bit_test(bit, index):
    7     return bit >> index & 1
    8 
    9 
    10 def bit_clear(bit, index):
    11     return bit & ~(1 << index);
    12 
    13 
    14 def bit_set(bit, index):
    15     return bit | (1 << index)
    16 
    17 
    18 def bit_set_value(bit, value, index):
    19     return bit & ~(1 << index) | (value << index)
    20 
     4import pablo
     5from deposit_compressed_kernel import DepositCompressedKernel
     6from extract_e1_m0_kernel import ExtractE1M0Kernel
     7from bitwise_op_helper import *
    218
    229# Kernels
     
    7562    return block_data, uncompressed_mask
    7663
    77 #
    78 #  === The E1 and M0 Bitstreams for LZ4 Decoding ===
    79 #
    80 #  Suppose Z is a LZ4-compressed text stream and T is the equivalent uncompressed text.
    81 #
    82 #  First define the basic extract and deposit streams E and D.
    83 #
    84 #  The bit stream E is a bit stream of length |Z| such that E[i] = 1 iff Z[i] is a
    85 #  literal character to be copied directly to T.
    86 #
    87 #  The bit stream D is a bit stream of length |T| such that D[i] = 1 iff T[i] is a
    88 #  a character copied directly from Z[j], where Z[j] = 1, and popcnt(T[0..i-1]) = popcnt(Z[0..j-1])
    89 #
    90 #  So suppose we have a compressed stream Z with literal characters marked by E
    91 #
    92 #  Z    -ab---pqrstu------FGH---JK
    93 #  E    .11...111111......111...11
    94 #
    95 #  The uncompressed stream T and deposit stream D could look like:
    96 #
    97 #  T    ab------------pqrstu------------fgh----------jk
    98 #  D    11............111111............111..........11
    99 #
    100 #
    101 #  But during decoding, it is better to compute the bitstream E1, which
    102 #  is defined by E1[i] = E[i] | E[i+1].
    103 #
    104 #  E1 is the stream marking the byte prior to a literal plus the literal.  The
    105 #  byte prior to the literal is the token or the last length byte after length
    106 #  extenders.
    107 #
    108 #  Z    -ab---pqrstu------fgh---jk
    109 #  E1   111..1111111..1..1111..111
    110 #
    111 #  Given the E1 stream, the E stream can easily be computed as E = E1 & Advance(E1).
    112 #
    113 #  The advantage of the E1 stream is that we mark the position of every literal,
    114 #  even if it is of length 0.   This allows us to compute the bit stream marking
    115 #  all the offsets:
    116 #  Offset_stream = Advance(E1) &~ E1
    117 #
    118 #  In our example, we might have had a length 0 literal between the PQRSTU and the FGH
    119 #  literals
    120 #
    121 #  Z    -ab---pqrstu------fgh---jk
    122 #  E1   111..1111111..1..1111..111
    123 #
    124 #  Similarly, instead of directly storing the D stream, I think it is useful
    125 #  to store the M0 stream which consists of the output positions for all
    126 #  the nonfinal bytes of match copies. 
    127 #
    128 #  T    ab------------pqrstu------------fgh----------jk
    129 #  M0   ..11111111111.......1111.111111....111111111...
    130 #
    131 #  The advantage of the M0 stream is that it lets us compute the length
    132 #  of match copies even if we have two consecutive copies in the output
    133 #  (because there was a 0-length literal).  The length is always one more
    134 #  than the length of the run of one bits plus 1.
    135 #
    136 #  We can also easily compute the D stream from the M0 stream
    137 #
    138 #  D = ~(M0 | Advance(M0))
    139 #  (Assuming that we are confined to the compressed stream region).
    140 
    141 
    142 
    143 def extract_E1_M0(file_content, basis_bits, block_data, extender):
    144     # Only count_forward_one/zero base on extender stream
    145     E1_marker = 0
    146     M0_marker = 0
    147     uncompressed_data = []
    148     output_pos = 0
    149 
    150     for (isCompressed, block_start_pos, block_end_pos) in block_data:
    151 
    152         if isCompressed:
    153             token_pos = block_start_pos
    154 
    155             while token_pos < block_end_pos:
    156                 token = ord(file_content[token_pos])
    157                 # print(token_pos)
    158                 # print(ord(file_content[token_pos]))
    159                 literal_length_base = token >> 4
    160                 match_length_base = token & 0x0f
    161 
    162                 extended_literal_value = token & 0xf0 == 0xf0
    163 
    164                 literal_length_end_pos = sequential_pablo.position_of_next_zero(bit_set_value(extender, extended_literal_value, token_pos), token_pos)
    165 
    166                 literal_extension_size = literal_length_end_pos - token_pos
    167                
    168                 final_length_byte = ord(file_content[literal_length_end_pos])
    169 
    170                 literal_length = literal_length_base
    171                
    172                 # Will be a Select Instruction
    173                 if literal_extension_size > 0:
    174                     literal_length += (literal_extension_size - 1) * 255 + final_length_byte
    175 
    176                 # handle e1 bits
    177                 # Assume we have enough output buffer
    178                 new_e1_bits = sequential_pablo.generate_bit_one_mask(literal_length_end_pos, literal_length_end_pos + 1 + literal_length)
    179                 E1_marker |= new_e1_bits
    180                
    181                 output_pos += literal_length
    182 
    183                 offset_pos = literal_length_end_pos + literal_length + 1
    184 
    185                 # This branch will almost always be taken, there will just one misprediction penalty at the end.
    186                 if offset_pos < block_end_pos: 
    187                     match_length_start_pos = offset_pos + 1
    188                     extended_match_value = token & 0xf == 0xf
    189                     match_extension_size = sequential_pablo.count_forward_one(
    190                         bit_set_value(extender, extended_match_value, match_length_start_pos), match_length_start_pos)
    191 
    192                     match_length = match_length_base + 4
    193 
    194                     # Will Be a Select Instruction
    195                     if match_extension_size > 0:
    196                         match_length += (match_extension_size - 1) * 255 + ord(file_content[offset_pos + 1 + match_extension_size])
    197 
    198                     # Handle M0 Bits
    199                     # Assume we have enough output buffer
    200                     new_M0_bits = sequential_pablo.generate_bit_one_mask(output_pos, output_pos + match_length - 1)
    201                     output_pos += match_length
    202                    
    203                     M0_marker |= new_M0_bits
    204                    
    205                     token_pos = offset_pos + match_extension_size + 2
    206                 else:
    207                     token_pos = offset_pos
    208         else:
    209             block_length = block_end_pos - block_start_pos
    210             uncompressed_data.append(block_start_pos, block_length, output_pos)
    211             output_pos += block_length
    212 
    213     return E1_marker, M0_marker, uncompressed_data, output_pos
    214 
    215 
    216 def deposit_compressed(file_content, block_data, e1_marker, m0_marker, outputStrBuffer):
    217     # Maybe we need to calculate d_marker in other pablo kernel
    218     d_marker = ~(m0_marker | sequential_pablo.Advance(m0_marker))
    219     output_pos = 0
    220 
    221     # Outer loop will iterate throught all of the compressed block
    222     # while (compressed_start_marker):
    223     for (isCompressed, block_start_pos, block_end_pos) in block_data:
    224         if not isCompressed:
    225             # There will be much fewer uncompressed block than compressed block
    226             output_pos = output_pos + block_end_pos - block_start_pos
    227             continue
    228 
    229         output_pos = sequential_pablo.position_of_next_one(d_marker, output_pos) # There should be another cursor for d_marker
    230         # Position of first e1 bit
    231         last_match_len_marker_pos = sequential_pablo.position_of_next_one(e1_marker, block_start_pos)
    232 
    233         literal_end_pos = sequential_pablo.position_of_next_zero(e1_marker, last_match_len_marker_pos)
    234         literal_length = literal_end_pos - last_match_len_marker_pos - 1
    235 
    236         # Output
    237         outputStrBuffer[output_pos: output_pos + literal_length] = list(file_content[last_match_len_marker_pos + 1: last_match_len_marker_pos + 1 + literal_length])
    238 
    239         output_pos += literal_length
    240         while literal_end_pos < block_end_pos:
    241             offset_pos = literal_end_pos
    242 
    243             match_offset = ord(file_content[offset_pos]) + (ord(file_content[offset_pos+ 1]) << 8)
    244             match_copy_pos = output_pos - match_offset
    245             match_end_mark_pos = sequential_pablo.position_of_next_zero(m0_marker, output_pos)
    246             match_length = match_end_mark_pos - output_pos + 1
    247 
    248             # Match Copy
    249             for i in range(match_length):
    250                 outputStrBuffer[output_pos + i] = outputStrBuffer[match_copy_pos + i]
    251 
    252             output_pos += match_length
    253 
    254             #  /* next token */
    255             last_match_len_marker_pos = sequential_pablo.position_of_next_one(e1_marker, literal_end_pos)
    256 
    257             literal_end_pos = sequential_pablo.position_of_next_zero(e1_marker, last_match_len_marker_pos)
    258 
    259             literal_length = literal_end_pos - last_match_len_marker_pos - 1
    260 
    261             # Literal Copy
    262             outputStrBuffer[output_pos: output_pos + literal_length] = list(
    263                 file_content[last_match_len_marker_pos + 1: last_match_len_marker_pos + 1 + literal_length])
    264 
    265             output_pos += literal_length
    266 
    267 
    26864def deposit_uncompressed(file_content, uncompressed_block_data, outputStrBuffer):
    26965    for (uncompressed_start_pos, block_length, output_pos) in uncompressed_block_data:
     
    30197
    30298        # Extract E1  and M0 marker streams as wells as data for uncompressed blocks and total length
    303         E1_marker, M0_marker, uncompressed_data, output_length = extract_E1_M0(file_content, basis_bits, block_data, extender_bits)
     99        extract_e1_m0_k = ExtractE1M0Kernel({
     100            'file_content' : file_content,
     101            'block_data' : block_data,
     102            'extender' : extender_bits
     103        })
     104        E1_marker, M0_marker, uncompressed_data, output_length = extract_e1_m0_k.process()
    304105
    305106        # Use Array of char to imitate output string buffer of fixed length
    306107        outputStrBuffer = ["" for i in range(output_length)]
    307108
     109        # D_marker Will Be Calculated by a pablo kernel
     110        d_marker = ~(M0_marker | pablo.Advance(M0_marker))
     111
    308112        # Deposit compressed and uncompressed blocks
    309         deposit_compressed(file_content, block_data, E1_marker, M0_marker, outputStrBuffer)
     113        # deposit_compressed(file_content, block_data, E1_marker, M0_marker, outputStrBuffer)
     114        deposit_compressed_k = DepositCompressedKernel({
     115            'file_content' : file_content,
     116            'block_data' : block_data,
     117            'e1_marker': E1_marker,
     118            'm0_marker': M0_marker,
     119            'd_marker' : d_marker
     120        }, {
     121            'outputStrBuffer': outputStrBuffer
     122        })
     123        deposit_compressed_k.process()
    310124        deposit_uncompressed(file_content, uncompressed_data, outputStrBuffer)
    311125
    312126        outputStr = ''.join(outputStrBuffer)
    313         print(outputStr)
     127        # print(outputStr)
    314128        with open(outputFile, 'w') as f:
    315129            f.write(outputStr)
Note: See TracChangeset for help on using the changeset viewer.