source: proto/lz4d/extract_e1_m0_kernel.py @ 5539

Last change on this file since 5539 was 5539, checked in by xwa163, 22 months ago

lz4d_prototype: add prototype for sequential_pablo_kernel, modify lz4d prototype to use sequential_pablo_kernel

File size: 6.4 KB
Line 
1from deposit_compressed_kernel import DepositCompressedKernel
2from bitwise_op_helper import *
3import sequential_pablo
4
5#
6#  === The E1 and M0 Bitstreams for LZ4 Decoding ===
7#
8#  Suppose Z is a LZ4-compressed text stream and T is the equivalent uncompressed text.
9#
10#  First define the basic extract and deposit streams E and D.
11#
12#  The bit stream E is a bit stream of length |Z| such that E[i] = 1 iff Z[i] is a
13#  literal character to be copied directly to T.
14#
15#  The bit stream D is a bit stream of length |T| such that D[i] = 1 iff T[i] is a
16#  a character copied directly from Z[j], where Z[j] = 1, and popcnt(T[0..i-1]) = popcnt(Z[0..j-1])
17#
18#  So suppose we have a compressed stream Z with literal characters marked by E
19#
20#  Z    -ab---pqrstu------FGH---JK
21#  E    .11...111111......111...11
22#
23#  The uncompressed stream T and deposit stream D could look like:
24#
25#  T    ab------------pqrstu------------fgh----------jk
26#  D    11............111111............111..........11
27#
28#
29#  But during decoding, it is better to compute the bitstream E1, which
30#  is defined by E1[i] = E[i] | E[i+1].
31#
32#  E1 is the stream marking the byte prior to a literal plus the literal.  The
33#  byte prior to the literal is the token or the last length byte after length
34#  extenders.
35#
36#  Z    -ab---pqrstu------fgh---jk
37#  E1   111..1111111..1..1111..111
38#
39#  Given the E1 stream, the E stream can easily be computed as E = E1 & Advance(E1).
40#
41#  The advantage of the E1 stream is that we mark the position of every literal,
42#  even if it is of length 0.   This allows us to compute the bit stream marking
43#  all the offsets:
44#  Offset_stream = Advance(E1) &~ E1
45#
46#  In our example, we might have had a length 0 literal between the PQRSTU and the FGH
47#  literals
48#
49#  Z    -ab---pqrstu------fgh---jk
50#  E1   111..1111111..1..1111..111
51#
52#  Similarly, instead of directly storing the D stream, I think it is useful
53#  to store the M0 stream which consists of the output positions for all
54#  the nonfinal bytes of match copies.
55#
56#  T    ab------------pqrstu------------fgh----------jk
57#  M0   ..11111111111.......1111.111111....111111111...
58#
59#  The advantage of the M0 stream is that it lets us compute the length
60#  of match copies even if we have two consecutive copies in the output
61#  (because there was a 0-length literal).  The length is always one more
62#  than the length of the run of one bits plus 1.
63#
64#  We can also easily compute the D stream from the M0 stream
65#
66#  D = ~(M0 | Advance(M0))
67#  (Assuming that we are confined to the compressed stream region).
68
69class ExtractE1M0Kernel(DepositCompressedKernel):
70    def __init__(self, input_buffers):
71        DepositCompressedKernel.__init__(self, input_buffers, {
72            'e1_marker' : 0,
73            'm0_marker' : 0
74        })
75
76    def process(self):
77        self.init_buffer_cursor(['extender'])
78        file_content = self._input_buffers['file_content']
79        block_data = self._input_buffers['block_data']
80
81        uncompressed_data = []
82        output_pos = 0
83
84        for (isCompressed, block_start_pos, block_end_pos) in block_data:
85
86            if isCompressed:
87                self.advance_cursor_until_pos('extender', block_start_pos)
88                # token_pos = block_start_pos
89
90                while self.get_cursor_value('extender') < block_end_pos:
91                    token = ord(file_content[self.get_cursor_value('extender')])
92
93                    # print(token_pos)
94                    # print(ord(file_content[token_pos]))
95                    literal_length_base = token >> 4
96                    match_length_base = token & 0x0f
97
98                    extended_literal_value = token & 0xf0 == 0xf0
99
100                    token_pos = self.get_cursor_value('extender')
101
102                    self.modify_input_bitstream('extender', 'extender', extended_literal_value)
103                    self.advance_cursor_until_next_zero('extender', 'extender')
104
105                    literal_length_end_pos = self.get_cursor_value('extender')
106
107                    literal_extension_size = literal_length_end_pos - token_pos
108
109                    final_length_byte = ord(file_content[literal_length_end_pos])
110
111                    literal_length = literal_length_base
112
113                    # Will be a Select Instruction
114                    if literal_extension_size > 0:
115                        literal_length += (literal_extension_size - 1) * 255 + final_length_byte
116
117                    # handle e1 bits
118                    # Assume we have enough output buffer
119                    self.mark_output_bitstream_one('e1_marker', literal_length_end_pos, literal_length_end_pos + 1 + literal_length )
120
121                    output_pos += literal_length
122                    offset_pos = literal_length_end_pos + literal_length + 1
123
124                    # This branch will almost always be taken, there will just one misprediction penalty at the end.
125                    if offset_pos < block_end_pos:
126                        match_length_start_pos = offset_pos + 1
127                        self.advance_cursor_until_pos('extender', match_length_start_pos)
128                        extended_match_value = token & 0xf == 0xf
129                        self.modify_input_bitstream('extender', 'extender', extended_match_value)
130
131                        self.advance_cursor_until_next_zero('extender', 'extender')
132                        match_extension_size = self.get_cursor_value('extender') - match_length_start_pos
133
134                        match_length = match_length_base + 4
135
136                        # Will Be a Select Instruction
137                        if match_extension_size > 0:
138                            match_length += (match_extension_size - 1) * 255 + ord(
139                                file_content[offset_pos + 1 + match_extension_size])
140
141                        # Handle M0 Bits
142                        # Assume we have enough output buffer
143                        self.mark_output_bitstream_one('m0_marker', output_pos, output_pos + match_length - 1)
144                        output_pos += match_length
145
146                        self.advance_cursor('extender', 1)
147                    else:
148                        self.advance_cursor_until_pos('extender', offset_pos)
149            else:
150                block_length = block_end_pos - block_start_pos
151                uncompressed_data.append(block_start_pos, block_length, output_pos)
152                output_pos += block_length
153
154        return self._output_buffers['e1_marker'], self._output_buffers['m0_marker'], uncompressed_data, output_pos
Note: See TracBrowser for help on using the repository browser.