source: proto/lz4d/lz4d_e_d.py @ 5534

Last change on this file since 5534 was 5534, checked in by xwa163, 22 months ago

lz4d prototype: convert scanThru(), position() to count forward zero/one, add prototype for basic sequential pablo

File size: 11.5 KB
Line 
1import sys
2from Basis_Bits import Basis_bits
3import sequential_pablo
4
5# Bitwise Operation
6def bit_test(bit, index):
7    return bit >> index & 1
8
9
10def bit_clear(bit, index):
11    return bit & ~(1 << index);
12
13
14def bit_set(bit, index):
15    return bit | (1 << index)
16
17
18def bit_set_value(bit, value, index):
19    return bit & ~(1 << index) | (value << index)
20
21
22# Kernels
23def extract_blocks(file_content):
24    offset = 0
25    block_data = []
26    uncompressed_mask = 0
27
28    offset += 4  #Magicc Number
29
30    flag = ord(file_content[offset])
31
32    has_content_checksum = bit_test(flag, 2)
33    has_content_size = bit_test(flag, 3)
34    has_block_checksum = bit_test(flag, 4)
35
36    frame_descriptor_size = 3
37    if has_content_size:
38        frame_descriptor_size = 11
39    offset += frame_descriptor_size
40
41    # Process Data Block
42    while True:
43        blockSize = 0
44        for i in range(4):
45            blockSize += (ord(file_content[offset + i]) << (i * 8))
46            pass
47
48        if blockSize == 0:
49            # End Mark
50            break
51
52        highestIndex = 4 * 8 - 1
53        isCompressed = not bit_test(blockSize, highestIndex)
54        realBlockSize = bit_clear(blockSize, highestIndex)
55
56        offset = offset + 4
57
58        if realBlockSize == 0: #End Mark
59            break
60
61        block_start = offset
62
63        offset = offset + realBlockSize
64
65        block_end = offset
66
67        block_data.append((isCompressed, block_start, block_end))
68       
69        if not isCompressed: uncompressed_mask |= (1 << block_end) - (1 << block_start)
70
71        if has_block_checksum:
72            offset = offset + 4
73
74    # Ignore Content Check Sum
75    return block_data, uncompressed_mask
76
77#
78#  === The E1 and M0 Bitstreams for LZ4 Decoding ===
79#
80#  Suppose Z is a LZ4-compressed text stream and T is the equivalent uncompressed text.
81#
82#  First define the basic extract and deposit streams E and D.
83#
84#  The bit stream E is a bit stream of length |Z| such that E[i] = 1 iff Z[i] is a
85#  literal character to be copied directly to T.
86#
87#  The bit stream D is a bit stream of length |T| such that D[i] = 1 iff T[i] is a
88#  a character copied directly from Z[j], where Z[j] = 1, and popcnt(T[0..i-1]) = popcnt(Z[0..j-1])
89#
90#  So suppose we have a compressed stream Z with literal characters marked by E
91#
92#  Z    -ab---pqrstu------FGH---JK
93#  E    .11...111111......111...11
94#
95#  The uncompressed stream T and deposit stream D could look like:
96#
97#  T    ab------------pqrstu------------fgh----------jk
98#  D    11............111111............111..........11
99#
100#
101#  But during decoding, it is better to compute the bitstream E1, which
102#  is defined by E1[i] = E[i] | E[i+1].
103#
104#  E1 is the stream marking the byte prior to a literal plus the literal.  The
105#  byte prior to the literal is the token or the last length byte after length
106#  extenders.
107#
108#  Z    -ab---pqrstu------fgh---jk
109#  E1   111..1111111..1..1111..111
110#
111#  Given the E1 stream, the E stream can easily be computed as E = E1 & Advance(E1).
112#
113#  The advantage of the E1 stream is that we mark the position of every literal,
114#  even if it is of length 0.   This allows us to compute the bit stream marking
115#  all the offsets:
116#  Offset_stream = Advance(E1) &~ E1
117#
118#  In our example, we might have had a length 0 literal between the PQRSTU and the FGH
119#  literals
120#
121#  Z    -ab---pqrstu------fgh---jk
122#  E1   111..1111111..1..1111..111
123#
124#  Similarly, instead of directly storing the D stream, I think it is useful
125#  to store the M0 stream which consists of the output positions for all
126#  the nonfinal bytes of match copies. 
127#
128#  T    ab------------pqrstu------------fgh----------jk
129#  M0   ..11111111111.......1111.111111....111111111...
130#
131#  The advantage of the M0 stream is that it lets us compute the length
132#  of match copies even if we have two consecutive copies in the output
133#  (because there was a 0-length literal).  The length is always one more
134#  than the length of the run of one bits plus 1.
135#
136#  We can also easily compute the D stream from the M0 stream
137#
138#  D = ~(M0 | Advance(M0))
139#  (Assuming that we are confined to the compressed stream region).
140
141
142
143def extract_E1_M0(file_content, basis_bits, block_data, extender):
144    # Only count_forward_one/zero base on extender stream
145    E1_marker = 0
146    M0_marker = 0
147    uncompressed_data = []
148    output_pos = 0
149
150    for (isCompressed, block_start_pos, block_end_pos) in block_data:
151
152        if isCompressed:
153            token_pos = block_start_pos
154
155            while token_pos < block_end_pos:
156                token = ord(file_content[token_pos])
157                # print(token_pos)
158                # print(ord(file_content[token_pos]))
159                literal_length_base = token >> 4
160                match_length_base = token & 0x0f
161
162                extended_literal_value = token & 0xf0 == 0xf0
163
164                literal_length_end_pos = sequential_pablo.position_of_next_zero(bit_set_value(extender, extended_literal_value, token_pos), token_pos)
165
166                literal_extension_size = literal_length_end_pos - token_pos
167               
168                final_length_byte = ord(file_content[literal_length_end_pos])
169
170                literal_length = literal_length_base
171               
172                # Will be a Select Instruction
173                if literal_extension_size > 0:
174                    literal_length += (literal_extension_size - 1) * 255 + final_length_byte
175
176                # handle e1 bits
177                # Assume we have enough output buffer
178                new_e1_bits = sequential_pablo.generate_bit_one_mask(literal_length_end_pos, literal_length_end_pos + 1 + literal_length)
179                E1_marker |= new_e1_bits
180               
181                output_pos += literal_length
182
183                offset_pos = literal_length_end_pos + literal_length + 1
184
185                # This branch will almost always be taken, there will just one misprediction penalty at the end.
186                if offset_pos < block_end_pos: 
187                    match_length_start_pos = offset_pos + 1
188                    extended_match_value = token & 0xf == 0xf
189                    match_extension_size = sequential_pablo.count_forward_one(
190                        bit_set_value(extender, extended_match_value, match_length_start_pos), match_length_start_pos)
191
192                    match_length = match_length_base + 4
193
194                    # Will Be a Select Instruction
195                    if match_extension_size > 0:
196                        match_length += (match_extension_size - 1) * 255 + ord(file_content[offset_pos + 1 + match_extension_size])
197
198                    # Handle M0 Bits
199                    # Assume we have enough output buffer
200                    new_M0_bits = sequential_pablo.generate_bit_one_mask(output_pos, output_pos + match_length - 1)
201                    output_pos += match_length
202                   
203                    M0_marker |= new_M0_bits
204                   
205                    token_pos = offset_pos + match_extension_size + 2
206                else:
207                    token_pos = offset_pos
208        else:
209            block_length = block_end_pos - block_start_pos
210            uncompressed_data.append(block_start_pos, block_length, output_pos)
211            output_pos += block_length
212
213    return E1_marker, M0_marker, uncompressed_data, output_pos
214
215
216def deposit_compressed(file_content, block_data, e1_marker, m0_marker, outputStrBuffer):
217    # Maybe we need to calculate d_marker in other pablo kernel
218    d_marker = ~(m0_marker | sequential_pablo.Advance(m0_marker))
219    output_pos = 0
220
221    # Outer loop will iterate throught all of the compressed block
222    # while (compressed_start_marker):
223    for (isCompressed, block_start_pos, block_end_pos) in block_data:
224        if not isCompressed:
225            # There will be much fewer uncompressed block than compressed block
226            output_pos = output_pos + block_end_pos - block_start_pos
227            continue
228
229        output_pos = sequential_pablo.position_of_next_one(d_marker, output_pos) # There should be another cursor for d_marker
230        # Position of first e1 bit
231        last_match_len_marker_pos = sequential_pablo.position_of_next_one(e1_marker, block_start_pos)
232
233        literal_end_pos = sequential_pablo.position_of_next_zero(e1_marker, last_match_len_marker_pos)
234        literal_length = literal_end_pos - last_match_len_marker_pos - 1
235
236        # Output
237        outputStrBuffer[output_pos: output_pos + literal_length] = list(file_content[last_match_len_marker_pos + 1: last_match_len_marker_pos + 1 + literal_length])
238
239        output_pos += literal_length
240        while literal_end_pos < block_end_pos:
241            offset_pos = literal_end_pos
242
243            match_offset = ord(file_content[offset_pos]) + (ord(file_content[offset_pos+ 1]) << 8)
244            match_copy_pos = output_pos - match_offset
245            match_end_mark_pos = sequential_pablo.position_of_next_zero(m0_marker, output_pos)
246            match_length = match_end_mark_pos - output_pos + 1
247
248            # Match Copy
249            for i in range(match_length):
250                outputStrBuffer[output_pos + i] = outputStrBuffer[match_copy_pos + i]
251
252            output_pos += match_length
253
254            #  /* next token */
255            last_match_len_marker_pos = sequential_pablo.position_of_next_one(e1_marker, literal_end_pos)
256
257            literal_end_pos = sequential_pablo.position_of_next_zero(e1_marker, last_match_len_marker_pos)
258
259            literal_length = literal_end_pos - last_match_len_marker_pos - 1
260
261            # Literal Copy
262            outputStrBuffer[output_pos: output_pos + literal_length] = list(
263                file_content[last_match_len_marker_pos + 1: last_match_len_marker_pos + 1 + literal_length])
264
265            output_pos += literal_length
266
267
268def deposit_uncompressed(file_content, uncompressed_block_data, outputStrBuffer):
269    for (uncompressed_start_pos, block_length, output_pos) in uncompressed_block_data:
270        uncompressed_end_pos = uncompressed_start_pos + block_length
271        outputStrBuffer[output_pos: output_pos + block_length] = list(
272            file_content[uncompressed_start_pos: uncompressed_end_pos]
273        )
274
275'''
276Main
277'''
278
279if __name__ == '__main__':
280    if len(sys.argv) < 3:
281        print("""
282Usage:
283python lz4d_e_d.py [input_file_path] [output_file_path]
284        """)
285    else:
286        # Prepare Input
287        inputFile = sys.argv[1]
288        outputFile = sys.argv[2]
289        with open(inputFile, "rb") as f:
290            file_content = f.read()
291        basis_bits = Basis_bits()
292        sequential_pablo.transpose_streams(file_content, basis_bits)
293
294
295        # Extract start and end marker of compressed and uncompressed blocks
296        block_data, uncompressed_mask = extract_blocks(file_content)
297        #print block_data
298
299        # Imitate ParabixCharacterClassKernelBuilder to produce extenders stream
300        extender_bits = basis_bits.bit_0 & basis_bits.bit_1 & basis_bits.bit_2 & basis_bits.bit_3 & basis_bits.bit_4 & basis_bits.bit_5 & basis_bits.bit_6 & basis_bits.bit_7
301
302        # Extract E1  and M0 marker streams as wells as data for uncompressed blocks and total length
303        E1_marker, M0_marker, uncompressed_data, output_length = extract_E1_M0(file_content, basis_bits, block_data, extender_bits)
304
305        # Use Array of char to imitate output string buffer of fixed length
306        outputStrBuffer = ["" for i in range(output_length)]
307
308        # Deposit compressed and uncompressed blocks
309        deposit_compressed(file_content, block_data, E1_marker, M0_marker, outputStrBuffer)
310        deposit_uncompressed(file_content, uncompressed_data, outputStrBuffer)
311
312        outputStr = ''.join(outputStrBuffer)
313        print(outputStr)
314        with open(outputFile, 'w') as f:
315            f.write(outputStr)
316
Note: See TracBrowser for help on using the repository browser.