Changeset 5534
- Timestamp:
- Jun 28, 2017, 3:05:39 PM (20 months ago)
- Location:
- proto/lz4d
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
proto/lz4d/lz4d_e_d.py
r5530 r5534 1 2 1 import sys 3 2 from Basis_Bits import Basis_bits 4 import pablo 5 6 ''' 7 Bitwise Operation 8 ''' 3 import sequential_pablo 4 5 # Bitwise Operation 9 6 def bit_test(bit, index): 10 7 return bit >> index & 1 8 9 11 10 def bit_clear(bit, index): 12 mask = ~0 - (1 << index); 13 return bit & mask 14 15 16 ''' 17 Methods that should be implemented in Pablo 18 ''' 19 def position(bits) : 20 return len(bin(bits)) - 3 21 # can not use log2 since log2 will cause some inprecise results when the data is large 22 # return int(math.log(bits, 2)) 23 def singletonStream(bits): 24 return bits - ((bits - 1) & bits) 25 def singleton(pos): 26 return 1 << pos 27 28 # Advance by variable 29 30 31 ''' 32 Methods that has already been supported 33 ''' 34 def count_forward_zero(bits): 35 if bits == 0 : 36 return -1 37 return position(singletonStream(bits)) 38 39 def extend_match_length(fromIndex, extender_bits): 40 return count_forward_zero(~(extender_bits >> fromIndex)) 41 42 ''' 43 Helper Functions for extract_m0_and_d 44 ''' 45 def min_position(a,b,c,d): 46 for i in [a,b,c,d]: 47 if i > 0: 48 temp = i 49 break 50 for i in [a,b,c,d]: 51 if i > 0 and i < temp: 52 temp = i 53 return temp 54 55 ''' 56 Kernels 57 ''' 58 11 return bit & ~(1 << index); 12 13 14 def bit_set(bit, index): 15 return bit | (1 << index) 16 17 18 def bit_set_value(bit, value, index): 19 return bit & ~(1 << index) | (value << index) 20 21 22 # Kernels 59 23 def extract_blocks(file_content): 60 24 offset = 0 … … 178 142 179 143 def extract_E1_M0(file_content, basis_bits, block_data, extender): 180 # CC_0x0X = (~basis_bits.bit_0) & (~basis_bits.bit_1) & (~basis_bits.bit_2) & (~basis_bits.bit_3) 181 182 CC_0xFX = basis_bits.bit_0 & basis_bits.bit_1 & basis_bits.bit_2 & basis_bits.bit_3 183 CC_0xXF = basis_bits.bit_4 & basis_bits.bit_5 & basis_bits.bit_6 & basis_bits.bit_7 184 144 # Only count_forward_one/zero base on extender stream 185 145 E1_marker = 0 186 146 M0_marker = 0 … … 194 154 195 155 while token_pos < block_end_pos: 196 token_marker = 1 << token_pos197 198 156 token = ord(file_content[token_pos]) 199 # print(token_pos)157 # print(token_pos) 200 158 # print(ord(file_content[token_pos])) 201 159 literal_length_base = token >> 4 202 160 match_length_base = token & 0x0f 203 161 204 extended_literal = token_marker & CC_0xFX 205 extended_match = token_marker & CC_0xXF 206 207 literal_length_end = pablo.ScanThru(token_marker, extended_literal | extender) 208 209 literal_length_end_pos = position(literal_length_end) 162 extended_literal_value = token & 0xf0 == 0xf0 163 164 literal_length_end_pos = sequential_pablo.position_of_next_zero(bit_set_value(extender, extended_literal_value, token_pos), token_pos) 210 165 211 166 literal_extension_size = literal_length_end_pos - token_pos … … 219 174 literal_length += (literal_extension_size - 1) * 255 + final_length_byte 220 175 221 literal_marker_bit = pablo.Advance(literal_length_end)222 literal_marker_end_bit = pablo.AdvancebyPos(literal_marker_bit, literal_length)223 224 176 # handle e1 bits 225 new_e1_bits = literal_marker_end_bit - literal_length_end 177 # Assume we have enough output buffer 178 new_e1_bits = sequential_pablo.generate_bit_one_mask(literal_length_end_pos, literal_length_end_pos + 1 + literal_length) 226 179 E1_marker |= new_e1_bits 227 180 … … 232 185 # This branch will almost always be taken, there will just one misprediction penalty at the end. 233 186 if offset_pos < block_end_pos: 234 c = offset_pos - token_pos + 1 235 236 match_length_start = pablo.AdvancebyPos(token_marker, c) # second bit of offset 237 match_length_extend_start = pablo.AdvancebyPos(extended_match, c) 238 match_length_end = pablo.ScanThru(match_length_start, match_length_extend_start | extender) 239 match_extension_size = position(match_length_end) - position(match_length_start) 187 match_length_start_pos = offset_pos + 1 188 extended_match_value = token & 0xf == 0xf 189 match_extension_size = sequential_pablo.count_forward_one( 190 bit_set_value(extender, extended_match_value, match_length_start_pos), match_length_start_pos) 240 191 241 192 match_length = match_length_base + 4 … … 245 196 match_length += (match_extension_size - 1) * 255 + ord(file_content[offset_pos + 1 + match_extension_size]) 246 197 247 new_M0_bits = ((1 << (match_length - 1)) - 1) << output_pos 198 # Handle M0 Bits 199 # Assume we have enough output buffer 200 new_M0_bits = sequential_pablo.generate_bit_one_mask(output_pos, output_pos + match_length - 1) 248 201 output_pos += match_length 249 202 250 203 M0_marker |= new_M0_bits 251 204 252 #token_marker = pablo.Advance(match_length_end)253 205 token_pos = offset_pos + match_extension_size + 2 254 206 else: … … 263 215 264 216 def deposit_compressed(file_content, block_data, e1_marker, m0_marker, outputStrBuffer): 265 266 d_marker = ~(m0_marker | pablo.Advance(m0_marker))217 # Maybe we need to calculate d_marker in other pablo kernel 218 d_marker = ~(m0_marker | sequential_pablo.Advance(m0_marker)) 267 219 output_pos = 0 268 220 … … 275 227 continue 276 228 277 output_pos_marker = pablo.ScanTo(singleton(output_pos), d_marker) 278 output_pos = position(output_pos_marker) 279 280 compressed_start_bit = singleton(block_start_pos) 281 token_mark = compressed_start_bit 282 283 last_match_len_marker = pablo.ScanTo(token_mark, e1_marker) # first e1 bit 284 last_match_len_marker_pos = position(last_match_len_marker) 285 literal_end_mark = pablo.ScanThru(last_match_len_marker, e1_marker) 286 literal_end_pos = position(literal_end_mark) 229 output_pos = sequential_pablo.position_of_next_one(d_marker, output_pos) # There should be another cursor for d_marker 230 # Position of first e1 bit 231 last_match_len_marker_pos = sequential_pablo.position_of_next_one(e1_marker, block_start_pos) 232 233 literal_end_pos = sequential_pablo.position_of_next_zero(e1_marker, last_match_len_marker_pos) 287 234 literal_length = literal_end_pos - last_match_len_marker_pos - 1 288 235 … … 296 243 match_offset = ord(file_content[offset_pos]) + (ord(file_content[offset_pos+ 1]) << 8) 297 244 match_copy_pos = output_pos - match_offset 298 299 output_mark = singleton(output_pos) 300 match_end_mark = pablo.ScanThru(output_mark, m0_marker) 301 302 match_length = position(match_end_mark) - output_pos + 1 245 match_end_mark_pos = sequential_pablo.position_of_next_zero(m0_marker, output_pos) 246 match_length = match_end_mark_pos - output_pos + 1 303 247 304 248 # Match Copy … … 309 253 310 254 # /* next token */ 311 last_match_len_marker = pablo.ScanTo(literal_end_mark, e1_marker)312 last_match_len_marker_pos = position(last_match_len_marker) 313 literal_end_ mark = pablo.ScanThru(last_match_len_marker, e1_marker)314 literal_end_pos = position(literal_end_mark) 315 literal_length = literal_end_pos - position(last_match_len_marker)- 1255 last_match_len_marker_pos = sequential_pablo.position_of_next_one(e1_marker, literal_end_pos) 256 257 literal_end_pos = sequential_pablo.position_of_next_zero(e1_marker, last_match_len_marker_pos) 258 259 literal_length = literal_end_pos - last_match_len_marker_pos - 1 316 260 317 261 # Literal Copy … … 320 264 321 265 output_pos += literal_length 266 322 267 323 268 def deposit_uncompressed(file_content, uncompressed_block_data, outputStrBuffer): … … 345 290 file_content = f.read() 346 291 basis_bits = Basis_bits() 347 pablo.EOF_mask =pablo.transpose_streams(file_content, basis_bits)292 sequential_pablo.transpose_streams(file_content, basis_bits) 348 293 349 294 … … 366 311 367 312 outputStr = ''.join(outputStrBuffer) 368 #print(outputStr)313 print(outputStr) 369 314 with open(outputFile, 'w') as f: 370 315 f.write(outputStr)
Note: See TracChangeset
for help on using the changeset viewer.