[120] | 1 | /* |
---|
| 2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
---|
| 3 | * contributor license agreements. See the NOTICE file distributed with |
---|
| 4 | * this work for additional information regarding copyright ownership. |
---|
| 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
---|
| 6 | * (the "License"); you may not use this file except in compliance with |
---|
| 7 | * the License. You may obtain a copy of the License at |
---|
| 8 | * |
---|
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 10 | * |
---|
| 11 | * Unless required by applicable law or agreed to in writing, software |
---|
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
| 14 | * See the License for the specific language governing permissions and |
---|
| 15 | * limitations under the License. |
---|
| 16 | * |
---|
| 17 | */ |
---|
| 18 | |
---|
| 19 | /* |
---|
| 20 | * This package is based on the work done by Keiron Liddle, Aftex Software |
---|
| 21 | * <keiron@aftexsw.com> to whom the Ant project is very grateful for his |
---|
| 22 | * great code. |
---|
| 23 | */ |
---|
| 24 | package org.apache.hadoop.io.compress.bzip2; |
---|
| 25 | |
---|
| 26 | import java.io.InputStream; |
---|
| 27 | import java.io.IOException; |
---|
| 28 | |
---|
| 29 | /** |
---|
| 30 | * An input stream that decompresses from the BZip2 format (without the file |
---|
| 31 | * header chars) to be read as any other stream. |
---|
| 32 | * |
---|
| 33 | * <p> |
---|
| 34 | * The decompression requires large amounts of memory. Thus you should call the |
---|
| 35 | * {@link #close() close()} method as soon as possible, to force |
---|
| 36 | * <tt>CBZip2InputStream</tt> to release the allocated memory. See |
---|
| 37 | * {@link CBZip2OutputStream CBZip2OutputStream} for information about memory |
---|
| 38 | * usage. |
---|
| 39 | * </p> |
---|
| 40 | * |
---|
| 41 | * <p> |
---|
| 42 | * <tt>CBZip2InputStream</tt> reads bytes from the compressed source stream via |
---|
| 43 | * the single byte {@link java.io.InputStream#read() read()} method exclusively. |
---|
| 44 | * Thus you should consider to use a buffered source stream. |
---|
| 45 | * </p> |
---|
| 46 | * |
---|
| 47 | * <p> |
---|
| 48 | * Instances of this class are not threadsafe. |
---|
| 49 | * </p> |
---|
| 50 | */ |
---|
| 51 | public class CBZip2InputStream extends InputStream implements BZip2Constants { |
---|
| 52 | |
---|
| 53 | private static void reportCRCError() throws IOException { |
---|
| 54 | |
---|
| 55 | throw new IOException("BZip2 CRC error"); |
---|
| 56 | |
---|
| 57 | } |
---|
| 58 | |
---|
| 59 | private void makeMaps() { |
---|
| 60 | final boolean[] inUse = this.data.inUse; |
---|
| 61 | final byte[] seqToUnseq = this.data.seqToUnseq; |
---|
| 62 | |
---|
| 63 | int nInUseShadow = 0; |
---|
| 64 | |
---|
| 65 | for (int i = 0; i < 256; i++) { |
---|
| 66 | if (inUse[i]) |
---|
| 67 | seqToUnseq[nInUseShadow++] = (byte) i; |
---|
| 68 | } |
---|
| 69 | |
---|
| 70 | this.nInUse = nInUseShadow; |
---|
| 71 | } |
---|
| 72 | |
---|
| 73 | /** |
---|
| 74 | * Index of the last char in the block, so the block size == last + 1. |
---|
| 75 | */ |
---|
| 76 | private int last; |
---|
| 77 | |
---|
| 78 | /** |
---|
| 79 | * Index in zptr[] of original string after sorting. |
---|
| 80 | */ |
---|
| 81 | private int origPtr; |
---|
| 82 | |
---|
| 83 | /** |
---|
| 84 | * always: in the range 0 .. 9. The current block size is 100000 * this |
---|
| 85 | * number. |
---|
| 86 | */ |
---|
| 87 | private int blockSize100k; |
---|
| 88 | |
---|
| 89 | private boolean blockRandomised; |
---|
| 90 | |
---|
| 91 | private int bsBuff; |
---|
| 92 | private int bsLive; |
---|
| 93 | private final CRC crc = new CRC(); |
---|
| 94 | |
---|
| 95 | private int nInUse; |
---|
| 96 | |
---|
| 97 | private InputStream in; |
---|
| 98 | |
---|
| 99 | private int currentChar = -1; |
---|
| 100 | |
---|
| 101 | private static final int EOF = 0; |
---|
| 102 | private static final int START_BLOCK_STATE = 1; |
---|
| 103 | private static final int RAND_PART_A_STATE = 2; |
---|
| 104 | private static final int RAND_PART_B_STATE = 3; |
---|
| 105 | private static final int RAND_PART_C_STATE = 4; |
---|
| 106 | private static final int NO_RAND_PART_A_STATE = 5; |
---|
| 107 | private static final int NO_RAND_PART_B_STATE = 6; |
---|
| 108 | private static final int NO_RAND_PART_C_STATE = 7; |
---|
| 109 | |
---|
| 110 | private int currentState = START_BLOCK_STATE; |
---|
| 111 | |
---|
| 112 | private int storedBlockCRC, storedCombinedCRC; |
---|
| 113 | private int computedBlockCRC, computedCombinedCRC; |
---|
| 114 | |
---|
| 115 | // Variables used by setup* methods exclusively |
---|
| 116 | |
---|
| 117 | private int su_count; |
---|
| 118 | private int su_ch2; |
---|
| 119 | private int su_chPrev; |
---|
| 120 | private int su_i2; |
---|
| 121 | private int su_j2; |
---|
| 122 | private int su_rNToGo; |
---|
| 123 | private int su_rTPos; |
---|
| 124 | private int su_tPos; |
---|
| 125 | private char su_z; |
---|
| 126 | |
---|
| 127 | /** |
---|
| 128 | * All memory intensive stuff. This field is initialized by initBlock(). |
---|
| 129 | */ |
---|
| 130 | private CBZip2InputStream.Data data; |
---|
| 131 | |
---|
| 132 | /** |
---|
| 133 | * Constructs a new CBZip2InputStream which decompresses bytes read from the |
---|
| 134 | * specified stream. |
---|
| 135 | * |
---|
| 136 | * <p> |
---|
| 137 | * Although BZip2 headers are marked with the magic <tt>"Bz"</tt> this |
---|
| 138 | * constructor expects the next byte in the stream to be the first one after |
---|
| 139 | * the magic. Thus callers have to skip the first two bytes. Otherwise this |
---|
| 140 | * constructor will throw an exception. |
---|
| 141 | * </p> |
---|
| 142 | * |
---|
| 143 | * @throws IOException |
---|
| 144 | * if the stream content is malformed or an I/O error occurs. |
---|
| 145 | * @throws NullPointerException |
---|
| 146 | * if <tt>in == null</tt> |
---|
| 147 | */ |
---|
| 148 | public CBZip2InputStream(final InputStream in) throws IOException { |
---|
| 149 | super(); |
---|
| 150 | |
---|
| 151 | this.in = in; |
---|
| 152 | init(); |
---|
| 153 | } |
---|
| 154 | |
---|
| 155 | public int read() throws IOException { |
---|
| 156 | if (this.in != null) { |
---|
| 157 | return read0(); |
---|
| 158 | } else { |
---|
| 159 | throw new IOException("stream closed"); |
---|
| 160 | } |
---|
| 161 | } |
---|
| 162 | |
---|
| 163 | public int read(final byte[] dest, final int offs, final int len) |
---|
| 164 | throws IOException { |
---|
| 165 | if (offs < 0) { |
---|
| 166 | throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); |
---|
| 167 | } |
---|
| 168 | if (len < 0) { |
---|
| 169 | throw new IndexOutOfBoundsException("len(" + len + ") < 0."); |
---|
| 170 | } |
---|
| 171 | if (offs + len > dest.length) { |
---|
| 172 | throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" |
---|
| 173 | + len + ") > dest.length(" + dest.length + ")."); |
---|
| 174 | } |
---|
| 175 | if (this.in == null) { |
---|
| 176 | throw new IOException("stream closed"); |
---|
| 177 | } |
---|
| 178 | |
---|
| 179 | final int hi = offs + len; |
---|
| 180 | int destOffs = offs; |
---|
| 181 | for (int b; (destOffs < hi) && ((b = read0()) >= 0);) { |
---|
| 182 | dest[destOffs++] = (byte) b; |
---|
| 183 | } |
---|
| 184 | |
---|
| 185 | return (destOffs == offs) ? -1 : (destOffs - offs); |
---|
| 186 | } |
---|
| 187 | |
---|
| 188 | private int read0() throws IOException { |
---|
| 189 | final int retChar = this.currentChar; |
---|
| 190 | |
---|
| 191 | switch (this.currentState) { |
---|
| 192 | case EOF: |
---|
| 193 | return -1; |
---|
| 194 | |
---|
| 195 | case START_BLOCK_STATE: |
---|
| 196 | throw new IllegalStateException(); |
---|
| 197 | |
---|
| 198 | case RAND_PART_A_STATE: |
---|
| 199 | throw new IllegalStateException(); |
---|
| 200 | |
---|
| 201 | case RAND_PART_B_STATE: |
---|
| 202 | setupRandPartB(); |
---|
| 203 | break; |
---|
| 204 | |
---|
| 205 | case RAND_PART_C_STATE: |
---|
| 206 | setupRandPartC(); |
---|
| 207 | break; |
---|
| 208 | |
---|
| 209 | case NO_RAND_PART_A_STATE: |
---|
| 210 | throw new IllegalStateException(); |
---|
| 211 | |
---|
| 212 | case NO_RAND_PART_B_STATE: |
---|
| 213 | setupNoRandPartB(); |
---|
| 214 | break; |
---|
| 215 | |
---|
| 216 | case NO_RAND_PART_C_STATE: |
---|
| 217 | setupNoRandPartC(); |
---|
| 218 | break; |
---|
| 219 | |
---|
| 220 | default: |
---|
| 221 | throw new IllegalStateException(); |
---|
| 222 | } |
---|
| 223 | |
---|
| 224 | return retChar; |
---|
| 225 | } |
---|
| 226 | |
---|
| 227 | private void init() throws IOException { |
---|
| 228 | int magic2 = this.in.read(); |
---|
| 229 | if (magic2 != 'h') { |
---|
| 230 | throw new IOException("Stream is not BZip2 formatted: expected 'h'" |
---|
| 231 | + " as first byte but got '" + (char) magic2 + "'"); |
---|
| 232 | } |
---|
| 233 | |
---|
| 234 | int blockSize = this.in.read(); |
---|
| 235 | if ((blockSize < '1') || (blockSize > '9')) { |
---|
| 236 | throw new IOException("Stream is not BZip2 formatted: illegal " |
---|
| 237 | + "blocksize " + (char) blockSize); |
---|
| 238 | } |
---|
| 239 | |
---|
| 240 | this.blockSize100k = blockSize - '0'; |
---|
| 241 | |
---|
| 242 | initBlock(); |
---|
| 243 | setupBlock(); |
---|
| 244 | } |
---|
| 245 | |
---|
| 246 | private void initBlock() throws IOException { |
---|
| 247 | char magic0 = bsGetUByte(); |
---|
| 248 | char magic1 = bsGetUByte(); |
---|
| 249 | char magic2 = bsGetUByte(); |
---|
| 250 | char magic3 = bsGetUByte(); |
---|
| 251 | char magic4 = bsGetUByte(); |
---|
| 252 | char magic5 = bsGetUByte(); |
---|
| 253 | |
---|
| 254 | if (magic0 == 0x17 && magic1 == 0x72 && magic2 == 0x45 |
---|
| 255 | && magic3 == 0x38 && magic4 == 0x50 && magic5 == 0x90) { |
---|
| 256 | complete(); // end of file |
---|
| 257 | } else if (magic0 != 0x31 || // '1' |
---|
| 258 | magic1 != 0x41 || // ')' |
---|
| 259 | magic2 != 0x59 || // 'Y' |
---|
| 260 | magic3 != 0x26 || // '&' |
---|
| 261 | magic4 != 0x53 || // 'S' |
---|
| 262 | magic5 != 0x59 // 'Y' |
---|
| 263 | ) { |
---|
| 264 | this.currentState = EOF; |
---|
| 265 | throw new IOException("bad block header"); |
---|
| 266 | } else { |
---|
| 267 | this.storedBlockCRC = bsGetInt(); |
---|
| 268 | this.blockRandomised = bsR(1) == 1; |
---|
| 269 | |
---|
| 270 | /** |
---|
| 271 | * Allocate data here instead in constructor, so we do not allocate |
---|
| 272 | * it if the input file is empty. |
---|
| 273 | */ |
---|
| 274 | if (this.data == null) { |
---|
| 275 | this.data = new Data(this.blockSize100k); |
---|
| 276 | } |
---|
| 277 | |
---|
| 278 | // currBlockNo++; |
---|
| 279 | getAndMoveToFrontDecode(); |
---|
| 280 | |
---|
| 281 | this.crc.initialiseCRC(); |
---|
| 282 | this.currentState = START_BLOCK_STATE; |
---|
| 283 | } |
---|
| 284 | } |
---|
| 285 | |
---|
| 286 | private void endBlock() throws IOException { |
---|
| 287 | this.computedBlockCRC = this.crc.getFinalCRC(); |
---|
| 288 | |
---|
| 289 | // A bad CRC is considered a fatal error. |
---|
| 290 | if (this.storedBlockCRC != this.computedBlockCRC) { |
---|
| 291 | // make next blocks readable without error |
---|
| 292 | // (repair feature, not yet documented, not tested) |
---|
| 293 | this.computedCombinedCRC = (this.storedCombinedCRC << 1) |
---|
| 294 | | (this.storedCombinedCRC >>> 31); |
---|
| 295 | this.computedCombinedCRC ^= this.storedBlockCRC; |
---|
| 296 | |
---|
| 297 | reportCRCError(); |
---|
| 298 | } |
---|
| 299 | |
---|
| 300 | this.computedCombinedCRC = (this.computedCombinedCRC << 1) |
---|
| 301 | | (this.computedCombinedCRC >>> 31); |
---|
| 302 | this.computedCombinedCRC ^= this.computedBlockCRC; |
---|
| 303 | } |
---|
| 304 | |
---|
| 305 | private void complete() throws IOException { |
---|
| 306 | this.storedCombinedCRC = bsGetInt(); |
---|
| 307 | this.currentState = EOF; |
---|
| 308 | this.data = null; |
---|
| 309 | |
---|
| 310 | if (this.storedCombinedCRC != this.computedCombinedCRC) { |
---|
| 311 | reportCRCError(); |
---|
| 312 | } |
---|
| 313 | } |
---|
| 314 | |
---|
| 315 | public void close() throws IOException { |
---|
| 316 | InputStream inShadow = this.in; |
---|
| 317 | if (inShadow != null) { |
---|
| 318 | try { |
---|
| 319 | if (inShadow != System.in) { |
---|
| 320 | inShadow.close(); |
---|
| 321 | } |
---|
| 322 | } finally { |
---|
| 323 | this.data = null; |
---|
| 324 | this.in = null; |
---|
| 325 | } |
---|
| 326 | } |
---|
| 327 | } |
---|
| 328 | |
---|
| 329 | private int bsR(final int n) throws IOException { |
---|
| 330 | int bsLiveShadow = this.bsLive; |
---|
| 331 | int bsBuffShadow = this.bsBuff; |
---|
| 332 | |
---|
| 333 | if (bsLiveShadow < n) { |
---|
| 334 | final InputStream inShadow = this.in; |
---|
| 335 | do { |
---|
| 336 | int thech = inShadow.read(); |
---|
| 337 | |
---|
| 338 | if (thech < 0) { |
---|
| 339 | throw new IOException("unexpected end of stream"); |
---|
| 340 | } |
---|
| 341 | |
---|
| 342 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
| 343 | bsLiveShadow += 8; |
---|
| 344 | } while (bsLiveShadow < n); |
---|
| 345 | |
---|
| 346 | this.bsBuff = bsBuffShadow; |
---|
| 347 | } |
---|
| 348 | |
---|
| 349 | this.bsLive = bsLiveShadow - n; |
---|
| 350 | return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1); |
---|
| 351 | } |
---|
| 352 | |
---|
| 353 | private boolean bsGetBit() throws IOException { |
---|
| 354 | int bsLiveShadow = this.bsLive; |
---|
| 355 | int bsBuffShadow = this.bsBuff; |
---|
| 356 | |
---|
| 357 | if (bsLiveShadow < 1) { |
---|
| 358 | int thech = this.in.read(); |
---|
| 359 | |
---|
| 360 | if (thech < 0) { |
---|
| 361 | throw new IOException("unexpected end of stream"); |
---|
| 362 | } |
---|
| 363 | |
---|
| 364 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
| 365 | bsLiveShadow += 8; |
---|
| 366 | this.bsBuff = bsBuffShadow; |
---|
| 367 | } |
---|
| 368 | |
---|
| 369 | this.bsLive = bsLiveShadow - 1; |
---|
| 370 | return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0; |
---|
| 371 | } |
---|
| 372 | |
---|
| 373 | private char bsGetUByte() throws IOException { |
---|
| 374 | return (char) bsR(8); |
---|
| 375 | } |
---|
| 376 | |
---|
| 377 | private int bsGetInt() throws IOException { |
---|
| 378 | return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8); |
---|
| 379 | } |
---|
| 380 | |
---|
| 381 | /** |
---|
| 382 | * Called by createHuffmanDecodingTables() exclusively. |
---|
| 383 | */ |
---|
| 384 | private static void hbCreateDecodeTables(final int[] limit, |
---|
| 385 | final int[] base, final int[] perm, final char[] length, |
---|
| 386 | final int minLen, final int maxLen, final int alphaSize) { |
---|
| 387 | for (int i = minLen, pp = 0; i <= maxLen; i++) { |
---|
| 388 | for (int j = 0; j < alphaSize; j++) { |
---|
| 389 | if (length[j] == i) { |
---|
| 390 | perm[pp++] = j; |
---|
| 391 | } |
---|
| 392 | } |
---|
| 393 | } |
---|
| 394 | |
---|
| 395 | for (int i = MAX_CODE_LEN; --i > 0;) { |
---|
| 396 | base[i] = 0; |
---|
| 397 | limit[i] = 0; |
---|
| 398 | } |
---|
| 399 | |
---|
| 400 | for (int i = 0; i < alphaSize; i++) { |
---|
| 401 | base[length[i] + 1]++; |
---|
| 402 | } |
---|
| 403 | |
---|
| 404 | for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { |
---|
| 405 | b += base[i]; |
---|
| 406 | base[i] = b; |
---|
| 407 | } |
---|
| 408 | |
---|
| 409 | for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { |
---|
| 410 | final int nb = base[i + 1]; |
---|
| 411 | vec += nb - b; |
---|
| 412 | b = nb; |
---|
| 413 | limit[i] = vec - 1; |
---|
| 414 | vec <<= 1; |
---|
| 415 | } |
---|
| 416 | |
---|
| 417 | for (int i = minLen + 1; i <= maxLen; i++) { |
---|
| 418 | base[i] = ((limit[i - 1] + 1) << 1) - base[i]; |
---|
| 419 | } |
---|
| 420 | } |
---|
| 421 | |
---|
| 422 | private void recvDecodingTables() throws IOException { |
---|
| 423 | final Data dataShadow = this.data; |
---|
| 424 | final boolean[] inUse = dataShadow.inUse; |
---|
| 425 | final byte[] pos = dataShadow.recvDecodingTables_pos; |
---|
| 426 | final byte[] selector = dataShadow.selector; |
---|
| 427 | final byte[] selectorMtf = dataShadow.selectorMtf; |
---|
| 428 | |
---|
| 429 | int inUse16 = 0; |
---|
| 430 | |
---|
| 431 | /* Receive the mapping table */ |
---|
| 432 | for (int i = 0; i < 16; i++) { |
---|
| 433 | if (bsGetBit()) { |
---|
| 434 | inUse16 |= 1 << i; |
---|
| 435 | } |
---|
| 436 | } |
---|
| 437 | |
---|
| 438 | for (int i = 256; --i >= 0;) { |
---|
| 439 | inUse[i] = false; |
---|
| 440 | } |
---|
| 441 | |
---|
| 442 | for (int i = 0; i < 16; i++) { |
---|
| 443 | if ((inUse16 & (1 << i)) != 0) { |
---|
| 444 | final int i16 = i << 4; |
---|
| 445 | for (int j = 0; j < 16; j++) { |
---|
| 446 | if (bsGetBit()) { |
---|
| 447 | inUse[i16 + j] = true; |
---|
| 448 | } |
---|
| 449 | } |
---|
| 450 | } |
---|
| 451 | } |
---|
| 452 | |
---|
| 453 | makeMaps(); |
---|
| 454 | final int alphaSize = this.nInUse + 2; |
---|
| 455 | |
---|
| 456 | /* Now the selectors */ |
---|
| 457 | final int nGroups = bsR(3); |
---|
| 458 | final int nSelectors = bsR(15); |
---|
| 459 | |
---|
| 460 | for (int i = 0; i < nSelectors; i++) { |
---|
| 461 | int j = 0; |
---|
| 462 | while (bsGetBit()) { |
---|
| 463 | j++; |
---|
| 464 | } |
---|
| 465 | selectorMtf[i] = (byte) j; |
---|
| 466 | } |
---|
| 467 | |
---|
| 468 | /* Undo the MTF values for the selectors. */ |
---|
| 469 | for (int v = nGroups; --v >= 0;) { |
---|
| 470 | pos[v] = (byte) v; |
---|
| 471 | } |
---|
| 472 | |
---|
| 473 | for (int i = 0; i < nSelectors; i++) { |
---|
| 474 | int v = selectorMtf[i] & 0xff; |
---|
| 475 | final byte tmp = pos[v]; |
---|
| 476 | while (v > 0) { |
---|
| 477 | // nearly all times v is zero, 4 in most other cases |
---|
| 478 | pos[v] = pos[v - 1]; |
---|
| 479 | v--; |
---|
| 480 | } |
---|
| 481 | pos[0] = tmp; |
---|
| 482 | selector[i] = tmp; |
---|
| 483 | } |
---|
| 484 | |
---|
| 485 | final char[][] len = dataShadow.temp_charArray2d; |
---|
| 486 | |
---|
| 487 | /* Now the coding tables */ |
---|
| 488 | for (int t = 0; t < nGroups; t++) { |
---|
| 489 | int curr = bsR(5); |
---|
| 490 | final char[] len_t = len[t]; |
---|
| 491 | for (int i = 0; i < alphaSize; i++) { |
---|
| 492 | while (bsGetBit()) { |
---|
| 493 | curr += bsGetBit() ? -1 : 1; |
---|
| 494 | } |
---|
| 495 | len_t[i] = (char) curr; |
---|
| 496 | } |
---|
| 497 | } |
---|
| 498 | |
---|
| 499 | // finally create the Huffman tables |
---|
| 500 | createHuffmanDecodingTables(alphaSize, nGroups); |
---|
| 501 | } |
---|
| 502 | |
---|
| 503 | /** |
---|
| 504 | * Called by recvDecodingTables() exclusively. |
---|
| 505 | */ |
---|
| 506 | private void createHuffmanDecodingTables(final int alphaSize, |
---|
| 507 | final int nGroups) { |
---|
| 508 | final Data dataShadow = this.data; |
---|
| 509 | final char[][] len = dataShadow.temp_charArray2d; |
---|
| 510 | final int[] minLens = dataShadow.minLens; |
---|
| 511 | final int[][] limit = dataShadow.limit; |
---|
| 512 | final int[][] base = dataShadow.base; |
---|
| 513 | final int[][] perm = dataShadow.perm; |
---|
| 514 | |
---|
| 515 | for (int t = 0; t < nGroups; t++) { |
---|
| 516 | int minLen = 32; |
---|
| 517 | int maxLen = 0; |
---|
| 518 | final char[] len_t = len[t]; |
---|
| 519 | for (int i = alphaSize; --i >= 0;) { |
---|
| 520 | final char lent = len_t[i]; |
---|
| 521 | if (lent > maxLen) { |
---|
| 522 | maxLen = lent; |
---|
| 523 | } |
---|
| 524 | if (lent < minLen) { |
---|
| 525 | minLen = lent; |
---|
| 526 | } |
---|
| 527 | } |
---|
| 528 | hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, |
---|
| 529 | maxLen, alphaSize); |
---|
| 530 | minLens[t] = minLen; |
---|
| 531 | } |
---|
| 532 | } |
---|
| 533 | |
---|
| 534 | private void getAndMoveToFrontDecode() throws IOException { |
---|
| 535 | this.origPtr = bsR(24); |
---|
| 536 | recvDecodingTables(); |
---|
| 537 | |
---|
| 538 | final InputStream inShadow = this.in; |
---|
| 539 | final Data dataShadow = this.data; |
---|
| 540 | final byte[] ll8 = dataShadow.ll8; |
---|
| 541 | final int[] unzftab = dataShadow.unzftab; |
---|
| 542 | final byte[] selector = dataShadow.selector; |
---|
| 543 | final byte[] seqToUnseq = dataShadow.seqToUnseq; |
---|
| 544 | final char[] yy = dataShadow.getAndMoveToFrontDecode_yy; |
---|
| 545 | final int[] minLens = dataShadow.minLens; |
---|
| 546 | final int[][] limit = dataShadow.limit; |
---|
| 547 | final int[][] base = dataShadow.base; |
---|
| 548 | final int[][] perm = dataShadow.perm; |
---|
| 549 | final int limitLast = this.blockSize100k * 100000; |
---|
| 550 | |
---|
| 551 | /* |
---|
| 552 | * Setting up the unzftab entries here is not strictly necessary, but it |
---|
| 553 | * does save having to do it later in a separate pass, and so saves a |
---|
| 554 | * block's worth of cache misses. |
---|
| 555 | */ |
---|
| 556 | for (int i = 256; --i >= 0;) { |
---|
| 557 | yy[i] = (char) i; |
---|
| 558 | unzftab[i] = 0; |
---|
| 559 | } |
---|
| 560 | |
---|
| 561 | int groupNo = 0; |
---|
| 562 | int groupPos = G_SIZE - 1; |
---|
| 563 | final int eob = this.nInUse + 1; |
---|
| 564 | int nextSym = getAndMoveToFrontDecode0(0); |
---|
| 565 | int bsBuffShadow = this.bsBuff; |
---|
| 566 | int bsLiveShadow = this.bsLive; |
---|
| 567 | int lastShadow = -1; |
---|
| 568 | int zt = selector[groupNo] & 0xff; |
---|
| 569 | int[] base_zt = base[zt]; |
---|
| 570 | int[] limit_zt = limit[zt]; |
---|
| 571 | int[] perm_zt = perm[zt]; |
---|
| 572 | int minLens_zt = minLens[zt]; |
---|
| 573 | |
---|
| 574 | while (nextSym != eob) { |
---|
| 575 | if ((nextSym == RUNA) || (nextSym == RUNB)) { |
---|
| 576 | int s = -1; |
---|
| 577 | |
---|
| 578 | for (int n = 1; true; n <<= 1) { |
---|
| 579 | if (nextSym == RUNA) { |
---|
| 580 | s += n; |
---|
| 581 | } else if (nextSym == RUNB) { |
---|
| 582 | s += n << 1; |
---|
| 583 | } else { |
---|
| 584 | break; |
---|
| 585 | } |
---|
| 586 | |
---|
| 587 | if (groupPos == 0) { |
---|
| 588 | groupPos = G_SIZE - 1; |
---|
| 589 | zt = selector[++groupNo] & 0xff; |
---|
| 590 | base_zt = base[zt]; |
---|
| 591 | limit_zt = limit[zt]; |
---|
| 592 | perm_zt = perm[zt]; |
---|
| 593 | minLens_zt = minLens[zt]; |
---|
| 594 | } else { |
---|
| 595 | groupPos--; |
---|
| 596 | } |
---|
| 597 | |
---|
| 598 | int zn = minLens_zt; |
---|
| 599 | |
---|
| 600 | // Inlined: |
---|
| 601 | // int zvec = bsR(zn); |
---|
| 602 | while (bsLiveShadow < zn) { |
---|
| 603 | final int thech = inShadow.read(); |
---|
| 604 | if (thech >= 0) { |
---|
| 605 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
| 606 | bsLiveShadow += 8; |
---|
| 607 | continue; |
---|
| 608 | } else { |
---|
| 609 | throw new IOException("unexpected end of stream"); |
---|
| 610 | } |
---|
| 611 | } |
---|
| 612 | int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) |
---|
| 613 | & ((1 << zn) - 1); |
---|
| 614 | bsLiveShadow -= zn; |
---|
| 615 | |
---|
| 616 | while (zvec > limit_zt[zn]) { |
---|
| 617 | zn++; |
---|
| 618 | while (bsLiveShadow < 1) { |
---|
| 619 | final int thech = inShadow.read(); |
---|
| 620 | if (thech >= 0) { |
---|
| 621 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
| 622 | bsLiveShadow += 8; |
---|
| 623 | continue; |
---|
| 624 | } else { |
---|
| 625 | throw new IOException( |
---|
| 626 | "unexpected end of stream"); |
---|
| 627 | } |
---|
| 628 | } |
---|
| 629 | bsLiveShadow--; |
---|
| 630 | zvec = (zvec << 1) |
---|
| 631 | | ((bsBuffShadow >> bsLiveShadow) & 1); |
---|
| 632 | } |
---|
| 633 | nextSym = perm_zt[zvec - base_zt[zn]]; |
---|
| 634 | } |
---|
| 635 | |
---|
| 636 | final byte ch = seqToUnseq[yy[0]]; |
---|
| 637 | unzftab[ch & 0xff] += s + 1; |
---|
| 638 | |
---|
| 639 | while (s-- >= 0) { |
---|
| 640 | ll8[++lastShadow] = ch; |
---|
| 641 | } |
---|
| 642 | |
---|
| 643 | if (lastShadow >= limitLast) { |
---|
| 644 | throw new IOException("block overrun"); |
---|
| 645 | } |
---|
| 646 | } else { |
---|
| 647 | if (++lastShadow >= limitLast) { |
---|
| 648 | throw new IOException("block overrun"); |
---|
| 649 | } |
---|
| 650 | |
---|
| 651 | final char tmp = yy[nextSym - 1]; |
---|
| 652 | unzftab[seqToUnseq[tmp] & 0xff]++; |
---|
| 653 | ll8[lastShadow] = seqToUnseq[tmp]; |
---|
| 654 | |
---|
| 655 | /* |
---|
| 656 | * This loop is hammered during decompression, hence avoid |
---|
| 657 | * native method call overhead of System.arraycopy for very |
---|
| 658 | * small ranges to copy. |
---|
| 659 | */ |
---|
| 660 | if (nextSym <= 16) { |
---|
| 661 | for (int j = nextSym - 1; j > 0;) { |
---|
| 662 | yy[j] = yy[--j]; |
---|
| 663 | } |
---|
| 664 | } else { |
---|
| 665 | System.arraycopy(yy, 0, yy, 1, nextSym - 1); |
---|
| 666 | } |
---|
| 667 | |
---|
| 668 | yy[0] = tmp; |
---|
| 669 | |
---|
| 670 | if (groupPos == 0) { |
---|
| 671 | groupPos = G_SIZE - 1; |
---|
| 672 | zt = selector[++groupNo] & 0xff; |
---|
| 673 | base_zt = base[zt]; |
---|
| 674 | limit_zt = limit[zt]; |
---|
| 675 | perm_zt = perm[zt]; |
---|
| 676 | minLens_zt = minLens[zt]; |
---|
| 677 | } else { |
---|
| 678 | groupPos--; |
---|
| 679 | } |
---|
| 680 | |
---|
| 681 | int zn = minLens_zt; |
---|
| 682 | |
---|
| 683 | // Inlined: |
---|
| 684 | // int zvec = bsR(zn); |
---|
| 685 | while (bsLiveShadow < zn) { |
---|
| 686 | final int thech = inShadow.read(); |
---|
| 687 | if (thech >= 0) { |
---|
| 688 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
| 689 | bsLiveShadow += 8; |
---|
| 690 | continue; |
---|
| 691 | } else { |
---|
| 692 | throw new IOException("unexpected end of stream"); |
---|
| 693 | } |
---|
| 694 | } |
---|
| 695 | int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) |
---|
| 696 | & ((1 << zn) - 1); |
---|
| 697 | bsLiveShadow -= zn; |
---|
| 698 | |
---|
| 699 | while (zvec > limit_zt[zn]) { |
---|
| 700 | zn++; |
---|
| 701 | while (bsLiveShadow < 1) { |
---|
| 702 | final int thech = inShadow.read(); |
---|
| 703 | if (thech >= 0) { |
---|
| 704 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
| 705 | bsLiveShadow += 8; |
---|
| 706 | continue; |
---|
| 707 | } else { |
---|
| 708 | throw new IOException("unexpected end of stream"); |
---|
| 709 | } |
---|
| 710 | } |
---|
| 711 | bsLiveShadow--; |
---|
| 712 | zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); |
---|
| 713 | } |
---|
| 714 | nextSym = perm_zt[zvec - base_zt[zn]]; |
---|
| 715 | } |
---|
| 716 | } |
---|
| 717 | |
---|
| 718 | this.last = lastShadow; |
---|
| 719 | this.bsLive = bsLiveShadow; |
---|
| 720 | this.bsBuff = bsBuffShadow; |
---|
| 721 | } |
---|
| 722 | |
---|
| 723 | private int getAndMoveToFrontDecode0(final int groupNo) throws IOException { |
---|
| 724 | final InputStream inShadow = this.in; |
---|
| 725 | final Data dataShadow = this.data; |
---|
| 726 | final int zt = dataShadow.selector[groupNo] & 0xff; |
---|
| 727 | final int[] limit_zt = dataShadow.limit[zt]; |
---|
| 728 | int zn = dataShadow.minLens[zt]; |
---|
| 729 | int zvec = bsR(zn); |
---|
| 730 | int bsLiveShadow = this.bsLive; |
---|
| 731 | int bsBuffShadow = this.bsBuff; |
---|
| 732 | |
---|
| 733 | while (zvec > limit_zt[zn]) { |
---|
| 734 | zn++; |
---|
| 735 | while (bsLiveShadow < 1) { |
---|
| 736 | final int thech = inShadow.read(); |
---|
| 737 | |
---|
| 738 | if (thech >= 0) { |
---|
| 739 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
| 740 | bsLiveShadow += 8; |
---|
| 741 | continue; |
---|
| 742 | } else { |
---|
| 743 | throw new IOException("unexpected end of stream"); |
---|
| 744 | } |
---|
| 745 | } |
---|
| 746 | bsLiveShadow--; |
---|
| 747 | zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); |
---|
| 748 | } |
---|
| 749 | |
---|
| 750 | this.bsLive = bsLiveShadow; |
---|
| 751 | this.bsBuff = bsBuffShadow; |
---|
| 752 | |
---|
| 753 | return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]]; |
---|
| 754 | } |
---|
| 755 | |
---|
| 756 | private void setupBlock() throws IOException { |
---|
| 757 | if (this.data == null) { |
---|
| 758 | return; |
---|
| 759 | } |
---|
| 760 | |
---|
| 761 | final int[] cftab = this.data.cftab; |
---|
| 762 | final int[] tt = this.data.initTT(this.last + 1); |
---|
| 763 | final byte[] ll8 = this.data.ll8; |
---|
| 764 | cftab[0] = 0; |
---|
| 765 | System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); |
---|
| 766 | |
---|
| 767 | for (int i = 1, c = cftab[0]; i <= 256; i++) { |
---|
| 768 | c += cftab[i]; |
---|
| 769 | cftab[i] = c; |
---|
| 770 | } |
---|
| 771 | |
---|
| 772 | for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { |
---|
| 773 | tt[cftab[ll8[i] & 0xff]++] = i; |
---|
| 774 | } |
---|
| 775 | |
---|
| 776 | if ((this.origPtr < 0) || (this.origPtr >= tt.length)) { |
---|
| 777 | throw new IOException("stream corrupted"); |
---|
| 778 | } |
---|
| 779 | |
---|
| 780 | this.su_tPos = tt[this.origPtr]; |
---|
| 781 | this.su_count = 0; |
---|
| 782 | this.su_i2 = 0; |
---|
| 783 | this.su_ch2 = 256; /* not a char and not EOF */ |
---|
| 784 | |
---|
| 785 | if (this.blockRandomised) { |
---|
| 786 | this.su_rNToGo = 0; |
---|
| 787 | this.su_rTPos = 0; |
---|
| 788 | setupRandPartA(); |
---|
| 789 | } else { |
---|
| 790 | setupNoRandPartA(); |
---|
| 791 | } |
---|
| 792 | } |
---|
| 793 | |
---|
| 794 | private void setupRandPartA() throws IOException { |
---|
| 795 | if (this.su_i2 <= this.last) { |
---|
| 796 | this.su_chPrev = this.su_ch2; |
---|
| 797 | int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; |
---|
| 798 | this.su_tPos = this.data.tt[this.su_tPos]; |
---|
| 799 | if (this.su_rNToGo == 0) { |
---|
| 800 | this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1; |
---|
| 801 | if (++this.su_rTPos == 512) { |
---|
| 802 | this.su_rTPos = 0; |
---|
| 803 | } |
---|
| 804 | } else { |
---|
| 805 | this.su_rNToGo--; |
---|
| 806 | } |
---|
| 807 | this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0; |
---|
| 808 | this.su_i2++; |
---|
| 809 | this.currentChar = su_ch2Shadow; |
---|
| 810 | this.currentState = RAND_PART_B_STATE; |
---|
| 811 | this.crc.updateCRC(su_ch2Shadow); |
---|
| 812 | } else { |
---|
| 813 | endBlock(); |
---|
| 814 | initBlock(); |
---|
| 815 | setupBlock(); |
---|
| 816 | } |
---|
| 817 | } |
---|
| 818 | |
---|
| 819 | private void setupNoRandPartA() throws IOException { |
---|
| 820 | if (this.su_i2 <= this.last) { |
---|
| 821 | this.su_chPrev = this.su_ch2; |
---|
| 822 | int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; |
---|
| 823 | this.su_ch2 = su_ch2Shadow; |
---|
| 824 | this.su_tPos = this.data.tt[this.su_tPos]; |
---|
| 825 | this.su_i2++; |
---|
| 826 | this.currentChar = su_ch2Shadow; |
---|
| 827 | this.currentState = NO_RAND_PART_B_STATE; |
---|
| 828 | this.crc.updateCRC(su_ch2Shadow); |
---|
| 829 | } else { |
---|
| 830 | this.currentState = NO_RAND_PART_A_STATE; |
---|
| 831 | endBlock(); |
---|
| 832 | initBlock(); |
---|
| 833 | setupBlock(); |
---|
| 834 | } |
---|
| 835 | } |
---|
| 836 | |
---|
| 837 | private void setupRandPartB() throws IOException { |
---|
| 838 | if (this.su_ch2 != this.su_chPrev) { |
---|
| 839 | this.currentState = RAND_PART_A_STATE; |
---|
| 840 | this.su_count = 1; |
---|
| 841 | setupRandPartA(); |
---|
| 842 | } else if (++this.su_count >= 4) { |
---|
| 843 | this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); |
---|
| 844 | this.su_tPos = this.data.tt[this.su_tPos]; |
---|
| 845 | if (this.su_rNToGo == 0) { |
---|
| 846 | this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1; |
---|
| 847 | if (++this.su_rTPos == 512) { |
---|
| 848 | this.su_rTPos = 0; |
---|
| 849 | } |
---|
| 850 | } else { |
---|
| 851 | this.su_rNToGo--; |
---|
| 852 | } |
---|
| 853 | this.su_j2 = 0; |
---|
| 854 | this.currentState = RAND_PART_C_STATE; |
---|
| 855 | if (this.su_rNToGo == 1) { |
---|
| 856 | this.su_z ^= 1; |
---|
| 857 | } |
---|
| 858 | setupRandPartC(); |
---|
| 859 | } else { |
---|
| 860 | this.currentState = RAND_PART_A_STATE; |
---|
| 861 | setupRandPartA(); |
---|
| 862 | } |
---|
| 863 | } |
---|
| 864 | |
---|
| 865 | private void setupRandPartC() throws IOException { |
---|
| 866 | if (this.su_j2 < this.su_z) { |
---|
| 867 | this.currentChar = this.su_ch2; |
---|
| 868 | this.crc.updateCRC(this.su_ch2); |
---|
| 869 | this.su_j2++; |
---|
| 870 | } else { |
---|
| 871 | this.currentState = RAND_PART_A_STATE; |
---|
| 872 | this.su_i2++; |
---|
| 873 | this.su_count = 0; |
---|
| 874 | setupRandPartA(); |
---|
| 875 | } |
---|
| 876 | } |
---|
| 877 | |
---|
| 878 | private void setupNoRandPartB() throws IOException { |
---|
| 879 | if (this.su_ch2 != this.su_chPrev) { |
---|
| 880 | this.su_count = 1; |
---|
| 881 | setupNoRandPartA(); |
---|
| 882 | } else if (++this.su_count >= 4) { |
---|
| 883 | this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); |
---|
| 884 | this.su_tPos = this.data.tt[this.su_tPos]; |
---|
| 885 | this.su_j2 = 0; |
---|
| 886 | setupNoRandPartC(); |
---|
| 887 | } else { |
---|
| 888 | setupNoRandPartA(); |
---|
| 889 | } |
---|
| 890 | } |
---|
| 891 | |
---|
| 892 | private void setupNoRandPartC() throws IOException { |
---|
| 893 | if (this.su_j2 < this.su_z) { |
---|
| 894 | int su_ch2Shadow = this.su_ch2; |
---|
| 895 | this.currentChar = su_ch2Shadow; |
---|
| 896 | this.crc.updateCRC(su_ch2Shadow); |
---|
| 897 | this.su_j2++; |
---|
| 898 | this.currentState = NO_RAND_PART_C_STATE; |
---|
| 899 | } else { |
---|
| 900 | this.su_i2++; |
---|
| 901 | this.su_count = 0; |
---|
| 902 | setupNoRandPartA(); |
---|
| 903 | } |
---|
| 904 | } |
---|
| 905 | |
---|
| 906 | private static final class Data extends Object { |
---|
| 907 | |
---|
| 908 | // (with blockSize 900k) |
---|
| 909 | final boolean[] inUse = new boolean[256]; // 256 byte |
---|
| 910 | |
---|
| 911 | final byte[] seqToUnseq = new byte[256]; // 256 byte |
---|
| 912 | final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte |
---|
| 913 | final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte |
---|
| 914 | |
---|
| 915 | /** |
---|
| 916 | * Freq table collected to save a pass over the data during |
---|
| 917 | * decompression. |
---|
| 918 | */ |
---|
| 919 | final int[] unzftab = new int[256]; // 1024 byte |
---|
| 920 | |
---|
| 921 | final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte |
---|
| 922 | final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte |
---|
| 923 | final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte |
---|
| 924 | final int[] minLens = new int[N_GROUPS]; // 24 byte |
---|
| 925 | |
---|
| 926 | final int[] cftab = new int[257]; // 1028 byte |
---|
| 927 | final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte |
---|
| 928 | final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 |
---|
| 929 | // byte |
---|
| 930 | final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte |
---|
| 931 | // --------------- |
---|
| 932 | // 60798 byte |
---|
| 933 | |
---|
| 934 | int[] tt; // 3600000 byte |
---|
| 935 | byte[] ll8; // 900000 byte |
---|
| 936 | |
---|
| 937 | // --------------- |
---|
| 938 | // 4560782 byte |
---|
| 939 | // =============== |
---|
| 940 | |
---|
| 941 | Data(int blockSize100k) { |
---|
| 942 | super(); |
---|
| 943 | |
---|
| 944 | this.ll8 = new byte[blockSize100k * BZip2Constants.baseBlockSize]; |
---|
| 945 | } |
---|
| 946 | |
---|
| 947 | /** |
---|
| 948 | * Initializes the {@link #tt} array. |
---|
| 949 | * |
---|
| 950 | * This method is called when the required length of the array is known. |
---|
| 951 | * I don't initialize it at construction time to avoid unneccessary |
---|
| 952 | * memory allocation when compressing small files. |
---|
| 953 | */ |
---|
| 954 | final int[] initTT(int length) { |
---|
| 955 | int[] ttShadow = this.tt; |
---|
| 956 | |
---|
| 957 | // tt.length should always be >= length, but theoretically |
---|
| 958 | // it can happen, if the compressor mixed small and large |
---|
| 959 | // blocks. Normally only the last block will be smaller |
---|
| 960 | // than others. |
---|
| 961 | if ((ttShadow == null) || (ttShadow.length < length)) { |
---|
| 962 | this.tt = ttShadow = new int[length]; |
---|
| 963 | } |
---|
| 964 | |
---|
| 965 | return ttShadow; |
---|
| 966 | } |
---|
| 967 | |
---|
| 968 | } |
---|
| 969 | } |
---|