1 | /* |
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
---|
3 | * contributor license agreements. See the NOTICE file distributed with |
---|
4 | * this work for additional information regarding copyright ownership. |
---|
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
---|
6 | * (the "License"); you may not use this file except in compliance with |
---|
7 | * the License. You may obtain a copy of the License at |
---|
8 | * |
---|
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
10 | * |
---|
11 | * Unless required by applicable law or agreed to in writing, software |
---|
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
14 | * See the License for the specific language governing permissions and |
---|
15 | * limitations under the License. |
---|
16 | * |
---|
17 | */ |
---|
18 | |
---|
19 | /* |
---|
20 | * This package is based on the work done by Keiron Liddle, Aftex Software |
---|
21 | * <keiron@aftexsw.com> to whom the Ant project is very grateful for his |
---|
22 | * great code. |
---|
23 | */ |
---|
24 | package org.apache.hadoop.io.compress.bzip2; |
---|
25 | |
---|
26 | import java.io.InputStream; |
---|
27 | import java.io.IOException; |
---|
28 | |
---|
29 | /** |
---|
30 | * An input stream that decompresses from the BZip2 format (without the file |
---|
31 | * header chars) to be read as any other stream. |
---|
32 | * |
---|
33 | * <p> |
---|
34 | * The decompression requires large amounts of memory. Thus you should call the |
---|
35 | * {@link #close() close()} method as soon as possible, to force |
---|
36 | * <tt>CBZip2InputStream</tt> to release the allocated memory. See |
---|
37 | * {@link CBZip2OutputStream CBZip2OutputStream} for information about memory |
---|
38 | * usage. |
---|
39 | * </p> |
---|
40 | * |
---|
41 | * <p> |
---|
42 | * <tt>CBZip2InputStream</tt> reads bytes from the compressed source stream via |
---|
43 | * the single byte {@link java.io.InputStream#read() read()} method exclusively. |
---|
44 | * Thus you should consider to use a buffered source stream. |
---|
45 | * </p> |
---|
46 | * |
---|
47 | * <p> |
---|
48 | * Instances of this class are not threadsafe. |
---|
49 | * </p> |
---|
50 | */ |
---|
51 | public class CBZip2InputStream extends InputStream implements BZip2Constants { |
---|
52 | |
---|
53 | private static void reportCRCError() throws IOException { |
---|
54 | |
---|
55 | throw new IOException("BZip2 CRC error"); |
---|
56 | |
---|
57 | } |
---|
58 | |
---|
59 | private void makeMaps() { |
---|
60 | final boolean[] inUse = this.data.inUse; |
---|
61 | final byte[] seqToUnseq = this.data.seqToUnseq; |
---|
62 | |
---|
63 | int nInUseShadow = 0; |
---|
64 | |
---|
65 | for (int i = 0; i < 256; i++) { |
---|
66 | if (inUse[i]) |
---|
67 | seqToUnseq[nInUseShadow++] = (byte) i; |
---|
68 | } |
---|
69 | |
---|
70 | this.nInUse = nInUseShadow; |
---|
71 | } |
---|
72 | |
---|
73 | /** |
---|
74 | * Index of the last char in the block, so the block size == last + 1. |
---|
75 | */ |
---|
76 | private int last; |
---|
77 | |
---|
78 | /** |
---|
79 | * Index in zptr[] of original string after sorting. |
---|
80 | */ |
---|
81 | private int origPtr; |
---|
82 | |
---|
83 | /** |
---|
84 | * always: in the range 0 .. 9. The current block size is 100000 * this |
---|
85 | * number. |
---|
86 | */ |
---|
87 | private int blockSize100k; |
---|
88 | |
---|
89 | private boolean blockRandomised; |
---|
90 | |
---|
91 | private int bsBuff; |
---|
92 | private int bsLive; |
---|
93 | private final CRC crc = new CRC(); |
---|
94 | |
---|
95 | private int nInUse; |
---|
96 | |
---|
97 | private InputStream in; |
---|
98 | |
---|
99 | private int currentChar = -1; |
---|
100 | |
---|
101 | private static final int EOF = 0; |
---|
102 | private static final int START_BLOCK_STATE = 1; |
---|
103 | private static final int RAND_PART_A_STATE = 2; |
---|
104 | private static final int RAND_PART_B_STATE = 3; |
---|
105 | private static final int RAND_PART_C_STATE = 4; |
---|
106 | private static final int NO_RAND_PART_A_STATE = 5; |
---|
107 | private static final int NO_RAND_PART_B_STATE = 6; |
---|
108 | private static final int NO_RAND_PART_C_STATE = 7; |
---|
109 | |
---|
110 | private int currentState = START_BLOCK_STATE; |
---|
111 | |
---|
112 | private int storedBlockCRC, storedCombinedCRC; |
---|
113 | private int computedBlockCRC, computedCombinedCRC; |
---|
114 | |
---|
115 | // Variables used by setup* methods exclusively |
---|
116 | |
---|
117 | private int su_count; |
---|
118 | private int su_ch2; |
---|
119 | private int su_chPrev; |
---|
120 | private int su_i2; |
---|
121 | private int su_j2; |
---|
122 | private int su_rNToGo; |
---|
123 | private int su_rTPos; |
---|
124 | private int su_tPos; |
---|
125 | private char su_z; |
---|
126 | |
---|
127 | /** |
---|
128 | * All memory intensive stuff. This field is initialized by initBlock(). |
---|
129 | */ |
---|
130 | private CBZip2InputStream.Data data; |
---|
131 | |
---|
132 | /** |
---|
133 | * Constructs a new CBZip2InputStream which decompresses bytes read from the |
---|
134 | * specified stream. |
---|
135 | * |
---|
136 | * <p> |
---|
137 | * Although BZip2 headers are marked with the magic <tt>"Bz"</tt> this |
---|
138 | * constructor expects the next byte in the stream to be the first one after |
---|
139 | * the magic. Thus callers have to skip the first two bytes. Otherwise this |
---|
140 | * constructor will throw an exception. |
---|
141 | * </p> |
---|
142 | * |
---|
143 | * @throws IOException |
---|
144 | * if the stream content is malformed or an I/O error occurs. |
---|
145 | * @throws NullPointerException |
---|
146 | * if <tt>in == null</tt> |
---|
147 | */ |
---|
148 | public CBZip2InputStream(final InputStream in) throws IOException { |
---|
149 | super(); |
---|
150 | |
---|
151 | this.in = in; |
---|
152 | init(); |
---|
153 | } |
---|
154 | |
---|
155 | public int read() throws IOException { |
---|
156 | if (this.in != null) { |
---|
157 | return read0(); |
---|
158 | } else { |
---|
159 | throw new IOException("stream closed"); |
---|
160 | } |
---|
161 | } |
---|
162 | |
---|
163 | public int read(final byte[] dest, final int offs, final int len) |
---|
164 | throws IOException { |
---|
165 | if (offs < 0) { |
---|
166 | throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); |
---|
167 | } |
---|
168 | if (len < 0) { |
---|
169 | throw new IndexOutOfBoundsException("len(" + len + ") < 0."); |
---|
170 | } |
---|
171 | if (offs + len > dest.length) { |
---|
172 | throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" |
---|
173 | + len + ") > dest.length(" + dest.length + ")."); |
---|
174 | } |
---|
175 | if (this.in == null) { |
---|
176 | throw new IOException("stream closed"); |
---|
177 | } |
---|
178 | |
---|
179 | final int hi = offs + len; |
---|
180 | int destOffs = offs; |
---|
181 | for (int b; (destOffs < hi) && ((b = read0()) >= 0);) { |
---|
182 | dest[destOffs++] = (byte) b; |
---|
183 | } |
---|
184 | |
---|
185 | return (destOffs == offs) ? -1 : (destOffs - offs); |
---|
186 | } |
---|
187 | |
---|
188 | private int read0() throws IOException { |
---|
189 | final int retChar = this.currentChar; |
---|
190 | |
---|
191 | switch (this.currentState) { |
---|
192 | case EOF: |
---|
193 | return -1; |
---|
194 | |
---|
195 | case START_BLOCK_STATE: |
---|
196 | throw new IllegalStateException(); |
---|
197 | |
---|
198 | case RAND_PART_A_STATE: |
---|
199 | throw new IllegalStateException(); |
---|
200 | |
---|
201 | case RAND_PART_B_STATE: |
---|
202 | setupRandPartB(); |
---|
203 | break; |
---|
204 | |
---|
205 | case RAND_PART_C_STATE: |
---|
206 | setupRandPartC(); |
---|
207 | break; |
---|
208 | |
---|
209 | case NO_RAND_PART_A_STATE: |
---|
210 | throw new IllegalStateException(); |
---|
211 | |
---|
212 | case NO_RAND_PART_B_STATE: |
---|
213 | setupNoRandPartB(); |
---|
214 | break; |
---|
215 | |
---|
216 | case NO_RAND_PART_C_STATE: |
---|
217 | setupNoRandPartC(); |
---|
218 | break; |
---|
219 | |
---|
220 | default: |
---|
221 | throw new IllegalStateException(); |
---|
222 | } |
---|
223 | |
---|
224 | return retChar; |
---|
225 | } |
---|
226 | |
---|
227 | private void init() throws IOException { |
---|
228 | int magic2 = this.in.read(); |
---|
229 | if (magic2 != 'h') { |
---|
230 | throw new IOException("Stream is not BZip2 formatted: expected 'h'" |
---|
231 | + " as first byte but got '" + (char) magic2 + "'"); |
---|
232 | } |
---|
233 | |
---|
234 | int blockSize = this.in.read(); |
---|
235 | if ((blockSize < '1') || (blockSize > '9')) { |
---|
236 | throw new IOException("Stream is not BZip2 formatted: illegal " |
---|
237 | + "blocksize " + (char) blockSize); |
---|
238 | } |
---|
239 | |
---|
240 | this.blockSize100k = blockSize - '0'; |
---|
241 | |
---|
242 | initBlock(); |
---|
243 | setupBlock(); |
---|
244 | } |
---|
245 | |
---|
246 | private void initBlock() throws IOException { |
---|
247 | char magic0 = bsGetUByte(); |
---|
248 | char magic1 = bsGetUByte(); |
---|
249 | char magic2 = bsGetUByte(); |
---|
250 | char magic3 = bsGetUByte(); |
---|
251 | char magic4 = bsGetUByte(); |
---|
252 | char magic5 = bsGetUByte(); |
---|
253 | |
---|
254 | if (magic0 == 0x17 && magic1 == 0x72 && magic2 == 0x45 |
---|
255 | && magic3 == 0x38 && magic4 == 0x50 && magic5 == 0x90) { |
---|
256 | complete(); // end of file |
---|
257 | } else if (magic0 != 0x31 || // '1' |
---|
258 | magic1 != 0x41 || // ')' |
---|
259 | magic2 != 0x59 || // 'Y' |
---|
260 | magic3 != 0x26 || // '&' |
---|
261 | magic4 != 0x53 || // 'S' |
---|
262 | magic5 != 0x59 // 'Y' |
---|
263 | ) { |
---|
264 | this.currentState = EOF; |
---|
265 | throw new IOException("bad block header"); |
---|
266 | } else { |
---|
267 | this.storedBlockCRC = bsGetInt(); |
---|
268 | this.blockRandomised = bsR(1) == 1; |
---|
269 | |
---|
270 | /** |
---|
271 | * Allocate data here instead in constructor, so we do not allocate |
---|
272 | * it if the input file is empty. |
---|
273 | */ |
---|
274 | if (this.data == null) { |
---|
275 | this.data = new Data(this.blockSize100k); |
---|
276 | } |
---|
277 | |
---|
278 | // currBlockNo++; |
---|
279 | getAndMoveToFrontDecode(); |
---|
280 | |
---|
281 | this.crc.initialiseCRC(); |
---|
282 | this.currentState = START_BLOCK_STATE; |
---|
283 | } |
---|
284 | } |
---|
285 | |
---|
286 | private void endBlock() throws IOException { |
---|
287 | this.computedBlockCRC = this.crc.getFinalCRC(); |
---|
288 | |
---|
289 | // A bad CRC is considered a fatal error. |
---|
290 | if (this.storedBlockCRC != this.computedBlockCRC) { |
---|
291 | // make next blocks readable without error |
---|
292 | // (repair feature, not yet documented, not tested) |
---|
293 | this.computedCombinedCRC = (this.storedCombinedCRC << 1) |
---|
294 | | (this.storedCombinedCRC >>> 31); |
---|
295 | this.computedCombinedCRC ^= this.storedBlockCRC; |
---|
296 | |
---|
297 | reportCRCError(); |
---|
298 | } |
---|
299 | |
---|
300 | this.computedCombinedCRC = (this.computedCombinedCRC << 1) |
---|
301 | | (this.computedCombinedCRC >>> 31); |
---|
302 | this.computedCombinedCRC ^= this.computedBlockCRC; |
---|
303 | } |
---|
304 | |
---|
305 | private void complete() throws IOException { |
---|
306 | this.storedCombinedCRC = bsGetInt(); |
---|
307 | this.currentState = EOF; |
---|
308 | this.data = null; |
---|
309 | |
---|
310 | if (this.storedCombinedCRC != this.computedCombinedCRC) { |
---|
311 | reportCRCError(); |
---|
312 | } |
---|
313 | } |
---|
314 | |
---|
315 | public void close() throws IOException { |
---|
316 | InputStream inShadow = this.in; |
---|
317 | if (inShadow != null) { |
---|
318 | try { |
---|
319 | if (inShadow != System.in) { |
---|
320 | inShadow.close(); |
---|
321 | } |
---|
322 | } finally { |
---|
323 | this.data = null; |
---|
324 | this.in = null; |
---|
325 | } |
---|
326 | } |
---|
327 | } |
---|
328 | |
---|
329 | private int bsR(final int n) throws IOException { |
---|
330 | int bsLiveShadow = this.bsLive; |
---|
331 | int bsBuffShadow = this.bsBuff; |
---|
332 | |
---|
333 | if (bsLiveShadow < n) { |
---|
334 | final InputStream inShadow = this.in; |
---|
335 | do { |
---|
336 | int thech = inShadow.read(); |
---|
337 | |
---|
338 | if (thech < 0) { |
---|
339 | throw new IOException("unexpected end of stream"); |
---|
340 | } |
---|
341 | |
---|
342 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
343 | bsLiveShadow += 8; |
---|
344 | } while (bsLiveShadow < n); |
---|
345 | |
---|
346 | this.bsBuff = bsBuffShadow; |
---|
347 | } |
---|
348 | |
---|
349 | this.bsLive = bsLiveShadow - n; |
---|
350 | return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1); |
---|
351 | } |
---|
352 | |
---|
353 | private boolean bsGetBit() throws IOException { |
---|
354 | int bsLiveShadow = this.bsLive; |
---|
355 | int bsBuffShadow = this.bsBuff; |
---|
356 | |
---|
357 | if (bsLiveShadow < 1) { |
---|
358 | int thech = this.in.read(); |
---|
359 | |
---|
360 | if (thech < 0) { |
---|
361 | throw new IOException("unexpected end of stream"); |
---|
362 | } |
---|
363 | |
---|
364 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
365 | bsLiveShadow += 8; |
---|
366 | this.bsBuff = bsBuffShadow; |
---|
367 | } |
---|
368 | |
---|
369 | this.bsLive = bsLiveShadow - 1; |
---|
370 | return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0; |
---|
371 | } |
---|
372 | |
---|
373 | private char bsGetUByte() throws IOException { |
---|
374 | return (char) bsR(8); |
---|
375 | } |
---|
376 | |
---|
377 | private int bsGetInt() throws IOException { |
---|
378 | return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8); |
---|
379 | } |
---|
380 | |
---|
381 | /** |
---|
382 | * Called by createHuffmanDecodingTables() exclusively. |
---|
383 | */ |
---|
384 | private static void hbCreateDecodeTables(final int[] limit, |
---|
385 | final int[] base, final int[] perm, final char[] length, |
---|
386 | final int minLen, final int maxLen, final int alphaSize) { |
---|
387 | for (int i = minLen, pp = 0; i <= maxLen; i++) { |
---|
388 | for (int j = 0; j < alphaSize; j++) { |
---|
389 | if (length[j] == i) { |
---|
390 | perm[pp++] = j; |
---|
391 | } |
---|
392 | } |
---|
393 | } |
---|
394 | |
---|
395 | for (int i = MAX_CODE_LEN; --i > 0;) { |
---|
396 | base[i] = 0; |
---|
397 | limit[i] = 0; |
---|
398 | } |
---|
399 | |
---|
400 | for (int i = 0; i < alphaSize; i++) { |
---|
401 | base[length[i] + 1]++; |
---|
402 | } |
---|
403 | |
---|
404 | for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { |
---|
405 | b += base[i]; |
---|
406 | base[i] = b; |
---|
407 | } |
---|
408 | |
---|
409 | for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { |
---|
410 | final int nb = base[i + 1]; |
---|
411 | vec += nb - b; |
---|
412 | b = nb; |
---|
413 | limit[i] = vec - 1; |
---|
414 | vec <<= 1; |
---|
415 | } |
---|
416 | |
---|
417 | for (int i = minLen + 1; i <= maxLen; i++) { |
---|
418 | base[i] = ((limit[i - 1] + 1) << 1) - base[i]; |
---|
419 | } |
---|
420 | } |
---|
421 | |
---|
422 | private void recvDecodingTables() throws IOException { |
---|
423 | final Data dataShadow = this.data; |
---|
424 | final boolean[] inUse = dataShadow.inUse; |
---|
425 | final byte[] pos = dataShadow.recvDecodingTables_pos; |
---|
426 | final byte[] selector = dataShadow.selector; |
---|
427 | final byte[] selectorMtf = dataShadow.selectorMtf; |
---|
428 | |
---|
429 | int inUse16 = 0; |
---|
430 | |
---|
431 | /* Receive the mapping table */ |
---|
432 | for (int i = 0; i < 16; i++) { |
---|
433 | if (bsGetBit()) { |
---|
434 | inUse16 |= 1 << i; |
---|
435 | } |
---|
436 | } |
---|
437 | |
---|
438 | for (int i = 256; --i >= 0;) { |
---|
439 | inUse[i] = false; |
---|
440 | } |
---|
441 | |
---|
442 | for (int i = 0; i < 16; i++) { |
---|
443 | if ((inUse16 & (1 << i)) != 0) { |
---|
444 | final int i16 = i << 4; |
---|
445 | for (int j = 0; j < 16; j++) { |
---|
446 | if (bsGetBit()) { |
---|
447 | inUse[i16 + j] = true; |
---|
448 | } |
---|
449 | } |
---|
450 | } |
---|
451 | } |
---|
452 | |
---|
453 | makeMaps(); |
---|
454 | final int alphaSize = this.nInUse + 2; |
---|
455 | |
---|
456 | /* Now the selectors */ |
---|
457 | final int nGroups = bsR(3); |
---|
458 | final int nSelectors = bsR(15); |
---|
459 | |
---|
460 | for (int i = 0; i < nSelectors; i++) { |
---|
461 | int j = 0; |
---|
462 | while (bsGetBit()) { |
---|
463 | j++; |
---|
464 | } |
---|
465 | selectorMtf[i] = (byte) j; |
---|
466 | } |
---|
467 | |
---|
468 | /* Undo the MTF values for the selectors. */ |
---|
469 | for (int v = nGroups; --v >= 0;) { |
---|
470 | pos[v] = (byte) v; |
---|
471 | } |
---|
472 | |
---|
473 | for (int i = 0; i < nSelectors; i++) { |
---|
474 | int v = selectorMtf[i] & 0xff; |
---|
475 | final byte tmp = pos[v]; |
---|
476 | while (v > 0) { |
---|
477 | // nearly all times v is zero, 4 in most other cases |
---|
478 | pos[v] = pos[v - 1]; |
---|
479 | v--; |
---|
480 | } |
---|
481 | pos[0] = tmp; |
---|
482 | selector[i] = tmp; |
---|
483 | } |
---|
484 | |
---|
485 | final char[][] len = dataShadow.temp_charArray2d; |
---|
486 | |
---|
487 | /* Now the coding tables */ |
---|
488 | for (int t = 0; t < nGroups; t++) { |
---|
489 | int curr = bsR(5); |
---|
490 | final char[] len_t = len[t]; |
---|
491 | for (int i = 0; i < alphaSize; i++) { |
---|
492 | while (bsGetBit()) { |
---|
493 | curr += bsGetBit() ? -1 : 1; |
---|
494 | } |
---|
495 | len_t[i] = (char) curr; |
---|
496 | } |
---|
497 | } |
---|
498 | |
---|
499 | // finally create the Huffman tables |
---|
500 | createHuffmanDecodingTables(alphaSize, nGroups); |
---|
501 | } |
---|
502 | |
---|
503 | /** |
---|
504 | * Called by recvDecodingTables() exclusively. |
---|
505 | */ |
---|
506 | private void createHuffmanDecodingTables(final int alphaSize, |
---|
507 | final int nGroups) { |
---|
508 | final Data dataShadow = this.data; |
---|
509 | final char[][] len = dataShadow.temp_charArray2d; |
---|
510 | final int[] minLens = dataShadow.minLens; |
---|
511 | final int[][] limit = dataShadow.limit; |
---|
512 | final int[][] base = dataShadow.base; |
---|
513 | final int[][] perm = dataShadow.perm; |
---|
514 | |
---|
515 | for (int t = 0; t < nGroups; t++) { |
---|
516 | int minLen = 32; |
---|
517 | int maxLen = 0; |
---|
518 | final char[] len_t = len[t]; |
---|
519 | for (int i = alphaSize; --i >= 0;) { |
---|
520 | final char lent = len_t[i]; |
---|
521 | if (lent > maxLen) { |
---|
522 | maxLen = lent; |
---|
523 | } |
---|
524 | if (lent < minLen) { |
---|
525 | minLen = lent; |
---|
526 | } |
---|
527 | } |
---|
528 | hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, |
---|
529 | maxLen, alphaSize); |
---|
530 | minLens[t] = minLen; |
---|
531 | } |
---|
532 | } |
---|
533 | |
---|
534 | private void getAndMoveToFrontDecode() throws IOException { |
---|
535 | this.origPtr = bsR(24); |
---|
536 | recvDecodingTables(); |
---|
537 | |
---|
538 | final InputStream inShadow = this.in; |
---|
539 | final Data dataShadow = this.data; |
---|
540 | final byte[] ll8 = dataShadow.ll8; |
---|
541 | final int[] unzftab = dataShadow.unzftab; |
---|
542 | final byte[] selector = dataShadow.selector; |
---|
543 | final byte[] seqToUnseq = dataShadow.seqToUnseq; |
---|
544 | final char[] yy = dataShadow.getAndMoveToFrontDecode_yy; |
---|
545 | final int[] minLens = dataShadow.minLens; |
---|
546 | final int[][] limit = dataShadow.limit; |
---|
547 | final int[][] base = dataShadow.base; |
---|
548 | final int[][] perm = dataShadow.perm; |
---|
549 | final int limitLast = this.blockSize100k * 100000; |
---|
550 | |
---|
551 | /* |
---|
552 | * Setting up the unzftab entries here is not strictly necessary, but it |
---|
553 | * does save having to do it later in a separate pass, and so saves a |
---|
554 | * block's worth of cache misses. |
---|
555 | */ |
---|
556 | for (int i = 256; --i >= 0;) { |
---|
557 | yy[i] = (char) i; |
---|
558 | unzftab[i] = 0; |
---|
559 | } |
---|
560 | |
---|
561 | int groupNo = 0; |
---|
562 | int groupPos = G_SIZE - 1; |
---|
563 | final int eob = this.nInUse + 1; |
---|
564 | int nextSym = getAndMoveToFrontDecode0(0); |
---|
565 | int bsBuffShadow = this.bsBuff; |
---|
566 | int bsLiveShadow = this.bsLive; |
---|
567 | int lastShadow = -1; |
---|
568 | int zt = selector[groupNo] & 0xff; |
---|
569 | int[] base_zt = base[zt]; |
---|
570 | int[] limit_zt = limit[zt]; |
---|
571 | int[] perm_zt = perm[zt]; |
---|
572 | int minLens_zt = minLens[zt]; |
---|
573 | |
---|
574 | while (nextSym != eob) { |
---|
575 | if ((nextSym == RUNA) || (nextSym == RUNB)) { |
---|
576 | int s = -1; |
---|
577 | |
---|
578 | for (int n = 1; true; n <<= 1) { |
---|
579 | if (nextSym == RUNA) { |
---|
580 | s += n; |
---|
581 | } else if (nextSym == RUNB) { |
---|
582 | s += n << 1; |
---|
583 | } else { |
---|
584 | break; |
---|
585 | } |
---|
586 | |
---|
587 | if (groupPos == 0) { |
---|
588 | groupPos = G_SIZE - 1; |
---|
589 | zt = selector[++groupNo] & 0xff; |
---|
590 | base_zt = base[zt]; |
---|
591 | limit_zt = limit[zt]; |
---|
592 | perm_zt = perm[zt]; |
---|
593 | minLens_zt = minLens[zt]; |
---|
594 | } else { |
---|
595 | groupPos--; |
---|
596 | } |
---|
597 | |
---|
598 | int zn = minLens_zt; |
---|
599 | |
---|
600 | // Inlined: |
---|
601 | // int zvec = bsR(zn); |
---|
602 | while (bsLiveShadow < zn) { |
---|
603 | final int thech = inShadow.read(); |
---|
604 | if (thech >= 0) { |
---|
605 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
606 | bsLiveShadow += 8; |
---|
607 | continue; |
---|
608 | } else { |
---|
609 | throw new IOException("unexpected end of stream"); |
---|
610 | } |
---|
611 | } |
---|
612 | int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) |
---|
613 | & ((1 << zn) - 1); |
---|
614 | bsLiveShadow -= zn; |
---|
615 | |
---|
616 | while (zvec > limit_zt[zn]) { |
---|
617 | zn++; |
---|
618 | while (bsLiveShadow < 1) { |
---|
619 | final int thech = inShadow.read(); |
---|
620 | if (thech >= 0) { |
---|
621 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
622 | bsLiveShadow += 8; |
---|
623 | continue; |
---|
624 | } else { |
---|
625 | throw new IOException( |
---|
626 | "unexpected end of stream"); |
---|
627 | } |
---|
628 | } |
---|
629 | bsLiveShadow--; |
---|
630 | zvec = (zvec << 1) |
---|
631 | | ((bsBuffShadow >> bsLiveShadow) & 1); |
---|
632 | } |
---|
633 | nextSym = perm_zt[zvec - base_zt[zn]]; |
---|
634 | } |
---|
635 | |
---|
636 | final byte ch = seqToUnseq[yy[0]]; |
---|
637 | unzftab[ch & 0xff] += s + 1; |
---|
638 | |
---|
639 | while (s-- >= 0) { |
---|
640 | ll8[++lastShadow] = ch; |
---|
641 | } |
---|
642 | |
---|
643 | if (lastShadow >= limitLast) { |
---|
644 | throw new IOException("block overrun"); |
---|
645 | } |
---|
646 | } else { |
---|
647 | if (++lastShadow >= limitLast) { |
---|
648 | throw new IOException("block overrun"); |
---|
649 | } |
---|
650 | |
---|
651 | final char tmp = yy[nextSym - 1]; |
---|
652 | unzftab[seqToUnseq[tmp] & 0xff]++; |
---|
653 | ll8[lastShadow] = seqToUnseq[tmp]; |
---|
654 | |
---|
655 | /* |
---|
656 | * This loop is hammered during decompression, hence avoid |
---|
657 | * native method call overhead of System.arraycopy for very |
---|
658 | * small ranges to copy. |
---|
659 | */ |
---|
660 | if (nextSym <= 16) { |
---|
661 | for (int j = nextSym - 1; j > 0;) { |
---|
662 | yy[j] = yy[--j]; |
---|
663 | } |
---|
664 | } else { |
---|
665 | System.arraycopy(yy, 0, yy, 1, nextSym - 1); |
---|
666 | } |
---|
667 | |
---|
668 | yy[0] = tmp; |
---|
669 | |
---|
670 | if (groupPos == 0) { |
---|
671 | groupPos = G_SIZE - 1; |
---|
672 | zt = selector[++groupNo] & 0xff; |
---|
673 | base_zt = base[zt]; |
---|
674 | limit_zt = limit[zt]; |
---|
675 | perm_zt = perm[zt]; |
---|
676 | minLens_zt = minLens[zt]; |
---|
677 | } else { |
---|
678 | groupPos--; |
---|
679 | } |
---|
680 | |
---|
681 | int zn = minLens_zt; |
---|
682 | |
---|
683 | // Inlined: |
---|
684 | // int zvec = bsR(zn); |
---|
685 | while (bsLiveShadow < zn) { |
---|
686 | final int thech = inShadow.read(); |
---|
687 | if (thech >= 0) { |
---|
688 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
689 | bsLiveShadow += 8; |
---|
690 | continue; |
---|
691 | } else { |
---|
692 | throw new IOException("unexpected end of stream"); |
---|
693 | } |
---|
694 | } |
---|
695 | int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) |
---|
696 | & ((1 << zn) - 1); |
---|
697 | bsLiveShadow -= zn; |
---|
698 | |
---|
699 | while (zvec > limit_zt[zn]) { |
---|
700 | zn++; |
---|
701 | while (bsLiveShadow < 1) { |
---|
702 | final int thech = inShadow.read(); |
---|
703 | if (thech >= 0) { |
---|
704 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
705 | bsLiveShadow += 8; |
---|
706 | continue; |
---|
707 | } else { |
---|
708 | throw new IOException("unexpected end of stream"); |
---|
709 | } |
---|
710 | } |
---|
711 | bsLiveShadow--; |
---|
712 | zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); |
---|
713 | } |
---|
714 | nextSym = perm_zt[zvec - base_zt[zn]]; |
---|
715 | } |
---|
716 | } |
---|
717 | |
---|
718 | this.last = lastShadow; |
---|
719 | this.bsLive = bsLiveShadow; |
---|
720 | this.bsBuff = bsBuffShadow; |
---|
721 | } |
---|
722 | |
---|
723 | private int getAndMoveToFrontDecode0(final int groupNo) throws IOException { |
---|
724 | final InputStream inShadow = this.in; |
---|
725 | final Data dataShadow = this.data; |
---|
726 | final int zt = dataShadow.selector[groupNo] & 0xff; |
---|
727 | final int[] limit_zt = dataShadow.limit[zt]; |
---|
728 | int zn = dataShadow.minLens[zt]; |
---|
729 | int zvec = bsR(zn); |
---|
730 | int bsLiveShadow = this.bsLive; |
---|
731 | int bsBuffShadow = this.bsBuff; |
---|
732 | |
---|
733 | while (zvec > limit_zt[zn]) { |
---|
734 | zn++; |
---|
735 | while (bsLiveShadow < 1) { |
---|
736 | final int thech = inShadow.read(); |
---|
737 | |
---|
738 | if (thech >= 0) { |
---|
739 | bsBuffShadow = (bsBuffShadow << 8) | thech; |
---|
740 | bsLiveShadow += 8; |
---|
741 | continue; |
---|
742 | } else { |
---|
743 | throw new IOException("unexpected end of stream"); |
---|
744 | } |
---|
745 | } |
---|
746 | bsLiveShadow--; |
---|
747 | zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); |
---|
748 | } |
---|
749 | |
---|
750 | this.bsLive = bsLiveShadow; |
---|
751 | this.bsBuff = bsBuffShadow; |
---|
752 | |
---|
753 | return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]]; |
---|
754 | } |
---|
755 | |
---|
756 | private void setupBlock() throws IOException { |
---|
757 | if (this.data == null) { |
---|
758 | return; |
---|
759 | } |
---|
760 | |
---|
761 | final int[] cftab = this.data.cftab; |
---|
762 | final int[] tt = this.data.initTT(this.last + 1); |
---|
763 | final byte[] ll8 = this.data.ll8; |
---|
764 | cftab[0] = 0; |
---|
765 | System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); |
---|
766 | |
---|
767 | for (int i = 1, c = cftab[0]; i <= 256; i++) { |
---|
768 | c += cftab[i]; |
---|
769 | cftab[i] = c; |
---|
770 | } |
---|
771 | |
---|
772 | for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { |
---|
773 | tt[cftab[ll8[i] & 0xff]++] = i; |
---|
774 | } |
---|
775 | |
---|
776 | if ((this.origPtr < 0) || (this.origPtr >= tt.length)) { |
---|
777 | throw new IOException("stream corrupted"); |
---|
778 | } |
---|
779 | |
---|
780 | this.su_tPos = tt[this.origPtr]; |
---|
781 | this.su_count = 0; |
---|
782 | this.su_i2 = 0; |
---|
783 | this.su_ch2 = 256; /* not a char and not EOF */ |
---|
784 | |
---|
785 | if (this.blockRandomised) { |
---|
786 | this.su_rNToGo = 0; |
---|
787 | this.su_rTPos = 0; |
---|
788 | setupRandPartA(); |
---|
789 | } else { |
---|
790 | setupNoRandPartA(); |
---|
791 | } |
---|
792 | } |
---|
793 | |
---|
794 | private void setupRandPartA() throws IOException { |
---|
795 | if (this.su_i2 <= this.last) { |
---|
796 | this.su_chPrev = this.su_ch2; |
---|
797 | int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; |
---|
798 | this.su_tPos = this.data.tt[this.su_tPos]; |
---|
799 | if (this.su_rNToGo == 0) { |
---|
800 | this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1; |
---|
801 | if (++this.su_rTPos == 512) { |
---|
802 | this.su_rTPos = 0; |
---|
803 | } |
---|
804 | } else { |
---|
805 | this.su_rNToGo--; |
---|
806 | } |
---|
807 | this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0; |
---|
808 | this.su_i2++; |
---|
809 | this.currentChar = su_ch2Shadow; |
---|
810 | this.currentState = RAND_PART_B_STATE; |
---|
811 | this.crc.updateCRC(su_ch2Shadow); |
---|
812 | } else { |
---|
813 | endBlock(); |
---|
814 | initBlock(); |
---|
815 | setupBlock(); |
---|
816 | } |
---|
817 | } |
---|
818 | |
---|
819 | private void setupNoRandPartA() throws IOException { |
---|
820 | if (this.su_i2 <= this.last) { |
---|
821 | this.su_chPrev = this.su_ch2; |
---|
822 | int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; |
---|
823 | this.su_ch2 = su_ch2Shadow; |
---|
824 | this.su_tPos = this.data.tt[this.su_tPos]; |
---|
825 | this.su_i2++; |
---|
826 | this.currentChar = su_ch2Shadow; |
---|
827 | this.currentState = NO_RAND_PART_B_STATE; |
---|
828 | this.crc.updateCRC(su_ch2Shadow); |
---|
829 | } else { |
---|
830 | this.currentState = NO_RAND_PART_A_STATE; |
---|
831 | endBlock(); |
---|
832 | initBlock(); |
---|
833 | setupBlock(); |
---|
834 | } |
---|
835 | } |
---|
836 | |
---|
837 | private void setupRandPartB() throws IOException { |
---|
838 | if (this.su_ch2 != this.su_chPrev) { |
---|
839 | this.currentState = RAND_PART_A_STATE; |
---|
840 | this.su_count = 1; |
---|
841 | setupRandPartA(); |
---|
842 | } else if (++this.su_count >= 4) { |
---|
843 | this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); |
---|
844 | this.su_tPos = this.data.tt[this.su_tPos]; |
---|
845 | if (this.su_rNToGo == 0) { |
---|
846 | this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1; |
---|
847 | if (++this.su_rTPos == 512) { |
---|
848 | this.su_rTPos = 0; |
---|
849 | } |
---|
850 | } else { |
---|
851 | this.su_rNToGo--; |
---|
852 | } |
---|
853 | this.su_j2 = 0; |
---|
854 | this.currentState = RAND_PART_C_STATE; |
---|
855 | if (this.su_rNToGo == 1) { |
---|
856 | this.su_z ^= 1; |
---|
857 | } |
---|
858 | setupRandPartC(); |
---|
859 | } else { |
---|
860 | this.currentState = RAND_PART_A_STATE; |
---|
861 | setupRandPartA(); |
---|
862 | } |
---|
863 | } |
---|
864 | |
---|
865 | private void setupRandPartC() throws IOException { |
---|
866 | if (this.su_j2 < this.su_z) { |
---|
867 | this.currentChar = this.su_ch2; |
---|
868 | this.crc.updateCRC(this.su_ch2); |
---|
869 | this.su_j2++; |
---|
870 | } else { |
---|
871 | this.currentState = RAND_PART_A_STATE; |
---|
872 | this.su_i2++; |
---|
873 | this.su_count = 0; |
---|
874 | setupRandPartA(); |
---|
875 | } |
---|
876 | } |
---|
877 | |
---|
878 | private void setupNoRandPartB() throws IOException { |
---|
879 | if (this.su_ch2 != this.su_chPrev) { |
---|
880 | this.su_count = 1; |
---|
881 | setupNoRandPartA(); |
---|
882 | } else if (++this.su_count >= 4) { |
---|
883 | this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); |
---|
884 | this.su_tPos = this.data.tt[this.su_tPos]; |
---|
885 | this.su_j2 = 0; |
---|
886 | setupNoRandPartC(); |
---|
887 | } else { |
---|
888 | setupNoRandPartA(); |
---|
889 | } |
---|
890 | } |
---|
891 | |
---|
892 | private void setupNoRandPartC() throws IOException { |
---|
893 | if (this.su_j2 < this.su_z) { |
---|
894 | int su_ch2Shadow = this.su_ch2; |
---|
895 | this.currentChar = su_ch2Shadow; |
---|
896 | this.crc.updateCRC(su_ch2Shadow); |
---|
897 | this.su_j2++; |
---|
898 | this.currentState = NO_RAND_PART_C_STATE; |
---|
899 | } else { |
---|
900 | this.su_i2++; |
---|
901 | this.su_count = 0; |
---|
902 | setupNoRandPartA(); |
---|
903 | } |
---|
904 | } |
---|
905 | |
---|
906 | private static final class Data extends Object { |
---|
907 | |
---|
908 | // (with blockSize 900k) |
---|
909 | final boolean[] inUse = new boolean[256]; // 256 byte |
---|
910 | |
---|
911 | final byte[] seqToUnseq = new byte[256]; // 256 byte |
---|
912 | final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte |
---|
913 | final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte |
---|
914 | |
---|
915 | /** |
---|
916 | * Freq table collected to save a pass over the data during |
---|
917 | * decompression. |
---|
918 | */ |
---|
919 | final int[] unzftab = new int[256]; // 1024 byte |
---|
920 | |
---|
921 | final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte |
---|
922 | final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte |
---|
923 | final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte |
---|
924 | final int[] minLens = new int[N_GROUPS]; // 24 byte |
---|
925 | |
---|
926 | final int[] cftab = new int[257]; // 1028 byte |
---|
927 | final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte |
---|
928 | final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 |
---|
929 | // byte |
---|
930 | final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte |
---|
931 | // --------------- |
---|
932 | // 60798 byte |
---|
933 | |
---|
934 | int[] tt; // 3600000 byte |
---|
935 | byte[] ll8; // 900000 byte |
---|
936 | |
---|
937 | // --------------- |
---|
938 | // 4560782 byte |
---|
939 | // =============== |
---|
940 | |
---|
941 | Data(int blockSize100k) { |
---|
942 | super(); |
---|
943 | |
---|
944 | this.ll8 = new byte[blockSize100k * BZip2Constants.baseBlockSize]; |
---|
945 | } |
---|
946 | |
---|
947 | /** |
---|
948 | * Initializes the {@link #tt} array. |
---|
949 | * |
---|
950 | * This method is called when the required length of the array is known. |
---|
951 | * I don't initialize it at construction time to avoid unneccessary |
---|
952 | * memory allocation when compressing small files. |
---|
953 | */ |
---|
954 | final int[] initTT(int length) { |
---|
955 | int[] ttShadow = this.tt; |
---|
956 | |
---|
957 | // tt.length should always be >= length, but theoretically |
---|
958 | // it can happen, if the compressor mixed small and large |
---|
959 | // blocks. Normally only the last block will be smaller |
---|
960 | // than others. |
---|
961 | if ((ttShadow == null) || (ttShadow.length < length)) { |
---|
962 | this.tt = ttShadow = new int[length]; |
---|
963 | } |
---|
964 | |
---|
965 | return ttShadow; |
---|
966 | } |
---|
967 | |
---|
968 | } |
---|
969 | } |
---|