Login
Artifact [fd3cd2bace]
Login

Artifact fd3cd2bace98546f2d9c81ff484102fb2caafef22b25f74a498aada808caa116:


#### Bzip2 Implementation
#### Copyright (C) 2023-2024 Remilia Scarlet
#### Copyright (C) 2015 Jaime Olivares
#### Copyright (c) 2011 Matthew Francis
#### MIT License
####
#### Ported from the Java implementation by Matthew Francis:
#### https://github.com/MateuszBartosiewicz/bzip2.
####
#### Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
#### http://github.com/jaime-olivares/bzip2

module RemiLib::Compression::BZip2
  # The default compresson level.
  DEFAULT_COMPRESSION_LEVEL = 9

  # The maximum compression level.
  MAX_COMPRESSION_LEVEL = 9

  # The minimum compression level.
  MIN_COMPRESSION_LEVEL = 1

  # :nodoc:
  # First three bytes of the block header marker.
  BLOCK_HEADER_MARKER_1 = 0x314159

  # :nodoc:
  # Last three bytes of the block header marker.
  BLOCK_HEADER_MARKER_2 = 0x265359

  # :nodoc:
  # Maximum possible Huffman alphabet size.
  MAX_ALPHABET_SIZE = 258

  # :nodoc:
  # The longest Huffman code length accepted by the decoder.
  MAX_CODE_LENGTH = 23

  # :nodoc:
  # The longest Huffman code length created by the encoder.
  ENCODE_MAX_CODE_LENGTH = 20

  # :nodoc:
  # Used in initial Huffman table generation.
  HIGH_SYMBOL_COST = 15

  # :nodoc:
  # The longest Huffman code length created by the encoder.
  ENCODE_MAXIMUM_CODE_LENGTH = 20

  # :nodoc:
  # Number of symbols decoded after which a new Huffman table is selected.
  GROUP_RUN_LENGTH = 50

  # :nodoc:
  # The first 2 bytes of a Bzip2 marker.
  STREAM_START_MARKER_1 = 0x425a_u32

  # :nodoc:
  # The 'h' that distinguishes BZip from BZip2.
  STREAM_START_MARKER_2 = 0x68_u32

  # :nodoc:
  # First three bytes of the end of stream marker.
  STREAM_END_MARKER_1 = 0x177245_u32

  # :nodoc:
  # Last three bytes of the end of stream marker.
  STREAM_END_MARKER_2 = 0x385090_u32

  # :nodoc:
  # Huffman symbol used for run-length encoding.
  RLE_SYMBOL_RUNA = 0

  # :nodoc:
  # Huffman symbol used for run-length encoding.
  RLE_SYMBOL_RUNB = 1

  # :nodoc:
  # Minimum number of alternative Huffman tables.
  MINIMUM_TABLES = 2

  # :nodoc:
  # Maximum number of alternative Huffman tables.
  MAXIMUM_TABLES = 6

  # :nodoc:
  # The BZip2 specification originally included the optional addition of a
  # slight pseudo-random perturbation to the input data, in order to work
  # around the block sorting algorithm's non- optimal performance on some
  # types of input. The current mainline bzip2 does not require this and will
  # not create randomised blocks, but compatibility is still required for old
  # data (and third party compressors that haven't caught up). When
  # decompressing a randomised block, for each value N in this array, a 1 will
  # be XOR'd onto the output of the Burrows-Wheeler transform stage after N
  # bytes, then the next N taken from the following entry.
  RNUMS = [
    619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 985, 724, 205, 454, 863,
    491, 741, 242, 949, 214, 733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
    419, 436, 278, 496, 867, 210, 399, 680, 480, 51, 878, 465, 811, 169, 869,
    675, 611, 697, 867, 561, 862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
    150, 238, 59, 379, 684, 877, 625, 169, 643, 105, 170, 607, 520, 932, 727,
    476, 693, 425, 174, 647, 73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
    909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 641, 801, 220, 162, 819,
    984, 589, 513, 495, 799, 161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
    382, 596, 414, 171, 516, 375, 682, 485, 911, 276, 98, 553, 163, 354, 666,
    933, 424, 341, 533, 870, 227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
    469, 68, 770, 919, 190, 373, 294, 822, 808, 206, 184, 943, 795, 384, 383,
    461, 404, 758, 839, 887, 715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
    951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 652, 934, 970, 447, 318,
    353, 859, 672, 112, 785, 645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
    609, 772, 154, 274, 580, 184, 79, 626, 630, 742, 653, 282, 762, 623, 680,
    81, 927, 626, 789, 125, 411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
    170, 774, 972, 275, 999, 639, 495, 78, 352, 126, 857, 956, 358, 619, 580,
    124, 737, 594, 701, 612, 669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
    944, 375, 748, 52, 600, 747, 642, 182, 862, 81, 344, 805, 988, 739, 511,
    655, 814, 334, 249, 515, 897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
    433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 686, 754, 806, 760, 493,
    403, 415, 394, 687, 700, 946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
    978, 321, 576, 617, 626, 502, 894, 679, 243, 440, 680, 879, 194, 572, 640,
    724, 926, 56, 204, 700, 707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
    297, 59, 87, 824, 713, 663, 412, 693, 342, 606, 134, 108, 571, 364, 631,
    212, 174, 643, 304, 329, 343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
    140, 206, 73, 263, 980, 736, 876, 478, 430, 305, 170, 514, 364, 692, 829,
    82, 855, 953, 676, 246, 369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
    804, 378, 215, 828, 592, 281, 565, 555, 710, 82, 896, 831, 547, 261, 524,
    462, 293, 465, 502, 56, 661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
    768, 550, 608, 933, 378, 286, 215, 979, 792, 961, 61, 688, 793, 644, 986,
    403, 106, 366, 905, 644, 372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
    780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 920, 176, 193, 713, 857,
    265, 203, 50, 668, 108, 645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
    936, 638
  ]

  # :nodoc:
  # Maximum possible number of Huffman table selectors.
  MAXIMUM_SELECTORS = 900000.tdiv(GROUP_RUN_LENGTH) + 1

  class Error < Exception
  end
end