1
2
3
4
5
6
7
8
9
|
/* trees.c -- output deflated data using Huffman coding
* Copyright (C) 1995-2017 Jean-loup Gailly
* detect_data_type() function provided freely by Cosmin Truta, 2006
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/*
* ALGORITHM
*
|
|
|
1
2
3
4
5
6
7
8
9
|
/* trees.c -- output deflated data using Huffman coding
* Copyright (C) 1995-2021 Jean-loup Gailly
* detect_data_type() function provided freely by Cosmin Truta, 2006
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/*
* ALGORITHM
*
|
| ︙ | | | ︙ | |
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
|
local void send_tree OF((deflate_state *s, ct_data *tree, int max_code));
local int build_bl_tree OF((deflate_state *s));
local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
int blcodes));
local void compress_block OF((deflate_state *s, const ct_data *ltree,
const ct_data *dtree));
local int detect_data_type OF((deflate_state *s));
local unsigned bi_reverse OF((unsigned value, int length));
local void bi_windup OF((deflate_state *s));
local void bi_flush OF((deflate_state *s));
#ifdef GEN_TREES_H
local void gen_trees_header OF((void));
#endif
|
|
|
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
|
local void send_tree OF((deflate_state *s, ct_data *tree, int max_code));
local int build_bl_tree OF((deflate_state *s));
local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
int blcodes));
local void compress_block OF((deflate_state *s, const ct_data *ltree,
const ct_data *dtree));
local int detect_data_type OF((deflate_state *s));
local unsigned bi_reverse OF((unsigned code, int len));
local void bi_windup OF((deflate_state *s));
local void bi_flush OF((deflate_state *s));
#ifdef GEN_TREES_H
local void gen_trees_header OF((void));
#endif
|
| ︙ | | | ︙ | |
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
|
/* Initialize the trees. */
for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
s->dyn_ltree[END_BLOCK].Freq = 1;
s->opt_len = s->static_len = 0L;
s->last_lit = s->matches = 0;
}
#define SMALLEST 1
/* Index within the heap array of least frequent node in the Huffman tree */
/* ===========================================================================
|
|
|
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
|
/* Initialize the trees. */
for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
s->dyn_ltree[END_BLOCK].Freq = 1;
s->opt_len = s->static_len = 0L;
s->sym_next = s->matches = 0;
}
#define SMALLEST 1
/* Index within the heap array of least frequent node in the Huffman tree */
/* ===========================================================================
|
| ︙ | | | ︙ | |
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
|
ulg stored_len; /* length of input block */
int last; /* one if this is the last block for a file */
{
send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */
bi_windup(s); /* align on byte boundary */
put_short(s, (ush)stored_len);
put_short(s, (ush)~stored_len);
zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
s->pending += stored_len;
#ifdef ZLIB_DEBUG
s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
s->compressed_len += (stored_len + 4) << 3;
s->bits_sent += 2*16;
s->bits_sent += stored_len<<3;
#endif
|
>
|
|
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
|
ulg stored_len; /* length of input block */
int last; /* one if this is the last block for a file */
{
send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */
bi_windup(s); /* align on byte boundary */
put_short(s, (ush)stored_len);
put_short(s, (ush)~stored_len);
if (stored_len)
zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
s->pending += stored_len;
#ifdef ZLIB_DEBUG
s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
s->compressed_len += (stored_len + 4) << 3;
s->bits_sent += 2*16;
s->bits_sent += stored_len<<3;
#endif
|
| ︙ | | | ︙ | |
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
|
/* Determine the best encoding. Compute the block lengths in bytes. */
opt_lenb = (s->opt_len+3+7)>>3;
static_lenb = (s->static_len+3+7)>>3;
Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
s->last_lit));
if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
} else {
Assert(buf != (char*)0, "lost buf");
opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
}
|
|
|
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
|
/* Determine the best encoding. Compute the block lengths in bytes. */
opt_lenb = (s->opt_len+3+7)>>3;
static_lenb = (s->static_len+3+7)>>3;
Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
s->sym_next / 3));
if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
} else {
Assert(buf != (char*)0, "lost buf");
opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
}
|
| ︙ | | | ︙ | |
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
|
* the current block must be flushed.
*/
int ZLIB_INTERNAL _tr_tally (s, dist, lc)
deflate_state *s;
unsigned dist; /* distance of matched string */
unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
{
s->d_buf[s->last_lit] = (ush)dist;
s->l_buf[s->last_lit++] = (uch)lc;
if (dist == 0) {
/* lc is the unmatched char */
s->dyn_ltree[lc].Freq++;
} else {
s->matches++;
/* Here, lc is the match length - MIN_MATCH */
dist--; /* dist = match distance - 1 */
Assert((ush)dist < (ush)MAX_DIST(s) &&
(ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
(ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match");
s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
s->dyn_dtree[d_code(dist)].Freq++;
}
#ifdef TRUNCATE_BLOCK
/* Try to guess if it is profitable to stop the current block here */
if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
/* Compute an upper bound for the compressed length */
ulg out_length = (ulg)s->last_lit*8L;
ulg in_length = (ulg)((long)s->strstart - s->block_start);
int dcode;
for (dcode = 0; dcode < D_CODES; dcode++) {
out_length += (ulg)s->dyn_dtree[dcode].Freq *
(5L+extra_dbits[dcode]);
}
out_length >>= 3;
Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
s->last_lit, in_length, out_length,
100L - out_length*100L/in_length));
if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
}
#endif
return (s->last_lit == s->lit_bufsize-1);
/* We avoid equality with lit_bufsize because of wraparound at 64K
* on 16 bit machines and because stored blocks are restricted to
* 64K-1 bytes.
*/
}
/* ===========================================================================
* Send the block data compressed using the given Huffman trees
*/
local void compress_block(s, ltree, dtree)
deflate_state *s;
const ct_data *ltree; /* literal tree */
const ct_data *dtree; /* distance tree */
{
unsigned dist; /* distance of matched string */
int lc; /* match length or unmatched char (if dist == 0) */
unsigned lx = 0; /* running index in l_buf */
unsigned code; /* the code to send */
int extra; /* number of extra bits to send */
if (s->last_lit != 0) do {
dist = s->d_buf[lx];
lc = s->l_buf[lx++];
if (dist == 0) {
send_code(s, lc, ltree); /* send a literal byte */
Tracecv(isgraph(lc), (stderr," '%c' ", lc));
} else {
/* Here, lc is the match length - MIN_MATCH */
code = _length_code[lc];
send_code(s, code+LITERALS+1, ltree); /* send the length code */
|
|
>
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
|
>
|
|
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
|
* the current block must be flushed.
*/
int ZLIB_INTERNAL _tr_tally (s, dist, lc)
deflate_state *s;
unsigned dist; /* distance of matched string */
unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
{
s->sym_buf[s->sym_next++] = dist;
s->sym_buf[s->sym_next++] = dist >> 8;
s->sym_buf[s->sym_next++] = lc;
if (dist == 0) {
/* lc is the unmatched char */
s->dyn_ltree[lc].Freq++;
} else {
s->matches++;
/* Here, lc is the match length - MIN_MATCH */
dist--; /* dist = match distance - 1 */
Assert((ush)dist < (ush)MAX_DIST(s) &&
(ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
(ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match");
s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
s->dyn_dtree[d_code(dist)].Freq++;
}
return (s->sym_next == s->sym_end);
}
/* ===========================================================================
* Send the block data compressed using the given Huffman trees
*/
local void compress_block(s, ltree, dtree)
deflate_state *s;
const ct_data *ltree; /* literal tree */
const ct_data *dtree; /* distance tree */
{
unsigned dist; /* distance of matched string */
int lc; /* match length or unmatched char (if dist == 0) */
unsigned sx = 0; /* running index in sym_buf */
unsigned code; /* the code to send */
int extra; /* number of extra bits to send */
if (s->sym_next != 0) do {
dist = s->sym_buf[sx++] & 0xff;
dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
lc = s->sym_buf[sx++];
if (dist == 0) {
send_code(s, lc, ltree); /* send a literal byte */
Tracecv(isgraph(lc), (stderr," '%c' ", lc));
} else {
/* Here, lc is the match length - MIN_MATCH */
code = _length_code[lc];
send_code(s, code+LITERALS+1, ltree); /* send the length code */
|
| ︙ | | | ︙ | |
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
|
extra = extra_dbits[code];
if (extra != 0) {
dist -= (unsigned)base_dist[code];
send_bits(s, dist, extra); /* send the extra distance bits */
}
} /* literal or match pair ? */
/* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
"pendingBuf overflow");
} while (lx < s->last_lit);
send_code(s, END_BLOCK, ltree);
}
/* ===========================================================================
* Check if the data type is TEXT or BINARY, using the following algorithm:
* - TEXT if the two conditions below are satisfied:
* a) There are no non-portable control characters belonging to the
* "black list" (0..6, 14..25, 28..31).
* b) There is at least one printable character belonging to the
* "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
* - BINARY otherwise.
* - The following partially-portable control characters form a
* "gray list" that is ignored in this detection algorithm:
* (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
* IN assertion: the fields Freq of dyn_ltree are set.
*/
local int detect_data_type(s)
deflate_state *s;
{
/* black_mask is the bit mask of black-listed bytes
* set bits 0..6, 14..25, and 28..31
* 0xf3ffc07f = binary 11110011111111111100000001111111
*/
unsigned long black_mask = 0xf3ffc07fUL;
int n;
/* Check for non-textual ("black-listed") bytes. */
for (n = 0; n <= 31; n++, black_mask >>= 1)
if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))
return Z_BINARY;
/* Check for textual ("white-listed") bytes. */
if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
|| s->dyn_ltree[13].Freq != 0)
return Z_TEXT;
for (n = 32; n < LITERALS; n++)
if (s->dyn_ltree[n].Freq != 0)
return Z_TEXT;
/* There are no "black-listed" or "white-listed" bytes:
* this stream either is empty or has tolerated ("gray-listed") bytes only.
*/
return Z_BINARY;
}
/* ===========================================================================
* Reverse the first len bits of a code, using straightforward code (a faster
|
|
<
|
|
|
|
|
|
|
|
|
|
|
|
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
|
extra = extra_dbits[code];
if (extra != 0) {
dist -= (unsigned)base_dist[code];
send_bits(s, dist, extra); /* send the extra distance bits */
}
} /* literal or match pair ? */
/* Check that the overlay between pending_buf and sym_buf is ok: */
Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
} while (sx < s->sym_next);
send_code(s, END_BLOCK, ltree);
}
/* ===========================================================================
* Check if the data type is TEXT or BINARY, using the following algorithm:
* - TEXT if the two conditions below are satisfied:
* a) There are no non-portable control characters belonging to the
* "block list" (0..6, 14..25, 28..31).
* b) There is at least one printable character belonging to the
* "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
* - BINARY otherwise.
* - The following partially-portable control characters form a
* "gray list" that is ignored in this detection algorithm:
* (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
* IN assertion: the fields Freq of dyn_ltree are set.
*/
local int detect_data_type(s)
deflate_state *s;
{
/* block_mask is the bit mask of block-listed bytes
* set bits 0..6, 14..25, and 28..31
* 0xf3ffc07f = binary 11110011111111111100000001111111
*/
unsigned long block_mask = 0xf3ffc07fUL;
int n;
/* Check for non-textual ("block-listed") bytes. */
for (n = 0; n <= 31; n++, block_mask >>= 1)
if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0))
return Z_BINARY;
/* Check for textual ("allow-listed") bytes. */
if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
|| s->dyn_ltree[13].Freq != 0)
return Z_TEXT;
for (n = 32; n < LITERALS; n++)
if (s->dyn_ltree[n].Freq != 0)
return Z_TEXT;
/* There are no "block-listed" or "allow-listed" bytes:
* this stream either is empty or has tolerated ("gray-listed") bytes only.
*/
return Z_BINARY;
}
/* ===========================================================================
* Reverse the first len bits of a code, using straightforward code (a faster
|
| ︙ | | | ︙ | |