1 |
--- gzip-1.3.13/deflate.c.rsync 2009-09-26 20:43:28.000000000 +0200 |
2 |
+++ gzip-1.3.13/deflate.c 2009-12-27 09:40:58.000000000 +0100 |
3 |
@@ -131,6 +131,14 @@ |
4 |
#endif |
5 |
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ |
6 |
|
7 |
+#ifndef RSYNC_WIN |
8 |
+# define RSYNC_WIN 4096 |
9 |
+#endif |
10 |
+/* Size of rsync window, must be < MAX_DIST */ |
11 |
+ |
12 |
+#define RSYNC_SUM_MATCH(sum) ((sum) % RSYNC_WIN == 0) |
13 |
+/* Whether window sum matches magic value */ |
14 |
+ |
15 |
/* =========================================================================== |
16 |
* Local data used by the "longest match" routines. |
17 |
*/ |
18 |
@@ -212,6 +220,8 @@ |
19 |
unsigned near good_match; |
20 |
/* Use a faster search when the previous match is longer than this */ |
21 |
|
22 |
+local ulg rsync_sum; /* rolling sum of rsync window */ |
23 |
+local ulg rsync_chunk_end; /* next rsync sequence point */ |
24 |
|
25 |
/* Values for max_lazy_match, good_match and max_chain_length, depending on |
26 |
* the desired pack level (0..9). The values given below have been tuned to |
27 |
@@ -310,6 +320,10 @@ |
28 |
#endif |
29 |
/* prev will be initialized on the fly */ |
30 |
|
31 |
+ /* rsync params */ |
32 |
+ rsync_chunk_end = 0xFFFFFFFFUL; |
33 |
+ rsync_sum = 0; |
34 |
+ |
35 |
/* Set the default configuration parameters: |
36 |
*/ |
37 |
max_lazy_match = configuration_table[pack_level].max_lazy; |
38 |
@@ -546,6 +560,8 @@ |
39 |
memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE); |
40 |
match_start -= WSIZE; |
41 |
strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */ |
42 |
+ if (rsync_chunk_end != 0xFFFFFFFFUL) |
43 |
+ rsync_chunk_end -= WSIZE; |
44 |
|
45 |
block_start -= (long) WSIZE; |
46 |
|
47 |
@@ -573,13 +589,46 @@ |
48 |
} |
49 |
} |
50 |
|
51 |
+local void rsync_roll(start, num) |
52 |
+ unsigned start; |
53 |
+ unsigned num; |
54 |
+{ |
55 |
+ unsigned i; |
56 |
+ |
57 |
+ if (start < RSYNC_WIN) { |
58 |
+ /* before window fills. */ |
59 |
+ for (i = start; i < RSYNC_WIN; i++) { |
60 |
+ if (i == start + num) return; |
61 |
+ rsync_sum += (ulg)window[i]; |
62 |
+ } |
63 |
+ num -= (RSYNC_WIN - start); |
64 |
+ start = RSYNC_WIN; |
65 |
+ } |
66 |
+ |
67 |
+ /* buffer after window full */ |
68 |
+ for (i = start; i < start+num; i++) { |
69 |
+ /* New character in */ |
70 |
+ rsync_sum += (ulg)window[i]; |
71 |
+ /* Old character out */ |
72 |
+ rsync_sum -= (ulg)window[i - RSYNC_WIN]; |
73 |
+ if (rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH(rsync_sum)) |
74 |
+ rsync_chunk_end = i; |
75 |
+ } |
76 |
+} |
77 |
+ |
78 |
+/* =========================================================================== |
79 |
+ * Set rsync_chunk_end if window sum matches magic value. |
80 |
+ */ |
81 |
+#define RSYNC_ROLL(s, n) \ |
82 |
+ do { if (rsync) rsync_roll((s), (n)); } while(0) |
83 |
+ |
84 |
/* =========================================================================== |
85 |
* Flush the current block, with given end-of-file flag. |
86 |
* IN assertion: strstart is set to the end of the current match. |
87 |
*/ |
88 |
#define FLUSH_BLOCK(eof) \ |
89 |
flush_block(block_start >= 0L ? (char*)&window[(unsigned)block_start] : \ |
90 |
- (char*)NULL, (long)strstart - block_start, (eof)) |
91 |
+ (char*)NULL, (long)strstart - block_start, flush-1, (eof)) |
92 |
|
93 |
/* =========================================================================== |
94 |
* Processes a new input file and return its compressed length. This |
95 |
@@ -590,7 +639,7 @@ |
96 |
local off_t deflate_fast() |
97 |
{ |
98 |
IPos hash_head; /* head of the hash chain */ |
99 |
- int flush; /* set if current block must be flushed */ |
100 |
+ int flush; /* set if current block must be flushed, 2=>and padded */ |
101 |
unsigned match_length = 0; /* length of best match */ |
102 |
|
103 |
prev_length = MIN_MATCH-1; |
104 |
@@ -620,6 +669,7 @@ |
105 |
|
106 |
lookahead -= match_length; |
107 |
|
108 |
+ RSYNC_ROLL(strstart, match_length); |
109 |
/* Insert new strings in the hash table only if the match length |
110 |
* is not too large. This saves time but degrades compression. |
111 |
*/ |
112 |
@@ -648,9 +698,14 @@ |
113 |
/* No match, output a literal byte */ |
114 |
Tracevv((stderr,"%c",window[strstart])); |
115 |
flush = ct_tally (0, window[strstart]); |
116 |
+ RSYNC_ROLL(strstart, 1); |
117 |
lookahead--; |
118 |
strstart++; |
119 |
} |
120 |
+ if (rsync && strstart > rsync_chunk_end) { |
121 |
+ rsync_chunk_end = 0xFFFFFFFFUL; |
122 |
+ flush = 2; |
123 |
+ } |
124 |
if (flush) FLUSH_BLOCK(0), block_start = strstart; |
125 |
|
126 |
/* Make sure that we always have enough lookahead, except |
127 |
@@ -724,6 +779,7 @@ |
128 |
*/ |
129 |
lookahead -= prev_length-1; |
130 |
prev_length -= 2; |
131 |
+ RSYNC_ROLL(strstart, prev_length+1); |
132 |
do { |
133 |
strstart++; |
134 |
INSERT_STRING(strstart, hash_head); |
135 |
@@ -736,24 +792,40 @@ |
136 |
match_available = 0; |
137 |
match_length = MIN_MATCH-1; |
138 |
strstart++; |
139 |
- if (flush) FLUSH_BLOCK(0), block_start = strstart; |
140 |
|
141 |
+ if (rsync && strstart > rsync_chunk_end) { |
142 |
+ rsync_chunk_end = 0xFFFFFFFFUL; |
143 |
+ flush = 2; |
144 |
+ } |
145 |
+ if (flush) FLUSH_BLOCK(0), block_start = strstart; |
146 |
} else if (match_available) { |
147 |
/* If there was no match at the previous position, output a |
148 |
* single literal. If there was a match but the current match |
149 |
* is longer, truncate the previous match to a single literal. |
150 |
*/ |
151 |
Tracevv((stderr,"%c",window[strstart-1])); |
152 |
- if (ct_tally (0, window[strstart-1])) { |
153 |
- FLUSH_BLOCK(0), block_start = strstart; |
154 |
+ flush = ct_tally (0, window[strstart-1]); |
155 |
+ if (rsync && strstart > rsync_chunk_end) { |
156 |
+ rsync_chunk_end = 0xFFFFFFFFUL; |
157 |
+ flush = 2; |
158 |
} |
159 |
+ if (flush) FLUSH_BLOCK(0), block_start = strstart; |
160 |
+ RSYNC_ROLL(strstart, 1); |
161 |
strstart++; |
162 |
lookahead--; |
163 |
} else { |
164 |
/* There is no previous match to compare with, wait for |
165 |
* the next step to decide. |
166 |
*/ |
167 |
+ if (rsync && strstart > rsync_chunk_end) { |
168 |
+ /* Reset huffman tree */ |
169 |
+ rsync_chunk_end = 0xFFFFFFFFUL; |
170 |
+ flush = 2; |
171 |
+ FLUSH_BLOCK(0), block_start = strstart; |
172 |
+ } |
173 |
+ |
174 |
match_available = 1; |
175 |
+ RSYNC_ROLL(strstart, 1); |
176 |
strstart++; |
177 |
lookahead--; |
178 |
} |
179 |
--- gzip-1.3.13/doc/gzip.texi.rsync 2009-09-28 11:08:16.000000000 +0200 |
180 |
+++ gzip-1.3.13/doc/gzip.texi 2009-12-27 09:40:58.000000000 +0100 |
181 |
@@ -353,6 +353,14 @@ |
182 |
into the directory and compress all the files it finds there (or |
183 |
decompress them in the case of @command{gunzip}). |
184 |
|
185 |
+@item --rsyncable |
186 |
+While compressing, synchronize the output occasionally based on the |
187 |
+input. This reduces compression by about 1 percent most cases, but |
188 |
+means that the @code{rsync} program can take advantage of similarities |
189 |
+in the uncompressed input when syncronizing two files compressed with |
190 |
+this flag. @code{gunzip} cannot tell the difference between a |
191 |
+compressed file created with this option, and one created without it. |
192 |
+ |
193 |
@item --suffix @var{suf} |
194 |
@itemx -S @var{suf} |
195 |
Use suffix @var{suf} instead of @samp{.gz}. Any suffix can be |
196 |
--- gzip-1.3.13/gzip.c.rsync 2009-09-26 20:56:02.000000000 +0200 |
197 |
+++ gzip-1.3.13/gzip.c 2009-12-27 09:40:58.000000000 +0100 |
198 |
@@ -229,6 +229,7 @@ |
199 |
unsigned insize; /* valid bytes in inbuf */ |
200 |
unsigned inptr; /* index of next byte to be processed in inbuf */ |
201 |
unsigned outcnt; /* bytes in output buffer */ |
202 |
+int rsync = 0; /* make ryncable chunks */ |
203 |
|
204 |
static int handled_sig[] = |
205 |
{ |
206 |
@@ -282,6 +283,7 @@ |
207 |
{"best", 0, 0, '9'}, /* compress better */ |
208 |
{"lzw", 0, 0, 'Z'}, /* make output compatible with old compress */ |
209 |
{"bits", 1, 0, 'b'}, /* max number of bits per code (implies -Z) */ |
210 |
+ {"rsyncable", 0, 0, 'R'}, /* make rsync-friendly archive */ |
211 |
{ 0, 0, 0, 0 } |
212 |
}; |
213 |
|
214 |
@@ -363,6 +365,7 @@ |
215 |
" -Z, --lzw produce output compatible with old compress", |
216 |
" -b, --bits=BITS max number of bits per code (implies -Z)", |
217 |
#endif |
218 |
+ " --rsyncable Make rsync-friendly archive", |
219 |
"", |
220 |
"With no FILE, or when FILE is -, read standard input.", |
221 |
"", |
222 |
@@ -493,6 +496,9 @@ |
223 |
recursive = 1; |
224 |
#endif |
225 |
break; |
226 |
+ |
227 |
+ case 'R': |
228 |
+ rsync = 1; break; |
229 |
case 'S': |
230 |
#ifdef NO_MULTIPLE_DOTS |
231 |
if (*optarg == '.') optarg++; |
232 |
--- gzip-1.3.13/gzip.h.rsync 2009-09-26 20:43:28.000000000 +0200 |
233 |
+++ gzip-1.3.13/gzip.h 2009-12-27 09:40:58.000000000 +0100 |
234 |
@@ -158,6 +158,7 @@ |
235 |
extern unsigned insize; /* valid bytes in inbuf */ |
236 |
extern unsigned inptr; /* index of next byte to be processed in inbuf */ |
237 |
extern unsigned outcnt; /* bytes in output buffer */ |
238 |
+extern int rsync; /* deflate into rsyncable chunks */ |
239 |
|
240 |
extern off_t bytes_in; /* number of input bytes */ |
241 |
extern off_t bytes_out; /* number of output bytes */ |
242 |
@@ -306,7 +307,7 @@ |
243 |
/* in trees.c */ |
244 |
void ct_init OF((ush *attr, int *method)); |
245 |
int ct_tally OF((int dist, int lc)); |
246 |
-off_t flush_block OF((char *buf, ulg stored_len, int eof)); |
247 |
+off_t flush_block OF((char *buf, ulg stored_len, int pad, int eof)); |
248 |
|
249 |
/* in bits.c */ |
250 |
void bi_init OF((file_t zipfile)); |
251 |
--- gzip-1.3.13/trees.c.rsync 2009-09-26 20:43:28.000000000 +0200 |
252 |
+++ gzip-1.3.13/trees.c 2009-12-27 09:40:58.000000000 +0100 |
253 |
@@ -856,9 +856,10 @@ |
254 |
* trees or store, and output the encoded block to the zip file. This function |
255 |
* returns the total compressed length for the file so far. |
256 |
*/ |
257 |
-off_t flush_block(buf, stored_len, eof) |
258 |
+off_t flush_block(buf, stored_len, pad, eof) |
259 |
char *buf; /* input block, or NULL if too old */ |
260 |
ulg stored_len; /* length of input block */ |
261 |
+ int pad; /* pad output to byte boundary */ |
262 |
int eof; /* true if this is the last block for a file */ |
263 |
{ |
264 |
ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ |
265 |
@@ -951,6 +952,10 @@ |
266 |
Assert (input_len == bytes_in, "bad input size"); |
267 |
bi_windup(); |
268 |
compressed_len += 7; /* align on byte boundary */ |
269 |
+ } else if (pad && (compressed_len % 8) != 0) { |
270 |
+ send_bits((STORED_BLOCK<<1)+eof, 3); /* send block type */ |
271 |
+ compressed_len = (compressed_len + 3 + 7) & ~7L; |
272 |
+ copy_block(buf, 0, 1); /* with header */ |
273 |
} |
274 |
|
275 |
return compressed_len >> 3; |