Go to the source code of this file.
Data Structures | |
struct | target |
Typedefs | |
typedef unsigned short | tag |
Functions | |
int | compare_targets (struct target *t1, struct target *t2) |
void | build_hash_table (struct sum_struct *s) |
void | matched (int f, struct sum_struct *s, struct map_struct *buf, OFF_T offset, int i) |
Transmit a literal and/or match token. More... | |
void | hash_search (int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) |
void | match_sums (int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) |
Scan through a origin file, looking for sections that match checksums from the generator, and transmit either literal or token data. More... | |
void | match_report (void) |
Variables | |
int | csum_length |
int | verbose |
int | am_server |
int | remote_version |
int | false_alarms |
int | tag_hits |
int | matches |
int64 | data_transfer |
int | total_false_alarms |
int | total_tag_hits |
int | total_matches |
stats | stats |
target * | targets |
int * | tag_table |
OFF_T | last_match |
|
Definition at line 29 of file match.c. Referenced by hash_search(), read_error_fd(), and read_unbuffered(). |
|
Definition at line 57 of file match.c. References target::t. Referenced by build_hash_table().
|
|
Definition at line 63 of file match.c. References compare_targets(), target::i, out_of_memory(), target::t, and tag_table. Referenced by match_sums().
00064 { 00065 int i; 00066 00067 if (!tag_table) 00068 tag_table = (int *)malloc(sizeof(tag_table[0])*TABLESIZE); 00069 00070 targets = (struct target *)malloc(sizeof(targets[0])*s->count); 00071 if (!tag_table || !targets) 00072 out_of_memory("build_hash_table"); 00073 00074 for (i=0;i<(int) s->count;i++) { 00075 targets[i].i = i; 00076 targets[i].t = gettag(s->sums[i].sum1); 00077 } 00078 00079 qsort(targets,s->count,sizeof(targets[0]),(int (*)())compare_targets); 00080 00081 for (i=0;i<TABLESIZE;i++) 00082 tag_table[i] = NULL_TAG; 00083 00084 for (i=s->count-1;i>=0;i--) { 00085 tag_table[targets[i].t] = i; 00086 } 00087 } |
|
Transmit a literal and/or match token. This delightfully-named function is called either when we find a match and need to transmit all the unmatched data leading up to it, or when we get bored of accumulating literal data and just need to transmit it. As a result of this second case, it is called even if we have not matched at all!
Definition at line 105 of file match.c. References data_transfer, end_progress(), map_struct::file_size, FINFO, last_match, sum_buf::len, map_ptr(), stats::matched_data, rprintf(), send_token(), show_progress(), sum_update(), sum_struct::sums, and verbose. Referenced by hash_search(), and match_sums().
00107 { 00108 OFF_T n = offset - last_match; 00109 OFF_T j; 00110 00111 if (verbose > 2 && i >= 0) 00112 rprintf(FINFO,"match at %.0f last_match=%.0f j=%d len=%d n=%.0f\n", 00113 (double)offset,(double)last_match,i,s->sums[i].len,(double)n); 00114 00115 send_token(f,i,buf,last_match,n,i<0?0:s->sums[i].len); 00116 data_transfer += n; 00117 00118 if (i >= 0) { 00119 stats.matched_data += s->sums[i].len; 00120 n += s->sums[i].len; 00121 } 00122 00123 for (j=0;j<n;j+=CHUNK_SIZE) { 00124 int n1 = MIN(CHUNK_SIZE,n-j); 00125 sum_update(map_ptr(buf,last_match+j,n1),n1); 00126 } 00127 00128 00129 if (i >= 0) 00130 last_match = offset + s->sums[i].len; 00131 else 00132 last_match = offset; 00133 00134 if (buf) { 00135 show_progress(last_match, buf->file_size); 00136 00137 if (i == -1) end_progress(buf->file_size); 00138 } 00139 } |
|
Definition at line 142 of file match.c. References sum_struct::count, csum_length, false_alarms, FINFO, get_checksum1(), get_checksum2(), target::i, last_match, sum_buf::len, map_ptr(), matched(), matches, sum_struct::n, rprintf(), sum_buf::sum1, sum_buf::sum2, sum_struct::sums, target::t, tag, tag_hits, tag_table, and verbose. Referenced by match_sums().
00144 { 00145 OFF_T offset, end; 00146 int j,k, last_i; 00147 char sum2[SUM_LENGTH]; 00148 uint32 s1, s2, sum; 00149 schar *map; 00150 00151 /* last_i is used to encourage adjacent matches, allowing the RLL coding of the 00152 output to work more efficiently */ 00153 last_i = -1; 00154 00155 if (verbose > 2) 00156 rprintf(FINFO,"hash search b=%ld len=%.0f\n", 00157 (long) s->n, (double)len); 00158 00159 /* cast is to make s->n signed; it should always be reasonably 00160 * small */ 00161 k = MIN(len, (OFF_T) s->n); 00162 00163 map = (schar *)map_ptr(buf,0,k); 00164 00165 sum = get_checksum1((char *)map, k); 00166 s1 = sum & 0xFFFF; 00167 s2 = sum >> 16; 00168 if (verbose > 3) 00169 rprintf(FINFO, "sum=%.8x k=%d\n", sum, k); 00170 00171 offset = 0; 00172 00173 end = len + 1 - s->sums[s->count-1].len; 00174 00175 if (verbose > 3) 00176 rprintf(FINFO, "hash search s->n=%ld len=%.0f count=%ld\n", 00177 (long) s->n, (double) len, (long) s->count); 00178 00179 do { 00180 tag t = gettag2(s1,s2); 00181 int done_csum2 = 0; 00182 00183 j = tag_table[t]; 00184 if (verbose > 4) 00185 rprintf(FINFO,"offset=%.0f sum=%08x\n",(double)offset,sum); 00186 00187 if (j == NULL_TAG) { 00188 goto null_tag; 00189 } 00190 00191 sum = (s1 & 0xffff) | (s2 << 16); 00192 tag_hits++; 00193 for (; j < (int) s->count && targets[j].t == t; j++) { 00194 int l, i = targets[j].i; 00195 00196 if (sum != s->sums[i].sum1) continue; 00197 00198 /* also make sure the two blocks are the same length */ 00199 l = MIN(s->n,len-offset); 00200 if (l != s->sums[i].len) continue; 00201 00202 if (verbose > 3) 00203 rprintf(FINFO,"potential match at %.0f target=%d %d sum=%08x\n", 00204 (double)offset,j,i,sum); 00205 00206 if (!done_csum2) { 00207 map = (schar *)map_ptr(buf,offset,l); 00208 get_checksum2((char *)map,l,sum2); 00209 done_csum2 = 1; 00210 } 00211 00212 if (memcmp(sum2,s->sums[i].sum2,csum_length) != 0) { 00213 false_alarms++; 00214 continue; 00215 } 00216 00217 /* we've found a match, but now check to see 00218 if last_i can hint at a better match */ 00219 for (j++; j < (int) s->count && targets[j].t == t; j++) { 00220 int i2 = targets[j].i; 00221 if (i2 == last_i + 1) { 00222 if (sum != s->sums[i2].sum1) break; 00223 if (memcmp(sum2,s->sums[i2].sum2,csum_length) != 0) break; 00224 /* we've found an adjacent match - the RLL coder 00225 will be happy */ 00226 i = i2; 00227 break; 00228 } 00229 } 00230 00231 last_i = i; 00232 00233 matched(f,s,buf,offset,i); 00234 offset += s->sums[i].len - 1; 00235 k = MIN((len-offset), s->n); 00236 map = (schar *)map_ptr(buf,offset,k); 00237 sum = get_checksum1((char *)map, k); 00238 s1 = sum & 0xFFFF; 00239 s2 = sum >> 16; 00240 matches++; 00241 break; 00242 } 00243 00244 null_tag: 00245 /* Trim off the first byte from the checksum */ 00246 map = (schar *)map_ptr(buf,offset,k+1); 00247 s1 -= map[0] + CHAR_OFFSET; 00248 s2 -= k * (map[0]+CHAR_OFFSET); 00249 00250 /* Add on the next byte (if there is one) to the checksum */ 00251 if (k < (len-offset)) { 00252 s1 += (map[k]+CHAR_OFFSET); 00253 s2 += s1; 00254 } else { 00255 --k; 00256 } 00257 00258 /* By matching early we avoid re-reading the 00259 data 3 times in the case where a token 00260 match comes a long way after last 00261 match. The 3 reads are caused by the 00262 running match, the checksum update and the 00263 literal send. */ 00264 if (offset > last_match && 00265 offset-last_match >= CHUNK_SIZE+s->n && 00266 (end-offset > CHUNK_SIZE)) { 00267 matched(f,s,buf,offset - s->n, -2); 00268 } 00269 } while (++offset < end); 00270 00271 matched(f,s,buf,len,-1); 00272 map_ptr(buf,len-1,1); 00273 } |
|
Scan through a origin file, looking for sections that match checksums from the generator, and transmit either literal or token data. Also calculates the MD4 checksum of the whole file, using the md accumulator. This is transmitted with the file as protection against corruption on the wire.
Definition at line 290 of file match.c. References build_hash_table(), sum_struct::count, data_transfer, false_alarms, FINFO, hash_search(), last_match, stats::literal_data, matched(), matches, remote_version, rprintf(), sum_end(), sum_init(), tag_hits, total_false_alarms, total_matches, total_tag_hits, verbose, write_batch_delta_file(), and write_buf(). Referenced by send_files().
00291 { 00292 char file_sum[MD4_SUM_LENGTH]; 00293 extern int write_batch; /* dw */ 00294 00295 last_match = 0; 00296 false_alarms = 0; 00297 tag_hits = 0; 00298 matches=0; 00299 data_transfer=0; 00300 00301 sum_init(); 00302 00303 if (len > 0 && s->count>0) { 00304 build_hash_table(s); 00305 00306 if (verbose > 2) 00307 rprintf(FINFO,"built hash table\n"); 00308 00309 hash_search(f,s,buf,len); 00310 00311 if (verbose > 2) 00312 rprintf(FINFO,"done hash search\n"); 00313 } else { 00314 OFF_T j; 00315 /* by doing this in pieces we avoid too many seeks */ 00316 for (j=0;j<(len-CHUNK_SIZE);j+=CHUNK_SIZE) { 00317 int n1 = MIN(CHUNK_SIZE,(len-CHUNK_SIZE)-j); 00318 matched(f,s,buf,j+n1,-2); 00319 } 00320 matched(f,s,buf,len,-1); 00321 } 00322 00323 sum_end(file_sum); 00324 00325 if (remote_version >= 14) { 00326 if (verbose > 2) 00327 rprintf(FINFO,"sending file_sum\n"); 00328 write_buf(f,file_sum,MD4_SUM_LENGTH); 00329 if (write_batch) /* dw */ 00330 write_batch_delta_file(file_sum, MD4_SUM_LENGTH); 00331 } 00332 00333 if (targets) { 00334 free(targets); 00335 targets=NULL; 00336 } 00337 00338 if (verbose > 2) 00339 rprintf(FINFO, "false_alarms=%d tag_hits=%d matches=%d\n", 00340 false_alarms, tag_hits, matches); 00341 00342 total_tag_hits += tag_hits; 00343 total_false_alarms += false_alarms; 00344 total_matches += matches; 00345 stats.literal_data += data_transfer; 00346 } |
|
Definition at line 348 of file match.c. References FINFO, stats::literal_data, rprintf(), total_false_alarms, total_matches, total_tag_hits, and verbose. Referenced by send_files().
00349 { 00350 if (verbose <= 1) 00351 return; 00352 00353 rprintf(FINFO, 00354 "total: matches=%d tag_hits=%d false_alarms=%d data=%.0f\n", 00355 total_matches,total_tag_hits, 00356 total_false_alarms, 00357 (double)stats.literal_data); 00358 } |
|
Definition at line 22 of file match.c. Referenced by hash_search(). |
|
Definition at line 24 of file match.c. Referenced by hash_search(), match_report(), match_sums(), and matched(). |
|
|
|
Definition at line 27 of file match.c. Referenced by match_sums(). |
|
Definition at line 34 of file match.c. Referenced by hash_search(), and match_sums(). |
|
Definition at line 35 of file match.c. Referenced by hash_search(), and match_sums(). |
|
Definition at line 36 of file match.c. Referenced by hash_search(), and match_sums(). |
|
Definition at line 37 of file match.c. Referenced by match_sums(), and matched(). |
|
Definition at line 39 of file match.c. Referenced by match_report(), and match_sums(). |
|
Definition at line 40 of file match.c. Referenced by match_report(), and match_sums(). |
|
Definition at line 41 of file match.c. Referenced by match_report(), and match_sums(). |
|
|
|
|
|
Definition at line 52 of file match.c. Referenced by build_hash_table(), and hash_search(). |
|
Definition at line 90 of file match.c. Referenced by hash_search(), match_sums(), and matched(). |