00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "rsync.h"
00021
00022 extern int csum_length;
00023
00024 extern int verbose;
00025 extern int am_server;
00026
00027 extern int remote_version;
00028
00029 typedef unsigned short tag;
00030
00031 #define TABLESIZE (1<<16)
00032 #define NULL_TAG (-1)
00033
00034 static int false_alarms;
00035 static int tag_hits;
00036 static int matches;
00037 static int64 data_transfer;
00038
00039 static int total_false_alarms;
00040 static int total_tag_hits;
00041 static int total_matches;
00042
00043 extern struct stats stats;
00044
00045 struct target {
00046 tag t;
00047 int i;
00048 };
00049
00050 static struct target *targets;
00051
00052 static int *tag_table;
00053
00054 #define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF)
00055 #define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16)
00056
00057 static int compare_targets(struct target *t1,struct target *t2)
00058 {
00059 return((int)t1->t - (int)t2->t);
00060 }
00061
00062
00063 static void build_hash_table(struct sum_struct *s)
00064 {
00065 int i;
00066
00067 if (!tag_table)
00068 tag_table = (int *)malloc(sizeof(tag_table[0])*TABLESIZE);
00069
00070 targets = (struct target *)malloc(sizeof(targets[0])*s->count);
00071 if (!tag_table || !targets)
00072 out_of_memory("build_hash_table");
00073
00074 for (i=0;i<(int) s->count;i++) {
00075 targets[i].i = i;
00076 targets[i].t = gettag(s->sums[i].sum1);
00077 }
00078
00079 qsort(targets,s->count,sizeof(targets[0]),(int (*)())compare_targets);
00080
00081 for (i=0;i<TABLESIZE;i++)
00082 tag_table[i] = NULL_TAG;
00083
00084 for (i=s->count-1;i>=0;i--) {
00085 tag_table[targets[i].t] = i;
00086 }
00087 }
00088
00089
00090 static OFF_T last_match;
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105 static void matched(int f,struct sum_struct *s,struct map_struct *buf,
00106 OFF_T offset,int i)
00107 {
00108 OFF_T n = offset - last_match;
00109 OFF_T j;
00110
00111 if (verbose > 2 && i >= 0)
00112 rprintf(FINFO,"match at %.0f last_match=%.0f j=%d len=%d n=%.0f\n",
00113 (double)offset,(double)last_match,i,s->sums[i].len,(double)n);
00114
00115 send_token(f,i,buf,last_match,n,i<0?0:s->sums[i].len);
00116 data_transfer += n;
00117
00118 if (i >= 0) {
00119 stats.matched_data += s->sums[i].len;
00120 n += s->sums[i].len;
00121 }
00122
00123 for (j=0;j<n;j+=CHUNK_SIZE) {
00124 int n1 = MIN(CHUNK_SIZE,n-j);
00125 sum_update(map_ptr(buf,last_match+j,n1),n1);
00126 }
00127
00128
00129 if (i >= 0)
00130 last_match = offset + s->sums[i].len;
00131 else
00132 last_match = offset;
00133
00134 if (buf) {
00135 show_progress(last_match, buf->file_size);
00136
00137 if (i == -1) end_progress(buf->file_size);
00138 }
00139 }
00140
00141
00142 static void hash_search(int f,struct sum_struct *s,
00143 struct map_struct *buf,OFF_T len)
00144 {
00145 OFF_T offset, end;
00146 int j,k, last_i;
00147 char sum2[SUM_LENGTH];
00148 uint32 s1, s2, sum;
00149 schar *map;
00150
00151
00152
00153 last_i = -1;
00154
00155 if (verbose > 2)
00156 rprintf(FINFO,"hash search b=%ld len=%.0f\n",
00157 (long) s->n, (double)len);
00158
00159
00160
00161 k = MIN(len, (OFF_T) s->n);
00162
00163 map = (schar *)map_ptr(buf,0,k);
00164
00165 sum = get_checksum1((char *)map, k);
00166 s1 = sum & 0xFFFF;
00167 s2 = sum >> 16;
00168 if (verbose > 3)
00169 rprintf(FINFO, "sum=%.8x k=%d\n", sum, k);
00170
00171 offset = 0;
00172
00173 end = len + 1 - s->sums[s->count-1].len;
00174
00175 if (verbose > 3)
00176 rprintf(FINFO, "hash search s->n=%ld len=%.0f count=%ld\n",
00177 (long) s->n, (double) len, (long) s->count);
00178
00179 do {
00180 tag t = gettag2(s1,s2);
00181 int done_csum2 = 0;
00182
00183 j = tag_table[t];
00184 if (verbose > 4)
00185 rprintf(FINFO,"offset=%.0f sum=%08x\n",(double)offset,sum);
00186
00187 if (j == NULL_TAG) {
00188 goto null_tag;
00189 }
00190
00191 sum = (s1 & 0xffff) | (s2 << 16);
00192 tag_hits++;
00193 for (; j < (int) s->count && targets[j].t == t; j++) {
00194 int l, i = targets[j].i;
00195
00196 if (sum != s->sums[i].sum1) continue;
00197
00198
00199 l = MIN(s->n,len-offset);
00200 if (l != s->sums[i].len) continue;
00201
00202 if (verbose > 3)
00203 rprintf(FINFO,"potential match at %.0f target=%d %d sum=%08x\n",
00204 (double)offset,j,i,sum);
00205
00206 if (!done_csum2) {
00207 map = (schar *)map_ptr(buf,offset,l);
00208 get_checksum2((char *)map,l,sum2);
00209 done_csum2 = 1;
00210 }
00211
00212 if (memcmp(sum2,s->sums[i].sum2,csum_length) != 0) {
00213 false_alarms++;
00214 continue;
00215 }
00216
00217
00218
00219 for (j++; j < (int) s->count && targets[j].t == t; j++) {
00220 int i2 = targets[j].i;
00221 if (i2 == last_i + 1) {
00222 if (sum != s->sums[i2].sum1) break;
00223 if (memcmp(sum2,s->sums[i2].sum2,csum_length) != 0) break;
00224
00225
00226 i = i2;
00227 break;
00228 }
00229 }
00230
00231 last_i = i;
00232
00233 matched(f,s,buf,offset,i);
00234 offset += s->sums[i].len - 1;
00235 k = MIN((len-offset), s->n);
00236 map = (schar *)map_ptr(buf,offset,k);
00237 sum = get_checksum1((char *)map, k);
00238 s1 = sum & 0xFFFF;
00239 s2 = sum >> 16;
00240 matches++;
00241 break;
00242 }
00243
00244 null_tag:
00245
00246 map = (schar *)map_ptr(buf,offset,k+1);
00247 s1 -= map[0] + CHAR_OFFSET;
00248 s2 -= k * (map[0]+CHAR_OFFSET);
00249
00250
00251 if (k < (len-offset)) {
00252 s1 += (map[k]+CHAR_OFFSET);
00253 s2 += s1;
00254 } else {
00255 --k;
00256 }
00257
00258
00259
00260
00261
00262
00263
00264 if (offset > last_match &&
00265 offset-last_match >= CHUNK_SIZE+s->n &&
00266 (end-offset > CHUNK_SIZE)) {
00267 matched(f,s,buf,offset - s->n, -2);
00268 }
00269 } while (++offset < end);
00270
00271 matched(f,s,buf,len,-1);
00272 map_ptr(buf,len-1,1);
00273 }
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290 void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len)
00291 {
00292 char file_sum[MD4_SUM_LENGTH];
00293 extern int write_batch;
00294
00295 last_match = 0;
00296 false_alarms = 0;
00297 tag_hits = 0;
00298 matches=0;
00299 data_transfer=0;
00300
00301 sum_init();
00302
00303 if (len > 0 && s->count>0) {
00304 build_hash_table(s);
00305
00306 if (verbose > 2)
00307 rprintf(FINFO,"built hash table\n");
00308
00309 hash_search(f,s,buf,len);
00310
00311 if (verbose > 2)
00312 rprintf(FINFO,"done hash search\n");
00313 } else {
00314 OFF_T j;
00315
00316 for (j=0;j<(len-CHUNK_SIZE);j+=CHUNK_SIZE) {
00317 int n1 = MIN(CHUNK_SIZE,(len-CHUNK_SIZE)-j);
00318 matched(f,s,buf,j+n1,-2);
00319 }
00320 matched(f,s,buf,len,-1);
00321 }
00322
00323 sum_end(file_sum);
00324
00325 if (remote_version >= 14) {
00326 if (verbose > 2)
00327 rprintf(FINFO,"sending file_sum\n");
00328 write_buf(f,file_sum,MD4_SUM_LENGTH);
00329 if (write_batch)
00330 write_batch_delta_file(file_sum, MD4_SUM_LENGTH);
00331 }
00332
00333 if (targets) {
00334 free(targets);
00335 targets=NULL;
00336 }
00337
00338 if (verbose > 2)
00339 rprintf(FINFO, "false_alarms=%d tag_hits=%d matches=%d\n",
00340 false_alarms, tag_hits, matches);
00341
00342 total_tag_hits += tag_hits;
00343 total_false_alarms += false_alarms;
00344 total_matches += matches;
00345 stats.literal_data += data_transfer;
00346 }
00347
00348 void match_report(void)
00349 {
00350 if (verbose <= 1)
00351 return;
00352
00353 rprintf(FINFO,
00354 "total: matches=%d tag_hits=%d false_alarms=%d data=%.0f\n",
00355 total_matches,total_tag_hits,
00356 total_false_alarms,
00357 (double)stats.literal_data);
00358 }