Main Page   Alphabetical List   Data Structures   File List   Data Fields   Globals   Related Pages  

match.c File Reference

Go to the source code of this file.

Data Structures

struct  target

Typedefs

typedef unsigned short tag

Functions

int compare_targets (struct target *t1, struct target *t2)
void build_hash_table (struct sum_struct *s)
void matched (int f, struct sum_struct *s, struct map_struct *buf, OFF_T offset, int i)
 Transmit a literal and/or match token. More...

void hash_search (int f, struct sum_struct *s, struct map_struct *buf, OFF_T len)
void match_sums (int f, struct sum_struct *s, struct map_struct *buf, OFF_T len)
 Scan through a origin file, looking for sections that match checksums from the generator, and transmit either literal or token data. More...

void match_report (void)

Variables

int csum_length
int verbose
int am_server
int remote_version
int false_alarms
int tag_hits
int matches
int64 data_transfer
int total_false_alarms
int total_tag_hits
int total_matches
stats stats
targettargets
int * tag_table
OFF_T last_match


Typedef Documentation

typedef unsigned short tag
 

Definition at line 29 of file match.c.

Referenced by hash_search(), read_error_fd(), and read_unbuffered().


Function Documentation

int compare_targets struct target   t1,
struct target   t2
[static]
 

Definition at line 57 of file match.c.

References target::t.

Referenced by build_hash_table().

00058 {
00059   return((int)t1->t - (int)t2->t);
00060 }

void build_hash_table struct sum_struct   s [static]
 

Definition at line 63 of file match.c.

References compare_targets(), target::i, out_of_memory(), target::t, and tag_table.

Referenced by match_sums().

00064 {
00065   int i;
00066 
00067   if (!tag_table)
00068     tag_table = (int *)malloc(sizeof(tag_table[0])*TABLESIZE);
00069 
00070   targets = (struct target *)malloc(sizeof(targets[0])*s->count);
00071   if (!tag_table || !targets) 
00072     out_of_memory("build_hash_table");
00073 
00074   for (i=0;i<(int) s->count;i++) {
00075     targets[i].i = i;
00076     targets[i].t = gettag(s->sums[i].sum1);
00077   }
00078 
00079   qsort(targets,s->count,sizeof(targets[0]),(int (*)())compare_targets);
00080 
00081   for (i=0;i<TABLESIZE;i++)
00082     tag_table[i] = NULL_TAG;
00083 
00084   for (i=s->count-1;i>=0;i--) {    
00085     tag_table[targets[i].t] = i;
00086   }
00087 }

void matched int    f,
struct sum_struct   s,
struct map_struct   buf,
OFF_T    offset,
int    i
[static]
 

Transmit a literal and/or match token.

This delightfully-named function is called either when we find a match and need to transmit all the unmatched data leading up to it, or when we get bored of accumulating literal data and just need to transmit it. As a result of this second case, it is called even if we have not matched at all!

Parameters:
i  If >0, the number of a matched token. If 0, indicates we have only literal data.

Definition at line 105 of file match.c.

References data_transfer, end_progress(), map_struct::file_size, FINFO, last_match, sum_buf::len, map_ptr(), stats::matched_data, rprintf(), send_token(), show_progress(), sum_update(), sum_struct::sums, and verbose.

Referenced by hash_search(), and match_sums().

00107 {
00108         OFF_T n = offset - last_match;
00109         OFF_T j;
00110 
00111         if (verbose > 2 && i >= 0)
00112                 rprintf(FINFO,"match at %.0f last_match=%.0f j=%d len=%d n=%.0f\n",
00113                         (double)offset,(double)last_match,i,s->sums[i].len,(double)n);
00114 
00115         send_token(f,i,buf,last_match,n,i<0?0:s->sums[i].len);
00116         data_transfer += n;
00117 
00118         if (i >= 0) {
00119                 stats.matched_data += s->sums[i].len;
00120                 n += s->sums[i].len;
00121         }
00122   
00123         for (j=0;j<n;j+=CHUNK_SIZE) {
00124                 int n1 = MIN(CHUNK_SIZE,n-j);
00125                 sum_update(map_ptr(buf,last_match+j,n1),n1);
00126         }
00127 
00128 
00129         if (i >= 0)
00130                 last_match = offset + s->sums[i].len;
00131         else
00132                 last_match = offset;
00133 
00134         if (buf) {
00135                 show_progress(last_match, buf->file_size);
00136 
00137                 if (i == -1) end_progress(buf->file_size);
00138         }
00139 }

void hash_search int    f,
struct sum_struct   s,
struct map_struct   buf,
OFF_T    len
[static]
 

Definition at line 142 of file match.c.

References sum_struct::count, csum_length, false_alarms, FINFO, get_checksum1(), get_checksum2(), target::i, last_match, sum_buf::len, map_ptr(), matched(), matches, sum_struct::n, rprintf(), sum_buf::sum1, sum_buf::sum2, sum_struct::sums, target::t, tag, tag_hits, tag_table, and verbose.

Referenced by match_sums().

00144 {
00145         OFF_T offset, end;
00146         int j,k, last_i;
00147         char sum2[SUM_LENGTH];
00148         uint32 s1, s2, sum; 
00149         schar *map;
00150 
00151         /* last_i is used to encourage adjacent matches, allowing the RLL coding of the
00152            output to work more efficiently */
00153         last_i = -1;
00154 
00155         if (verbose > 2)
00156                 rprintf(FINFO,"hash search b=%ld len=%.0f\n",
00157                         (long) s->n, (double)len);
00158 
00159         /* cast is to make s->n signed; it should always be reasonably
00160          * small */
00161         k = MIN(len, (OFF_T) s->n);
00162         
00163         map = (schar *)map_ptr(buf,0,k);
00164         
00165         sum = get_checksum1((char *)map, k);
00166         s1 = sum & 0xFFFF;
00167         s2 = sum >> 16;
00168         if (verbose > 3)
00169                 rprintf(FINFO, "sum=%.8x k=%d\n", sum, k);
00170         
00171         offset = 0;
00172         
00173         end = len + 1 - s->sums[s->count-1].len;
00174         
00175         if (verbose > 3)
00176                 rprintf(FINFO, "hash search s->n=%ld len=%.0f count=%ld\n",
00177                         (long) s->n, (double) len, (long) s->count);
00178         
00179         do {
00180                 tag t = gettag2(s1,s2);
00181                 int done_csum2 = 0;
00182                         
00183                 j = tag_table[t];
00184                 if (verbose > 4)
00185                         rprintf(FINFO,"offset=%.0f sum=%08x\n",(double)offset,sum);
00186                 
00187                 if (j == NULL_TAG) {
00188                         goto null_tag;
00189                 }
00190 
00191                 sum = (s1 & 0xffff) | (s2 << 16);
00192                 tag_hits++;
00193                 for (; j < (int) s->count && targets[j].t == t; j++) {
00194                         int l, i = targets[j].i;
00195                         
00196                         if (sum != s->sums[i].sum1) continue;
00197                         
00198                         /* also make sure the two blocks are the same length */
00199                         l = MIN(s->n,len-offset);
00200                         if (l != s->sums[i].len) continue;                      
00201 
00202                         if (verbose > 3)
00203                                 rprintf(FINFO,"potential match at %.0f target=%d %d sum=%08x\n",
00204                                         (double)offset,j,i,sum);
00205                         
00206                         if (!done_csum2) {
00207                                 map = (schar *)map_ptr(buf,offset,l);
00208                                 get_checksum2((char *)map,l,sum2);
00209                                 done_csum2 = 1;
00210                         }
00211                         
00212                         if (memcmp(sum2,s->sums[i].sum2,csum_length) != 0) {
00213                                 false_alarms++;
00214                                 continue;
00215                         }
00216 
00217                         /* we've found a match, but now check to see
00218                            if last_i can hint at a better match */
00219                         for (j++; j < (int) s->count && targets[j].t == t; j++) {
00220                                 int i2 = targets[j].i;
00221                                 if (i2 == last_i + 1) {
00222                                         if (sum != s->sums[i2].sum1) break;
00223                                         if (memcmp(sum2,s->sums[i2].sum2,csum_length) != 0) break;
00224                                         /* we've found an adjacent match - the RLL coder 
00225                                            will be happy */
00226                                         i = i2;
00227                                         break;
00228                                 }
00229                         }
00230 
00231                         last_i = i;
00232                         
00233                         matched(f,s,buf,offset,i);
00234                         offset += s->sums[i].len - 1;
00235                         k = MIN((len-offset), s->n);
00236                         map = (schar *)map_ptr(buf,offset,k);
00237                         sum = get_checksum1((char *)map, k);
00238                         s1 = sum & 0xFFFF;
00239                         s2 = sum >> 16;
00240                         matches++;
00241                         break;
00242                 }
00243                 
00244         null_tag:
00245                 /* Trim off the first byte from the checksum */
00246                 map = (schar *)map_ptr(buf,offset,k+1);
00247                 s1 -= map[0] + CHAR_OFFSET;
00248                 s2 -= k * (map[0]+CHAR_OFFSET);
00249                 
00250                 /* Add on the next byte (if there is one) to the checksum */
00251                 if (k < (len-offset)) {
00252                         s1 += (map[k]+CHAR_OFFSET);
00253                         s2 += s1;
00254                 } else {
00255                         --k;
00256                 }
00257 
00258                 /* By matching early we avoid re-reading the
00259                    data 3 times in the case where a token
00260                    match comes a long way after last
00261                    match. The 3 reads are caused by the
00262                    running match, the checksum update and the
00263                    literal send. */
00264                 if (offset > last_match &&
00265                     offset-last_match >= CHUNK_SIZE+s->n && 
00266                     (end-offset > CHUNK_SIZE)) {
00267                         matched(f,s,buf,offset - s->n, -2);
00268                 }
00269         } while (++offset < end);
00270         
00271         matched(f,s,buf,len,-1);
00272         map_ptr(buf,len-1,1);
00273 }

void match_sums int    f,
struct sum_struct   s,
struct map_struct   buf,
OFF_T    len
 

Scan through a origin file, looking for sections that match checksums from the generator, and transmit either literal or token data.

Also calculates the MD4 checksum of the whole file, using the md accumulator. This is transmitted with the file as protection against corruption on the wire.

Parameters:
s  Checksums received from the generator. If s->count == 0, then there are actually no checksums for this file.
len  Length of the file to send.

Definition at line 290 of file match.c.

References build_hash_table(), sum_struct::count, data_transfer, false_alarms, FINFO, hash_search(), last_match, stats::literal_data, matched(), matches, remote_version, rprintf(), sum_end(), sum_init(), tag_hits, total_false_alarms, total_matches, total_tag_hits, verbose, write_batch_delta_file(), and write_buf().

Referenced by send_files().

00291 {
00292         char file_sum[MD4_SUM_LENGTH];
00293         extern int write_batch;  /*  dw */
00294 
00295         last_match = 0;
00296         false_alarms = 0;
00297         tag_hits = 0;
00298         matches=0;
00299         data_transfer=0;
00300 
00301         sum_init();
00302 
00303         if (len > 0 && s->count>0) {
00304                 build_hash_table(s);
00305                 
00306                 if (verbose > 2) 
00307                         rprintf(FINFO,"built hash table\n");
00308                 
00309                 hash_search(f,s,buf,len);
00310                 
00311                 if (verbose > 2) 
00312                         rprintf(FINFO,"done hash search\n");
00313         } else {
00314                 OFF_T j;
00315                 /* by doing this in pieces we avoid too many seeks */
00316                 for (j=0;j<(len-CHUNK_SIZE);j+=CHUNK_SIZE) {
00317                         int n1 = MIN(CHUNK_SIZE,(len-CHUNK_SIZE)-j);
00318                         matched(f,s,buf,j+n1,-2);
00319                 }
00320                 matched(f,s,buf,len,-1);
00321         }
00322 
00323         sum_end(file_sum);
00324 
00325         if (remote_version >= 14) {
00326                 if (verbose > 2)
00327                         rprintf(FINFO,"sending file_sum\n");
00328                 write_buf(f,file_sum,MD4_SUM_LENGTH);
00329                 if (write_batch) /* dw */
00330                     write_batch_delta_file(file_sum, MD4_SUM_LENGTH);
00331         }
00332 
00333         if (targets) {
00334                 free(targets);
00335                 targets=NULL;
00336         }
00337         
00338         if (verbose > 2)
00339                 rprintf(FINFO, "false_alarms=%d tag_hits=%d matches=%d\n",
00340                         false_alarms, tag_hits, matches);
00341         
00342         total_tag_hits += tag_hits;
00343         total_false_alarms += false_alarms;
00344         total_matches += matches;
00345         stats.literal_data += data_transfer;
00346 }

void match_report void   
 

Definition at line 348 of file match.c.

References FINFO, stats::literal_data, rprintf(), total_false_alarms, total_matches, total_tag_hits, and verbose.

Referenced by send_files().

00349 {
00350         if (verbose <= 1)
00351                 return;
00352 
00353         rprintf(FINFO,
00354                 "total: matches=%d  tag_hits=%d  false_alarms=%d data=%.0f\n",
00355                 total_matches,total_tag_hits,
00356                 total_false_alarms,
00357                 (double)stats.literal_data);
00358 }


Variable Documentation

int csum_length
 

Definition at line 22 of file match.c.

Referenced by hash_search().

int verbose
 

Definition at line 24 of file match.c.

Referenced by hash_search(), match_report(), match_sums(), and matched().

int am_server
 

Definition at line 25 of file match.c.

int remote_version
 

Definition at line 27 of file match.c.

Referenced by match_sums().

int false_alarms [static]
 

Definition at line 34 of file match.c.

Referenced by hash_search(), and match_sums().

int tag_hits [static]
 

Definition at line 35 of file match.c.

Referenced by hash_search(), and match_sums().

int matches [static]
 

Definition at line 36 of file match.c.

Referenced by hash_search(), and match_sums().

int64 data_transfer [static]
 

Definition at line 37 of file match.c.

Referenced by match_sums(), and matched().

int total_false_alarms [static]
 

Definition at line 39 of file match.c.

Referenced by match_report(), and match_sums().

int total_tag_hits [static]
 

Definition at line 40 of file match.c.

Referenced by match_report(), and match_sums().

int total_matches [static]
 

Definition at line 41 of file match.c.

Referenced by match_report(), and match_sums().

struct stats stats
 

Definition at line 43 of file match.c.

struct target* targets [static]
 

Definition at line 50 of file match.c.

int* tag_table [static]
 

Definition at line 52 of file match.c.

Referenced by build_hash_table(), and hash_search().

OFF_T last_match [static]
 

Definition at line 90 of file match.c.

Referenced by hash_search(), match_sums(), and matched().


Generated on Tue Apr 16 12:37:38 2002 for rsync by doxygen1.2.15