Browse Source

use a hash table for message => sync record lookup

this removes the pathological O(<number of sync records> * <number of
new messages>) case at the cost of being a bit more cpu-intensive (but
O(<number of all messages>)) for old messages.
wip/maildir-uid-dupes-test
Oswald Buddenhagen 13 years ago
parent
commit
6d49c343fc
  1. 2
      src/isync.h
  2. 57
      src/sync.c
  3. 18
      src/util.c

2
src/isync.h

@ -442,6 +442,8 @@ void sort_ints( int *arr, int len );
void arc4_init( void ); void arc4_init( void );
unsigned char arc4_getbyte( void ); unsigned char arc4_getbyte( void );
int bucketsForSize( int size );
#ifdef HAVE_SYS_POLL_H #ifdef HAVE_SYS_POLL_H
# include <sys/poll.h> # include <sys/poll.h>
#else #else

57
src/sync.c

@ -146,7 +146,7 @@ typedef struct {
channel_conf_t *chan; channel_conf_t *chan;
store_t *ctx[2]; store_t *ctx[2];
driver_t *drv[2]; driver_t *drv[2];
int state[2], ref_count, ret, lfd; int state[2], ref_count, nsrecs, ret, lfd;
int new_total[2], new_done[2]; int new_total[2], new_done[2];
int flags_total[2], flags_done[2]; int flags_total[2], flags_done[2];
int trash_total[2], trash_done[2]; int trash_total[2], trash_done[2];
@ -754,6 +754,7 @@ box_selected( int sts, void *aux )
srec->next = 0; srec->next = 0;
*svars->srecadd = srec; *svars->srecadd = srec;
svars->srecadd = &srec->next; svars->srecadd = &srec->next;
svars->nsrecs++;
} }
fclose( jfp ); fclose( jfp );
} else { } else {
@ -817,6 +818,7 @@ box_selected( int sts, void *aux )
srec->next = 0; srec->next = 0;
*svars->srecadd = srec; *svars->srecadd = srec;
svars->srecadd = &srec->next; svars->srecadd = &srec->next;
svars->nsrecs++;
} else { } else {
for (nsrec = srec; srec; srec = srec->next) for (nsrec = srec; srec; srec = srec->next)
if (srec->uid[M] == t1 && srec->uid[S] == t2) if (srec->uid[M] == t1 && srec->uid[S] == t2)
@ -1001,6 +1003,11 @@ typedef struct {
int aflags, dflags; int aflags, dflags;
} flag_vars_t; } flag_vars_t;
typedef struct {
int uid;
sync_rec_t *srec;
} sync_rec_map_t;
static void flags_set_del( int sts, void *aux ); static void flags_set_del( int sts, void *aux );
static void flags_set_sync( int sts, void *aux ); static void flags_set_sync( int sts, void *aux );
static void flags_set_sync_p2( sync_vars_t *svars, sync_rec_t *srec, int t ); static void flags_set_sync_p2( sync_vars_t *svars, sync_rec_t *srec, int t );
@ -1013,13 +1020,14 @@ static void
box_loaded( int sts, void *aux ) box_loaded( int sts, void *aux )
{ {
DECL_SVARS; DECL_SVARS;
sync_rec_t *srec, *nsrec = 0; sync_rec_t *srec;
sync_rec_map_t *srecmap;
message_t *tmsg; message_t *tmsg;
copy_vars_t *cv; copy_vars_t *cv;
flag_vars_t *fv; flag_vars_t *fv;
const char *diag;
int uid, minwuid, *mexcs, nmexcs, rmexcs, no[2], del[2], todel, t1, t2; int uid, minwuid, *mexcs, nmexcs, rmexcs, no[2], del[2], todel, t1, t2;
int sflags, nflags, aflags, dflags, nex; int sflags, nflags, aflags, dflags, nex;
unsigned hashsz, idx;
char fbuf[16]; /* enlarge when support for keywords is added */ char fbuf[16]; /* enlarge when support for keywords is added */
if (check_ret( sts, aux )) if (check_ret( sts, aux ))
@ -1035,13 +1043,20 @@ box_loaded( int sts, void *aux )
} }
Fprintf( svars->jfp, "%c %d\n", "{}"[t], svars->ctx[t]->uidnext ); Fprintf( svars->jfp, "%c %d\n", "{}"[t], svars->ctx[t]->uidnext );
/*
* Mapping tmsg -> srec (this variant) is dog slow for new messages.
* Mapping srec -> tmsg is dog slow for deleted messages.
* One solution would be using binary search on an index array.
* msgs are already sorted by UID, srecs would have to be sorted by uid[t].
*/
debug( "matching messages on %s against sync records\n", str_ms[t] ); debug( "matching messages on %s against sync records\n", str_ms[t] );
hashsz = bucketsForSize( svars->nsrecs * 3 );
srecmap = nfcalloc( hashsz * sizeof(*srecmap) );
for (srec = svars->srecs; srec; srec = srec->next) {
if (srec->status & S_DEAD)
continue;
uid = srec->uid[t];
idx = (unsigned)((unsigned)uid * 1103515245U) % hashsz;
while (srecmap[idx].uid)
if (++idx == hashsz)
idx = 0;
srecmap[idx].uid = uid;
srecmap[idx].srec = srec;
}
for (tmsg = svars->ctx[t]->msgs; tmsg; tmsg = tmsg->next) { for (tmsg = svars->ctx[t]->msgs; tmsg; tmsg = tmsg->next) {
if (tmsg->srec) /* found by TUID */ if (tmsg->srec) /* found by TUID */
continue; continue;
@ -1050,21 +1065,14 @@ box_loaded( int sts, void *aux )
make_flags( tmsg->flags, fbuf ); make_flags( tmsg->flags, fbuf );
printf( svars->ctx[t]->opts & OPEN_SIZE ? " message %5d, %-4s, %6lu: " : " message %5d, %-4s: ", uid, fbuf, tmsg->size ); printf( svars->ctx[t]->opts & OPEN_SIZE ? " message %5d, %-4s, %6lu: " : " message %5d, %-4s: ", uid, fbuf, tmsg->size );
} }
for (srec = nsrec; srec; srec = srec->next) { idx = (unsigned)((unsigned)uid * 1103515245U) % hashsz;
if (srec->status & S_DEAD) while (srecmap[idx].uid) {
continue; if (srecmap[idx].uid == uid) {
if (srec->uid[t] == uid) { srec = srecmap[idx].srec;
diag = srec == nsrec ? "adjacently" : "after gap";
goto found;
}
}
for (srec = svars->srecs; srec != nsrec; srec = srec->next) {
if (srec->status & S_DEAD)
continue;
if (srec->uid[t] == uid) {
diag = "after reset";
goto found; goto found;
} }
if (++idx == hashsz)
idx = 0;
} }
tmsg->srec = 0; tmsg->srec = 0;
debug( "new\n" ); debug( "new\n" );
@ -1072,9 +1080,9 @@ box_loaded( int sts, void *aux )
found: found:
tmsg->srec = srec; tmsg->srec = srec;
srec->msg[t] = tmsg; srec->msg[t] = tmsg;
nsrec = srec->next; debug( "pairs %5d\n", srec->uid[1-t] );
debug( "pairs %5d %s\n", srec->uid[1-t], diag );
} }
free( srecmap );
if ((t == S) && svars->smaxxuid) { if ((t == S) && svars->smaxxuid) {
debug( "preparing master selection - max expired slave uid is %d\n", svars->smaxxuid ); debug( "preparing master selection - max expired slave uid is %d\n", svars->smaxxuid );
@ -1164,6 +1172,7 @@ box_loaded( int sts, void *aux )
srec->next = 0; srec->next = 0;
*svars->srecadd = srec; *svars->srecadd = srec;
svars->srecadd = &srec->next; svars->srecadd = &srec->next;
svars->nsrecs++;
srec->status = S_DONE; srec->status = S_DONE;
srec->flags = 0; srec->flags = 0;
srec->tuid[0] = 0; srec->tuid[0] = 0;

18
src/util.c

@ -475,6 +475,24 @@ arc4_getbyte( void )
return rs.s[(si + sj) & 0xff]; return rs.s[(si + sj) & 0xff];
} }
static const unsigned char prime_deltas[] = {
0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3,
1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0
};
int
bucketsForSize( int size )
{
int base = 4, bits = 2;
for (;;) {
int prime = base + prime_deltas[bits];
if (prime >= size)
return prime;
base <<= 1;
bits++;
}
}
#ifdef HAVE_SYS_POLL_H #ifdef HAVE_SYS_POLL_H
static struct pollfd *pollfds; static struct pollfd *pollfds;

Loading…
Cancel
Save