From: David van Moolenbroek Date: Mon, 21 Dec 2009 23:30:01 +0000 (+0000) Subject: Filter driver updates: X-Git-Tag: v3.1.6~140 X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/doxygen.log?a=commitdiff_plain;h=92ae5c81aeffab98d05103078808884cdf0fb2c6;p=minix.git Filter driver updates: - optionally vectorize I/O requests to work around hardware bugs - extend default buffer size to cover MFS's default maximum request size - use mmap directly, rather than alloc_contig - add 'nil' checksum type for comparison with layout - minor style corrections --- diff --git a/drivers/filter/driver.c b/drivers/filter/driver.c index e840308f2..86c577a69 100644 --- a/drivers/filter/driver.c +++ b/drivers/filter/driver.c @@ -793,48 +793,118 @@ static int paired_sendrec(message *m1, message *m2, int both) return r; } +/*===========================================================================* + * single_grant * + *===========================================================================*/ +static int single_grant(endpoint_t endpt, vir_bytes buf, int access, + cp_grant_id_t *gid, iovec_s_t vector[NR_IOREQS], size_t *sizep) +{ + /* Create grants for a vectored request to a single driver. + */ + cp_grant_id_t grant; + size_t size, chunk; + int count; + + size = *sizep; + + /* Split up the request into chunks, if requested. This makes no + * difference at all, except that this works around a weird performance + * bug with large DMA PRDs on some machines. + */ + if (CHUNK_SIZE > 0) chunk = CHUNK_SIZE; + else chunk = size; + + /* Fill in the vector, creating a grant for each item. */ + for (count = 0; size > 0 && count < NR_IOREQS; count++) { + /* The last chunk will contain all the remaining data. */ + if (chunk > size || count == NR_IOREQS - 1) + chunk = size; + + grant = cpf_grant_direct(endpt, buf, chunk, access); + if (!GRANT_VALID(grant)) + panic(__FILE__, "invalid grant", grant); + + vector[count].iov_grant = grant; + vector[count].iov_size = chunk; + + buf += chunk; + size -= chunk; + } + + /* Then create a grant for the vector itself. */ + *gid = cpf_grant_direct(endpt, (vir_bytes) vector, + sizeof(vector[0]) * count, CPF_READ | CPF_WRITE); + + if (!GRANT_VALID(*gid)) + panic(__FILE__, "invalid grant", *gid); + + return count; +} + /*===========================================================================* * paired_grant * *===========================================================================*/ -static void paired_grant(char *buf1, char *buf2, size_t size, int request, - cp_grant_id_t *gids, int both) +static int paired_grant(char *buf1, char *buf2, int request, + cp_grant_id_t *gids, iovec_s_t vectors[2][NR_IOREQS], size_t *sizes, + int both) { - /* Create memory grants. If USE_MIRROR, grant to both drivers, - * otherwise only to the main one. + /* Create memory grants, either to one or to both drivers. */ cp_grant_id_t gid; - int access; + int count, access; + count = 0; access = (request == FLT_WRITE) ? CPF_READ : CPF_WRITE; if(driver[DRIVER_MAIN].endpt > 0) { - gid = cpf_grant_direct(driver[DRIVER_MAIN].endpt, - (vir_bytes) buf1, size, access); - if(!GRANT_VALID(gid)) - panic(__FILE__, "invalid grant", gid); - gids[0] = gid; + count = single_grant(driver[DRIVER_MAIN].endpt, + (vir_bytes) buf1, access, &gids[0], vectors[0], + &sizes[0]); } if (both) { if(driver[DRIVER_BACKUP].endpt > 0) { - gid = cpf_grant_direct(driver[DRIVER_BACKUP].endpt, - (vir_bytes) buf2, size, access); - if(!GRANT_VALID(gid)) - panic(__FILE__, "invalid grant", gid); - gids[1] = gid; + count = single_grant(driver[DRIVER_BACKUP].endpt, + (vir_bytes) buf2, access, &gids[1], + vectors[1], &sizes[1]); } } } +/*===========================================================================* + * single_revoke * + *===========================================================================*/ +void single_revoke(cp_grant_id_t gid, iovec_s_t vector[NR_IOREQS], + size_t *sizep, int count) +{ + /* Revoke all grants associated with a request to a single driver. + * Modify the given size to reflect the actual I/O performed. + */ + int i; + + /* Revoke the grants for all the elements of the vector. */ + for (i = 0; i < count; i++) { + cpf_revoke(vector[i].iov_grant); + *sizep -= vector[i].iov_size; + } + + /* Then revoke the grant for the vector itself. */ + cpf_revoke(gid); +} + /*===========================================================================* * paired_revoke * *===========================================================================*/ -static void paired_revoke(cp_grant_id_t gid1, cp_grant_id_t gid2, int both) +static void paired_revoke(cp_grant_id_t *gids, iovec_s_t vectors[2][NR_IOREQS], + size_t *sizes, int count, int both) { - cpf_revoke(gid1); + /* Revoke grants to drivers for a single request. + */ + + single_revoke(gids[0], vectors[0], &sizes[0], count); if (both) - cpf_revoke(gid2); + single_revoke(gids[1], vectors[1], &sizes[1], count); } /*===========================================================================* @@ -842,30 +912,35 @@ static void paired_revoke(cp_grant_id_t gid1, cp_grant_id_t gid2, int both) *===========================================================================*/ int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request) { + iovec_s_t vectors[2][NR_IOREQS]; message m1, m2; cp_grant_id_t gids[2]; - int r, both; + size_t sizes[2]; + int r, both, count; gids[0] = gids[1] = GRANT_INVALID; + sizes[0] = sizes[1] = *sizep; /* Send two requests only if mirroring is enabled and the given request * is either FLT_READ2 or FLT_WRITE. */ both = (USE_MIRROR && request != FLT_READ); - m1.m_type = (request == FLT_WRITE) ? DEV_WRITE_S : DEV_READ_S; - m1.COUNT = *sizep; + count = paired_grant(bufa, bufb, request, gids, vectors, sizes, both); + + m1.m_type = (request == FLT_WRITE) ? DEV_SCATTER_S : DEV_GATHER_S; + m1.COUNT = count; m1.POSITION = ex64lo(pos); m1.HIGHPOS = ex64hi(pos); + m2 = m1; - paired_grant(bufa, bufb, *sizep, request, gids, both); m1.IO_GRANT = (char *) gids[0]; m2.IO_GRANT = (char *) gids[1]; r = paired_sendrec(&m1, &m2, both); - paired_revoke(gids[0], gids[1], both); + paired_revoke(gids, vectors, sizes, count, both); if(r != OK) { #if DEBUG @@ -875,7 +950,7 @@ int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request) return r; } - if (m1.m_type != TASK_REPLY || m1.REP_STATUS < 0) { + if (m1.m_type != TASK_REPLY || m1.REP_STATUS != OK) { printf("Filter: unexpected/invalid reply from main driver: " "(%x, %d)\n", m1.m_type, m1.REP_STATUS); @@ -883,26 +958,23 @@ int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request) (m1.m_type == TASK_REPLY) ? m1.REP_STATUS : EFAULT); } - if (m1.REP_STATUS != *sizep) { - printf("Filter: truncated reply %u to I/O request of size " - "0x%x at 0x%s; size 0x%s\n", - m1.REP_STATUS, *sizep, - print64(pos), print64(disk_size)); + if (sizes[0] != *sizep) { + printf("Filter: truncated reply from main driver\n"); /* If the driver returned a value *larger* than we requested, * OR if we did NOT exceed the disk size, then we should * report the driver for acting strangely! */ - if (m1.REP_STATUS > *sizep || - cmp64(add64u(pos, *sizep), disk_size) < 0) + if (sizes[0] < 0 || sizes[0] > *sizep || + cmp64(add64u(pos, sizes[0]), disk_size) < 0) return bad_driver(DRIVER_MAIN, BD_PROTO, EFAULT); /* Return the actual size. */ - *sizep = m1.REP_STATUS; + *sizep = sizes[0]; } if (both) { - if (m2.m_type != TASK_REPLY || m2.REP_STATUS < 0) { + if (m2.m_type != TASK_REPLY || m2.REP_STATUS != OK) { printf("Filter: unexpected/invalid reply from " "backup driver (%x, %d)\n", m2.m_type, m2.REP_STATUS); @@ -911,18 +983,18 @@ int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request) m2.m_type == TASK_REPLY ? m2.REP_STATUS : EFAULT); } - if (m2.REP_STATUS != *sizep) { + if (sizes[1] != *sizep) { printf("Filter: truncated reply from backup driver\n"); /* As above */ - if (m2.REP_STATUS > *sizep || - cmp64(add64u(pos, *sizep), disk_size) < 0) + if (sizes[1] < 0 || sizes[1] > *sizep || + cmp64(add64u(pos, sizes[1]), disk_size) < 0) return bad_driver(DRIVER_BACKUP, BD_PROTO, EFAULT); /* Return the actual size. */ - if (*sizep >= m2.REP_STATUS) - *sizep = m2.REP_STATUS; + if (*sizep >= sizes[1]) + *sizep = sizes[1]; } } diff --git a/drivers/filter/inc.h b/drivers/filter/inc.h index 05e515616..f3b95fdad 100644 --- a/drivers/filter/inc.h +++ b/drivers/filter/inc.h @@ -21,6 +21,7 @@ #define SECTOR_SIZE 512 enum { + ST_NIL, /* Zero checksums */ ST_XOR, /* XOR-based checksums */ ST_CRC, /* CRC32-based checksums */ ST_MD5 /* MD5-based checksums */ @@ -53,8 +54,8 @@ enum { #define DRIVER_MAIN 0 #define DRIVER_BACKUP 1 -/* Requests for more than this many bytes need to go through malloc(). */ -#define BUF_SIZE (128 * 1024) +/* Requests for more than this many bytes will be allocated dynamically. */ +#define BUF_SIZE (256 * 1024) #define SBUF_SIZE (BUF_SIZE * 2) #define LABEL_SIZE 32 @@ -72,6 +73,7 @@ extern int NR_SUM_SEC; extern int NR_RETRIES; extern int NR_RESTARTS; extern int DRIVER_TIMEOUT; +extern int CHUNK_SIZE; extern char MAIN_LABEL[LABEL_SIZE]; extern char BACKUP_LABEL[LABEL_SIZE]; diff --git a/drivers/filter/main.c b/drivers/filter/main.c index a973f1a3b..834f4e212 100644 --- a/drivers/filter/main.c +++ b/drivers/filter/main.c @@ -22,13 +22,15 @@ int BAD_SUM_ERROR = 1; /* bad checksums are considered a driver error */ int USE_SUM_LAYOUT = 0; /* use checksumming layout on disk */ int NR_SUM_SEC = 8; /* number of checksums per checksum sector */ -int SUM_TYPE = 0; /* use XOR, CRC or MD5 */ +int SUM_TYPE = ST_CRC; /* use NIL, XOR, CRC, or MD5 */ int SUM_SIZE = 0; /* size of the stored checksum */ int NR_RETRIES = 3; /* number of times the request will be retried (N) */ int NR_RESTARTS = 3; /* number of times a driver will be restarted (M) */ int DRIVER_TIMEOUT = 5; /* timeout in seconds to declare a driver dead (T) */ +int CHUNK_SIZE = 0; /* driver requests will be vectorized at this size */ + char MAIN_LABEL[LABEL_SIZE] = ""; /* main disk driver label */ char BACKUP_LABEL[LABEL_SIZE] = ""; /* backup disk driver label */ int MAIN_MINOR = -1; /* main partition minor nr */ @@ -46,6 +48,7 @@ struct optset optset_table[] = { { "nosum", OPT_BOOL, &USE_CHECKSUM, 0 }, { "mirror", OPT_BOOL, &USE_MIRROR, 1 }, { "nomirror", OPT_BOOL, &USE_MIRROR, 0 }, + { "nil", OPT_BOOL, &SUM_TYPE, ST_NIL }, { "xor", OPT_BOOL, &SUM_TYPE, ST_XOR }, { "crc", OPT_BOOL, &SUM_TYPE, ST_CRC }, { "md5", OPT_BOOL, &SUM_TYPE, ST_MD5 }, @@ -57,6 +60,7 @@ struct optset optset_table[] = { { "M", OPT_INT, &NR_RESTARTS, 10 }, { "timeout", OPT_INT, &DRIVER_TIMEOUT, 10 }, { "T", OPT_INT, &DRIVER_TIMEOUT, 10 }, + { "chunk", OPT_INT, &CHUNK_SIZE, 10 }, { NULL } }; @@ -298,6 +302,9 @@ static int parse_arguments(int argc, char *argv[]) /* Determine the checksum size for the chosen checksum type. */ switch (SUM_TYPE) { + case ST_NIL: + SUM_SIZE = 4; /* for the sector number */ + break; case ST_XOR: SUM_SIZE = 16; /* compatibility */ break; @@ -327,6 +334,7 @@ static int parse_arguments(int argc, char *argv[]) printf(" SUM_TYPE : "); switch (SUM_TYPE) { + case ST_NIL: printf("nil"); break; case ST_XOR: printf("xor"); break; case ST_CRC: printf("crc"); break; case ST_MD5: printf("md5"); break; @@ -376,8 +384,16 @@ static void got_signal(void) exit(0); } -/* SEF functions and variables. */ -FORWARD _PROTOTYPE( void sef_local_startup, (void) ); +/*===========================================================================* + * sef_local_startup * + *===========================================================================*/ +static void sef_local_startup(void) +{ + /* No live update support for now. */ + + /* Let SEF perform startup. */ + sef_startup(); +} /*===========================================================================* * main * @@ -396,7 +412,7 @@ int main(int argc, char *argv[]) return 1; } - if ((buf_array = alloc_contig(BUF_SIZE, 0, NULL)) == NULL) + if ((buf_array = flt_malloc(BUF_SIZE, NULL, 0)) == NULL) panic(__FILE__, "no memory available", NO_NUM); sum_init(); @@ -450,15 +466,3 @@ int main(int argc, char *argv[]) return 0; } - -/*===========================================================================* - * sef_local_startup * - *===========================================================================*/ -PRIVATE void sef_local_startup() -{ - /* No live update support for now. */ - - /* Let SEF perform startup. */ - sef_startup(); -} - diff --git a/drivers/filter/sum.c b/drivers/filter/sum.c index 1dca77765..86686da1c 100644 --- a/drivers/filter/sum.c +++ b/drivers/filter/sum.c @@ -21,14 +21,14 @@ static char *rb1_array; /* write readback buffer for disk 1 */ *===========================================================================*/ void sum_init(void) { - /* Initialize buffers. */ + /* Initialize buffers. */ - ext_array = alloc_contig(SBUF_SIZE, 0, NULL); - rb0_array = alloc_contig(SBUF_SIZE, 0, NULL); - rb1_array = alloc_contig(SBUF_SIZE, 0, NULL); + ext_array = flt_malloc(SBUF_SIZE, NULL, 0); + rb0_array = flt_malloc(SBUF_SIZE, NULL, 0); + rb1_array = flt_malloc(SBUF_SIZE, NULL, 0); - if (ext_array == NULL || rb0_array == NULL || rb1_array == NULL) - panic(__FILE__, "no memory available", NO_NUM); + if (ext_array == NULL || rb0_array == NULL || rb1_array == NULL) + panic(__FILE__, "no memory available", NO_NUM); } /*===========================================================================* @@ -44,6 +44,14 @@ static void calc_sum(unsigned sector, char *data, char *sum) struct MD5Context ctx; switch(SUM_TYPE) { + case ST_NIL: + /* No checksum at all */ + + q = (unsigned long *) sum; + *q = sector; + + break; + case ST_XOR: /* Basic XOR checksum */ p = (unsigned long *) data; diff --git a/drivers/filter/util.c b/drivers/filter/util.c index 678379961..7f476de39 100644 --- a/drivers/filter/util.c +++ b/drivers/filter/util.c @@ -20,8 +20,9 @@ char *flt_malloc(size_t size, char *sbuf, size_t ssize) if (size <= ssize) return sbuf; - p = alloc_contig(size, 0, NULL); - if (p == NULL) + p = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PREALLOC | MAP_CONTIG | MAP_ANON, -1, 0); + if (p == MAP_FAILED) panic(__FILE__, "out of memory", size); return p;