From be2087ecf90fc43a72c90e6279cbd3d26ba51739 Mon Sep 17 00:00:00 2001 From: David van Moolenbroek Date: Wed, 2 Dec 2009 10:08:58 +0000 Subject: [PATCH] Filter driver by Wu Bingzheng et al --- commands/scripts/MAKEDEV.sh | 8 +- drivers/Makefile | 1 + drivers/filter/Makefile | 31 ++ drivers/filter/crc.c | 88 ++++ drivers/filter/crc.h | 6 + drivers/filter/driver.c | 930 ++++++++++++++++++++++++++++++++++++ drivers/filter/inc.h | 101 ++++ drivers/filter/main.c | 446 +++++++++++++++++ drivers/filter/md5.c | 315 ++++++++++++ drivers/filter/md5.h | 26 + drivers/filter/optset.c | 128 +++++ drivers/filter/optset.h | 30 ++ drivers/filter/sum.c | 613 ++++++++++++++++++++++++ drivers/filter/util.c | 109 +++++ etc/drivers.conf | 22 + include/minix/dmap.h | 2 + servers/vfs/dmap.c | 2 +- 17 files changed, 2856 insertions(+), 2 deletions(-) create mode 100644 drivers/filter/Makefile create mode 100644 drivers/filter/crc.c create mode 100644 drivers/filter/crc.h create mode 100644 drivers/filter/driver.c create mode 100644 drivers/filter/inc.h create mode 100644 drivers/filter/main.c create mode 100644 drivers/filter/md5.c create mode 100644 drivers/filter/md5.h create mode 100644 drivers/filter/optset.c create mode 100644 drivers/filter/optset.h create mode 100644 drivers/filter/sum.c create mode 100644 drivers/filter/util.c diff --git a/commands/scripts/MAKEDEV.sh b/commands/scripts/MAKEDEV.sh index 66b2c785b..32f518264 100755 --- a/commands/scripts/MAKEDEV.sh +++ b/commands/scripts/MAKEDEV.sh @@ -23,7 +23,7 @@ case $#:$1 in ttypa ttypb ttypc ttypd ttype ttypf \ ttyq0 ttyq1 ttyq2 ttyq3 ttyq4 ttyq5 ttyq6 ttyq7 ttyq8 ttyq9 \ ttyqa ttyqb ttyqc ttyqd ttyqe ttyqf \ - eth klog random rescue + eth klog random rescue filter ;; 0:|1:-\?) cat >&2 <&2 ex=1 diff --git a/drivers/Makefile b/drivers/Makefile index 03f6391e4..c121e6b0e 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -28,6 +28,7 @@ all install depend clean: cd ./dpeth && $(MAKE) $@ cd ./log && $(MAKE) $@ cd ./bios_wini && $(MAKE) $@ + cd ./filter && $(MAKE) $@ cd ./random && $(MAKE) $@ cd ./readclock && $(MAKE) $@ cd ./dp8390 && $(MAKE) $@ diff --git a/drivers/filter/Makefile b/drivers/filter/Makefile new file mode 100644 index 000000000..1346aee4d --- /dev/null +++ b/drivers/filter/Makefile @@ -0,0 +1,31 @@ +# Makefile for filter driver +DRIVER = filter + +# programs, flags, etc. +CC = cc +CFLAGS = -DDEBUG=1 -DDEBUG2=0 +LDFLAGS = +LIBS = -lsys + +OBJ = main.o sum.o driver.o util.o optset.o crc.o md5.o + +# build local binary +all build: $(DRIVER) +$(DRIVER): $(OBJ) + $(CC) -o $@ $(LDFLAGS) $(OBJ) $(LIBS) + +# install with other drivers +install: /usr/sbin/$(DRIVER) +/usr/sbin/$(DRIVER): $(DRIVER) + install -o root -c $? $@ + +# clean up local files +clean: + rm -f *.o *.bak $(DRIVER) + +depend: + mkdep "$(CC) -E $(CPPFLAGS)" *.c > .depend + +# Include generated dependencies. +include .depend + diff --git a/drivers/filter/crc.c b/drivers/filter/crc.c new file mode 100644 index 000000000..20c91c61f --- /dev/null +++ b/drivers/filter/crc.c @@ -0,0 +1,88 @@ +/* CRC32 implementation taken from cksum.c */ + +/* Copyright 1991 by Vincent Archer + * You may freely redistribute this software, in source or binary + * form, provided that you do not alter this copyright mention in any + * way. + */ + +#include + +unsigned long crctab[] = { + 0x7fffffff, + 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, + 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, + 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, 0x646ba8c0, + 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, + 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, + 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, + 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, 0xc8d75180, + 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, + 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, + 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 0x6b6b51f4, + 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, + 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, + 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, + 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, + 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, + 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 0x5edef90e, + 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, + 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, + 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, 0x8708a3d2, + 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, + 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, + 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, + 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, + 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, + 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 0x95bf4a82, + 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, + 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, + 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, + 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, + 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, + 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 0xbdbdf21c, + 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, + 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +unsigned long compute_crc( unsigned char *b, size_t n) +{ + int i; + unsigned long s = 0; + int aux = 0; + + while (n-- > 0) { + /* Compute the index to the crc table */ + i = (s >> 24) ^ ((unsigned int) (*b++)); + + if (i == 0) { + /* Replace an intermediate zero with the next value + * from the sequence */ + i = aux++; + if (aux >= sizeof(crctab) / sizeof(crctab[0])) aux = 0; + } + + /* New checksum value */ + s = (s << 8) ^ crctab[i]; + } + return(s); +} + diff --git a/drivers/filter/crc.h b/drivers/filter/crc.h new file mode 100644 index 000000000..ed38d9793 --- /dev/null +++ b/drivers/filter/crc.h @@ -0,0 +1,6 @@ +#ifndef _CRC_H +#define _CRC_H + +extern unsigned long compute_crc(unsigned char *b, size_t n); + +#endif /* _CRC_H */ diff --git a/drivers/filter/driver.c b/drivers/filter/driver.c new file mode 100644 index 000000000..97e095850 --- /dev/null +++ b/drivers/filter/driver.c @@ -0,0 +1,930 @@ +/* Filter driver - lowest layer - disk driver management */ + +#include "inc.h" + +/* Drivers. */ +static struct { + char *label; + int minor; + endpoint_t endpt; + + int problem; /* one of BD_* */ + int error; /* one of E*, only relevant if problem>0 */ + int retries; + int kills; +} driver[2]; + +/* State variables. */ +static endpoint_t self_ep; +static asynmsg_t amsgtable[2]; + +static int size_known = 0; +static u64_t disk_size; + +static int problem_stats[BD_LAST] = { 0 }; + +/*===========================================================================* + * driver_open * + *===========================================================================*/ +static int driver_open(int which) +{ + /* Perform an open or close operation on the driver. This is + * unfinished code: we should never be doing a blocking sendrec() to + * the driver. + */ + message msg; + cp_grant_id_t gid; + struct partition part; + sector_t sectors; + int r; + + msg.m_type = DEV_OPEN; + msg.DEVICE = driver[which].minor; + msg.IO_ENDPT = self_ep; + r = sendrec(driver[which].endpt, &msg); + + if (r != OK) { + /* Should we restart the driver now? */ + printf("Filter: driver_open: sendrec returned %d\n", r); + + return RET_REDO; + } + + if(msg.m_type != TASK_REPLY || msg.REP_STATUS != OK) { + printf("Filter: driver_open: sendrec returned %d, %d\n", + msg.m_type, msg.REP_STATUS); + + return RET_REDO; + } + + /* Take the opportunity to retrieve the hard disk size. */ + gid = cpf_grant_direct(driver[which].endpt, + (vir_bytes) &part, sizeof(part), CPF_WRITE); + if(!GRANT_VALID(gid)) + panic(__FILE__, "invalid grant", gid); + + msg.m_type = DEV_IOCTL_S; + msg.REQUEST = DIOCGETP; + msg.DEVICE = driver[which].minor; + msg.IO_ENDPT = self_ep; + msg.IO_GRANT = (char *) gid; + + r = sendrec(driver[which].endpt, &msg); + + cpf_revoke(gid); + + if (r != OK || msg.m_type != TASK_REPLY || msg.REP_STATUS != OK) { + /* Not sure what to do here, either. */ + printf("Filter: ioctl(DIOCGETP) returned (%d, %d)\n", + r, msg.m_type); + + return RET_REDO; + } + + if(!size_known) { + disk_size = part.size; + size_known = 1; + sectors = div64u(disk_size, SECTOR_SIZE); + if(cmp64(mul64u(sectors, SECTOR_SIZE), disk_size)) { + printf("Filter: partition too large\n"); + + return RET_REDO; + } +#if DEBUG + printf("Filter: partition size: 0x%s / %lu sectors\n", + print64(disk_size), sectors); +#endif + } else { + if(cmp64(disk_size, part.size)) { + printf("Filter: partition size mismatch (%s != %s)\n", + print64(part.size), print64(disk_size)); + + return RET_REDO; + } + } + + return OK; +} + +/*===========================================================================* + * driver_close * + *===========================================================================*/ +static int driver_close(int which) +{ + message msg; + int r; + + msg.m_type = DEV_CLOSE; + msg.DEVICE = driver[which].minor; + msg.IO_ENDPT = self_ep; + r = sendrec(driver[which].endpt, &msg); + + if (r != OK) { + /* Should we restart the driver now? */ + printf("Filter: driver_close: sendrec returned %d\n", r); + + return RET_REDO; + } + + if(msg.m_type != TASK_REPLY || msg.REP_STATUS != OK) { + printf("Filter: driver_close: sendrec returned %d, %d\n", + msg.m_type, msg.REP_STATUS); + + return RET_REDO; + } + + return OK; +} + +/*===========================================================================* + * driver_init * + *===========================================================================*/ +void driver_init(void) +{ + /* Initialize the driver layer. */ + int r; + + self_ep = getprocnr(); + + memset(driver, 0, sizeof(driver)); + + /* Endpoints unknown. */ + driver[DRIVER_MAIN].endpt = NONE; + driver[DRIVER_BACKUP].endpt = NONE; + + /* Get disk driver's and this proc's endpoint. */ + driver[DRIVER_MAIN].label = MAIN_LABEL; + driver[DRIVER_MAIN].minor = MAIN_MINOR; + + r = ds_retrieve_u32(driver[DRIVER_MAIN].label, + (u32_t *) &driver[DRIVER_MAIN].endpt); + if (r != OK) { + printf("Filter: failed to get main disk driver's endpoint: " + "%d\n", r); + bad_driver(DRIVER_MAIN, BD_DEAD, EFAULT); + check_driver(DRIVER_MAIN); + } + else if (driver_open(DRIVER_MAIN) != OK) { + panic(__FILE__, "unhandled driver_open failure", NO_NUM); + } + + if(USE_MIRROR) { + driver[DRIVER_BACKUP].label = BACKUP_LABEL; + driver[DRIVER_BACKUP].minor = BACKUP_MINOR; + + if(!strcmp(driver[DRIVER_MAIN].label, + driver[DRIVER_BACKUP].label)) { + panic(__FILE__, "same driver: not tested", NO_NUM); + } + + r = ds_retrieve_u32(driver[DRIVER_BACKUP].label, + (u32_t *) &driver[DRIVER_BACKUP].endpt); + if (r != OK) { + printf("Filter: failed to get backup disk driver's " + "endpoint: %d\n", r); + bad_driver(DRIVER_BACKUP, BD_DEAD, EFAULT); + check_driver(DRIVER_BACKUP); + } + else if (driver_open(DRIVER_BACKUP) != OK) { + panic(__FILE__, "unhandled driver_open failure", + NO_NUM); + } + } +} + +/*===========================================================================* + * driver_shutdown * + *===========================================================================*/ +void driver_shutdown(void) +{ + /* Clean up. */ + +#if DEBUG + printf("Filter: %u driver deaths, %u protocol errors, " + "%u data errors\n", problem_stats[BD_DEAD], + problem_stats[BD_PROTO], problem_stats[BD_DATA]); +#endif + + if(driver_close(DRIVER_MAIN) != OK) + printf("Filter: DEV_CLOSE failed on shutdown (1)\n"); + + if(USE_MIRROR) + if(driver_close(DRIVER_BACKUP) != OK) + printf("Filter: DEV_CLOSE failed on shutdown (2)\n"); +} + +/*===========================================================================* + * get_raw_size * + *===========================================================================*/ +u64_t get_raw_size(void) +{ + /* Return the size of the raw disks as used by the filter driver. + */ + + return disk_size; +} + +/*===========================================================================* + * reset_kills * + *===========================================================================*/ +void reset_kills(void) +{ + /* Reset kill and retry statistics. */ + driver[DRIVER_MAIN].kills = 0; + driver[DRIVER_MAIN].retries = 0; + driver[DRIVER_BACKUP].kills = 0; + driver[DRIVER_BACKUP].retries = 0; +} + +/*===========================================================================* + * bad_driver * + *===========================================================================*/ +int bad_driver(int which, int type, int error) +{ + /* A disk driver has died or produced an error. Mark it so that we can + * deal with it later, and return RET_REDO to indicate that the + * current operation is to be retried. Also store an error code to + * return to the user if the situation is unrecoverable. + */ + driver[which].problem = type; + driver[which].error = error; + + return RET_REDO; +} + +/*===========================================================================* + * new_driver_ep * + *===========================================================================*/ +static int new_driver_ep(int which) +{ + /* See if a new driver instance has already been started for the given + * driver, by retrieving its entry from DS. + */ + int r; + endpoint_t endpt; + + r = ds_retrieve_u32(driver[which].label, (u32_t *) &endpt); + + if (r != OK) { + printf("Filter: DS query for %s failed\n", + driver[which].label); + + return 0; + } + + if (endpt == driver[which].endpt) { +#if DEBUG + printf("Filter: same endpoint for %s\n", driver[which].label); +#endif + return 0; + } + +#if DEBUG + printf("Filter: new enpdoint for %s: %d -> %d\n", driver[which].label, + driver[which].endpt, endpt); +#endif + + driver[which].endpt = endpt; + + return 1; +} + +/*===========================================================================* + * check_problem * + *===========================================================================*/ +static int check_problem(int which, int problem, int retries, int *tell_rs) +{ + /* A problem has occurred with a driver. Update statistics, and decide + * what to do. If EAGAIN is returned, the driver should be restarted; + * any other result will be passed up. + */ + +#if DEBUG + printf("Filter: check_driver processing driver %d, problem %d\n", + which, problem); +#endif + + problem_stats[problem]++; + + if(new_driver_ep(which)) { +#if DEBUG + printf("Filter: check_problem: noticed a new driver\n"); +#endif + + if(driver_open(which) == OK) { +#if DEBUG2 + printf("Filter: open OK -> no recovery\n"); +#endif + return OK; + } else { +#if DEBUG2 + printf("Filter: open not OK -> recovery\n"); +#endif + problem = BD_PROTO; + problem_stats[problem]++; + } + } + + /* If the driver has died, we always need to restart it. If it has + * been giving problems, we first retry the request, up to N times, + * after which we kill and restart the driver. We restart the driver + * up to M times, after which we remove the driver from the mirror + * configuration. If we are not set up to do mirroring, we can only + * do one thing, and that is continue to limp along with the bad + * driver.. + */ + switch(problem) { + case BD_PROTO: + case BD_DATA: + driver[which].retries++; + +#if DEBUG + printf("Filter: disk driver %d has had " + "%d/%d retry attempts, %d/%d kills\n", which, + driver[which].retries, NR_RETRIES, + driver[which].kills, NR_RESTARTS); +#endif + + if (driver[which].retries < NR_RETRIES) { + if(retries == 1) { +#if DEBUG + printf("Filter: not restarting; retrying " + "(retries %d/%d, kills %d/%d)\n", + driver[which].retries, NR_RETRIES, + driver[which].kills, NR_RESTARTS); +#endif + return OK; + } +#if DEBUG + printf("Filter: restarting (retries %d/%d, " + "kills %d/%d, internal retry %d)\n", + driver[which].retries, NR_RETRIES, + driver[which].kills, NR_RESTARTS, retries); +#endif + } + +#if DEBUG + printf("Filter: disk driver %d has reached error " + "threshold, restarting driver\n", which); +#endif + + *tell_rs = 1; + break; + + case BD_DEAD: + /* Can't kill that which is already dead.. */ + *tell_rs = 0; + break; + + default: + panic(__FILE__, "invalid problem", problem); + } + + /* At this point, the driver will be restarted. */ + driver[which].retries = 0; + driver[which].kills++; + + if (driver[which].kills < NR_RESTARTS) + return EAGAIN; + + /* We've reached the maximum number of restarts for this driver. */ + if (USE_MIRROR) { + printf("Filter: kill threshold reached, disabling mirroring\n"); + + USE_MIRROR = 0; + + if (which == DRIVER_MAIN) { + driver[DRIVER_MAIN] = driver[DRIVER_BACKUP]; + + /* This is not necessary. */ + strcpy(MAIN_LABEL, BACKUP_LABEL); + MAIN_MINOR = BACKUP_MINOR; + } + + driver[DRIVER_BACKUP].endpt = NONE; + + return OK; + } + else { + /* We tried, we really did. But now we give up. Tell the user. + */ + printf("Filter: kill threshold reached, returning error\n"); + + if (driver[which].error == EAGAIN) return EIO; + + return driver[which].error; + } +} + +/*===========================================================================* + * restart_driver * + *===========================================================================*/ +static void restart_driver(int which, int tell_rs) +{ + /* Restart the given driver. Block until the new instance is up. + */ + message msg; + endpoint_t endpt; + int r, w = 0; + + if (tell_rs) { + /* Tell RS to refresh or restart the driver */ + msg.m_type = RS_REFRESH; + msg.RS_CMD_ADDR = driver[which].label; + msg.RS_CMD_LEN = strlen(driver[which].label); + +#if DEBUG + printf("Filter: asking RS to refresh %s..\n", + driver[which].label); +#endif + + r = sendrec(RS_PROC_NR, &msg); + + if (r != OK || msg.m_type != OK) + panic(__FILE__, "RS request failed", r); + +#if DEBUG + printf("Filter: RS call succeeded\n"); +#endif + } + + /* Wait until the new driver instance is up, and get its endpoint. */ +#if DEBUG + printf("Filter: endpoint update driver %d; old endpoint %d\n", + which, driver[which].endpt); +#endif + + do { + if(w) flt_sleep(1); + w = 1; + + r = ds_retrieve_u32(driver[which].label, (u32_t *) &endpt); + +#if DEBUG2 + if (r != OK) + printf("Filter: DS request failed (%d)\n", r); + else if (endpt == driver[which].endpt) + printf("Filter: DS returned same endpoint\n"); + else + printf("Filter: DS request OK, new endpoint\n"); +#endif + } while (r != OK || endpt == driver[which].endpt); + + driver[which].endpt = endpt; +} + +/*===========================================================================* + * check_driver * + *===========================================================================*/ +int check_driver(int which) +{ + /* See if the given driver has been troublesome, and if so, deal with + * it. + */ + int problem, tell_rs; + int r, retries = 0; + + problem = driver[which].problem; + + if (problem == BD_NONE) + return OK; + + do { + if(retries) { +#if DEBUG + printf("Filter: check_driver: retry number %d\n", + retries); +#endif + problem = BD_PROTO; + } + retries++; + driver[which].problem = BD_NONE; + + /* Decide what to do: continue operation, restart the driver, + * or return an error. + */ + r = check_problem(which, problem, retries, &tell_rs); + if (r != EAGAIN) + return r; + + /* Restarting the driver it is. First tell RS (if necessary), + * then wait for the new driver instance to come up. + */ + restart_driver(which, tell_rs); + + /* Finally, open the device on the new driver */ + } while (driver_open(which) != OK); + +#if DEBUG + printf("Filter: check_driver restarted driver %d, endpoint %d\n", + which, driver[which].endpt); +#endif + + return OK; +} + +/*===========================================================================* + * flt_senda * + *===========================================================================*/ +static int flt_senda(message *mess, int which) +{ + /* Send a message to one driver. Can only return OK at the moment. */ + int r; + asynmsg_t *amp; + + /* Fill in the last bits of the message. */ + mess->DEVICE = driver[which].minor; + mess->IO_ENDPT = self_ep; + + /* Send the message asynchronously. */ + amp = &amsgtable[which]; + amp->dst = driver[which].endpt; + amp->msg = *mess; + amp->flags = AMF_VALID; + r = senda(amsgtable, 2); + + if(r != OK) + panic(__FILE__, "senda returned error", r); + + return r; +} + +/*===========================================================================* + * check_senda * + *===========================================================================*/ +static int check_senda(int which) +{ + /* Check whether an earlier senda resulted in an error indicating the + * message never got delivered. Only in that case can we reliably say + * that the driver died. Return BD_DEAD in this case, and BD_PROTO + * otherwise. + */ + asynmsg_t *amp; + + amp = &amsgtable[which]; + + if ((amp->flags & AMF_DONE) && + (amp->result == EDEADSRCDST || amp->result == EDSTDIED)) { + + return BD_DEAD; + } + + return BD_PROTO; +} + +/*===========================================================================* + * flt_receive * + *===========================================================================*/ +static int flt_receive(message *mess, int which) +{ + /* Receive a message from one or either driver, unless a timeout + * occurs. Can only return OK or RET_REDO. + */ + int r; + + for (;;) { + r = receive(ANY, mess); + if(r != OK) + panic(__FILE__, "receive returned error", r); + + if(mess->m_source == CLOCK && is_notify(mess->m_type)) { + if (mess->NOTIFY_TIMESTAMP < flt_alarm(-1)) { +#if DEBUG + printf("Filter: SKIPPING old alarm " + "notification\n"); +#endif + continue; + } + +#if DEBUG + printf("Filter: timeout waiting for disk driver %d " + "reply!\n", which); +#endif + + /* If we're waiting for either driver, + * both are at fault. + */ + if (which < 0) { + bad_driver(DRIVER_MAIN, + check_senda(DRIVER_MAIN), EFAULT); + + return bad_driver(DRIVER_BACKUP, + check_senda(DRIVER_BACKUP), EFAULT); + } + + /* Otherwise, just report the one not replying as dead. + */ + return bad_driver(which, check_senda(which), EFAULT); + } + + if (mess->m_source != driver[DRIVER_MAIN].endpt && + mess->m_source != driver[DRIVER_BACKUP].endpt) { +#if DEBUG + printf("Filter: got STRAY message %d from %d\n", + mess->m_type, mess->m_source); +#endif + + continue; + } + + /* We are waiting for a reply from one specific driver. */ + if (which >= 0) { + /* If the message source is that driver, good. */ + if (mess->m_source == driver[which].endpt) + break; + + /* This should probably be treated as a real protocol + * error. We do not abort any receives (not even paired + * receives) except because of timeouts. Getting here + * means a driver replied at least the timeout period + * later than expected, which should be enough reason + * to kill it really. The other explanation is that it + * is actually violating the protocol and sending bogus + * messages... + */ +#if DEBUG + printf("Filter: got UNEXPECTED reply from %d\n", + mess->m_source); +#endif + + continue; + } + + /* We got a message from one of the drivers, and we didn't + * care which one we wanted to receive from. A-OK. + */ + break; + } + + return OK; +} + +/*===========================================================================* + * flt_sendrec * + *===========================================================================*/ +static int flt_sendrec(message *mess, int which) +{ + int r; + + r = flt_senda(mess, which); + if(r != OK) + return r; + + if(check_senda(which) == BD_DEAD) { + return bad_driver(which, BD_DEAD, EFAULT); + } + + /* Set alarm. */ + flt_alarm(DRIVER_TIMEOUT); + + r = flt_receive(mess, which); + + /* Clear the alarm. */ + flt_alarm(0); + return r; +} + +/*===========================================================================* + * do_sendrec_both * + *===========================================================================*/ +static int do_sendrec_both(message *m1, message *m2) +{ + /* If USEE_MIRROR is set, call flt_sendrec() to both drivers. + * Otherwise, only call flt_sendrec() to the main driver. + * This function will only return either OK or RET_REDO. + */ + int r, which = -1; + message ma, mb; + + /* If the two disks use the same driver, call flt_sendrec() twice + * sequentially. Such a setup is not very useful though. + */ + if (!strcmp(driver[DRIVER_MAIN].label, driver[DRIVER_BACKUP].label)) { + if ((r = flt_sendrec(m1, DRIVER_MAIN)) != OK) return r; + return flt_sendrec(m2, DRIVER_BACKUP); + } + + /* If the two disks use different drivers, call flt_senda() + * twice, and then flt_receive(), and distinguish the return + * messages by means of m_source. + */ + if ((r = flt_senda(m1, DRIVER_MAIN)) != OK) return r; + if ((r = flt_senda(m2, DRIVER_BACKUP)) != OK) return r; + + /* Set alarm. */ + flt_alarm(DRIVER_TIMEOUT); + + /* The message received by the 1st flt_receive() may not be + * from DRIVER_MAIN. + */ + if ((r = flt_receive(&ma, -1)) != OK) { + flt_alarm(0); + return r; + } + + if (ma.m_source == driver[DRIVER_MAIN].endpt) { + which = DRIVER_BACKUP; + } else if (ma.m_source == driver[DRIVER_BACKUP].endpt) { + which = DRIVER_MAIN; + } else { + panic(__FILE__, "message from unexpected source", + ma.m_source); + } + + r = flt_receive(&mb, which); + + /* Clear the alarm. */ + flt_alarm(0); + + if(r != OK) + return r; + + if (ma.m_source == driver[DRIVER_MAIN].endpt) { + *m1 = ma; + *m2 = mb; + } else { + *m1 = mb; + *m2 = ma; + } + + return OK; +} + +/*===========================================================================* + * do_sendrec_one * + *===========================================================================*/ +static int do_sendrec_one(message *m1, message *m2) +{ + /* Only talk to the main driver. If something goes wrong, it will + * be fixed elsewhere. + * This function will only return either OK or RET_REDO. + */ + + return flt_sendrec(m1, DRIVER_MAIN); +} + +/*===========================================================================* + * paired_sendrec * + *===========================================================================*/ +static int paired_sendrec(message *m1, message *m2, int both) +{ + /* Sendrec with the disk driver. If the disk driver is down, and was + * restarted, redo the request, until the driver works fine, or can't + * be restarted again. + */ + int r; + +#if DEBUG2 + printf("paired_sendrec(%d) - <%d,%x:%x,%d> - %x,%x\n", + both, m1->m_type, m1->HIGHPOS, m1->POSITION, m1->COUNT, + m1->IO_GRANT, m2->IO_GRANT); +#endif + + if (both) + r = do_sendrec_both(m1, m2); + else + r = do_sendrec_one(m1, m2); + +#if DEBUG2 + if (r != OK) + printf("paired_sendrec about to return %d\n", r); +#endif + + return r; +} + +/*===========================================================================* + * paired_grant * + *===========================================================================*/ +static void paired_grant(char *buf1, char *buf2, size_t size, int request, + cp_grant_id_t *gids, int both) +{ + /* Create memory grants. If USE_MIRROR, grant to both drivers, + * otherwise only to the main one. + */ + cp_grant_id_t gid; + int access; + + access = (request == FLT_WRITE) ? CPF_READ : CPF_WRITE; + + if(driver[DRIVER_MAIN].endpt > 0) { + gid = cpf_grant_direct(driver[DRIVER_MAIN].endpt, + (vir_bytes) buf1, size, access); + if(!GRANT_VALID(gid)) + panic(__FILE__, "invalid grant", gid); + gids[0] = gid; + } + + if (both) { + if(driver[DRIVER_BACKUP].endpt > 0) { + gid = cpf_grant_direct(driver[DRIVER_BACKUP].endpt, + (vir_bytes) buf2, size, access); + if(!GRANT_VALID(gid)) + panic(__FILE__, "invalid grant", gid); + gids[1] = gid; + } + } +} + +/*===========================================================================* + * paired_revoke * + *===========================================================================*/ +static void paired_revoke(cp_grant_id_t gid1, cp_grant_id_t gid2, int both) +{ + cpf_revoke(gid1); + + if (both) + cpf_revoke(gid2); +} + +/*===========================================================================* + * read_write * + *===========================================================================*/ +int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, int request) +{ + message m1, m2; + cp_grant_id_t gids[2]; + int r, both; + + gids[0] = gids[1] = GRANT_INVALID; + + /* Send two requests only if mirroring is enabled and the given request + * is either FLT_READ2 or FLT_WRITE. + */ + both = (USE_MIRROR && request != FLT_READ); + + m1.m_type = (request == FLT_WRITE) ? DEV_WRITE_S : DEV_READ_S; + m1.COUNT = *sizep; + m1.POSITION = ex64lo(pos); + m1.HIGHPOS = ex64hi(pos); + m2 = m1; + + paired_grant(bufa, bufb, *sizep, request, gids, both); + m1.IO_GRANT = (char *) gids[0]; + m2.IO_GRANT = (char *) gids[1]; + + r = paired_sendrec(&m1, &m2, both); + + paired_revoke(gids[0], gids[1], both); + + if(r != OK) { +#if DEBUG + if (r != RET_REDO) + printf("Filter: paired_sendrec returned %d\n", r); +#endif + return r; + } + + if (m1.m_type != TASK_REPLY || m1.REP_STATUS < 0) { + printf("Filter: unexpected/invalid reply from main driver: " + "(%x, %d)\n", m1.m_type, m1.REP_STATUS); + + return bad_driver(DRIVER_MAIN, BD_PROTO, + (m1.m_type == TASK_REPLY) ? m1.REP_STATUS : EFAULT); + } + + if (m1.REP_STATUS != *sizep) { + printf("Filter: truncated reply %u to I/O request of size " + "0x%x at 0x%s; size 0x%s\n", + m1.REP_STATUS, *sizep, + print64(pos), print64(disk_size)); + + /* If the driver returned a value *larger* than we requested, + * OR if we did NOT exceed the disk size, then we should + * report the driver for acting strangely! + */ + if (m1.REP_STATUS > *sizep || + cmp64(add64u(pos, *sizep), disk_size) < 0) + return bad_driver(DRIVER_MAIN, BD_PROTO, EFAULT); + + /* Return the actual size. */ + *sizep = m1.REP_STATUS; + } + + if (both) { + if (m2.m_type != TASK_REPLY || m2.REP_STATUS < 0) { + printf("Filter: unexpected/invalid reply from " + "backup driver (%x, %d)\n", + m2.m_type, m2.REP_STATUS); + + return bad_driver(DRIVER_BACKUP, BD_PROTO, + m2.m_type == TASK_REPLY ? m2.REP_STATUS : + EFAULT); + } + if (m2.REP_STATUS != *sizep) { + printf("Filter: truncated reply from backup driver\n"); + + /* As above */ + if (m2.REP_STATUS > *sizep || + cmp64(add64u(pos, *sizep), disk_size) < 0) + return bad_driver(DRIVER_BACKUP, BD_PROTO, + EFAULT); + + /* Return the actual size. */ + if (*sizep >= m2.REP_STATUS) + *sizep = m2.REP_STATUS; + } + } + + return OK; +} diff --git a/drivers/filter/inc.h b/drivers/filter/inc.h new file mode 100644 index 000000000..05e515616 --- /dev/null +++ b/drivers/filter/inc.h @@ -0,0 +1,101 @@ +/* Filter driver - general include file */ +#define _MINIX 1 +#define _SYSTEM 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SECTOR_SIZE 512 + +enum { + ST_XOR, /* XOR-based checksums */ + ST_CRC, /* CRC32-based checksums */ + ST_MD5 /* MD5-based checksums */ +}; + +enum { + FLT_WRITE, /* write to up to two disks */ + FLT_READ, /* read from one disk */ + FLT_READ2 /* read from both disks */ +}; + +/* Something was wrong and the disk driver has been restarted/refreshed, + * so the request needs to be redone. + */ +#define RET_REDO 1 + +/* The cases where the disk driver need to be restarted/refreshed by RS. + * BD_DEAD: the disk driver has died. Restart it. + * BD_PROTO: a protocol error has occurred. Refresh it. + * BD_DATA: a data error has occurred. Refresh it. + */ +enum { + BD_NONE, + BD_DEAD, + BD_PROTO, + BD_DATA, + BD_LAST +}; + +#define DRIVER_MAIN 0 +#define DRIVER_BACKUP 1 + +/* Requests for more than this many bytes need to go through malloc(). */ +#define BUF_SIZE (128 * 1024) +#define SBUF_SIZE (BUF_SIZE * 2) + +#define LABEL_SIZE 32 + +typedef unsigned long sector_t; + +/* main.c */ +extern int USE_CHECKSUM; +extern int USE_MIRROR; +extern int BAD_SUM_ERROR; +extern int USE_SUM_LAYOUT; +extern int SUM_TYPE; +extern int SUM_SIZE; +extern int NR_SUM_SEC; +extern int NR_RETRIES; +extern int NR_RESTARTS; +extern int DRIVER_TIMEOUT; + +extern char MAIN_LABEL[LABEL_SIZE]; +extern char BACKUP_LABEL[LABEL_SIZE]; +extern int MAIN_MINOR; +extern int BACKUP_MINOR; + +/* sum.c */ +extern void sum_init(void); +extern int transfer(u64_t pos, char *buffer, size_t *sizep, int flag_rw); +extern u64_t convert(u64_t size); + +/* driver.c */ +extern void driver_init(void); +extern void driver_shutdown(void); +extern u64_t get_raw_size(void); +extern void reset_kills(void); +extern int check_driver(int which); +extern int bad_driver(int which, int type, int error); +extern int read_write(u64_t pos, char *bufa, char *bufb, size_t *sizep, + int flag_rw); + +/* util.c */ +extern char *flt_malloc(size_t size, char *sbuf, size_t ssize); +extern void flt_free(char *buf, size_t size, char *sbuf); +extern char *print64(u64_t p); +extern clock_t flt_alarm(clock_t dt); +extern void flt_sleep(int secs); diff --git a/drivers/filter/main.c b/drivers/filter/main.c new file mode 100644 index 000000000..0813ecd84 --- /dev/null +++ b/drivers/filter/main.c @@ -0,0 +1,446 @@ +/* Filter driver - top layer - block interface */ + +/* This is a filter driver, which lays above disk driver, and forwards + * messages between disk driver and its callers. The filter can detect + * corrupted data (toggled by USE_CHECKSUM) and recover it (toggled + * by USE_MIRROR). These two functions are independent from each other. + * The mirroring function requires two disks, on separate disk drivers. + */ + +#include "inc.h" +#include "optset.h" + +#define _POSIX_SOURCE 1 +#include + +/* Global settings. */ +int USE_CHECKSUM = 0; /* enable checksumming */ +int USE_MIRROR = 0; /* enable mirroring */ + +int BAD_SUM_ERROR = 1; /* bad checksums are considered a driver error */ + +int USE_SUM_LAYOUT = 0; /* use checksumming layout on disk */ +int NR_SUM_SEC = 8; /* number of checksums per checksum sector */ + +int SUM_TYPE = 0; /* use XOR, CRC or MD5 */ +int SUM_SIZE = 0; /* size of the stored checksum */ + +int NR_RETRIES = 3; /* number of times the request will be retried (N) */ +int NR_RESTARTS = 3; /* number of times a driver will be restarted (M) */ +int DRIVER_TIMEOUT = 5; /* timeout in seconds to declare a driver dead (T) */ + +char MAIN_LABEL[LABEL_SIZE] = ""; /* main disk driver label */ +char BACKUP_LABEL[LABEL_SIZE] = ""; /* backup disk driver label */ +int MAIN_MINOR = -1; /* main partition minor nr */ +int BACKUP_MINOR = -1; /* backup partition minor nr */ + +struct optset optset_table[] = { + { "label0", OPT_STRING, MAIN_LABEL, LABEL_SIZE }, + { "label1", OPT_STRING, BACKUP_LABEL, LABEL_SIZE }, + { "minor0", OPT_INT, &MAIN_MINOR, 10 }, + { "minor1", OPT_INT, &BACKUP_MINOR, 10 }, + { "sum_sec", OPT_INT, &NR_SUM_SEC, 10 }, + { "layout", OPT_BOOL, &USE_SUM_LAYOUT, 1 }, + { "nolayout", OPT_BOOL, &USE_SUM_LAYOUT, 0 }, + { "sum", OPT_BOOL, &USE_CHECKSUM, 1 }, + { "nosum", OPT_BOOL, &USE_CHECKSUM, 0 }, + { "mirror", OPT_BOOL, &USE_MIRROR, 1 }, + { "nomirror", OPT_BOOL, &USE_MIRROR, 0 }, + { "xor", OPT_BOOL, &SUM_TYPE, ST_XOR }, + { "crc", OPT_BOOL, &SUM_TYPE, ST_CRC }, + { "md5", OPT_BOOL, &SUM_TYPE, ST_MD5 }, + { "sumerr", OPT_BOOL, &BAD_SUM_ERROR, 1 }, + { "nosumerr", OPT_BOOL, &BAD_SUM_ERROR, 0 }, + { "retries", OPT_INT, &NR_RETRIES, 10 }, + { "N", OPT_INT, &NR_RETRIES, 10 }, + { "restarts", OPT_INT, &NR_RESTARTS, 10 }, + { "M", OPT_INT, &NR_RESTARTS, 10 }, + { "timeout", OPT_INT, &DRIVER_TIMEOUT, 10 }, + { "T", OPT_INT, &DRIVER_TIMEOUT, 10 }, + { NULL } +}; + +/* Request message. */ +static message m_in; +static endpoint_t who_e; /* m_source */ +static endpoint_t proc_e; /* IO_ENDPT */ +static cp_grant_id_t grant_id; /* IO_GRANT */ + +/* Data buffers. */ +static char *buf_array, *buffer; /* contiguous buffer */ + +/*===========================================================================* + * carry * + *===========================================================================*/ +static int carry(size_t size, int flag_rw) +{ + /* Carry data between caller proc and filter. + */ + + if (flag_rw == FLT_WRITE) + return sys_safecopyfrom(proc_e, grant_id, 0, + (vir_bytes) buffer, size, D); + else + return sys_safecopyto(proc_e, grant_id, 0, + (vir_bytes) buffer, size, D); +} + +/*===========================================================================* + * vcarry * + *===========================================================================*/ +static int vcarry(int grants, iovec_t *iov, int flag_rw, size_t size) +{ + /* Carry data between caller proc and filter, through grant-vector. + */ + char *bufp; + int i, r; + size_t bytes; + + bufp = buffer; + for(i = 0; i < grants && size > 0; i++) { + bytes = MIN(size, iov[i].iov_size); + + if (flag_rw == FLT_WRITE) + r = sys_safecopyfrom(proc_e, + (vir_bytes) iov[i].iov_addr, 0, + (vir_bytes) bufp, bytes, D); + else + r = sys_safecopyto(proc_e, + (vir_bytes) iov[i].iov_addr, 0, + (vir_bytes) bufp, bytes, D); + + if(r != OK) + return r; + + bufp += bytes; + size -= bytes; + } + + return OK; +} + +/*===========================================================================* + * do_rdwt * + *===========================================================================*/ +static int do_rdwt(int flag_rw) +{ + size_t size, size_ret; + u64_t pos; + int r; + + pos = make64(m_in.POSITION, m_in.HIGHPOS); + size = m_in.COUNT; + + if (rem64u(pos, SECTOR_SIZE) != 0 || size % SECTOR_SIZE != 0) { + printf("Filter: unaligned request from caller!\n"); + + return EINVAL; + } + + buffer = flt_malloc(size, buf_array, BUF_SIZE); + + if(flag_rw == FLT_WRITE) + carry(size, flag_rw); + + reset_kills(); + + for (;;) { + size_ret = size; + r = transfer(pos, buffer, &size_ret, flag_rw); + if(r != RET_REDO) + break; + +#if DEBUG + printf("Filter: transfer yielded RET_REDO, checking drivers\n"); +#endif + if((r = check_driver(DRIVER_MAIN)) != OK) break; + if((r = check_driver(DRIVER_BACKUP)) != OK) break; + } + + if(r == OK && flag_rw == FLT_READ) + carry(size_ret, flag_rw); + + flt_free(buffer, size, buf_array); + return r != OK ? r : size_ret; +} + +/*===========================================================================* + * do_vrdwt * + *===========================================================================*/ +static int do_vrdwt(int flag_rw) +{ + size_t size, size_ret, bytes; + int grants; + int r, i; + u64_t pos; + iovec_t iov_proc[NR_IOREQS]; + + /* Extract informations. */ + grants = m_in.COUNT; + if((r = sys_safecopyfrom(who_e, grant_id, 0, (vir_bytes) iov_proc, + grants * sizeof(iovec_t), D)) != OK) { + panic(__FILE__, "copying in grant vector failed", r); + } + + pos = make64(m_in.POSITION, m_in.HIGHPOS); + for(size = 0, i = 0; i < grants; i++) + size += iov_proc[i].iov_size; + + if (rem64u(pos, SECTOR_SIZE) != 0 || size % SECTOR_SIZE != 0) { + printf("Filter: unaligned request from caller!\n"); + return EINVAL; + } + + buffer = flt_malloc(size, buf_array, BUF_SIZE); + + if(flag_rw == FLT_WRITE) + vcarry(grants, iov_proc, flag_rw, size); + + reset_kills(); + + for (;;) { + size_ret = size; + r = transfer(pos, buffer, &size_ret, flag_rw); + if(r != RET_REDO) + break; + +#if DEBUG + printf("Filter: transfer yielded RET_REDO, checking drivers\n"); +#endif + if((r = check_driver(DRIVER_MAIN)) != OK) break; + if((r = check_driver(DRIVER_BACKUP)) != OK) break; + } + + if(r != OK) { + flt_free(buffer, size, buf_array); + return r; + } + + if(flag_rw == FLT_READ) + vcarry(grants, iov_proc, flag_rw, size_ret); + + /* Set the result-iovec. */ + for(i = 0; i < grants && size_ret > 0; i++) { + bytes = MIN(size_ret, iov_proc[i].iov_size); + + iov_proc[i].iov_size -= bytes; + size_ret -= bytes; + } + + /* Copy the caller's grant-table back. */ + if((r = sys_safecopyto(who_e, grant_id, 0, (vir_bytes) iov_proc, + grants * sizeof(iovec_t), D)) != OK) { + panic(__FILE__, "copying out grant vector failed", r); + } + + flt_free(buffer, size, buf_array); + return OK; +} + +/*===========================================================================* + * do_ioctl * + *===========================================================================*/ +static int do_ioctl(message *m) +{ + struct partition sizepart; + + switch(m->REQUEST) { + case DIOCSETP: + case DIOCTIMEOUT: + case DIOCOPENCT: + /* These do not make sense for us. */ + return EINVAL; + + case DIOCGETP: + memset(&sizepart, 0, sizeof(sizepart)); + + /* The presented disk size is the raw partition size, + * corrected for space needed for checksums. + */ + sizepart.size = convert(get_raw_size()); + + if(sys_safecopyto(proc_e, (vir_bytes) grant_id, 0, + (vir_bytes) &sizepart, + sizeof(struct partition), D) != OK) { + printf("Filter: DIOCGETP safecopyto failed\n"); + return EIO; + } + break; + + default: + printf("Filter: unknown ioctl request: %d!\n", m->REQUEST); + return EINVAL; + } + + return OK; +} + +/*===========================================================================* + * parse_arguments * + *===========================================================================*/ +static int parse_arguments(int argc, char *argv[]) +{ + + if(argc != 2) + return EINVAL; + + optset_parse(optset_table, argv[1]); + + if (MAIN_LABEL[0] == 0 || MAIN_MINOR < 0 || MAIN_MINOR > 255) + return EINVAL; + if (USE_MIRROR && (BACKUP_LABEL[0] == 0 || + BACKUP_MINOR < 0 || BACKUP_MINOR > 255)) + return EINVAL; + + /* Checksumming implies a checksum layout. */ + if (USE_CHECKSUM) + USE_SUM_LAYOUT = 1; + + /* Determine the checksum size for the chosen checksum type. */ + switch (SUM_TYPE) { + case ST_XOR: + SUM_SIZE = 16; /* compatibility */ + break; + case ST_CRC: + SUM_SIZE = 4; + break; + case ST_MD5: + SUM_SIZE = 16; + break; + default: + return EINVAL; + } + + if (NR_SUM_SEC <= 0 || SUM_SIZE * NR_SUM_SEC > SECTOR_SIZE) + return EINVAL; + +#if DEBUG + printf("Filter starting. Configuration:\n"); + printf(" USE_CHECKSUM : %3s ", USE_CHECKSUM ? "yes" : "no"); + printf(" USE_MIRROR : %3s\n", USE_MIRROR ? "yes" : "no"); + + if (USE_CHECKSUM) { + printf(" BAD_SUM_ERROR : %3s ", + BAD_SUM_ERROR ? "yes" : "no"); + printf(" NR_SUM_SEC : %3d\n", NR_SUM_SEC); + + printf(" SUM_TYPE : "); + + switch (SUM_TYPE) { + case ST_XOR: printf("xor"); break; + case ST_CRC: printf("crc"); break; + case ST_MD5: printf("md5"); break; + } + + printf(" SUM_SIZE : %3d\n", SUM_SIZE); + } + else printf(" USE_SUM_LAYOUT : %3s\n", USE_SUM_LAYOUT ? "yes" : "no"); + + printf(" N : %3dx M : %3dx T : %3ds\n", + NR_RETRIES, NR_RESTARTS, DRIVER_TIMEOUT); + + printf(" MAIN_LABEL / MAIN_MINOR : %19s / %d\n", + MAIN_LABEL, MAIN_MINOR); + if (USE_MIRROR) { + printf(" BACKUP_LABEL / BACKUP_MINOR : %15s / %d\n", + BACKUP_LABEL, BACKUP_MINOR); + } + +#endif + + /* Convert timeout seconds to ticks. */ + DRIVER_TIMEOUT *= sys_hz(); + + return OK; +} + +/*===========================================================================* + * got_signal * + *===========================================================================*/ +static void got_signal(void) +{ + sigset_t set; + + /* See if PM sent us a SIGTERM. */ + if (getsigset(&set) != 0) return; + + if (!sigismember(&set, SIGTERM)) return; + + /* If so, shut down this driver. */ +#if DEBUG + printf("Filter: shutdown...\n"); +#endif + + driver_shutdown(); + + exit(0); +} + +/*===========================================================================* + * main * + *===========================================================================*/ +int main(int argc, char *argv[]) +{ + message m_out; + int r; + + r = parse_arguments(argc, argv); + if(r != OK) { + printf("Filter: wrong argument!\n"); + return 1; + } + + if ((buf_array = alloc_contig(BUF_SIZE, 0, NULL)) == NULL) + panic(__FILE__, "no memory available", NO_NUM); + + sum_init(); + + driver_init(); + + for (;;) { + /* Wait for request. */ + if(receive(ANY, &m_in) != OK) { + panic(__FILE__, "receive failed", NO_NUM); + } + +#if DEBUG2 + printf("Filter: got request %d from %d\n", + m_in.m_type, m_in.m_source); +#endif + + if (is_notify(m_in.m_type) && m_in.m_source == PM_PROC_NR) + got_signal(); + + who_e = m_in.m_source; + proc_e = m_in.IO_ENDPT; + grant_id = (cp_grant_id_t) m_in.IO_GRANT; + + /* Forword the request message to the drivers. */ + switch(m_in.m_type) { + case DEV_OPEN: /* open/close is a noop for filter. */ + case DEV_CLOSE: r = OK; break; + case DEV_READ_S: r = do_rdwt(FLT_READ); break; + case DEV_WRITE_S: r = do_rdwt(FLT_WRITE); break; + case DEV_GATHER_S: r = do_vrdwt(FLT_READ); break; + case DEV_SCATTER_S: r = do_vrdwt(FLT_WRITE); break; + case DEV_IOCTL_S: r = do_ioctl(&m_in); break; + + default: + printf("Filter: ignoring unknown request %d from %d\n", + m_in.m_type, m_in.m_source); + continue; + } + +#if DEBUG2 + printf("Filter: replying with code %d\n", r); +#endif + + /* Send back reply message. */ + m_out.m_type = TASK_REPLY; + m_out.REP_ENDPT = proc_e; + m_out.REP_STATUS = r; + send(who_e, &m_out); + } + + return 0; +} diff --git a/drivers/filter/md5.c b/drivers/filter/md5.c new file mode 100644 index 000000000..f2962242a --- /dev/null +++ b/drivers/filter/md5.c @@ -0,0 +1,315 @@ +/* + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + */ + +/* This code was modified in 1997 by Jim Kingdon of Cyclic Software to + not require an integer type which is exactly 32 bits. This work + draws on the changes for the same purpose by Tatu Ylonen + as part of SSH, but since I didn't actually use + that code, there is no copyright issue. I hereby disclaim + copyright in any changes I have made; this code remains in the + public domain. */ + +#ifdef TEST +#include +#endif + +#include /* for memcpy() and memset() */ + +#include "md5.h" + +/* Little-endian byte-swapping routines. Note that these do not + depend on the size of datatypes such as uint32, nor do they require + us to detect the endianness of the machine we are running on. It + is possible they should be macros for speed, but I would be + surprised if they were a performance bottleneck for MD5. */ + +static uint32 +getu32 (const unsigned char *addr) +{ + return (((((unsigned long)addr[3] << 8) | addr[2]) << 8) + | addr[1]) << 8 | addr[0]; +} + +static void +putu32 (uint32 data, unsigned char *addr) +{ + addr[0] = (unsigned char)data; + addr[1] = (unsigned char)(data >> 8); + addr[2] = (unsigned char)(data >> 16); + addr[3] = (unsigned char)(data >> 24); +} + +/* + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. + */ +void +MD5Init (ctx) + struct MD5Context *ctx; +{ + ctx->buf[0] = 0x67452301; + ctx->buf[1] = 0xefcdab89; + ctx->buf[2] = 0x98badcfe; + ctx->buf[3] = 0x10325476; + + ctx->bits[0] = 0; + ctx->bits[1] = 0; +} + +/* + * Update context to reflect the concatenation of another buffer full + * of bytes. + */ +void +MD5Update (ctx, buf, len) + struct MD5Context *ctx; + unsigned char const *buf; + unsigned len; +{ + uint32 t; + + /* Update bitcount */ + + t = ctx->bits[0]; + if ((ctx->bits[0] = (t + ((uint32)len << 3)) & 0xffffffff) < t) + ctx->bits[1]++; /* Carry from low to high */ + ctx->bits[1] += len >> 29; + + t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ + + /* Handle any leading odd-sized chunks */ + + if ( t ) { + unsigned char *p = ctx->in + t; + + t = 64-t; + if (len < t) { + memcpy(p, buf, len); + return; + } + memcpy(p, buf, t); + MD5Transform (ctx->buf, ctx->in); + buf += t; + len -= t; + } + + /* Process data in 64-byte chunks */ + + while (len >= 64) { + memcpy(ctx->in, buf, 64); + MD5Transform (ctx->buf, ctx->in); + buf += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + + memcpy(ctx->in, buf, len); +} + +/* + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) + */ +void +MD5Final (digest, ctx) + unsigned char digest[16]; + struct MD5Context *ctx; +{ + unsigned count; + unsigned char *p; + + /* Compute number of bytes mod 64 */ + count = (ctx->bits[0] >> 3) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = ctx->in + count; + *p++ = 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = 64 - 1 - count; + + /* Pad out to 56 mod 64 */ + if (count < 8) { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset(p, 0, count); + MD5Transform (ctx->buf, ctx->in); + + /* Now fill the next block with 56 bytes */ + memset(ctx->in, 0, 56); + } else { + /* Pad block to 56 bytes */ + memset(p, 0, count-8); + } + + /* Append length in bits and transform */ + putu32(ctx->bits[0], ctx->in + 56); + putu32(ctx->bits[1], ctx->in + 60); + + MD5Transform (ctx->buf, ctx->in); + putu32(ctx->buf[0], digest); + putu32(ctx->buf[1], digest + 4); + putu32(ctx->buf[2], digest + 8); + putu32(ctx->buf[3], digest + 12); + memset(ctx, 0, sizeof(ctx)); /* In case it's sensitive */ +} + +#ifndef ASM_MD5 + +/* The four core functions - F1 is optimized somewhat */ + +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f, w, x, y, z, data, s) \ + ( w += f(x, y, z) + data, w &= 0xffffffff, w = w<>(32-s), w += x ) + +/* + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. MD5Update blocks + * the data and converts bytes into longwords for this routine. + */ +void +MD5Transform (buf, inraw) + uint32 buf[4]; + const unsigned char inraw[64]; +{ + register uint32 a, b, c, d; + uint32 in[16]; + int i; + + for (i = 0; i < 16; ++i) + in[i] = getu32 (inraw + 4 * i); + + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + MD5STEP(F1, a, b, c, d, in[ 0]+0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[ 1]+0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[ 2]+0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[ 3]+0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[ 4]+0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[ 5]+0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[ 6]+0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[ 7]+0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[ 8]+0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[ 9]+0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10]+0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11]+0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12]+0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13]+0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14]+0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15]+0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[ 1]+0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[ 6]+0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11]+0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[ 0]+0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[ 5]+0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10]+0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15]+0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[ 4]+0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[ 9]+0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14]+0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[ 3]+0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[ 8]+0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13]+0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[ 2]+0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[ 7]+0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12]+0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[ 5]+0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[ 8]+0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11]+0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14]+0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[ 1]+0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[ 4]+0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[ 7]+0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10]+0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13]+0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[ 0]+0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[ 3]+0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[ 6]+0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[ 9]+0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12]+0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15]+0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[ 2]+0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[ 0]+0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[ 7]+0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14]+0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[ 5]+0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12]+0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[ 3]+0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10]+0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[ 1]+0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[ 8]+0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15]+0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[ 6]+0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13]+0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[ 4]+0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11]+0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[ 2]+0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[ 9]+0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} +#endif + +#ifdef TEST +/* Simple test program. Can use it to manually run the tests from + RFC1321 for example. */ +#include + +int +main (int argc, char **argv) +{ + struct MD5Context context; + unsigned char checksum[16]; + int i; + int j; + + if (argc < 2) + { + fprintf (stderr, "usage: %s string-to-hash\n", argv[0]); + exit (1); + } + for (j = 1; j < argc; ++j) + { + printf ("MD5 (\"%s\") = ", argv[j]); + MD5Init (&context); + MD5Update (&context, (unsigned char *)argv[j], strlen (argv[j])); + MD5Final (checksum, &context); + for (i = 0; i < 16; i++) + { + printf ("%02x", (unsigned int) checksum[i]); + } + printf ("\n"); + } + return 0; +} +#endif /* TEST */ diff --git a/drivers/filter/md5.h b/drivers/filter/md5.h new file mode 100644 index 000000000..fb2e57a4a --- /dev/null +++ b/drivers/filter/md5.h @@ -0,0 +1,26 @@ +/* See md5.c for explanation and copyright information. */ + +#ifndef MD5_H +#define MD5_H + +/* Unlike previous versions of this code, uint32 need not be exactly + 32 bits, merely 32 bits or more. Choosing a data type which is 32 + bits instead of 64 is not important; speed is considerably more + important. ANSI guarantees that "unsigned long" will be big enough, + and always using it seems to have few disadvantages. */ +typedef unsigned long uint32; + +struct MD5Context { + uint32 buf[4]; + uint32 bits[2]; + unsigned char in[64]; +}; + +void MD5Init(struct MD5Context *context); +void MD5Update(struct MD5Context *context, + unsigned char const *buf, unsigned len); +void MD5Final(unsigned char digest[16], + struct MD5Context *context); +void MD5Transform(uint32 buf[4], const unsigned char in[64]); + +#endif /* !MD5_H */ diff --git a/drivers/filter/optset.c b/drivers/filter/optset.c new file mode 100644 index 000000000..a338a4717 --- /dev/null +++ b/drivers/filter/optset.c @@ -0,0 +1,128 @@ +/* This file provides functionality to parse strings of comma-separated + * options, each being either a single key name or a key=value pair, where the + * value may be enclosed in quotes. A table of optset entries is provided to + * determine which options are recognized, how to parse their values, and where + * to store those. Unrecognized options are silently ignored; improperly + * formatted options are silently set to reasonably acceptable values. + * + * The entry points into this file are: + * optset_parse parse the given options string using the given table + * + * Created: + * May 2009 (D.C. van Moolenbroek) + */ + +#define _MINIX 1 +#include +#include +#include +#include + +#include "optset.h" + +FORWARD _PROTOTYPE( void optset_parse_entry, (struct optset *entry, + char *ptr, int len) ); + +/*===========================================================================* + * optset_parse_entry * + *===========================================================================*/ +PRIVATE void optset_parse_entry(entry, ptr, len) +struct optset *entry; +char *ptr; +int len; +{ +/* Parse and store the value of a single option. + */ + char *dst; + int val; + + switch (entry->os_type) { + case OPT_BOOL: + *((int *) entry->os_ptr) = entry->os_val; + + break; + + case OPT_STRING: + if (len >= entry->os_val) + len = entry->os_val - 1; + + dst = (char *) entry->os_ptr; + + if (len > 0) + memcpy(dst, ptr, len); + dst[len] = 0; + + break; + + case OPT_INT: + if (len > 0) + val = strtol(ptr, NULL, entry->os_val); + else + val = 0; + + *((int *) entry->os_ptr) = val; + + break; + } +} + +/*===========================================================================* + * optset_parse * + *===========================================================================*/ +PUBLIC void optset_parse(table, string) +struct optset *table; +char *string; +{ +/* Parse a string of options, using the provided table of optset entries. + */ + char *p, *kptr, *vptr; + int i, klen, vlen; + + for (p = string; *p; ) { + /* Get the key name for the field. */ + for (kptr = p, klen = 0; *p && *p != '=' && *p != ','; p++, klen++); + + if (*p == '=') { + /* The field has an associated value. */ + vptr = ++p; + + /* If the first character after the '=' is a quote character, + * find a matching quote character followed by either a comma + * or the terminating null character, and use the string in + * between. Otherwise, use the string up to the next comma or + * the terminating null character. + */ + if (*p == '\'' || *p == '"') { + p++; + + for (vlen = 0; *p && (*p != *vptr || + (p[1] && p[1] != ',')); p++, vlen++); + + if (*p) p++; + vptr++; + } + else + for (vlen = 0; *p && *p != ','; p++, vlen++); + } + else { + vptr = NULL; + vlen = 0; + } + + if (*p == ',') p++; + + /* Find a matching entry for this key in the given table. If found, + * call optset_parse_entry() on it. Silently ignore the option + * otherwise. + */ + for (i = 0; table[i].os_name != NULL; i++) { + if (strlen(table[i].os_name) == klen && + !strncasecmp(table[i].os_name, kptr, klen)) { + + optset_parse_entry(&table[i], vptr, vlen); + + break; + } + } + } +} diff --git a/drivers/filter/optset.h b/drivers/filter/optset.h new file mode 100644 index 000000000..87ea4ce9f --- /dev/null +++ b/drivers/filter/optset.h @@ -0,0 +1,30 @@ +#ifndef _OPTSET_H +#define _OPTSET_H + +enum { + OPT_BOOL, + OPT_STRING, + OPT_INT +}; + +/* An entry for the parser of an options set. The 'os_name' field must point + * to a string, which is treated case-insensitively; the last entry of a table + * must have NULL name. The 'os_type' field must be set to one of the OPT_ + * values defined above. The 'os_ptr' field must point to the field that is to + * receive the value of a recognized option. For OPT_STRING, it must point to a + * string of a size set in 'os_val'; the resulting string may be truncated, but + * will always be null-terminated. For OPT_BOOL, it must point to an int which + * will be set to the value in 'os_val' if the option is present. For OPT_INT, + * it must point to an int which will be set to the provided option value; + * 'os_val' is then a base passed to strtol(). + */ +struct optset { + char *os_name; + int os_type; + void *os_ptr; + int os_val; +}; + +_PROTOTYPE( void optset_parse, (struct optset *table, char *string) ); + +#endif /* _OPTSET_H */ diff --git a/drivers/filter/sum.c b/drivers/filter/sum.c new file mode 100644 index 000000000..1dca77765 --- /dev/null +++ b/drivers/filter/sum.c @@ -0,0 +1,613 @@ +/* Filter driver - middle layer - checksumming */ + +#include "inc.h" +#include "crc.h" +#include "md5.h" + +#define GROUP_SIZE (SECTOR_SIZE * NR_SUM_SEC) +#define SEC2SUM_NR(nr) ((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + NR_SUM_SEC) +#define LOG2PHYS(nr) ((nr)/NR_SUM_SEC*(NR_SUM_SEC+1) + (nr)%NR_SUM_SEC) + +#define POS2SEC(nr) div64u((nr), SECTOR_SIZE) +#define SEC2POS(nr) mul64u((nr), SECTOR_SIZE) + +/* Data buffers. */ +static char *ext_array, *ext_buffer; /* interspersed buffer */ +static char *rb0_array; /* write readback buffer for disk 0 */ +static char *rb1_array; /* write readback buffer for disk 1 */ + +/*===========================================================================* + * sum_init * + *===========================================================================*/ +void sum_init(void) +{ + /* Initialize buffers. */ + + ext_array = alloc_contig(SBUF_SIZE, 0, NULL); + rb0_array = alloc_contig(SBUF_SIZE, 0, NULL); + rb1_array = alloc_contig(SBUF_SIZE, 0, NULL); + + if (ext_array == NULL || rb0_array == NULL || rb1_array == NULL) + panic(__FILE__, "no memory available", NO_NUM); +} + +/*===========================================================================* + * calc_sum * + *===========================================================================*/ +static void calc_sum(unsigned sector, char *data, char *sum) +{ + /* Compute the checksum for a sector. The sector number must be part + * of the checksum in some way. + */ + unsigned long crc, *p, *q; + int i, j; + struct MD5Context ctx; + + switch(SUM_TYPE) { + case ST_XOR: + /* Basic XOR checksum */ + p = (unsigned long *) data; + + memset(sum, 0, SUM_SIZE); + for(i = 0; i < SECTOR_SIZE / SUM_SIZE; i++) { + q = (unsigned long *) sum; + for(j = 0; j < SUM_SIZE / sizeof(*p); j++) { + *q ^= *p; + q++; + p++; + } + } + q = (unsigned long *) sum; + *q ^= sector; + + break; + + case ST_CRC: + /* CRC32 checksum */ + + crc = compute_crc((unsigned char *) data, SECTOR_SIZE); + + q = (unsigned long *) sum; + + *q = crc ^ sector; + + break; + + case ST_MD5: + /* MD5 checksum */ + + MD5Init(&ctx); + MD5Update(&ctx, (unsigned char *) data, SECTOR_SIZE); + MD5Update(&ctx, (unsigned char *) §or, sizeof(sector)); + MD5Final((unsigned char *) sum, &ctx); + + break; + + default: + panic(__FILE__, "invalid checksum type", SUM_TYPE); + } +} + +/*===========================================================================* + * read_sectors * + *===========================================================================*/ +static int read_sectors(char *buf, sector_t phys_sector, int count) +{ + /* Read 'count' sectors starting at 'phys_sector' into 'buf'. If an + * EOF occurs, zero-fill the remaining part of the buffer. + */ + size_t size, wsize; + int r; + + size = wsize = count * SECTOR_SIZE; + + r = read_write(SEC2POS(phys_sector), buf, buf, &size, FLT_READ); + + if (r != OK) + return r; + + if (size != wsize) { +#if DEBUG + printf("Filter: EOF reading sector %lu\n", phys_sector); +#endif + + memset(buf + size, 0, wsize - size); + } + + return OK; +} + +/*===========================================================================* + * make_group_sum * + *===========================================================================*/ +static void make_group_sum(char *bufp, char *sump, sector_t sector, int index, + int count) +{ + /* Compute checksums for 'count' sectors within a group, starting at + * sector 'index' into the group, which has logical sector number + * 'sector'. The 'bufp' pointer points to the same first sector to + * start checksumming; 'sump' is a pointer to the checksum sector. + */ + + sump += index * SUM_SIZE; + + while (count--) { + calc_sum(sector, bufp, sump); + + bufp += SECTOR_SIZE; + + sump += SUM_SIZE; + sector++; + } +} + +/*===========================================================================* + * check_group_sum * + *===========================================================================*/ +static int check_group_sum(char *bufp, char *sump, sector_t sector, int index, + int count) +{ + /* Check checksums in a group. Parameters are the same as in + * make_group_sum(). Return OK if all checksums check out, or RET_REDO + * upon failure. + */ + char sum_buffer[SECTOR_SIZE]; + + sump += index * SUM_SIZE; + + while (count--) { + calc_sum(sector, bufp, sum_buffer); + + if (memcmp(sum_buffer, sump, SUM_SIZE)) { + printf("Filter: BAD CHECKSUM at sector %lu\n", sector); + + if (BAD_SUM_ERROR) + return bad_driver(DRIVER_MAIN, BD_DATA, EIO); + } + + bufp += SECTOR_SIZE; + sump += SUM_SIZE; + sector++; + } + + return OK; +} + +/*===========================================================================* + * make_sum * + *===========================================================================*/ +static int make_sum(sector_t current_sector, sector_t sectors_left) +{ + /* Compute checksums over all data in the buffer with expanded data. + * As side effect, possibly read in first and last checksum sectors + * and data to fill the gap between the last data sector and the last + * checksum sector. + */ + sector_t sector_in_group, group_left; + size_t size, gap; + char *extp; + int r; + + /* See the description of the extended buffer in transfer(). A number + * of points are relevant for this function in particular: + * + * 1) If the "xx" head of the buffer does not cover an entire group, + * we need to copy in the first checksum sector so that we can + * modify it. + * 2) We can generate checksums for the full "yyyyy" groups without + * copying in the corresponding checksum sectors first, because + * those sectors will be overwritten entirely anyway. + * 3) We copy in not only the checksum sector for the group containing + * the "zzz" tail data, but also all the data between "zzz" and the + * last checksum sector. This allows us to write all the data in + * the buffer in one operation. In theory, we could verify the + * checksum of the data in this gap for extra early failure + * detection, but we currently do not do this. + * + * If points 1 and 3 cover the same group (implying a small, unaligned + * write operation), the read operation is done only once. Whether + * point 1 or 3 is skipped depends on whether there is a gap before + * the checksum sector. + */ + + sector_in_group = current_sector % NR_SUM_SEC; + group_left = NR_SUM_SEC - sector_in_group; + + extp = ext_buffer; + + /* This loop covers points 1 and 2. */ + while (sectors_left >= group_left) { + size = group_left * SECTOR_SIZE; + + if (sector_in_group > 0) { + if ((r = read_sectors(extp + size, + LOG2PHYS(current_sector) + group_left, + 1)) != OK) + return r; + } + else memset(extp + size, 0, SECTOR_SIZE); + + make_group_sum(extp, extp + size, current_sector, + sector_in_group, group_left); + + extp += size + SECTOR_SIZE; + + sectors_left -= group_left; + current_sector += group_left; + + sector_in_group = 0; + group_left = NR_SUM_SEC; + } + + /* The remaining code covers point 3. */ + if (sectors_left > 0) { + size = sectors_left * SECTOR_SIZE; + + if (group_left != NR_SUM_SEC - sector_in_group) + panic(__FILE__, "group_left assertion", 0); + + gap = group_left - sectors_left; + + if (gap <= 0) + panic(__FILE__, "gap assertion", 0); + + if ((r = read_sectors(extp + size, + LOG2PHYS(current_sector) + sectors_left, + gap + 1)) != OK) + return r; + + make_group_sum(extp, extp + size + gap * SECTOR_SIZE, + current_sector, sector_in_group, sectors_left); + } + + return OK; +} + +/*===========================================================================* + * check_sum * + *===========================================================================*/ +static int check_sum(sector_t current_sector, size_t bytes_left) +{ + /* Check checksums of all data in the buffer with expanded data. + * Return OK if all checksums are okay, or RET_REDO upon failure. + */ + sector_t sector_in_group; + size_t size, groupbytes_left; + int count; + char *extp; + + extp = ext_buffer; + + sector_in_group = current_sector % NR_SUM_SEC; + groupbytes_left = (NR_SUM_SEC - sector_in_group) * SECTOR_SIZE; + + while (bytes_left > 0) { + size = MIN(bytes_left, groupbytes_left); + count = size / SECTOR_SIZE; + + if (check_group_sum(extp, extp + groupbytes_left, + current_sector, sector_in_group, count)) + return RET_REDO; + + extp += size + SECTOR_SIZE; + + bytes_left -= MIN(size + SECTOR_SIZE, bytes_left); + current_sector += count; + + sector_in_group = 0; + groupbytes_left = GROUP_SIZE; + } + + return OK; +} + +/*===========================================================================* + * check_write * + *===========================================================================*/ +static int check_write(u64_t pos, size_t size) +{ + /* Read back the data just written, from both disks if mirroring is + * enabled, and check the result against the original. Return OK on + * success; report the malfunctioning driver and return RET_REDO + * otherwise. + */ + char *rb0_buffer, *rb1_buffer; + size_t orig_size; + int r; + + if (size == 0) + return OK; + + rb0_buffer = rb1_buffer = + flt_malloc(size, rb0_array, SBUF_SIZE); + if (USE_MIRROR) + rb1_buffer = flt_malloc(size, rb1_array, SBUF_SIZE); + + orig_size = size; + + r = read_write(pos, rb0_buffer, rb1_buffer, &size, FLT_READ2); + + if (r != OK) { + if (USE_MIRROR) flt_free(rb1_buffer, orig_size, rb1_array); + flt_free(rb0_buffer, orig_size, rb0_array); + + return r; + } + + /* If we get a size smaller than what we requested, then we somehow + * succeeded in writing past the disk end, and now fail to read it all + * back. This is not an error, and we just compare the part that we + * did manage to read back in. + */ + + if (memcmp(ext_buffer, rb0_buffer, size)) { +#if DEBUG + printf("Filter: readback from disk 0 failed (size %d)\n", + size); +#endif + + return bad_driver(DRIVER_MAIN, BD_DATA, EFAULT); + } + + if (USE_MIRROR && memcmp(ext_buffer, rb1_buffer, size)) { +#if DEBUG + printf("Filter: readback from disk 1 failed (size %d)\n", + size); +#endif + + return bad_driver(DRIVER_BACKUP, BD_DATA, EFAULT); + } + + if (USE_MIRROR) flt_free(rb1_buffer, orig_size, rb1_array); + flt_free(rb0_buffer, orig_size, rb0_array); + + return OK; +} + +/*===========================================================================* + * expand * + *===========================================================================*/ +static void expand(sector_t first_sector, char *buffer, sector_t sectors_left) +{ + /* Expand the contiguous data in 'buffer' to interspersed format in + * 'ext_buffer'. The checksum areas are not touched. + */ + char *srcp, *dstp; + sector_t group_left; + size_t size; + int count; + + srcp = buffer; + dstp = ext_buffer; + + group_left = NR_SUM_SEC - first_sector % NR_SUM_SEC; + + while (sectors_left > 0) { + count = MIN(sectors_left, group_left); + size = count * SECTOR_SIZE; + + memcpy(dstp, srcp, size); + + srcp += size; + dstp += size + SECTOR_SIZE; + + sectors_left -= count; + group_left = NR_SUM_SEC; + } +} + +/*===========================================================================* + * collapse * + *===========================================================================*/ +static void collapse(sector_t first_sector, char *buffer, size_t *sizep) +{ + /* Collapse the interspersed data in 'ext_buffer' to contiguous format + * in 'buffer'. As side effect, adjust the given size to reflect the + * resulting contiguous data size. + */ + char *srcp, *dstp; + size_t size, bytes_left, groupbytes_left; + + srcp = ext_buffer; + dstp = buffer; + + bytes_left = *sizep; + groupbytes_left = + (NR_SUM_SEC - first_sector % NR_SUM_SEC) * SECTOR_SIZE; + + while (bytes_left > 0) { + size = MIN(bytes_left, groupbytes_left); + + memcpy(dstp, srcp, size); + + srcp += size + SECTOR_SIZE; + dstp += size; + + bytes_left -= MIN(size + SECTOR_SIZE, bytes_left); + groupbytes_left = GROUP_SIZE; + } + + *sizep = dstp - buffer; +} + +/*===========================================================================* + * expand_sizes * + *===========================================================================*/ +static size_t expand_sizes(sector_t first_sector, sector_t nr_sectors, + size_t *req_size) +{ + /* Compute the size of the data area including interspersed checksum + * sectors (req_size) and the size of the data area including + * interspersed and trailing checksum sectors (the return value). + */ + sector_t last_sector, sum_sector, phys_sector; + + last_sector = LOG2PHYS(first_sector + nr_sectors - 1); + + sum_sector = SEC2SUM_NR(first_sector + nr_sectors - 1); + + phys_sector = LOG2PHYS(first_sector); + + *req_size = (last_sector - phys_sector + 1) * SECTOR_SIZE; + + return (sum_sector - phys_sector + 1) * SECTOR_SIZE; +} + +/*===========================================================================* + * collapse_size * + *===========================================================================*/ +static void collapse_size(sector_t first_sector, size_t *sizep) +{ + /* Compute the size of the contiguous user data written to disk, given + * the result size of the write operation with interspersed checksums. + */ + sector_t sector_in_group; + size_t sectors_from_group_base, nr_sum_secs, nr_data_secs; + + sector_in_group = first_sector % NR_SUM_SEC; + + sectors_from_group_base = *sizep / SECTOR_SIZE + sector_in_group; + + nr_sum_secs = sectors_from_group_base / (NR_SUM_SEC+1); + + nr_data_secs = sectors_from_group_base - sector_in_group - nr_sum_secs; + + *sizep = nr_data_secs * SECTOR_SIZE; +} + +/*===========================================================================* + * transfer * + *===========================================================================*/ +int transfer(u64_t pos, char *buffer, size_t *sizep, int flag_rw) +{ + /* Transfer data in interspersed-checksum format. When writing, first + * compute checksums, and read back the written data afterwards. When + * reading, check the stored checksums afterwards. + */ + sector_t first_sector, nr_sectors; + size_t ext_size, req_size, res_size; + u64_t phys_pos; + int r; + + /* If we don't use checksums or even checksum layout, simply pass on + * the request to the drivers as is. + */ + if (!USE_SUM_LAYOUT) + return read_write(pos, buffer, buffer, sizep, flag_rw); + + /* The extended buffer (for checksumming) essentially looks like this: + * + * ------------------------------ + * |xx|C|yyyyy|C|yyyyy|C|zzz |C| + * ------------------------------ + * + * In this example, "xxyyyyyyyyyyzzz" is our actual data. The data is + * split up into groups, so that each group is followed by a checksum + * sector C containing the checksums for all data sectors in that + * group. The head and tail of the actual data may cover parts of + * groups; the remaining data (nor their checksums) are not to be + * modified. + * + * The entire buffer is written or read in one operation: the + * read_write() call below. In order to write, we may first have to + * read some data; see the description in make_sum(). + * + * Some points of interest here: + * - We need a buffer large enough to hold the all user and non-user + * data, from the first "xx" to the last checksum sector. This size + * is ext_size. + * - For writing, we need to expand the user-provided data from + * contiguous layout to interspersed format. The size of the user + * data after expansion is req_size. + * - For reading, we need to collapse the user-requested data from + * interspersed to contiguous format. For writing, we still need to + * compute the contiguous result size to return to the user. + * - In both cases, the result size may be different from the + * requested write size, because an EOF (as in, disk end) may occur + * and the resulting size is less than the requested size. + * - If we only follow the checksum layout, and do not do any + * checksumming, ext_size is reduced to req_size. + */ + + first_sector = POS2SEC(pos); + nr_sectors = *sizep / SECTOR_SIZE; + phys_pos = SEC2POS(LOG2PHYS(first_sector)); + +#if DEBUG2 + printf("Filter: transfer: pos 0x%lx:0x%lx -> phys_pos 0x%lx:0x%lx\n", + ex64hi(pos), ex64lo(pos), ex64hi(phys_pos), ex64lo(phys_pos)); +#endif + + /* Compute the size for the buffer and for the user data after + * expansion. + */ + ext_size = expand_sizes(first_sector, nr_sectors, &req_size); + + if (!USE_CHECKSUM) + ext_size = req_size; + + ext_buffer = flt_malloc(ext_size, ext_array, SBUF_SIZE); + + if (flag_rw == FLT_WRITE) { + expand(first_sector, buffer, nr_sectors); + + if (USE_CHECKSUM && make_sum(first_sector, nr_sectors)) + return RET_REDO; + } + + /* Perform the actual I/O. */ + res_size = ext_size; + r = read_write(phys_pos, ext_buffer, ext_buffer, &res_size, flag_rw); + +#if DEBUG2 + printf("Filter: transfer: read_write(%x:%x, %u, %d) = %d, %u\n", + ex64hi(phys_pos), ex64lo(phys_pos), ext_size, flag_rw, r, + res_size); +#endif + + if (r != OK) { + flt_free(ext_buffer, ext_size, ext_array); + + return r; + } + + /* Limit the resulting size to the user data part of the buffer. + * The resulting size may already be less, due to an EOF. + */ + *sizep = MIN(req_size, res_size); + + if (flag_rw == FLT_WRITE) { + if (USE_CHECKSUM && check_write(phys_pos, res_size)) + return RET_REDO; + + collapse_size(first_sector, sizep); + } + else { /* FLT_READ */ + if (USE_CHECKSUM && check_sum(first_sector, *sizep)) + return RET_REDO; + + collapse(first_sector, buffer, sizep); + } + + flt_free(ext_buffer, ext_size, ext_array); + + return OK; +} + +/*===========================================================================* + * convert * + *===========================================================================*/ +u64_t convert(u64_t size) +{ + /* Given a raw disk size, subtract the amount of disk space used for + * checksums, resulting in the user-visible disk size. + */ + sector_t sectors; + + if (!USE_SUM_LAYOUT) + return size; + + sectors = POS2SEC(size); + + return SEC2POS(sectors / (NR_SUM_SEC + 1) * NR_SUM_SEC); +} diff --git a/drivers/filter/util.c b/drivers/filter/util.c new file mode 100644 index 000000000..678379961 --- /dev/null +++ b/drivers/filter/util.c @@ -0,0 +1,109 @@ +/* Filter driver - utility functions */ + +#include "inc.h" +#include +#include + +static clock_t next_alarm; + +/*===========================================================================* + * flt_malloc * + *===========================================================================*/ +char *flt_malloc(size_t size, char *sbuf, size_t ssize) +{ + /* Allocate a buffer for 'size' bytes. If 'size' is equal to or less + * than 'ssize', return the static buffer 'sbuf', otherwise, use + * malloc() to allocate memory dynamically. + */ + char *p; + + if (size <= ssize) + return sbuf; + + p = alloc_contig(size, 0, NULL); + if (p == NULL) + panic(__FILE__, "out of memory", size); + + return p; +} + +/*===========================================================================* + * flt_free * + *===========================================================================*/ +void flt_free(char *buf, size_t size, char *sbuf) +{ + /* Free a buffer previously allocated with flt_malloc(). + */ + + if(buf != sbuf) + munmap(buf, size); +} + +/*===========================================================================* + * print64 * + *===========================================================================*/ +char *print64(u64_t p) +{ +#define NB 10 + static int n = 0; + static char buf[NB][100]; + u32_t lo = ex64lo(p), hi = ex64hi(p); + n = (n+1) % NB; + if(!hi) sprintf(buf[n], "%lx", lo); + else sprintf(buf[n], "%lx%08lx", hi, lo); + return buf[n]; +} + +/*===========================================================================* + * flt_alarm * + *===========================================================================*/ +clock_t flt_alarm(clock_t dt) +{ + int r; + + if(dt < 0) + return next_alarm; + + r = sys_setalarm(dt, 0); + + if(r != OK) + panic(__FILE__, "sys_setalarm failed", r); + + if(dt == 0) { + if(!next_alarm) + panic(__FILE__, "clearing unset alarm", r); + next_alarm = 0; + } else { + if(next_alarm) + panic(__FILE__, "overwriting alarm", r); + if ((r = getuptime(&next_alarm)) != OK) + panic(__FILE__, "getuptime failed", r); + next_alarm += dt; + } + + return next_alarm; +} + +/*===========================================================================* + * got_alarm * + *===========================================================================*/ +static void got_alarm(int sig) +{ + /* Do nothing. */ +} + +/*===========================================================================* + * flt_sleep * + *===========================================================================*/ +void flt_sleep(int secs) +{ + /* Sleep for the given number of seconds. Don't use sleep(), as that + * will end up calling select() to VFS. This implementation could be + * improved. + */ + + signal(SIGALRM, got_alarm); + alarm(secs); + + pause(); +} diff --git a/etc/drivers.conf b/etc/drivers.conf index ef62bca24..cd4e83c87 100644 --- a/etc/drivers.conf +++ b/etc/drivers.conf @@ -464,3 +464,25 @@ driver osscore ; uid 0; }; + +driver filter +{ + system + SETALARM # 24 + TIMES # 25 + GETINFO # 26 + SAFECOPYFROM # 31 + SAFECOPYTO # 32 + SETGRANT # 34 + SYSCTL # 44 + ; + ipc + SYSTEM PM VFS RS DS VM + at_wini + bios_wini + ; + control + at_wini + bios_wini + ; +}; diff --git a/include/minix/dmap.h b/include/minix/dmap.h index 890050fa6..b61e53cfc 100644 --- a/include/minix/dmap.h +++ b/include/minix/dmap.h @@ -38,6 +38,8 @@ enum dev_style { STYLE_DEV, STYLE_NDEV, STYLE_TTY, STYLE_CLONE }; #define RESCUE_MAJOR 9 /* major device for rescue */ +#define FILTER_MAJOR 11 /* major device for filter driver */ + #define LOG_MAJOR 15 /* major device for log driver */ # define IS_KLOG_DEV 0 /* minor device for /dev/klog */ diff --git a/servers/vfs/dmap.c b/servers/vfs/dmap.c index ac5b0aa46..fc9eae77a 100644 --- a/servers/vfs/dmap.c +++ b/servers/vfs/dmap.c @@ -46,7 +46,7 @@ PRIVATE struct dmap init_dmap[] = { DT(0, no_dev, 0, NONE, DMAP_MUTABLE, "") /* 8 = /dev/c1 */ DT(0, 0, 0, 0, DMAP_MUTABLE, "") /* 9 = not used */ DT(0, no_dev, 0, 0, DMAP_MUTABLE, "") /*10 = /dev/c2 */ - DT(0, 0, 0, 0, DMAP_MUTABLE, "") /*11 = not used */ + DT(0, no_dev, 0, 0, DMAP_MUTABLE, "") /*11 = /dev/filter*/ DT(0, no_dev, 0, NONE, DMAP_MUTABLE, "") /*12 = /dev/c3 */ DT(0, no_dev, 0, NONE, DMAP_MUTABLE, "") /*13 = /dev/audio */ DT(0, 0, 0, 0, DMAP_MUTABLE, "") /*14 = not used */ -- 2.44.0