From: David van Moolenbroek Date: Sun, 24 Aug 2014 09:40:52 +0000 (+0000) Subject: libminixfs: add block I/O routines X-Git-Url: http://zhaoyanbai.com/repos/%22http:/www.isc.org/icons/addsub.png?a=commitdiff_plain;h=refs%2Fchanges%2F45%2F2745%2F3;p=minix.git libminixfs: add block I/O routines The new functionality aims to save each file system server from having to implement its own block I/O routines just so that it can serve as a root file system. The new source file (bio.c) lists the requirements that file system servers have to fulfill in order to use the routines. Change-Id: Ia0190fd5c30e8c2097ed8f4b0e3ccde1827e0b92 --- diff --git a/minix/include/minix/libminixfs.h b/minix/include/minix/libminixfs.h index 8293fa307..a0369904b 100644 --- a/minix/include/minix/libminixfs.h +++ b/minix/include/minix/libminixfs.h @@ -6,6 +6,7 @@ #include #include #include +#include struct buf { /* Data portion of the buffer. */ @@ -40,6 +41,7 @@ int lmfs_bytes(struct buf *bp); int lmfs_bufs_in_use(void); int lmfs_nr_bufs(void); void lmfs_flushall(void); +void lmfs_flushdev(dev_t dev); int lmfs_fs_block_size(void); void lmfs_may_use_vmcache(int); void lmfs_set_blocksize(int blocksize, int major); @@ -77,5 +79,11 @@ void fs_blockstats(u64_t *blocks, u64_t *free, u64_t *used); #define END_OF_FILE (-104) /* eof detected */ +/* Block I/O helper functions. */ +void lmfs_driver(dev_t dev, char *label); +ssize_t lmfs_bio(dev_t dev, struct fsdriver_data *data, size_t bytes, + off_t pos, int call); +void lmfs_bflush(dev_t dev); + #endif /* _MINIX_FSLIB_H */ diff --git a/minix/lib/libminixfs/Makefile b/minix/lib/libminixfs/Makefile index 53a5cf1e3..5a3578eb2 100644 --- a/minix/lib/libminixfs/Makefile +++ b/minix/lib/libminixfs/Makefile @@ -6,6 +6,6 @@ CPPFLAGS+= -D_MINIX_SYSTEM .include LIB= minixfs -SRCS= fetch_credentials.c cache.c +SRCS= fetch_credentials.c cache.c bio.c .include diff --git a/minix/lib/libminixfs/bio.c b/minix/lib/libminixfs/bio.c new file mode 100644 index 000000000..5b47e8f8f --- /dev/null +++ b/minix/lib/libminixfs/bio.c @@ -0,0 +1,211 @@ +/* + * This file provides an implementation for block I/O functions as expected by + * libfsdriver for root file systems. In particular, the lmfs_driver function + * can be used to implement fdr_driver, the lmfs_bio function can be used to + * implement the fdr_bread, fdr_bwrite, and fdr_bpeek hooks, and the the + * lmfs_bflush function can be used to implement the fdr_bflush hook. At the + * very least, a file system that makes use of the provided functionality + * must adhere to the following rules: + * + * o it must initialize this library in order to set up a buffer pool for + * use by these functions, using the lmfs_buf_pool function; the + * recommended number of blocks for *non*-disk-backed file systems is + * NR_IOREQS buffers (disk-backed file systems typically use many more); + * o it must enable VM caching in order to support memory mapping of block + * devices, using the lmfs_may_use_vmcache function; + * o it must either use lmfs_flushall as implementation for the fdr_sync + * hook, or call lmfs_flushall as part of its own fdr_sync implementation. + * + * In addition, a disk-backed file system (as opposed to e.g. a networked file + * system that intends to be able to serve as a root file system) should + * consider the following points: + * + * o it may restrict calls to fdr_bwrite on the mounted partition, for + * example to the partition's first 1024 bytes; it should generally not + * prevent that area from being written even if the file system is mounted + * read-only; + * o it is free to set its own block size, although the default block size + * works fine for raw block I/O as well. + */ + +#include +#include +#include +#include +#include + +/* + * Set the driver label of the device identified by 'dev' to 'label'. While + * 'dev' is a full device number, only its major device number is to be used. + * This is a very thin wrapper right now, but eventually we will want to hide + * all of libbdev from file systems that use this library, so it is a start. + */ +void +lmfs_driver(dev_t dev, char *label) +{ + + bdev_driver(dev, label); +} + +/* + * Prefetch up to "nblocks" blocks on "dev" starting from block number "block". + * Stop early when either the I/O request fills up or when a block is already + * found to be in the cache. The latter is likely to happen often, since this + * function is called before getting each block for reading. Prefetching is a + * strictly best-effort operation, and may fail silently. + * TODO: limit according to the number of available buffers. + */ +static void +block_prefetch(dev_t dev, block_t block, block_t nblocks) +{ + struct buf *bp, *bufs[NR_IOREQS]; + unsigned int count; + + for (count = 0; count < nblocks; count++) { + bp = lmfs_get_block(dev, block + count, PREFETCH); + assert(bp != NULL); + + if (lmfs_dev(bp) != NO_DEV) { + lmfs_put_block(bp, FULL_DATA_BLOCK); + + break; + } + + bufs[count] = bp; + } + + if (count > 0) + lmfs_rw_scattered(dev, bufs, count, READING); +} + +/* + * Perform block I/O, on "dev", starting from offset "pos", for a total of + * "bytes" bytes. Reading, writing, and peeking are highly similar, and thus, + * this function implements all of them. The "call" parameter indicates the + * call type (one of FSC_READ, FSC_WRITE, FSC_PEEK). For read and write calls, + * "data" will identify the user buffer to use; for peek calls, "data" is set + * to NULL. In all cases, this function returns the number of bytes + * successfully transferred, 0 on end-of-file conditions, and a negative error + * code if no bytes could be transferred due to an error. Dirty data is not + * flushed immediately, and thus, a successful write only indicates that the + * data have been taken in by the cache (for immediate I/O, a character device + * would have to be used, but MINIX3 no longer supports this), which may be + * follwed later by silent failures, including undetected end-of-file cases. + * In particular, write requests may or may not return 0 (EOF) immediately when + * writing at or beyond the block device's size. i Since block I/O takes place + * at block granularity, block-unaligned writes have to read a block from disk + * before updating it, and that is the only possible source of actual I/O + * errors for write calls. + * TODO: reconsider the buffering-only approach, or see if we can at least + * somehow throw accurate EOF errors without reading in each block first. + */ +ssize_t +lmfs_bio(dev_t dev, struct fsdriver_data * data, size_t bytes, off_t pos, + int call) +{ + block_t block, blocks_left; + size_t block_size, off, block_off, chunk; + struct buf *bp; + int r, write, how; + + if (dev == NO_DEV) + return EINVAL; + + block_size = lmfs_fs_block_size(); + write = (call == FSC_WRITE); + + assert(block_size > 0); + + /* FIXME: block_t is 32-bit, so we have to impose a limit here. */ + if (pos < 0 || pos / block_size > UINT32_MAX || bytes > SSIZE_MAX) + return EINVAL; + + off = 0; + block = pos / block_size; + block_off = (size_t)(pos % block_size); + blocks_left = howmany(block_off + bytes, block_size); + + lmfs_reset_rdwt_err(); + r = OK; + + for (off = 0; off < bytes; off += chunk) { + chunk = block_size - block_off; + if (chunk > bytes - off) + chunk = bytes - off; + + /* + * For read requests, help the block driver form larger I/O + * requests. + */ + if (!write) + block_prefetch(dev, block, blocks_left); + + /* + * Do not read the block from disk if we will end up + * overwriting all of its contents. + */ + how = (write && chunk == block_size) ? NO_READ : NORMAL; + + bp = lmfs_get_block(dev, block, how); + assert(bp); + + r = lmfs_rdwt_err(); + + if (r == OK && data != NULL) { + assert(lmfs_dev(bp) != NO_DEV); + + if (write) { + r = fsdriver_copyin(data, off, + (char *)bp->data + block_off, chunk); + + /* + * Mark the block as dirty even if the copy + * failed, since the copy may in fact have + * succeeded partially. This is an interface + * issue that should be resolved at some point, + * but for now we do not want the cache to be + * desynchronized from the disk contents. + */ + lmfs_markdirty(bp); + } else + r = fsdriver_copyout(data, off, + (char *)bp->data + block_off, chunk); + } + + lmfs_put_block(bp, FULL_DATA_BLOCK); + + if (r != OK) + break; + + block++; + block_off = 0; + blocks_left--; + } + + /* + * If we were not able to do any I/O, return the error (or EOF, even + * for writes). Otherwise, return how many bytes we did manage to + * transfer. + */ + if (r != OK && off == 0) + return (r == END_OF_FILE) ? 0 : r; + + return off; +} + +/* + * Perform a flush request on a block device, flushing and invalidating all + * blocks associated with this device, both in the local cache and in VM. + * This operation is called after a block device is closed and must prevent + * that stale copies of blocks remain in any cache. + */ +void +lmfs_bflush(dev_t dev) +{ + + /* First flush any dirty blocks on this device to disk. */ + lmfs_flushdev(dev); + + /* Then purge any blocks associated with the device. */ + lmfs_invalidate(dev); +} diff --git a/minix/lib/libminixfs/cache.c b/minix/lib/libminixfs/cache.c index 787759fc1..aba2a9bb9 100644 --- a/minix/lib/libminixfs/cache.c +++ b/minix/lib/libminixfs/cache.c @@ -29,7 +29,6 @@ static unsigned int bufs_in_use;/* # bufs currently in use (not on free list)*/ static void rm_lru(struct buf *bp); static void read_block(struct buf *); -static void flushall(dev_t dev); static void freeblock(struct buf *bp); static void cache_heuristic_check(int major); @@ -226,7 +225,7 @@ static void freeblock(struct buf *bp) * Avoid hysteresis by flushing all other dirty blocks for the same device. */ if (bp->lmfs_dev != NO_DEV) { - if (!lmfs_isclean(bp)) flushall(bp->lmfs_dev); + if (!lmfs_isclean(bp)) lmfs_flushdev(bp->lmfs_dev); assert(bp->lmfs_bytes == fs_block_size); bp->lmfs_dev = NO_DEV; } @@ -579,9 +578,9 @@ void lmfs_invalidate( } /*===========================================================================* - * flushall * + * lmfs_flushdev * *===========================================================================*/ -static void flushall(dev_t dev) +void lmfs_flushdev(dev_t dev) { /* Flush all dirty blocks for one device. */ @@ -910,7 +909,7 @@ void lmfs_flushall(void) struct buf *bp; for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) if(bp->lmfs_dev != NO_DEV && !lmfs_isclean(bp)) - flushall(bp->lmfs_dev); + lmfs_flushdev(bp->lmfs_dev); } int lmfs_fs_block_size(void) diff --git a/minix/tests/test72.c b/minix/tests/test72.c index 9aa105dc2..5a9e6289a 100644 --- a/minix/tests/test72.c +++ b/minix/tests/test72.c @@ -5,7 +5,6 @@ #define _MINIX_SYSTEM -#include #include #include #include @@ -21,6 +20,7 @@ #include #include #include +#include int max_error = 0; diff --git a/minix/tests/test73.c b/minix/tests/test73.c index c16e29c41..84394a68d 100644 --- a/minix/tests/test73.c +++ b/minix/tests/test73.c @@ -6,7 +6,6 @@ #define _MINIX_SYSTEM 1 -#include #include #include #include @@ -23,6 +22,7 @@ #include #include #include +#include #include "testvm.h"