From: Thomas Veerman Date: Mon, 2 Aug 2010 11:44:45 +0000 (+0000) Subject: Add support for the ext2 file system. Contributed by Evgeniy Ivanov. X-Git-Tag: v3.1.8~122 X-Git-Url: http://zhaoyanbai.com/repos/?a=commitdiff_plain;h=dff0434eaf0ad0ae9c6dbc699efe8062b4098a2c;p=minix.git Add support for the ext2 file system. Contributed by Evgeniy Ivanov. --- diff --git a/docs/UPDATING b/docs/UPDATING index 08637c920..bb6805f0c 100644 --- a/docs/UPDATING +++ b/docs/UPDATING @@ -1,3 +1,7 @@ +20100802: + /usr/src/etc/system.conf updated to include ext2 file server: copy it + (or merge it) to /etc/system.conf. + 20100719: If you installed using a 3.1.6 image (or earlier) and haven't updated the boot monitor since r6246 you need to do so now: diff --git a/etc/system.conf b/etc/system.conf index df9b1193a..474b8cafc 100644 --- a/etc/system.conf +++ b/etc/system.conf @@ -119,6 +119,19 @@ service mfs quantum 500; # default server quantum }; +service ext2 +{ + ipc ALL; # ALL ipc targets allowed + system BASIC; # Only basic kernel calls allowed + vm BASIC; # Only basic VM calls allowed + io NONE; # No I/O range allowed + irq NONE; # No IRQ allowed + sigmgr rs; # Signal manager is RS + scheduler sched; # Scheduler is sched + priority 5; # priority queue 5 + quantum 500; # default server quantum +}; + service pfs { uid 0; diff --git a/servers/Makefile b/servers/Makefile index 4152e2a1f..a1faf82b5 100644 --- a/servers/Makefile +++ b/servers/Makefile @@ -3,7 +3,7 @@ .include -SUBDIR= ds hgfs inet init ipc is iso9660fs mfs pfs pm rs sched vfs vm +SUBDIR= ds ext2 hgfs inet init ipc is iso9660fs mfs pfs pm rs sched vfs vm IMAGE_SUBDIR= ds init mfs pfs pm rs sched vfs vm diff --git a/servers/ext2/Makefile b/servers/ext2/Makefile new file mode 100644 index 000000000..b5af34d2a --- /dev/null +++ b/servers/ext2/Makefile @@ -0,0 +1,19 @@ +# Makefile for ext2 filesystem +PROG= ext2 +SRCS= balloc.c cache.c device.c link.c \ + mount.c misc.c open.c protect.c read.c \ + stadir.c table.c time.c utility.c \ + write.c ialloc.c inode.c main.c path.c \ + super.c optset.c +DPADD+= ${LIBSYS} +LDADD+= -lsys + +MAN= + +BINDIR?= /sbin +INSTALLFLAGS+= -S 128k + +DEFAULT_NR_BUFS= 1024 +CPPFLAGS+= -DDEFAULT_NR_BUFS=${DEFAULT_NR_BUFS} + +.include diff --git a/servers/ext2/balloc.c b/servers/ext2/balloc.c new file mode 100644 index 000000000..68945fce1 --- /dev/null +++ b/servers/ext2/balloc.c @@ -0,0 +1,354 @@ +/* This files manages blocks allocation and deallocation. + * + * The entry points into this file are: + * discard_preallocated_blocks: Discard preallocated blocks. + * alloc_block: somebody wants to allocate a block; find one. + * free_block: indicate that a block is available for new allocation. + * + * Created: + * June 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include +#include +#include "buf.h" +#include "inode.h" +#include "super.h" +#include "const.h" + + +FORWARD _PROTOTYPE( block_t alloc_block_bit, (struct super_block *sp, + block_t origin, + struct inode *rip)); + +/*===========================================================================* + * discard_preallocated_blocks * + *===========================================================================*/ +PUBLIC void discard_preallocated_blocks(struct inode *rip) +{ +/* When called for rip, discard (free) blocks preallocated for rip, + * otherwise discard all preallocated blocks. + * Normally it should be called in following situations: + * 1. File is closed. + * 2. File is truncated. + * 3. Non-sequential write. + * 4. inode is "unloaded" from the memory. + * 5. No free blocks left (discard all preallocated blocks). + */ + int i; + + if (rip) { + rip->i_prealloc_count = rip->i_prealloc_index = 0; + for (i = 0; i < EXT2_PREALLOC_BLOCKS; i++) { + if (rip->i_prealloc_blocks[i] != NO_BLOCK) { + free_block(rip->i_sp, rip->i_prealloc_blocks[i]); + rip->i_prealloc_blocks[i] = NO_BLOCK; + } + } + return; + } + + /* Discard all allocated blocks. + * Probably there are just few blocks on the disc, so forbid preallocation.*/ + for(rip = &inode[0]; rip < &inode[NR_INODES]; rip++) { + rip->i_prealloc_count = rip->i_prealloc_index = 0; + rip->i_preallocation = 0; /* forbid preallocation */ + for (i = 0; i < EXT2_PREALLOC_BLOCKS; i++) { + if (rip->i_prealloc_blocks[i] != NO_BLOCK) { + free_block(rip->i_sp, rip->i_prealloc_blocks[i]); + rip->i_prealloc_blocks[i] = NO_BLOCK; + } + } + } +} + + +/*===========================================================================* + * alloc_block * + *===========================================================================*/ +PUBLIC block_t alloc_block(struct inode *rip, block_t block) +{ +/* Allocate a block for inode. If block is provided, then use it as a goal: + * try to allocate this block or his neghbors. + * If block is not provided then goal is group, where inode lives. + */ + block_t goal; + block_t b; + struct super_block *sp = rip->i_sp; + + if (sp->s_rd_only) + panic("can't alloc block on read-only filesys."); + + /* Check for free blocks. First time discard preallocation, + * next time return NO_BLOCK + */ + if (!opt.use_reserved_blocks && + sp->s_free_blocks_count <= sp->s_r_blocks_count) { + discard_preallocated_blocks(NULL); + } else if (sp->s_free_blocks_count <= EXT2_PREALLOC_BLOCKS) { + discard_preallocated_blocks(NULL); + } + + if (!opt.use_reserved_blocks && + sp->s_free_blocks_count <= sp->s_r_blocks_count) { + return(NO_BLOCK); + } else if (sp->s_free_blocks_count == 0) { + return(NO_BLOCK); + } + + if (block != NO_BLOCK) { + goal = block; + if (rip->i_preallocation && rip->i_prealloc_count > 0) { + /* check if goal is preallocated */ + b = rip->i_prealloc_blocks[rip->i_prealloc_index]; + if (block == b || (block + 1) == b) { + /* use preallocated block */ + rip->i_prealloc_blocks[rip->i_prealloc_index] = NO_BLOCK; + rip->i_prealloc_count--; + rip->i_prealloc_index++; + if (rip->i_prealloc_index >= EXT2_PREALLOC_BLOCKS) { + rip->i_prealloc_index = 0; + ASSERT(rip->i_prealloc_count == 0); + } + rip->i_bsearch = b; + return b; + } else { + /* probably non-sequential write operation, + * disable preallocation for this inode. + */ + rip->i_preallocation = 0; + discard_preallocated_blocks(rip); + } + } + } else { + int group = (rip->i_num - 1) / sp->s_inodes_per_group; + goal = sp->s_blocks_per_group*group + sp->s_first_data_block; + } + + if (rip->i_preallocation && rip->i_prealloc_count) { + ext2_debug("There're preallocated blocks, but they're\ + neither used or freed!"); + } + + b = alloc_block_bit(sp, goal, rip); + + if (b != NO_BLOCK) + rip->i_bsearch = b; + + return b; +} + + +FORWARD _PROTOTYPE( void check_block_number, (block_t block, + struct super_block *sp, + struct group_desc *gd) ); + +/*===========================================================================* + * alloc_block_bit * + *===========================================================================*/ +PRIVATE block_t alloc_block_bit(sp, goal, rip) +struct super_block *sp; /* the filesystem to allocate from */ +block_t goal; /* try to allocate near this block */ +struct inode *rip; /* used for preallocation */ +{ + block_t block = NO_BLOCK; /* allocated block */ + int word; /* word in block bitmap */ + bit_t bit = -1; + int group; + char update_bsearch = FALSE; + int i; + + if (goal >= sp->s_blocks_count || + (goal < sp->s_first_data_block && goal != 0)) { + goal = sp->s_bsearch; + } + + if (goal <= sp->s_bsearch) { + /* No reason to search in a place with no free blocks */ + goal = sp->s_bsearch; + update_bsearch = TRUE; + } + + /* Figure out where to start the bit search. */ + word = ((goal - sp->s_first_data_block) % sp->s_blocks_per_group) + / FS_BITCHUNK_BITS; + + /* Try to allocate block at any group starting from the goal's group. + * First time goal's group is checked from the word=goal, after all + * groups checked, it's checked again from word=0, that's why "i <=". + */ + group = (goal - sp->s_first_data_block) / sp->s_blocks_per_group; + for (i = 0; i <= sp->s_groups_count; i++, group++) { + struct buf *bp; + struct group_desc *gd; + + if (group >= sp->s_groups_count) + group = 0; + + gd = get_group_desc(group); + if (gd == NULL) + panic("can't get group_desc to alloc block"); + + if (gd->free_blocks_count == 0) { + word = 0; + continue; + } + + bp = get_block(sp->s_dev, gd->block_bitmap, NORMAL); + + if (rip->i_preallocation && + gd->free_blocks_count >= (EXT2_PREALLOC_BLOCKS * 4) ) { + /* Try to preallocate blocks */ + if (rip->i_prealloc_count != 0) { + /* kind of glitch... */ + discard_preallocated_blocks(rip); + ext2_debug("warning, discarding previously preallocated\ + blocks! It had to be done by another code."); + } + ASSERT(rip->i_prealloc_count == 0); + /* we preallocate bytes only */ + ASSERT(EXT2_PREALLOC_BLOCKS == sizeof(char)*CHAR_BIT); + + bit = setbyte(bp->b_bitmap, sp->s_blocks_per_group, word); + if (bit != -1) { + block = bit + sp->s_first_data_block + + group * sp->s_blocks_per_group; + check_block_number(block, sp, gd); + + /* We preallocate a byte starting from block. + * First preallocated block will be returned as + * normally allocated block. + */ + for (i = 1; i < EXT2_PREALLOC_BLOCKS; i++) { + check_block_number(block + i, sp, gd); + rip->i_prealloc_blocks[i-1] = block + i; + } + rip->i_prealloc_index = 0; + rip->i_prealloc_count = EXT2_PREALLOC_BLOCKS - 1; + + bp->b_dirt = DIRTY; /* by setbyte */ + put_block(bp, MAP_BLOCK); + + gd->free_blocks_count -= EXT2_PREALLOC_BLOCKS; + sp->s_free_blocks_count -= EXT2_PREALLOC_BLOCKS; + group_descriptors_dirty = DIRTY; + return block; + } + } + + bit = setbit(bp->b_bitmap, sp->s_blocks_per_group, word); + if (bit == -1) { + if (word == 0) { + panic("ext2: allocator failed to allocate a bit in bitmap\ + with free bits."); + } else { + word = 0; + continue; + } + } + + block = sp->s_first_data_block + group * sp->s_blocks_per_group + bit; + check_block_number(block, sp, gd); + + bp->b_dirt = DIRTY; /* Now it's safe to mark it as dirty */ + put_block(bp, MAP_BLOCK); + + gd->free_blocks_count--; + sp->s_free_blocks_count--; + group_descriptors_dirty = DIRTY; + + if (update_bsearch && block != -1 && block != NO_BLOCK) { + /* We searched from the beginning, update bsearch. */ + sp->s_bsearch = block; + } + + return block; + } + + return block; +} + + +/*===========================================================================* + * free_block * + *===========================================================================*/ +PUBLIC void free_block(struct super_block *sp, bit_t bit_returned) +{ +/* Return a block by turning off its bitmap bit. */ + int group; /* group number of bit_returned */ + int bit; /* bit_returned number within its group */ + struct buf *bp; + struct group_desc *gd; + + if (sp->s_rd_only) + panic("can't free bit on read-only filesys."); + + if (bit_returned >= sp->s_blocks_count || + bit_returned < sp->s_first_data_block) + panic("trying to free block %d beyond blocks scope.", + bit_returned); + + /* At first search group, to which bit_returned belongs to + * and figure out in what word bit is stored. + */ + group = (bit_returned - sp->s_first_data_block) / sp->s_blocks_per_group; + bit = (bit_returned - sp->s_first_data_block) % sp->s_blocks_per_group; + + gd = get_group_desc(group); + if (gd == NULL) + panic("can't get group_desc to alloc block"); + + /* We might be buggy (No way! :P), so check if we deallocate + * data block, but not control (system) block. + * This should never happen. + */ + if (bit_returned == gd->inode_bitmap || bit_returned == gd->block_bitmap + || (bit_returned >= gd->inode_table + && bit_returned < (gd->inode_table + sp->s_itb_per_group))) { + ext2_debug("ext2: freeing non-data block %d\n", bit_returned); + panic("trying to deallocate \ + system/control block, hardly poke author."); + } + + bp = get_block(sp->s_dev, gd->block_bitmap, NORMAL); + + if (unsetbit(bp->b_bitmap, bit)) + panic("Tried to free unused block", bit_returned); + + bp->b_dirt = DIRTY; + put_block(bp, MAP_BLOCK); + + gd->free_blocks_count++; + sp->s_free_blocks_count++; + + group_descriptors_dirty = DIRTY; + + if (bit_returned < sp->s_bsearch) + sp->s_bsearch = bit_returned; +} + + +PRIVATE void check_block_number(block_t block, struct super_block *sp, + struct group_desc *gd) +{ + + /* Check if we allocated a data block, but not control (system) block. + * Only major bug can cause us to allocate wrong block. If it happens, + * we panic (and don't bloat filesystem's bitmap). + */ + if (block == gd->inode_bitmap || block == gd->block_bitmap || + (block >= gd->inode_table + && block < (gd->inode_table + sp->s_itb_per_group))) { + ext2_debug("ext2: allocating non-data block %d\n", block); + panic("ext2: block allocator tryed to return \ + system/control block, poke author.\n"); + } + + if (block >= sp->s_blocks_count) { + panic("ext2: allocator returned blocknum greater, than \ + total number of blocks.\n"); + } +} diff --git a/servers/ext2/buf.h b/servers/ext2/buf.h new file mode 100644 index 000000000..cf7ce5bdd --- /dev/null +++ b/servers/ext2/buf.h @@ -0,0 +1,53 @@ +/* Buffer (block) cache. To acquire a block, a routine calls get_block(), + * telling which block it wants. The block is then regarded as "in use" + * and has its 'b_count' field incremented. All the blocks that are not + * in use are chained together in an LRU list, with 'front' pointing + * to the least recently used block, and 'rear' to the most recently used + * block. A reverse chain, using the field b_prev is also maintained. + * Usage for LRU is measured by the time the put_block() is done. The second + * parameter to put_block() can violate the LRU order and put a block on the + * front of the list, if it will probably not be needed soon. If a block + * is modified, the modifying routine must set b_dirt to DIRTY, so the block + * will eventually be rewritten to the disk. + */ + +#ifndef EXT2_BUF_H +#define EXT2_BUF_H + +#include /* need struct direct */ +#include + +union fsdata_u { + char b__data[_MAX_BLOCK_SIZE]; /* ordinary user data */ +/* indirect block */ + block_t b__ind[_MAX_BLOCK_SIZE/sizeof(block_t)]; +/* bit map block */ + bitchunk_t b__bitmap[FS_BITMAP_CHUNKS(_MAX_BLOCK_SIZE)]; +}; + +/* A block is free if b_dev == NO_DEV. */ + +/* These defs make it possible to use to bp->b_data instead of bp->b.b__data */ +#define b_data bp->b__data +#define b_ind bp->b__ind +#define b_ino bp->b__ino +#define b_bitmap bp->b__bitmap + +#define BUFHASH(b) ((b) % nr_bufs) + +EXTERN struct buf *front; /* points to least recently used free block */ +EXTERN struct buf *rear; /* points to most recently used free block */ +EXTERN unsigned int bufs_in_use; /* # bufs currently in use (not on free list)*/ + +/* When a block is released, the type of usage is passed to put_block(). */ +#define WRITE_IMMED 0100 /* block should be written to disk now */ +#define ONE_SHOT 0200 /* set if block not likely to be needed soon */ + +#define INODE_BLOCK 0 /* inode block */ +#define DIRECTORY_BLOCK 1 /* directory block */ +#define INDIRECT_BLOCK 2 /* pointer block */ +#define MAP_BLOCK 3 /* bit map */ +#define FULL_DATA_BLOCK 5 /* data, fully used */ +#define PARTIAL_DATA_BLOCK 6 /* data, partly used*/ + +#endif /* EXT2_BUF_H */ diff --git a/servers/ext2/cache.c b/servers/ext2/cache.c new file mode 100644 index 000000000..776ea6c64 --- /dev/null +++ b/servers/ext2/cache.c @@ -0,0 +1,562 @@ +/* The file system maintains a buffer cache to reduce the number of disk + * accesses needed. Whenever a read or write to the disk is done, a check is + * first made to see if the block is in the cache. This file manages the + * cache. + * + * The entry points into this file are: + * get_block: request to fetch a block for reading or writing from cache + * put_block: return a block previously requested with get_block + * invalidate: remove all the cache blocks on some device + * + * Private functions: + * rw_block: read or write a block from the disk itself + * + * Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include +#include "buf.h" +#include "super.h" +#include "inode.h" + +FORWARD _PROTOTYPE( void rm_lru, (struct buf *bp) ); +FORWARD _PROTOTYPE( void rw_block, (struct buf *, int) ); + +PRIVATE int vmcache_avail = -1; /* 0 if not available, >0 if available. */ + +/*===========================================================================* + * get_block * + *===========================================================================*/ +PUBLIC struct buf *get_block( + register dev_t dev, /* on which device is the block? */ + register block_t block, /* which block is wanted? */ + int only_search /* if NO_READ, don't read, else act normal */ +) +{ +/* Check to see if the requested block is in the block cache. If so, return + * a pointer to it. If not, evict some other block and fetch it (unless + * 'only_search' is 1). All the blocks in the cache that are not in use + * are linked together in a chain, with 'front' pointing to the least recently + * used block and 'rear' to the most recently used block. If 'only_search' is + * 1, the block being requested will be overwritten in its entirety, so it is + * only necessary to see if it is in the cache; if it is not, any free buffer + * will do. It is not necessary to actually read the block in from disk. + * If 'only_search' is PREFETCH, the block need not be read from the disk, + * and the device is not to be marked on the block, so callers can tell if + * the block returned is valid. + * In addition to the LRU chain, there is also a hash chain to link together + * blocks whose block numbers end with the same bit strings, for fast lookup. + */ + + int b; + static struct buf *bp, *prev_ptr; + u64_t yieldid = VM_BLOCKID_NONE, getid = make64(dev, block); + int vmcache = 0; + + assert(buf_hash); + assert(buf); + assert(nr_bufs > 0); + + if(vmcache_avail < 0) { + /* Test once for the availability of the vm yield block feature. */ + if(vm_forgetblock(VM_BLOCKID_NONE) == ENOSYS) { + vmcache_avail = 0; + } else { + vmcache_avail = 1; + } + } + + /* use vmcache if it's available, and allowed, and we're not doing + * i/o on a ram disk device. + */ + if(vmcache_avail && may_use_vmcache && major(dev) != MEMORY_MAJOR) + vmcache = 1; + + ASSERT(fs_block_size > 0); + + /* Search the hash chain for (dev, block). Do_read() can use + * get_block(NO_DEV ...) to get an unnamed block to fill with zeros when + * someone wants to read from a hole in a file, in which case this search + * is skipped + */ + if (dev != NO_DEV) { + b = BUFHASH(block); + bp = buf_hash[b]; + while (bp != NULL) { + if (bp->b_blocknr == block && bp->b_dev == dev) { + /* Block needed has been found. */ + if (bp->b_count == 0) rm_lru(bp); + bp->b_count++; /* record that block is in use */ + ASSERT(bp->b_bytes == fs_block_size); + ASSERT(bp->b_dev == dev); + ASSERT(bp->b_dev != NO_DEV); + ASSERT(bp->bp); + return(bp); + } else { + /* This block is not the one sought. */ + bp = bp->b_hash; /* move to next block on hash chain */ + } + } + } + + /* Desired block is not on available chain. Take oldest block ('front'). */ + if ((bp = front) == NULL) panic("all buffers in use", nr_bufs); + + if(bp->b_bytes < fs_block_size) { + ASSERT(!bp->bp); + ASSERT(bp->b_bytes == 0); + if(!(bp->bp = alloc_contig( (size_t) fs_block_size, 0, NULL))) { + ext2_debug("ext2: couldn't allocate a new block.\n"); + for(bp = front; + bp && bp->b_bytes < fs_block_size; bp = bp->b_next) + ; + if(!bp) { + panic("no buffer available"); + } + } else { + bp->b_bytes = fs_block_size; + } + } + + ASSERT(bp); + ASSERT(bp->bp); + ASSERT(bp->b_bytes == fs_block_size); + ASSERT(bp->b_count == 0); + + rm_lru(bp); + + /* Remove the block that was just taken from its hash chain. */ + b = BUFHASH(bp->b_blocknr); + prev_ptr = buf_hash[b]; + if (prev_ptr == bp) { + buf_hash[b] = bp->b_hash; + } else { + /* The block just taken is not on the front of its hash chain. */ + while (prev_ptr->b_hash != NULL) + if (prev_ptr->b_hash == bp) { + prev_ptr->b_hash = bp->b_hash; /* found it */ + break; + } else { + prev_ptr = prev_ptr->b_hash; /* keep looking */ + } + } + + /* If the block taken is dirty, make it clean by writing it to the disk. + * Avoid hysteresis by flushing all other dirty blocks for the same device. + */ + if (bp->b_dev != NO_DEV) { + if (bp->b_dirt == DIRTY) flushall(bp->b_dev); + + /* Are we throwing out a block that contained something? + * Give it to VM for the second-layer cache. + */ + yieldid = make64(bp->b_dev, bp->b_blocknr); + assert(bp->b_bytes == fs_block_size); + bp->b_dev = NO_DEV; + } + + /* Fill in block's parameters and add it to the hash chain where it goes. */ + bp->b_dev = dev; /* fill in device number */ + bp->b_blocknr = block; /* fill in block number */ + bp->b_count++; /* record that block is being used */ + b = BUFHASH(bp->b_blocknr); + bp->b_hash = buf_hash[b]; + + buf_hash[b] = bp; /* add to hash list */ + + if(dev == NO_DEV) { + if(vmcache && cmp64(yieldid, VM_BLOCKID_NONE) != 0) { + vm_yield_block_get_block(yieldid, VM_BLOCKID_NONE, + bp->bp, fs_block_size); + } + return(bp); /* If the caller wanted a NO_DEV block, work is done. */ + } + + /* Go get the requested block unless searching or prefetching. */ + if(only_search == PREFETCH || only_search == NORMAL) { + /* Block is not found in our cache, but we do want it + * if it's in the vm cache. + */ + if(vmcache) { + /* If we can satisfy the PREFETCH or NORMAL request + * from the vm cache, work is done. + */ + if(vm_yield_block_get_block(yieldid, getid, + bp->bp, fs_block_size) == OK) { + return bp; + } + } + } + + if(only_search == PREFETCH) { + /* PREFETCH: don't do i/o. */ + bp->b_dev = NO_DEV; + } else if (only_search == NORMAL) { + rw_block(bp, READING); + } else if(only_search == NO_READ) { + /* we want this block, but its contents + * will be overwritten. VM has to forget + * about it. + */ + if(vmcache) { + vm_forgetblock(getid); + } + } else + panic("unexpected only_search value: %d", only_search); + + assert(bp->bp); + + return(bp); /* return the newly acquired block */ +} + +/*===========================================================================* + * put_block * + *===========================================================================*/ +PUBLIC void put_block( + register struct buf *bp, /* pointer to the buffer to be released */ + int block_type /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */ +) +{ +/* Return a block to the list of available blocks. Depending on 'block_type' + * it may be put on the front or rear of the LRU chain. Blocks that are + * expected to be needed again shortly (e.g., partially full data blocks) + * go on the rear; blocks that are unlikely to be needed again shortly + * (e.g., full data blocks) go on the front. Blocks whose loss can hurt + * the integrity of the file system (e.g., inode blocks) are written to + * disk immediately if they are dirty. + */ + if (bp == NULL) return; /* it is easier to check here than in caller */ + + bp->b_count--; /* there is one use fewer now */ + if (bp->b_count != 0) return; /* block is still in use */ + + bufs_in_use--; /* one fewer block buffers in use */ + + /* Put this block back on the LRU chain. If the ONE_SHOT bit is set in + * 'block_type', the block is not likely to be needed again shortly, so put + * it on the front of the LRU chain where it will be the first one to be + * taken when a free buffer is needed later. + */ + if (bp->b_dev == DEV_RAM || (block_type & ONE_SHOT)) { + /* Block probably won't be needed quickly. Put it on front of chain. + * It will be the next block to be evicted from the cache. + */ + bp->b_prev = NULL; + bp->b_next = front; + if (front == NULL) + rear = bp; /* LRU chain was empty */ + else + front->b_prev = bp; + front = bp; + } + else { + /* Block probably will be needed quickly. Put it on rear of chain. + * It will not be evicted from the cache for a long time. + */ + bp->b_prev = rear; + bp->b_next = NULL; + if (rear == NULL) + front = bp; + else + rear->b_next = bp; + rear = bp; + } + + /* Some blocks are so important (e.g., inodes, indirect blocks) that they + * should be written to the disk immediately to avoid messing up the file + * system in the event of a crash. + */ + if ((block_type & WRITE_IMMED) && bp->b_dirt==DIRTY && bp->b_dev != NO_DEV) { + rw_block(bp, WRITING); + } +} + + +/*===========================================================================* + * rw_block * + *===========================================================================*/ +PRIVATE void rw_block( + register struct buf *bp, /* buffer pointer */ + int rw_flag /* READING or WRITING */ +) +{ +/* Read or write a disk block. This is the only routine in which actual disk + * I/O is invoked. If an error occurs, a message is printed here, but the error + * is not reported to the caller. If the error occurred while purging a block + * from the cache, it is not clear what the caller could do about it anyway. + */ + int r, op, op_failed = 0; + u64_t pos; + dev_t dev; + + if ( (dev = bp->b_dev) != NO_DEV) { + pos = mul64u(bp->b_blocknr, fs_block_size); + op = (rw_flag == READING ? MFS_DEV_READ : MFS_DEV_WRITE); + r = block_dev_io(op, dev, SELF_E, bp->b_data, pos, fs_block_size); + if (r < 0) { + printf("Ext2(%d) I/O error on device %d/%d, block %lu\n", + SELF_E, major(dev), minor(dev), bp->b_blocknr); + op_failed = 1; + } else if( (unsigned) r != fs_block_size) { + r = END_OF_FILE; + op_failed = 1; + } + + if (op_failed) { + bp->b_dev = NO_DEV; /* invalidate block */ + + /* Report read errors to interested parties. */ + if (rw_flag == READING) rdwt_err = r; + + } + } + + bp->b_dirt = CLEAN; +} + +/*===========================================================================* + * invalidate * + *===========================================================================*/ +PUBLIC void invalidate( + dev_t device /* device whose blocks are to be purged */ +) +{ +/* Remove all the blocks belonging to some device from the cache. */ + + register struct buf *bp; + + for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) + if (bp->b_dev == device) bp->b_dev = NO_DEV; + + vm_forgetblocks(); +} + +/*===========================================================================* + * flushall * + *===========================================================================*/ +PUBLIC void flushall( + dev_t dev /* device to flush */ +) +{ +/* Flush all dirty blocks for one device. */ + + register struct buf *bp; + static struct buf **dirty; /* static so it isn't on stack */ + static int unsigned dirtylistsize = 0; + int ndirty; + + if(dirtylistsize != nr_bufs) { + if(dirtylistsize > 0) { + assert(dirty != NULL); + free(dirty); + } + if(!(dirty = malloc(sizeof(dirty[0])*nr_bufs))) + panic("couldn't allocate dirty buf list"); + dirtylistsize = nr_bufs; + } + + for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++) + if (bp->b_dirt == DIRTY && bp->b_dev == dev) dirty[ndirty++] = bp; + rw_scattered(dev, dirty, ndirty, WRITING); +} + +/*===========================================================================* + * rw_scattered * + *===========================================================================*/ +PUBLIC void rw_scattered( + dev_t dev, /* major-minor device number */ + struct buf **bufq, /* pointer to array of buffers */ + int bufqsize, /* number of buffers */ + int rw_flag /* READING or WRITING */ +) +{ +/* Read or write scattered data from a device. */ + + register struct buf *bp; + int gap; + register int i; + register iovec_t *iop; + static iovec_t *iovec = NULL; + int j, r; + + STATICINIT(iovec, NR_IOREQS); + + /* (Shell) sort buffers on b_blocknr. */ + gap = 1; + do + gap = 3 * gap + 1; + while (gap <= bufqsize); + while (gap != 1) { + gap /= 3; + for (j = gap; j < bufqsize; j++) { + for (i = j - gap; + i >= 0 && bufq[i]->b_blocknr > bufq[i + gap]->b_blocknr; + i -= gap) { + bp = bufq[i]; + bufq[i] = bufq[i + gap]; + bufq[i + gap] = bp; + } + } + } + + /* Set up I/O vector and do I/O. The result of dev_io is OK if everything + * went fine, otherwise the error code for the first failed transfer. + */ + while (bufqsize > 0) { + for (j = 0, iop = iovec; j < NR_IOREQS && j < bufqsize; j++, iop++) { + bp = bufq[j]; + if (bp->b_blocknr != (block_t) bufq[0]->b_blocknr + j) break; + iop->iov_addr = (vir_bytes) bp->b_data; + iop->iov_size = (vir_bytes) fs_block_size; + } + r = block_dev_io(rw_flag == WRITING ? MFS_DEV_SCATTER : MFS_DEV_GATHER, + dev, SELF_E, iovec, + mul64u(bufq[0]->b_blocknr, fs_block_size), j); + + /* Harvest the results. Dev_io reports the first error it may have + * encountered, but we only care if it's the first block that failed. + */ + for (i = 0, iop = iovec; i < j; i++, iop++) { + bp = bufq[i]; + if (iop->iov_size != 0) { + /* Transfer failed. An error? Do we care? */ + if (r != OK && i == 0) { + printf( + "fs: I/O error on device %d/%d, block %lu\n", + major(dev), minor(dev), bp->b_blocknr); + bp->b_dev = NO_DEV; /* invalidate block */ + vm_forgetblocks(); + } + break; + } + if (rw_flag == READING) { + bp->b_dev = dev; /* validate block */ + put_block(bp, PARTIAL_DATA_BLOCK); + } else { + bp->b_dirt = CLEAN; + } + } + bufq += i; + bufqsize -= i; + if (rw_flag == READING) { + /* Don't bother reading more than the device is willing to + * give at this time. Don't forget to release those extras. + */ + while (bufqsize > 0) { + put_block(*bufq++, PARTIAL_DATA_BLOCK); + bufqsize--; + } + } + if (rw_flag == WRITING && i == 0) { + /* We're not making progress, this means we might keep + * looping. Buffers remain dirty if un-written. Buffers are + * lost if invalidate()d or LRU-removed while dirty. This + * is better than keeping unwritable blocks around forever.. + */ + break; + } + } +} + +/*===========================================================================* + * rm_lru * + *===========================================================================*/ +PRIVATE void rm_lru( + struct buf *bp +) +{ +/* Remove a block from its LRU chain. */ + struct buf *next_ptr, *prev_ptr; + + bufs_in_use++; + next_ptr = bp->b_next; /* successor on LRU chain */ + prev_ptr = bp->b_prev; /* predecessor on LRU chain */ + if (prev_ptr != NULL) + prev_ptr->b_next = next_ptr; + else + front = next_ptr; /* this block was at front of chain */ + + if (next_ptr != NULL) + next_ptr->b_prev = prev_ptr; + else + rear = prev_ptr; /* this block was at rear of chain */ +} + +/*===========================================================================* + * set_blocksize * + *===========================================================================*/ +PUBLIC void set_blocksize(unsigned int blocksize) +{ + struct buf *bp; + struct inode *rip; + + ASSERT(blocksize > 0); + + for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) + if(bp->b_count != 0) panic("change blocksize with buffer in use"); + + for (rip = &inode[0]; rip < &inode[NR_INODES]; rip++) + if (rip->i_count > 0) panic("change blocksize with inode in use"); + + buf_pool(nr_bufs); + fs_block_size = blocksize; +} + +/*===========================================================================* + * buf_pool * + *===========================================================================*/ +PUBLIC void buf_pool(int new_nr_bufs) +{ +/* Initialize the buffer pool. */ + register struct buf *bp; + + assert(new_nr_bufs > 0); + + if(nr_bufs > 0) { + assert(buf); + (void) fs_sync(); + for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { + if(bp->bp) { + assert(bp->b_bytes > 0); + free_contig(bp->bp, bp->b_bytes); + } + } + } + + if(buf) + free(buf); + + if(!(buf = calloc(sizeof(buf[0]), new_nr_bufs))) + panic("couldn't allocate buf list (%d)", new_nr_bufs); + + if(buf_hash) + free(buf_hash); + if(!(buf_hash = calloc(sizeof(buf_hash[0]), new_nr_bufs))) + panic("couldn't allocate buf hash list (%d)", new_nr_bufs); + + nr_bufs = new_nr_bufs; + + bufs_in_use = 0; + front = &buf[0]; + rear = &buf[nr_bufs - 1]; + + for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) { + bp->b_blocknr = NO_BLOCK; + bp->b_dev = NO_DEV; + bp->b_next = bp + 1; + bp->b_prev = bp - 1; + bp->bp = NULL; + bp->b_bytes = 0; + } + buf[0].b_prev = NULL; + buf[nr_bufs - 1].b_next = NULL; + + for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) bp->b_hash = bp->b_next; + buf_hash[0] = front; + + vm_forgetblocks(); +} diff --git a/servers/ext2/const.h b/servers/ext2/const.h new file mode 100644 index 000000000..a939716f0 --- /dev/null +++ b/servers/ext2/const.h @@ -0,0 +1,185 @@ +#ifndef EXT2_CONST_H +#define EXT2_CONST_H + +/* Tables sizes */ + +#define NR_INODES 256 /* # slots in "in core" inode table */ +#define GETDENTS_BUFSIZ 257 + +#define INODE_HASH_LOG2 7 /* 2 based logarithm of the inode hash size */ +#define INODE_HASH_SIZE ((unsigned long)1<d_name_len) + +/* size with padding */ +#define DIR_ENTRY_ACTUAL_SIZE(d) (DIR_ENTRY_CONTENTS_SIZE(d) + \ + ((DIR_ENTRY_CONTENTS_SIZE(d) & 0x03) == 0 ? 0 : \ + DIR_ENTRY_ALIGN - (DIR_ENTRY_CONTENTS_SIZE(d) & 0x03) )) + +/* How many bytes can be taken from the end of dentry */ +#define DIR_ENTRY_SHRINK(d) (conv2(le_CPU, (d)->d_rec_len) \ + - DIR_ENTRY_ACTUAL_SIZE(d)) + +/* Dentry can have padding, which can be used to enlarge namelen */ +#define DIR_ENTRY_MAX_NAME_LEN(d) (conv2(le_CPU, (d)->d_rec_len) \ + - MIN_DIR_ENTRY_SIZE) + +/* Constants relative to the data blocks */ +/* When change EXT2_NDIR_BLOCKS, modify ext2_max_size()!!!*/ +#define EXT2_NDIR_BLOCKS 12 +#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS +#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1) +#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1) +#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) + +#define FS_BITMAP_CHUNKS(b) ((b)/usizeof (bitchunk_t))/* # map chunks/blk */ +#define FS_BITCHUNK_BITS (usizeof(bitchunk_t) * CHAR_BIT) +#define FS_BITS_PER_BLOCK(b) (FS_BITMAP_CHUNKS(b) * FS_BITCHUNK_BITS) + +/* Inodes */ + +/* Next 4 following macroses were taken from linux' ext2_fs.h */ +#define EXT2_GOOD_OLD_INODE_SIZE 128 +#define EXT2_GOOD_OLD_FIRST_INO 11 + +#define EXT2_INODE_SIZE(s) (((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \ + EXT2_GOOD_OLD_INODE_SIZE : \ + (s)->s_inode_size) +#define EXT2_FIRST_INO(s) (((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \ + EXT2_GOOD_OLD_FIRST_INO : \ + (s)->s_first_ino) + +/* Maximum size of a fast symlink including trailing '\0' */ +#define MAX_FAST_SYMLINK_LENGTH \ + ( sizeof(((d_inode *)0)->i_block[0]) * EXT2_N_BLOCKS ) + +#define NUL(str,l,m) mfs_nul_f(__FILE__,__LINE__,(str), (l), (m)) + +/* Args to dev_bio/dev_io */ +#define MFS_DEV_READ 10001 +#define MFS_DEV_WRITE 10002 +#define MFS_DEV_SCATTER 10003 +#define MFS_DEV_GATHER 10004 + +/* FS states */ +#define EXT2_VALID_FS 0x0001 /* Cleanly unmounted */ +#define EXT2_ERROR_FS 0x0002 /* Errors detected */ + +#define EXT2_GOOD_OLD_REV 0 /* The good old (original) format */ +#define EXT2_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ + +/* ext2 features, names shorted (cut EXT2_ prefix) */ +#define COMPAT_DIR_PREALLOC 0x0001 +#define COMPAT_IMAGIC_INODES 0x0002 +#define COMPAT_HAS_JOURNAL 0x0004 +#define COMPAT_EXT_ATTR 0x0008 +#define COMPAT_RESIZE_INO 0x0010 +#define COMPAT_DIR_INDEX 0x0020 +#define COMPAT_ANY 0xffffffff + +#define RO_COMPAT_SPARSE_SUPER 0x0001 +#define RO_COMPAT_LARGE_FILE 0x0002 +#define RO_COMPAT_BTREE_DIR 0x0004 +#define RO_COMPAT_ANY 0xffffffff + +#define INCOMPAT_COMPRESSION 0x0001 +#define INCOMPAT_FILETYPE 0x0002 +#define INCOMPAT_RECOVER 0x0004 +#define INCOMPAT_JOURNAL_DEV 0x0008 +#define INCOMPAT_META_BG 0x0010 +#define INCOMPAT_ANY 0xffffffff + +/* What do we support? */ +#define SUPPORTED_INCOMPAT_FEATURES (INCOMPAT_FILETYPE) +#define SUPPORTED_RO_COMPAT_FEATURES (RO_COMPAT_SPARSE_SUPER | \ + RO_COMPAT_LARGE_FILE) + +/* Ext2 directory file types. Only the low 3 bits are used. + * The other bits are reserved for now. + */ +#define EXT2_FT_UNKNOWN 0 +#define EXT2_FT_REG_FILE 1 +#define EXT2_FT_DIR 2 +#define EXT2_FT_CHRDEV 3 +#define EXT2_FT_BLKDEV 4 +#define EXT2_FT_FIFO 5 +#define EXT2_FT_SOCK 6 +#define EXT2_FT_SYMLINK 7 + +#define EXT2_FT_MAX 8 + +#define HAS_COMPAT_FEATURE(sp, mask) \ + ( (sp)->s_feature_compat & (mask) ) +#define HAS_RO_COMPAT_FEATURE(sp, mask) \ + ( (sp)->s_feature_ro_compat & (mask) ) +#define HAS_INCOMPAT_FEATURE(sp, mask) \ + ( (sp)->s_feature_incompat & (mask) ) + + +/* hash-indexed directory */ +#define EXT2_INDEX_FL 0x00001000 +/* Top of directory hierarchies*/ +#define EXT2_TOPDIR_FL 0x00020000 + +#define EXT2_PREALLOC_BLOCKS 8 + + +#endif /* EXT2_CONST_H */ diff --git a/servers/ext2/device.c b/servers/ext2/device.c new file mode 100644 index 000000000..9bb601941 --- /dev/null +++ b/servers/ext2/device.c @@ -0,0 +1,359 @@ +#include "fs.h" +#include +#include +#include +#include +#include +#include "inode.h" +#include "super.h" +#include "const.h" +#include "drivers.h" + +#include + +FORWARD _PROTOTYPE( int safe_io_conversion, (endpoint_t driver, + cp_grant_id_t *gid, int *op, cp_grant_id_t *gids, endpoint_t *io_ept, + void **buffer, int *vec_grants, size_t bytes)); +FORWARD _PROTOTYPE( void safe_io_cleanup, (cp_grant_id_t, cp_grant_id_t *, + int)); +FORWARD _PROTOTYPE( int gen_opcl, (endpoint_t driver_e, int op, + dev_t dev, endpoint_t proc_e, int flags) ); +FORWARD _PROTOTYPE( int gen_io, (endpoint_t task_nr, message *mess_ptr) ); + + +/*===========================================================================* + * fs_new_driver * + *===========================================================================*/ +PUBLIC int fs_new_driver(void) +{ + /* New driver endpoint for this device */ + dev_t dev; + dev = (dev_t) fs_m_in.REQ_DEV; + driver_endpoints[major(dev)].driver_e = (endpoint_t) fs_m_in.REQ_DRIVER_E; + return(OK); +} + + +/*===========================================================================* + * safe_io_conversion * + *===========================================================================*/ +PRIVATE int safe_io_conversion(driver, gid, op, gids, io_ept, buffer, + vec_grants, bytes) +endpoint_t driver; +cp_grant_id_t *gid; +int *op; +cp_grant_id_t *gids; +endpoint_t *io_ept; +void **buffer; +int *vec_grants; +size_t bytes; +{ + unsigned int j; + int access; + iovec_t *v; + static iovec_t *new_iovec; + + STATICINIT(new_iovec, NR_IOREQS); + + /* Number of grants allocated in vector I/O. */ + *vec_grants = 0; + + /* Driver can handle it - change request to a safe one. */ + + *gid = GRANT_INVALID; + + switch(*op) { + case MFS_DEV_READ: + case MFS_DEV_WRITE: + /* Change to safe op. */ + *op = *op == MFS_DEV_READ ? DEV_READ_S : DEV_WRITE_S; + *gid = cpf_grant_direct(driver, (vir_bytes) *buffer, bytes, + *op == DEV_READ_S ? CPF_WRITE : CPF_READ); + if(*gid == GRANT_INVALID) { + panic("cpf_grant_magic of buffer failed"); + } + + break; + case MFS_DEV_GATHER: + case MFS_DEV_SCATTER: + /* Change to safe op. */ + *op = *op == MFS_DEV_GATHER ? DEV_GATHER_S : DEV_SCATTER_S; + + /* Grant access to my new i/o vector. */ + *gid = cpf_grant_direct(driver, (vir_bytes) new_iovec, + bytes * sizeof(iovec_t), CPF_READ|CPF_WRITE); + if(*gid == GRANT_INVALID) { + panic("cpf_grant_direct of vector failed"); + } + + v = (iovec_t *) *buffer; + + /* Grant access to i/o buffers. */ + for(j = 0; j < bytes; j++) { + if(j >= NR_IOREQS) + panic("vec too big: %u", bytes); + access = (*op == DEV_GATHER_S) ? CPF_WRITE : CPF_READ; + new_iovec[j].iov_addr = gids[j] = + cpf_grant_direct(driver, (vir_bytes) v[j].iov_addr, + (size_t) v[j].iov_size, access); + + if(!GRANT_VALID(gids[j])) { + panic("ext2: grant to iovec buf failed"); + } + new_iovec[j].iov_size = v[j].iov_size; + (*vec_grants)++; + } + + /* Set user's vector to the new one. */ + *buffer = new_iovec; + break; + default: + panic("Illegal operation %d\n", *op); + break; + } + + /* If we have converted to a safe operation, I/O + * endpoint becomes FS if it wasn't already. + */ + if(GRANT_VALID(*gid)) { + *io_ept = SELF_E; + return 1; + } + + /* Not converted to a safe operation (because there is no + * copying involved in this operation). + */ + return 0; +} + +/*===========================================================================* + * safe_io_cleanup * + *===========================================================================*/ +PRIVATE void safe_io_cleanup(gid, gids, gids_size) +cp_grant_id_t gid; +cp_grant_id_t *gids; +int gids_size; +{ +/* Free resources (specifically, grants) allocated by safe_io_conversion(). */ + int j; + + (void) cpf_revoke(gid); + + for(j = 0; j < gids_size; j++) + (void) cpf_revoke(gids[j]); + + return; +} + +/*===========================================================================* + * block_dev_io * + *===========================================================================*/ +PUBLIC int block_dev_io( + int op, /* MFS_DEV_READ, MFS_DEV_WRITE, etc. */ + dev_t dev, /* major-minor device number */ + endpoint_t proc_e, /* in whose address space is buf? */ + void *buffer, /* virtual address of the buffer */ + u64_t pos, /* byte position */ + size_t bytes /* how many bytes to transfer */ +) +{ +/* Read or write from a device. The parameter 'dev' tells which one. */ + int r, safe; + message m; + cp_grant_id_t gid = GRANT_INVALID; + int vec_grants; + int op_used; + void *buf_used; + static cp_grant_id_t *gids; + endpoint_t driver_e; + + STATICINIT(gids, NR_IOREQS); + + /* Determine driver endpoint for this device */ + driver_e = driver_endpoints[major(dev)].driver_e; + + /* See if driver is roughly valid. */ + if (driver_e == NONE) { + printf("ext2(%d) block_dev_io: no driver for dev %x\n", SELF_E, dev); + return(EDEADEPT); + } + + /* The io vector copying relies on this I/O being for FS itself. */ + if(proc_e != SELF_E) { + printf("ext2(%d) doing block_dev_io for non-self %d\n", SELF_E, proc_e); + panic("doing block_dev_io for non-self: %d", proc_e); + } + + /* By default, these are right. */ + m.IO_ENDPT = proc_e; + m.ADDRESS = buffer; + buf_used = buffer; + + /* Convert parameters to 'safe mode'. */ + op_used = op; + safe = safe_io_conversion(driver_e, &gid, &op_used, gids, &m.IO_ENDPT, + &buf_used, &vec_grants, bytes); + + /* Set up rest of the message. */ + if (safe) m.IO_GRANT = (char *) gid; + + m.m_type = op_used; + m.DEVICE = minor(dev); + m.POSITION = ex64lo(pos); + m.COUNT = bytes; + m.HIGHPOS = ex64hi(pos); + + /* Call the task. */ + r = sendrec(driver_e, &m); + if(r == OK && m.REP_STATUS == ERESTART) r = EDEADEPT; + + /* As block I/O never SUSPENDs, safe cleanup must be done whether + * the I/O succeeded or not. */ + if (safe) safe_io_cleanup(gid, gids, vec_grants); + + /* RECOVERY: + * - send back dead driver number + * - VFS unmaps it, waits for new driver + * - VFS sends the new driver endp for the FS proc and the request again + */ + if (r != OK) { + if (r == EDEADSRCDST || r == EDEADEPT) { + printf("ext2(%d) dead driver %d\n", SELF_E, driver_e); + driver_endpoints[major(dev)].driver_e = NONE; + return(r); + } else if (r == ELOCKED) { + printf("ext2(%d) ELOCKED talking to %d\n", SELF_E, driver_e); + return(r); + } else + panic("call_task: can't send/receive: %d", r); + } else { + /* Did the process we did the sendrec() for get a result? */ + if (m.REP_ENDPT != proc_e) { + printf("ext2(%d) strange device reply from %d, type = %d, proc " + "= %d (not %d) (2) ignored\n", SELF_E, m.m_source, + m.m_type, proc_e, m.REP_ENDPT); + r = EIO; + } + } + + /* Task has completed. See if call completed. */ + if (m.REP_STATUS == SUSPEND) { + panic("ext2 block_dev_io: driver returned SUSPEND"); + } + + if(buffer != buf_used && r == OK) { + memcpy(buffer, buf_used, bytes * sizeof(iovec_t)); + } + + return(m.REP_STATUS); +} + +/*===========================================================================* + * dev_open * + *===========================================================================*/ +PUBLIC int dev_open( + endpoint_t driver_e, + dev_t dev, /* device to open */ + endpoint_t proc_e, /* process to open for */ + int flags /* mode bits and flags */ +) +{ + int major, r; + + /* Determine the major device number call the device class specific + * open/close routine. (This is the only routine that must check the + * device number for being in range. All others can trust this check.) + */ + major = major(dev); + if (major >= NR_DEVICES) { + printf("Major device number %d not in range\n", major(dev)); + return(EIO); + } + r = gen_opcl(driver_e, DEV_OPEN, dev, proc_e, flags); + if (r == SUSPEND) panic("suspend on open from"); + return(r); +} + + +/*===========================================================================* + * dev_close * + *===========================================================================*/ +PUBLIC void dev_close( + endpoint_t driver_e, + dev_t dev /* device to close */ +) +{ + (void) gen_opcl(driver_e, DEV_CLOSE, dev, 0, 0); +} + + +/*===========================================================================* + * gen_opcl * + *===========================================================================*/ +PRIVATE int gen_opcl( + endpoint_t driver_e, + int op, /* operation, DEV_OPEN or DEV_CLOSE */ + dev_t dev, /* device to open or close */ + endpoint_t proc_e, /* process to open/close for */ + int flags /* mode bits and flags */ +) +{ +/* Called from the dmap struct in table.c on opens & closes of special files.*/ + message dev_mess; + + dev_mess.m_type = op; + dev_mess.DEVICE = minor(dev); + dev_mess.IO_ENDPT = proc_e; + dev_mess.COUNT = flags; + + /* Call the task. */ + (void) gen_io(driver_e, &dev_mess); + + return(dev_mess.REP_STATUS); +} + + +/*===========================================================================* + * gen_io * + *===========================================================================*/ +PRIVATE int gen_io( + endpoint_t task_nr, /* which task to call */ + message *mess_ptr /* pointer to message for task */ +) +{ +/* All file system I/O ultimately comes down to I/O on major/minor device + * pairs. These lead to calls on the following routines via the dmap table. + */ + + int r, proc_e; + + proc_e = mess_ptr->IO_ENDPT; + + r = sendrec(task_nr, mess_ptr); + if(r == OK && mess_ptr->REP_STATUS == ERESTART) + r = EDEADEPT; + + if (r != OK) { + if (r == EDEADSRCDST || r == EDEADEPT) { + printf("fs: dead driver %d\n", task_nr); + panic("should handle crashed drivers"); + return(r); + } + if (r == ELOCKED) { + printf("fs: ELOCKED talking to %d\n", task_nr); + return(r); + } + panic("call_task: can't send/receive: %d", r); + } + + /* Did the process we did the sendrec() for get a result? */ + if (mess_ptr->REP_ENDPT != proc_e) { + printf("fs: strange device reply from %d, type = %d, proc = %d (not " + "%d) (2) ignored\n", mess_ptr->m_source, mess_ptr->m_type, + proc_e, + mess_ptr->REP_ENDPT); + return(EIO); + } + + return(OK); +} diff --git a/servers/ext2/drivers.h b/servers/ext2/drivers.h new file mode 100644 index 000000000..24eaeec08 --- /dev/null +++ b/servers/ext2/drivers.h @@ -0,0 +1,11 @@ +#ifndef EXT2_DRIVERS_H +#define EXT2_DRIVERS_H + +/* Driver endpoints for major devices. Only the block devices + * are mapped here, it's a subset of the mapping in the VFS */ + +EXTERN struct driver_endpoints { + endpoint_t driver_e; +} driver_endpoints[NR_DEVICES]; + +#endif /* EXT2_DRIVERS_H */ diff --git a/servers/ext2/fs.h b/servers/ext2/fs.h new file mode 100644 index 000000000..36c1d65c8 --- /dev/null +++ b/servers/ext2/fs.h @@ -0,0 +1,35 @@ +/* This is the master header for fs. It includes some other files + * and defines the principal constants. + */ + +#ifndef EXT2_FS_H +#define EXT2_FS_H + +#define _POSIX_SOURCE 1 /* tell headers to include POSIX stuff */ +#define _MINIX 1 /* tell headers to include MINIX stuff */ +#define _SYSTEM 1 /* tell headers that this is the kernel */ + +#define VERBOSE 0 /* show messages during initialization? */ + +/* The following are so basic, all the *.c files get them automatically. */ +#include /* MUST be first */ +#include /* MUST be second */ +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "const.h" +#include "type.h" +#include "proto.h" +#include "glo.h" + +#define ext2_debug printf + +#endif /* EXT2_FS_H */ diff --git a/servers/ext2/glo.h b/servers/ext2/glo.h new file mode 100644 index 000000000..72819ace0 --- /dev/null +++ b/servers/ext2/glo.h @@ -0,0 +1,62 @@ +/* EXTERN should be extern except for the table file */ + +#ifndef EXT2_GLO_H +#define EXT2_GLO_H + +#ifdef _TABLE +#undef EXTERN +#define EXTERN +#endif + +#include + +/* The following variables are used for returning results to the caller. */ +EXTERN int err_code; /* temporary storage for error number */ +EXTERN int rdwt_err; /* status of last disk i/o request */ + +EXTERN int cch[NR_INODES]; + +extern char dot1[2]; /* dot1 (&dot1[0]) and dot2 (&dot2[0]) have a special */ +extern char dot2[3]; /* meaning to search_dir: no access permission check. */ + +extern _PROTOTYPE (int (*fs_call_vec[]), (void) ); /* fs call table */ + +EXTERN message fs_m_in; +EXTERN message fs_m_out; +EXTERN vfs_ucred_t credentials; + +EXTERN uid_t caller_uid; +EXTERN gid_t caller_gid; + +EXTERN int req_nr; + +EXTERN endpoint_t SELF_E; + +EXTERN char user_path[PATH_MAX+1]; /* pathname to be processed */ + +EXTERN dev_t fs_dev; /* The device that is handled by this FS proc + */ +EXTERN char fs_dev_label[16]; /* Name of the device driver that is handled + * by this FS proc. + */ +EXTERN int unmountdone; +EXTERN int exitsignaled; + +/* our block size. */ +EXTERN unsigned int fs_block_size; + +/* Buffer cache. */ +EXTERN struct buf *buf; +EXTERN struct buf **buf_hash; /* the buffer hash table */ +EXTERN unsigned int nr_bufs; +EXTERN int may_use_vmcache; +/* Little hack for syncing group descriptors. */ +EXTERN int group_descriptors_dirty; + +EXTERN struct opt opt; /* global options */ + +/* On ext2 metadata is stored in little endian format, so we shoud take + * care about byte swapping, when have BE CPU. */ +EXTERN int le_CPU; /* little/big endian, if TRUE do not swap bytes */ + +#endif /* EXT2_GLO_H */ diff --git a/servers/ext2/ialloc.c b/servers/ext2/ialloc.c new file mode 100644 index 000000000..b4f491d54 --- /dev/null +++ b/servers/ext2/ialloc.c @@ -0,0 +1,476 @@ +/* This files manages inodes allocation and deallocation. + * + * The entry points into this file are: + * alloc_inode: allocate a new, unused inode. + * free_inode: mark an inode as available for a new file. + * + * Created (alloc_inode/free_inode/wipe_inode are from MFS): + * June 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include +#include +#include "buf.h" +#include "inode.h" +#include "super.h" +#include "const.h" + + +FORWARD _PROTOTYPE( bit_t alloc_inode_bit, (struct super_block *sp, + struct inode *parent, + int is_dir)); +FORWARD _PROTOTYPE( void free_inode_bit, (struct super_block *sp, + bit_t bit_returned, + int is_dir)); +FORWARD _PROTOTYPE( void wipe_inode, (struct inode *rip)); + + +/*===========================================================================* + * alloc_inode * + *===========================================================================*/ +PUBLIC struct inode *alloc_inode(struct inode *parent, mode_t bits) +{ +/* Allocate a free inode on parent's dev, and return a pointer to it. */ + + register struct inode *rip; + register struct super_block *sp; + int major, minor, inumb; + bit_t b; + + sp = get_super(parent->i_dev); /* get pointer to super_block */ + if (sp->s_rd_only) { /* can't allocate an inode on a read only device. */ + err_code = EROFS; + return(NULL); + } + + /* Acquire an inode from the bit map. */ + b = alloc_inode_bit(sp, parent, (bits & I_TYPE) == I_DIRECTORY); + if (b == NO_BIT) { + err_code = ENFILE; + major = (int) (sp->s_dev >> MAJOR) & BYTE; + minor = (int) (sp->s_dev >> MINOR) & BYTE; + ext2_debug("Out of i-nodes on device %d/%d\n", major, minor); + return(NULL); + } + + inumb = (int) b; /* be careful not to pass unshort as param */ + + /* Try to acquire a slot in the inode table. */ + if ((rip = get_inode(NO_DEV, inumb)) == NULL) { + /* No inode table slots available. Free the inode just allocated. */ + free_inode_bit(sp, b, (bits & I_TYPE) == I_DIRECTORY); + } else { + /* An inode slot is available. Put the inode just allocated into it. */ + rip->i_mode = bits; /* set up RWX bits */ + rip->i_links_count = NO_LINK; /* initial no links */ + rip->i_uid = caller_uid; /* file's uid is owner's */ + rip->i_gid = caller_gid; /* ditto group id */ + rip->i_dev = parent->i_dev; /* mark which device it is on */ + rip->i_sp = sp; /* pointer to super block */ + + /* Fields not cleared already are cleared in wipe_inode(). They have + * been put there because truncate() needs to clear the same fields if + * the file happens to be open while being truncated. It saves space + * not to repeat the code twice. + */ + wipe_inode(rip); + } + + return(rip); +} + + +/*===========================================================================* + * free_inode * + *===========================================================================*/ +PUBLIC void free_inode( + register struct inode *rip /* inode to free */ +) +{ +/* Return an inode to the pool of unallocated inodes. */ + register struct super_block *sp; + dev_t dev = rip->i_dev; + bit_t b = rip->i_num; + u16_t mode = rip->i_mode; + + /* Locate the appropriate super_block. */ + sp = get_super(dev); + + if (b <= NO_ENTRY || b > sp->s_inodes_count) + return; + free_inode_bit(sp, b, (mode & I_TYPE) == I_DIRECTORY); + + rip->i_mode = I_NOT_ALLOC; /* clear I_TYPE field */ +} + + +FORWARD _PROTOTYPE( int find_group_dir, (struct super_block *sp, + struct inode *parent) ); +FORWARD _PROTOTYPE( int find_group_hashalloc, (struct super_block *sp, + struct inode *parent) ); +FORWARD _PROTOTYPE( int find_group_any, (struct super_block *sp, + struct inode *parent) ); +FORWARD _PROTOTYPE( int find_group_orlov, (struct super_block *sp, + struct inode *parent) ); + + +/*===========================================================================* + * alloc_inode_bit * + *===========================================================================*/ +PRIVATE bit_t alloc_inode_bit(sp, parent, is_dir) +struct super_block *sp; /* the filesystem to allocate from */ +struct inode *parent; /* parent of newly allocated inode */ +int is_dir; /* inode will be a directory if it is TRUE */ +{ + int group; + ino_t inumber = NO_BIT; + bit_t bit; + struct buf *bp; + struct group_desc *gd; + + if (sp->s_rd_only) + panic("can't alloc inode on read-only filesys."); + + if (opt.mfsalloc) { + group = find_group_any(sp, parent); + } else { + if (is_dir) { + if (opt.use_orlov) { + group = find_group_orlov(sp, parent); + } else { + group = find_group_dir(sp, parent); + } + } else { + group = find_group_hashalloc(sp, parent); + } + } + /* Check if we have a group where to allocate an inode */ + if (group == -1) + return(NO_BIT); /* no bit could be allocated */ + + gd = get_group_desc(group); + if (gd == NULL) + panic("can't get group_desc to alloc block"); + + /* find_group_* should always return either a group with + * a free inode slot or -1, which we checked earlier. + */ + ASSERT(gd->free_inodes_count); + + bp = get_block(sp->s_dev, gd->inode_bitmap, NORMAL); + bit = setbit(bp->b_bitmap, sp->s_inodes_per_group, 0); + ASSERT(bit != -1); /* group definitly contains free inode */ + + inumber = group * sp->s_inodes_per_group + bit + 1; + + /* Extra checks before real allocation. + * Only major bug can cause problems. Since setbit changed + * bp->b_bitmap there is no way to recover from this bug. + * Should never happen. + */ + if (inumber > sp->s_inodes_count) { + panic("ext2: allocator returned inum greater, than\ + total number of inodes.\n"); + } + + if (inumber < EXT2_FIRST_INO(sp)) { + panic("ext2: allocator tryed to use reserved inode.\n"); + } + + bp->b_dirt = DIRTY; + put_block(bp, MAP_BLOCK); + + gd->free_inodes_count--; + sp->s_free_inodes_count--; + if (is_dir) { + gd->used_dirs_count++; + sp->s_dirs_counter++; + } + + group_descriptors_dirty = DIRTY; + + /* Almost the same as previous 'group' ASSERT */ + ASSERT(inumber != NO_BIT); + return inumber; +} + + +/*===========================================================================* + * free_inode_bit * + *===========================================================================*/ +PRIVATE void free_inode_bit(struct super_block *sp, bit_t bit_returned, + int is_dir) +{ + /* Return an inode by turning off its bitmap bit. */ + int group; /* group number of bit_returned */ + int bit; /* bit_returned number within its group */ + struct buf *bp; + struct group_desc *gd; + + if (sp->s_rd_only) + panic("can't free bit on read-only filesys."); + + /* At first search group, to which bit_returned belongs to + * and figure out in what word bit is stored. + */ + if (bit_returned > sp->s_inodes_count || + bit_returned < EXT2_FIRST_INO(sp)) + panic("trying to free inode %d beyond inodes scope.", bit_returned); + + group = (bit_returned - 1) / sp->s_inodes_per_group; + bit = (bit_returned - 1) % sp->s_inodes_per_group; /* index in bitmap */ + + gd = get_group_desc(group); + if (gd == NULL) + panic("can't get group_desc to alloc block"); + + bp = get_block(sp->s_dev, gd->inode_bitmap, NORMAL); + + if (unsetbit(bp->b_bitmap, bit)) + panic("Tried to free unused inode", bit_returned); + + bp->b_dirt = DIRTY; + put_block(bp, MAP_BLOCK); + + gd->free_inodes_count++; + sp->s_free_inodes_count++; + + if (is_dir) { + gd->used_dirs_count--; + sp->s_dirs_counter--; + } + + group_descriptors_dirty = DIRTY; + + if (group < sp->s_igsearch) + sp->s_igsearch = group; +} + + +/* it's implemented very close to the linux' find_group_dir() */ +PRIVATE int find_group_dir(struct super_block *sp, struct inode *parent) +{ + int avefreei = sp->s_free_inodes_count / sp->s_groups_count; + struct group_desc *gd, *best_gd = NULL; + int group, best_group = -1; + + for (group = 0; group < sp->s_groups_count; ++group) { + gd = get_group_desc(group); + if (gd == NULL) + panic("can't get group_desc to alloc inode"); + if (gd->free_inodes_count == 0) + continue; + if (gd->free_inodes_count < avefreei) + continue; + if (!best_gd || + gd->free_blocks_count > best_gd->free_blocks_count) { + best_gd = gd; + best_group = group; + } + } + + return best_group; /* group or -1 */ +} + + +/* Analog of ffs_hashalloc() from *BSD. + * 1) Check parent's for free inodes and blocks. + * 2) Quadradically rehash on the group number. + * 3) Make a linear search for free inode. + */ +PRIVATE int find_group_hashalloc(struct super_block *sp, struct inode *parent) +{ + int ngroups = sp->s_groups_count; + struct group_desc *gd; + int group, i; + int parent_group = (parent->i_num - 1) / sp->s_inodes_per_group; + + /* Try to place new inode in its parent group */ + gd = get_group_desc(parent_group); + if (gd == NULL) + panic("can't get group_desc to alloc inode"); + if (gd->free_inodes_count && gd->free_blocks_count) + return parent_group; + + /* We can't allocate inode in the parent's group. + * Now we will try to place it in another blockgroup. + * The main idea is still to keep files from the same + * directory together and use different blockgroups for + * files from another directory, which lives in the same + * blockgroup as our parent. + * Thus we will spread things on the disk. + */ + group = (parent_group + parent->i_num) % ngroups; + + /* Make quadratic probing to find a group with free inodes and blocks. */ + for (i = 1; i < ngroups; i <<= 1) { + group += i; + if (group >= ngroups) + group -= ngroups; + gd = get_group_desc(group); + if (gd == NULL) + panic("can't get group_desc to alloc inode"); + if (gd->free_inodes_count && gd->free_blocks_count) + return group; + } + + /* Still no group for new inode, try linear search. + * Also check parent again (but for free inodes only). + */ + group = parent_group; + for (i = 0; i < ngroups; i++, group++) { + if (group >= ngroups) + group = 0; + gd = get_group_desc(group); + if (gd == NULL) + panic("can't get group_desc to alloc inode"); + if (gd->free_inodes_count) + return group; + } + + return -1; +} + + +/* Find first group which has free inode slot. + * This is similar to what MFS does. + */ +PRIVATE int find_group_any(struct super_block *sp, struct inode *parent) +{ + int ngroups = sp->s_groups_count; + struct group_desc *gd; + int group = sp->s_igsearch; + + for (; group < ngroups; group++) { + gd = get_group_desc(group); + if (gd == NULL) + panic("can't get group_desc to alloc inode"); + if (gd->free_inodes_count) { + sp->s_igsearch = group; + return group; + } + } + + return -1; +} + + +/* We try to spread first-level directories (i.e. directories in the root + * or in the directory marked as TOPDIR). + * If there are blockgroups with counts for blocks and inodes less than average + * we return a group with lowest directory count. Otherwise we either + * return a group with good free inodes and blocks counts or just a group + * with free inode. + * + * For other directories we try to find a 'good' group, we consider a group as + * a 'good' if it has enough blocks and inodes (greater than min_blocks and + * min_inodes). + * + */ +PRIVATE int find_group_orlov(struct super_block *sp, struct inode *parent) +{ + int avefreei = sp->s_free_inodes_count / sp->s_groups_count; + int avefreeb = sp->s_free_blocks_count / sp->s_groups_count; + + int group = -1; + int fallback_group = -1; /* Group with at least 1 free inode */ + struct group_desc *gd; + int i; + + if (parent->i_num == ROOT_INODE || + parent->i_flags & EXT2_TOPDIR_FL) { + int best_group = -1; + int best_avefree_group = -1; /* Best value of avefreei/avefreeb */ + int best_ndir = sp->s_inodes_per_group; + + group = (unsigned int)random(); + for (i = 0; i < sp->s_groups_count; i++, group++) { + if (group >= sp->s_groups_count) + group = 0; + gd = get_group_desc(group); + if (gd == NULL) + panic("can't get group_desc to alloc inode"); + if (gd->free_inodes_count == 0) + continue; + + fallback_group = group; + + if (gd->free_inodes_count < avefreei || + gd->free_blocks_count < avefreeb) + continue; + + best_avefree_group = group; + + if (gd->used_dirs_count >= best_ndir) + continue; + best_ndir = gd->used_dirs_count; + best_group = group; + } + if (best_group >= 0) + return best_group; + if (best_avefree_group >= 0) + return best_avefree_group; + return fallback_group; + } else { + int parent_group = (parent->i_num - 1) / sp->s_inodes_per_group; + /* 2 is kind of random thing for now, + * but performance results are still good. + */ + int min_blocks = avefreeb / 2; + int min_inodes = avefreei / 2; + + group = parent_group; + for (i = 0; i < sp->s_groups_count; i++, group++) { + if (group >= sp->s_groups_count) + group = 0; + gd = get_group_desc(group); + if (gd == NULL) + panic("can't get group_desc to alloc inode"); + if (gd->free_inodes_count == 0) + continue; + + fallback_group = group; + + if (gd->free_inodes_count >= min_inodes && + gd->free_blocks_count >= min_blocks) + return group; + } + return fallback_group; + } + + return -1; +} + + +/*===========================================================================* + * wipe_inode * + *===========================================================================*/ +PRIVATE void wipe_inode( + register struct inode *rip /* the inode to be erased */ +) +{ +/* Erase some fields in the inode. This function is called from alloc_inode() + * when a new inode is to be allocated, and from truncate(), when an existing + * inode is to be truncated. + */ + + register int i; + + rip->i_size = 0; + rip->i_update = ATIME | CTIME | MTIME; /* update all times later */ + rip->i_blocks = 0; + rip->i_flags = 0; + rip->i_generation = 0; + rip->i_file_acl = 0; + rip->i_dir_acl = 0; + rip->i_faddr = 0; + + for (i = 0; i < EXT2_N_BLOCKS; i++) + rip->i_block[i] = NO_BLOCK; + rip->i_block[0] = NO_BLOCK; + + rip->i_dirt = DIRTY; +} diff --git a/servers/ext2/inode.c b/servers/ext2/inode.c new file mode 100644 index 000000000..eeaa755cb --- /dev/null +++ b/servers/ext2/inode.c @@ -0,0 +1,426 @@ +/* This file manages the inode table. There are procedures to allocate and + * deallocate inodes, acquire, erase, and release them, and read and write + * them from the disk. + * + * The entry points into this file are + * get_inode: search inode table for a given inode; if not there, + * read it + * put_inode: indicate that an inode is no longer needed in memory + * update_times: update atime, ctime, and mtime + * rw_inode: read a disk block and extract an inode, or corresp. write + * dup_inode: indicate that someone else is using an inode table entry + * find_inode: retrieve pointer to inode in inode cache + * + * Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include "buf.h" +#include "inode.h" +#include "super.h" +#include + +FORWARD _PROTOTYPE( void icopy, (struct inode *rip, d_inode *dip, + int direction, int norm)); +FORWARD _PROTOTYPE( void addhash_inode, (struct inode *node) ); +FORWARD _PROTOTYPE( void unhash_inode, (struct inode *node) ); + + +/*===========================================================================* + * fs_putnode * + *===========================================================================*/ +PUBLIC int fs_putnode(void) +{ +/* Find the inode specified by the request message and decrease its counter.*/ + + struct inode *rip; + int count; + + rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR); + + if (!rip) { + printf("%s:%d put_inode: inode #%d dev: %d not found\n", __FILE__, + __LINE__, (ino_t) fs_m_in.REQ_INODE_NR, fs_dev); + panic("fs_putnode failed"); + } + + count = fs_m_in.REQ_COUNT; + if (count <= 0) { + printf("%s:%d put_inode: bad value for count: %d\n", __FILE__, + __LINE__, count); + panic("fs_putnode failed"); + } else if (count > rip->i_count) { + printf("%s:%d put_inode: count too high: %d > %d\n", __FILE__, + __LINE__, count, rip->i_count); + panic("fs_putnode failed"); + } + + /* Decrease reference counter, but keep one reference; + * it will be consumed by put_inode(). + */ + rip->i_count -= count - 1; + put_inode(rip); + + return(OK); +} + + +/*===========================================================================* + * init_inode_cache * + *===========================================================================*/ +PUBLIC void init_inode_cache() +{ + struct inode *rip; + struct inodelist *rlp; + + inode_cache_hit = 0; + inode_cache_miss = 0; + + /* init free/unused list */ + TAILQ_INIT(&unused_inodes); + + /* init hash lists */ + for (rlp = &hash_inodes[0]; rlp < &hash_inodes[INODE_HASH_SIZE]; ++rlp) + LIST_INIT(rlp); + + /* add free inodes to unused/free list */ + for (rip = &inode[0]; rip < &inode[NR_INODES]; ++rip) { + rip->i_num = NO_ENTRY; + TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused); + } +} + + +/*===========================================================================* + * addhash_inode * + *===========================================================================*/ +PRIVATE void addhash_inode(struct inode *node) +{ + int hashi = node->i_num & INODE_HASH_MASK; + + /* insert into hash table */ + LIST_INSERT_HEAD(&hash_inodes[hashi], node, i_hash); +} + + +/*===========================================================================* + * unhash_inode * + *===========================================================================*/ +PRIVATE void unhash_inode(struct inode *node) +{ + /* remove from hash table */ + LIST_REMOVE(node, i_hash); +} + + +/*===========================================================================* + * get_inode * + *===========================================================================*/ +PUBLIC struct inode *get_inode( + dev_t dev, /* device on which inode resides */ + ino_t numb /* inode number (ANSI: may not be unshort) */ +) +{ +/* Find the inode in the hash table. If it is not there, get a free inode + * load it from the disk if it's necessary and put on the hash list + */ + register struct inode *rip; + int hashi; + int i; + + hashi = (int) numb & INODE_HASH_MASK; + + /* Search inode in the hash table */ + LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) { + if (rip->i_num == numb && rip->i_dev == dev) { + /* If unused, remove it from the unused/free list */ + if (rip->i_count == 0) { + inode_cache_hit++; + TAILQ_REMOVE(&unused_inodes, rip, i_unused); + } + ++rip->i_count; + return(rip); + } + } + + inode_cache_miss++; + + /* Inode is not on the hash, get a free one */ + if (TAILQ_EMPTY(&unused_inodes)) { + err_code = ENFILE; + return(NULL); + } + rip = TAILQ_FIRST(&unused_inodes); + + /* If not free unhash it */ + if (rip->i_num != NO_ENTRY) + unhash_inode(rip); + + /* Inode is not unused any more */ + TAILQ_REMOVE(&unused_inodes, rip, i_unused); + + /* Load the inode. */ + rip->i_dev = dev; + rip->i_num = numb; + rip->i_count = 1; + if (dev != NO_DEV) + rw_inode(rip, READING); /* get inode from disk */ + rip->i_update = 0; /* all the times are initially up-to-date */ + rip->i_last_dpos = 0; /* no dentries searched for yet */ + rip->i_bsearch = NO_BLOCK; + rip->i_last_pos_bl_alloc = 0; + rip->i_last_dentry_size = 0; + rip->i_mountpoint= FALSE; + + rip->i_preallocation = opt.use_prealloc; + rip->i_prealloc_count = rip->i_prealloc_index = 0; + + for (i = 0; i < EXT2_PREALLOC_BLOCKS; i++) { + if (rip->i_prealloc_blocks[i] != NO_BLOCK) { + /* Actually this should never happen */ + free_block(rip->i_sp, rip->i_prealloc_blocks[i]); + rip->i_prealloc_blocks[i] = NO_BLOCK; + ext2_debug("Warning: Unexpected preallocated block."); + } + } + + /* Add to hash */ + addhash_inode(rip); + + return(rip); +} + + +/*===========================================================================* + * find_inode * + *===========================================================================*/ +PUBLIC struct inode *find_inode( + dev_t dev, /* device on which inode resides */ + ino_t numb /* inode number (ANSI: may not be unshort) */ +) +{ +/* Find the inode specified by the inode and device number. */ + struct inode *rip; + int hashi; + + hashi = (int) numb & INODE_HASH_MASK; + + /* Search inode in the hash table */ + LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) { + if (rip->i_count > 0 && rip->i_num == numb && rip->i_dev == dev) { + return(rip); + } + } + + return(NULL); +} + + +/*===========================================================================* + * put_inode * + *===========================================================================*/ +PUBLIC void put_inode( + register struct inode *rip /* pointer to inode to be released */ +) +{ +/* The caller is no longer using this inode. If no one else is using it either + * write it back to the disk immediately. If it has no links, truncate it and + * return it to the pool of available inodes. + */ + + if (rip == NULL) + return; /* checking here is easier than in caller */ + + if (rip->i_count < 1) + panic("put_inode: i_count already below 1", rip->i_count); + + if (--rip->i_count == 0) { /* i_count == 0 means no one is using it now */ + if (rip->i_links_count == NO_LINK) { + /* i_nlinks == NO_LINK means free the inode. */ + /* return all the disk blocks */ + + /* Ignore errors by truncate_inode in case inode is a block + * special or character special file. + */ + (void) truncate_inode(rip, (off_t) 0); + /* free inode clears I_TYPE field, since it's used there */ + rip->i_dirt = DIRTY; + free_inode(rip); + } + + rip->i_mountpoint = FALSE; + if (rip->i_dirt == DIRTY) rw_inode(rip, WRITING); + + discard_preallocated_blocks(rip); /* Return blocks to the filesystem */ + + if (rip->i_links_count == NO_LINK) { + /* free, put at the front of the LRU list */ + unhash_inode(rip); + rip->i_num = NO_ENTRY; + TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused); + } else { + /* unused, put at the back of the LRU (cache it) */ + TAILQ_INSERT_TAIL(&unused_inodes, rip, i_unused); + } + } +} + + +/*===========================================================================* + * update_times * + *===========================================================================*/ +PUBLIC void update_times( + register struct inode *rip /* pointer to inode to be read/written */ +) +{ +/* Various system calls are required by the standard to update atime, ctime, + * or mtime. Since updating a time requires sending a message to the clock + * task--an expensive business--the times are marked for update by setting + * bits in i_update. When a stat, fstat, or sync is done, or an inode is + * released, update_times() may be called to actually fill in the times. + */ + + time_t cur_time; + struct super_block *sp; + + sp = rip->i_sp; /* get pointer to super block. */ + if (sp->s_rd_only) + return; /* no updates for read-only file systems */ + + cur_time = clock_time(); + if (rip->i_update & ATIME) + rip->i_atime = cur_time; + if (rip->i_update & CTIME) + rip->i_ctime = cur_time; + if (rip->i_update & MTIME) + rip->i_mtime = cur_time; + rip->i_update = 0; /* they are all up-to-date now */ +} + +/*===========================================================================* + * rw_inode * + *===========================================================================*/ +PUBLIC void rw_inode( + register struct inode *rip, /* pointer to inode to be read/written */ + int rw_flag /* READING or WRITING */ +) +{ +/* An entry in the inode table is to be copied to or from the disk. */ + + register struct buf *bp; + register struct super_block *sp; + register struct group_desc *gd; + register d_inode *dip; + u32_t block_group_number; + block_t b, offset; + + /* Get the block where the inode resides. */ + sp = get_super(rip->i_dev); /* get pointer to super block */ + rip->i_sp = sp; /* inode must contain super block pointer */ + + block_group_number = (rip->i_num - 1) / sp->s_inodes_per_group; + + gd = get_group_desc(block_group_number); + + if (gd == NULL) + panic("can't get group_desc to read/write inode"); + + offset = ((rip->i_num - 1) % sp->s_inodes_per_group) * EXT2_INODE_SIZE(sp); + /* offset requires shifting, since each block contains several inodes, + * e.g. inode 2 is stored in bklock 0. + */ + b = (block_t) gd->inode_table + (offset >> sp->s_blocksize_bits); + bp = get_block(rip->i_dev, b, NORMAL); + + offset &= (sp->s_block_size - 1); + dip = (d_inode*) (bp->b_data + offset); + + /* Do the read or write. */ + if (rw_flag == WRITING) { + if (rip->i_update) + update_times(rip); /* times need updating */ + if (sp->s_rd_only == FALSE) + bp->b_dirt = DIRTY; + } + + icopy(rip, dip, rw_flag, TRUE); + + put_block(bp, INODE_BLOCK); + rip->i_dirt = CLEAN; +} + + +/*===========================================================================* + * icopy * + *===========================================================================*/ +PRIVATE void icopy( + register struct inode *rip, /* pointer to the in-core inode struct */ + register d_inode *dip, /* pointer to the on-disk struct */ + int direction, /* READING (from disk) or WRITING (to disk) */ + int norm /* TRUE = do not swap bytes; FALSE = swap */ +) +{ + int i; + + if (direction == READING) { + /* Copy inode to the in-core table, swapping bytes if need be. */ + rip->i_mode = conv2(norm,dip->i_mode); + rip->i_uid = conv2(norm,dip->i_uid); + rip->i_size = conv4(norm,dip->i_size); + rip->i_atime = conv4(norm,dip->i_atime); + rip->i_ctime = conv4(norm,dip->i_ctime); + rip->i_mtime = conv4(norm,dip->i_mtime); + rip->i_dtime = conv4(norm,dip->i_dtime); + rip->i_gid = conv2(norm,dip->i_gid); + rip->i_links_count = conv2(norm,dip->i_links_count); + rip->i_blocks = conv4(norm,dip->i_blocks); + rip->i_flags = conv4(norm,dip->i_flags); + /* Minix doesn't touch osd1 and osd2 either, so just copy. */ + memcpy(&rip->osd1, &dip->osd1, sizeof(rip->osd1)); + for (i = 0; i < EXT2_N_BLOCKS; i++) + rip->i_block[i] = conv4(norm, dip->i_block[i]); + rip->i_generation = conv4(norm,dip->i_generation); + rip->i_file_acl = conv4(norm,dip->i_file_acl); + rip->i_dir_acl = conv4(norm,dip->i_dir_acl); + rip->i_faddr = conv4(norm,dip->i_faddr); + memcpy(&rip->osd2, &dip->osd2, sizeof(rip->osd2)); + } else { + /* Copying inode to disk from the in-core table. */ + dip->i_mode = conv2(norm,rip->i_mode); + dip->i_uid = conv2(norm,rip->i_uid); + dip->i_size = conv4(norm,rip->i_size); + dip->i_atime = conv4(norm,rip->i_atime); + dip->i_ctime = conv4(norm,rip->i_ctime); + dip->i_mtime = conv4(norm,rip->i_mtime); + dip->i_dtime = conv4(norm,rip->i_dtime); + dip->i_gid = conv2(norm,rip->i_gid); + dip->i_links_count = conv2(norm,rip->i_links_count); + dip->i_blocks = conv4(norm,rip->i_blocks); + dip->i_flags = conv4(norm,rip->i_flags); + /* Minix doesn't touch osd1 and osd2 either, so just copy. */ + memcpy(&dip->osd1, &rip->osd1, sizeof(dip->osd1)); + for (i = 0; i < EXT2_N_BLOCKS; i++) + dip->i_block[i] = conv4(norm, rip->i_block[i]); + dip->i_generation = conv4(norm,rip->i_generation); + dip->i_file_acl = conv4(norm,rip->i_file_acl); + dip->i_dir_acl = conv4(norm,rip->i_dir_acl); + dip->i_faddr = conv4(norm,rip->i_faddr); + memcpy(&dip->osd2, &rip->osd2, sizeof(dip->osd2)); + } +} + + +/*===========================================================================* + * dup_inode * + *===========================================================================*/ +PUBLIC void dup_inode( + struct inode *ip /* The inode to be duplicated. */ +) +{ +/* This routine is a simplified form of get_inode() for the case where + * the inode pointer is already known. + */ + ip->i_count++; +} diff --git a/servers/ext2/inode.h b/servers/ext2/inode.h new file mode 100644 index 000000000..9e534a3ce --- /dev/null +++ b/servers/ext2/inode.h @@ -0,0 +1,120 @@ +/* Inode table. This table holds inodes that are currently in use. In some + * cases they have been opened by an open() or creat() system call, in other + * cases the file system itself needs the inode for one reason or another, + * such as to search a directory for a path name. + * The first part of the struct holds fields that are present on the + * disk; the second part holds fields not present on the disk. + * The disk inode part is also declared in "type.h" as 'd_inode' + * + */ + +#ifndef EXT2_INODE_H +#define EXT2_INODE_H + +#include + +/* Disk part of inode structure was taken from + * linux/include/linux/ext2_fs.h. + */ +EXTERN struct inode { + u16_t i_mode; /* File mode */ + u16_t i_uid; /* Low 16 bits of Owner Uid */ + u32_t i_size; /* Size in bytes */ + u32_t i_atime; /* Access time */ + u32_t i_ctime; /* Creation time */ + u32_t i_mtime; /* Modification time */ + u32_t i_dtime; /* Deletion Time */ + u16_t i_gid; /* Low 16 bits of Group Id */ + u16_t i_links_count; /* Links count */ + u32_t i_blocks; /* 512-byte blocks count */ + u32_t i_flags; /* File flags */ + union { + struct { + u32_t l_i_reserved1; + } linux1; + struct { + u32_t h_i_translator; + } hurd1; + struct { + u32_t m_i_reserved1; + } masix1; + } osd1; /* OS dependent 1 */ + u32_t i_block[EXT2_N_BLOCKS]; /* Pointers to blocks */ + u32_t i_generation; /* File version (for NFS) */ + u32_t i_file_acl; /* File ACL */ + u32_t i_dir_acl; /* Directory ACL */ + u32_t i_faddr; /* Fragment address */ + union { + struct { + u8_t l_i_frag; /* Fragment number */ + u8_t l_i_fsize; /* Fragment size */ + u16_t i_pad1; + u16_t l_i_uid_high; /* these 2 fields */ + u16_t l_i_gid_high; /* were reserved2[0] */ + u32_t l_i_reserved2; + } linux2; + struct { + u8_t h_i_frag; /* Fragment number */ + u8_t h_i_fsize; /* Fragment size */ + u16_t h_i_mode_high; + u16_t h_i_uid_high; + u16_t h_i_gid_high; + u32_t h_i_author; + } hurd2; + struct { + u8_t m_i_frag; /* Fragment number */ + u8_t m_i_fsize; /* Fragment size */ + u16_t m_pad1; + u32_t m_i_reserved2[2]; + } masix2; + } osd2; /* OS dependent 2 */ + + /* The following items are not present on the disk. */ + dev_t i_dev; /* which device is the inode on */ + ino_t i_num; /* inode number on its (minor) device */ + int i_count; /* # times inode used; 0 means slot is free */ + struct super_block *i_sp; /* pointer to super block for inode's device */ + char i_dirt; /* CLEAN or DIRTY */ + block_t i_bsearch; /* where to start search for new blocks, + * also this is last allocated block. + */ + off_t i_last_pos_bl_alloc; /* last write position for which we allocated + * a new block (should be block i_bsearch). + * used to check for sequential operation. + */ + off_t i_last_dpos; /* where to start dentry search */ + int i_last_dentry_size; /* size of last found dentry */ + + char i_mountpoint; /* true if mounted on */ + + char i_seek; /* set on LSEEK, cleared on READ/WRITE */ + char i_update; /* the ATIME, CTIME, and MTIME bits are here */ + + block_t i_prealloc_blocks[EXT2_PREALLOC_BLOCKS]; /* preallocated blocks */ + int i_prealloc_count; /* number of preallocated blocks */ + int i_prealloc_index; /* index into i_prealloc_blocks */ + int i_preallocation; /* use preallocation for this inode, normally + * it's reset only when non-sequential write + * happens. + */ + + LIST_ENTRY(inode) i_hash; /* hash list */ + TAILQ_ENTRY(inode) i_unused; /* free and unused list */ + +} inode[NR_INODES]; + + +/* list of unused/free inodes */ +EXTERN TAILQ_HEAD(unused_inodes_t, inode) unused_inodes; + +/* inode hashtable */ +EXTERN LIST_HEAD(inodelist, inode) hash_inodes[INODE_HASH_SIZE]; + +EXTERN unsigned int inode_cache_hit; +EXTERN unsigned int inode_cache_miss; + +/* Field values. Note that CLEAN and DIRTY are defined in "const.h" */ +#define NO_SEEK 0 /* i_seek = NO_SEEK if last op was not SEEK */ +#define ISEEK 1 /* i_seek = ISEEK if last op was SEEK */ + +#endif /* EXT2_INODE_H */ diff --git a/servers/ext2/link.c b/servers/ext2/link.c new file mode 100644 index 000000000..a3549422c --- /dev/null +++ b/servers/ext2/link.c @@ -0,0 +1,713 @@ +/* Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include +#include "buf.h" +#include "inode.h" +#include "super.h" +#include + +#define SAME 1000 + +FORWARD _PROTOTYPE( int freesp_inode, (struct inode *rip, off_t st, + off_t end) ); +FORWARD _PROTOTYPE( int remove_dir, (struct inode *rldirp, + struct inode *rip, char dir_name[NAME_MAX + 1]) ); +FORWARD _PROTOTYPE( int unlink_file, (struct inode *dirp, + struct inode *rip, char file_name[NAME_MAX + 1])); +FORWARD _PROTOTYPE( off_t nextblock, (off_t pos, int blocksize) ); +FORWARD _PROTOTYPE( void zeroblock_half, (struct inode *i, off_t p, int l)); +FORWARD _PROTOTYPE( void zeroblock_range, (struct inode *i, off_t p, off_t h)); + +/* Args to zeroblock_half() */ +#define FIRST_HALF 0 +#define LAST_HALF 1 + + +/*===========================================================================* + * fs_link * + *===========================================================================*/ +PUBLIC int fs_link() +{ +/* Perform the link(name1, name2) system call. */ + + struct inode *ip, *rip; + register int r; + char string[NAME_MAX + 1]; + struct inode *new_ip; + phys_bytes len; + + /* Copy the link name's last component */ + len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */ + if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1) + return(ENAMETOOLONG); + + r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT, 0, + (vir_bytes) string, (size_t) len, D); + if (r != OK) return r; + NUL(string, len, sizeof(string)); + + /* Temporarily open the file. */ + if( (rip = get_inode(fs_dev, fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + /* Check to see if the file has maximum number of links already. */ + r = OK; + if (rip->i_links_count >= USHRT_MAX) + r = EMLINK; + if(rip->i_links_count >= LINK_MAX) + r = EMLINK; + + /* Only super_user may link to directories. */ + if(r == OK) + if( (rip->i_mode & I_TYPE) == I_DIRECTORY && caller_uid != SU_UID) + r = EPERM; + + /* If error with 'name', return the inode. */ + if (r != OK) { + put_inode(rip); + return(r); + } + + /* Temporarily open the last dir */ + if( (ip = get_inode(fs_dev, fs_m_in.REQ_DIR_INO)) == NULL) + return(EINVAL); + + /* If 'name2' exists in full (even if no space) set 'r' to error. */ + if ((new_ip = advance(ip, string, IGN_PERM)) == NULL) { + r = err_code; + if(r == ENOENT) + r = OK; + } else { + put_inode(new_ip); + r = EEXIST; + } + + /* Try to link. */ + if(r == OK) + r = search_dir(ip, string, &rip->i_num, ENTER, IGN_PERM, + rip->i_mode & I_TYPE); + + /* If success, register the linking. */ + if(r == OK) { + rip->i_links_count++; + rip->i_update |= CTIME; + rip->i_dirt = DIRTY; + } + + /* Done. Release both inodes. */ + put_inode(rip); + put_inode(ip); + return(r); +} + + +/*===========================================================================* + * fs_unlink * + *===========================================================================*/ +PUBLIC int fs_unlink() +{ +/* Perform the unlink(name) or rmdir(name) system call. The code for these two + * is almost the same. They differ only in some condition testing. Unlink() + * may be used by the superuser to do dangerous things; rmdir() may not. + */ + register struct inode *rip; + struct inode *rldirp; + int r; + char string[NAME_MAX + 1]; + phys_bytes len; + + /* Copy the last component */ + len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */ + if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1) + return(ENAMETOOLONG); + + r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT, + (vir_bytes) 0, (vir_bytes) string, (size_t) len, D); + if (r != OK) return r; + NUL(string, len, sizeof(string)); + + /* Temporarily open the dir. */ + if( (rldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + /* The last directory exists. Does the file also exist? */ + rip = advance(rldirp, string, IGN_PERM); + r = err_code; + + /* If error, return inode. */ + if(r != OK) { + /* Mount point? */ + if (r == EENTERMOUNT || r == ELEAVEMOUNT) { + put_inode(rip); + r = EBUSY; + } + put_inode(rldirp); + return(r); + } + + /* Now test if the call is allowed, separately for unlink() and rmdir(). */ + if(fs_m_in.m_type == REQ_UNLINK) { + /* Only the su may unlink directories, but the su can unlink any + * dir.*/ + if( (rip->i_mode & I_TYPE) == I_DIRECTORY) r = EPERM; + + /* Actually try to unlink the file; fails if parent is mode 0 etc. */ + if (r == OK) r = unlink_file(rldirp, rip, string); + } else { + r = remove_dir(rldirp, rip, string); /* call is RMDIR */ + } + + /* If unlink was possible, it has been done, otherwise it has not. */ + put_inode(rip); + put_inode(rldirp); + return(r); +} + + +/*===========================================================================* + * fs_rdlink * + *===========================================================================*/ +PUBLIC int fs_rdlink() +{ + block_t b; /* block containing link text */ + struct buf *bp; /* buffer containing link text */ + char* link_text; /* either bp->b_data or rip->i_block */ + register struct inode *rip; /* target inode */ + register int r; /* return value */ + size_t copylen; + + copylen = min( (size_t) fs_m_in.REQ_MEM_SIZE, UMAX_FILE_POS); + + /* Temporarily open the file. */ + if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + if (!S_ISLNK(rip->i_mode)) + r = EACCES; + if (rip->i_size > MAX_FAST_SYMLINK_LENGTH) { + /* normal symlink */ + if ((b = read_map(rip, (off_t) 0)) == NO_BLOCK) { + r = EIO; + } else { + bp = get_block(rip->i_dev, b, NORMAL); + link_text = bp->b_data; + if (bp) + r = OK; + else + r = EIO; + } + } else { + /* fast symlink, stored in inode */ + link_text = (char*) rip->i_block; + r = OK; + } + if (r == OK) { + /* Passed all checks */ + /* We can safely cast to unsigned, because copylen is guaranteed to be + below max file size */ + copylen = min( copylen, (unsigned) rip->i_size); + bp = get_block(rip->i_dev, b, NORMAL); + r = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT, + (vir_bytes) 0, (vir_bytes) link_text, + (size_t) copylen, D); + put_block(bp, DIRECTORY_BLOCK); + if (r == OK) + fs_m_out.RES_NBYTES = copylen; + } + + put_inode(rip); + return(r); +} + + +/*===========================================================================* + * remove_dir * + *===========================================================================*/ +PRIVATE int remove_dir(rldirp, rip, dir_name) +struct inode *rldirp; /* parent directory */ +struct inode *rip; /* directory to be removed */ +char dir_name[NAME_MAX + 1]; /* name of directory to be removed */ +{ + /* A directory file has to be removed. Five conditions have to met: + * - The file must be a directory + * - The directory must be empty (except for . and ..) + * - The final component of the path must not be . or .. + * - The directory must not be the root of a mounted file system (VFS) + * - The directory must not be anybody's root/working directory (VFS) + */ + int r; + + /* search_dir checks that rip is a directory too. */ + if ((r = search_dir(rip, "", NULL, IS_EMPTY, IGN_PERM, 0)) != OK) + return r; + + if (strcmp(dir_name, ".") == 0 || strcmp(dir_name, "..") == 0)return(EINVAL); + if (rip->i_num == ROOT_INODE) return(EBUSY); /* can't remove 'root' */ + + /* Actually try to unlink the file; fails if parent is mode 0 etc. */ + if ((r = unlink_file(rldirp, rip, dir_name)) != OK) return r; + + /* Unlink . and .. from the dir. The super user can link and unlink any dir, + * so don't make too many assumptions about them. + */ + (void) unlink_file(rip, NULL, dot1); + (void) unlink_file(rip, NULL, dot2); + return(OK); +} + + +/*===========================================================================* + * unlink_file * + *===========================================================================*/ +PRIVATE int unlink_file(dirp, rip, file_name) +struct inode *dirp; /* parent directory of file */ +struct inode *rip; /* inode of file, may be NULL too. */ +char file_name[NAME_MAX + 1]; /* name of file to be removed */ +{ +/* Unlink 'file_name'; rip must be the inode of 'file_name' or NULL. */ + + ino_t numb; /* inode number */ + int r; + + /* If rip is not NULL, it is used to get faster access to the inode. */ + if (rip == NULL) { + /* Search for file in directory and try to get its inode. */ + err_code = search_dir(dirp, file_name, &numb, LOOK_UP, IGN_PERM, 0); + if (err_code == OK) rip = get_inode(dirp->i_dev, (int) numb); + if (err_code != OK || rip == NULL) return(err_code); + } else { + dup_inode(rip); /* inode will be returned with put_inode */ + } + + r = search_dir(dirp, file_name, NULL, DELETE, IGN_PERM, 0); + + if (r == OK) { + rip->i_links_count--; /* entry deleted from parent's dir */ + rip->i_update |= CTIME; + rip->i_dirt = DIRTY; + } + + put_inode(rip); + return(r); +} + + +/*===========================================================================* + * fs_rename * + *===========================================================================*/ +PUBLIC int fs_rename() +{ +/* Perform the rename(name1, name2) system call. */ + struct inode *old_dirp, *old_ip; /* ptrs to old dir, file inodes */ + struct inode *new_dirp, *new_ip; /* ptrs to new dir, file inodes */ + struct inode *new_superdirp, *next_new_superdirp; + int r = OK; /* error flag; initially no error */ + int odir, ndir; /* TRUE iff {old|new} file is dir */ + int same_pdir; /* TRUE iff parent dirs are the same */ + char old_name[NAME_MAX + 1], new_name[NAME_MAX + 1]; + ino_t numb; + phys_bytes len; + + /* Copy the last component of the old name */ + len = fs_m_in.REQ_REN_LEN_OLD; /* including trailing '\0' */ + if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1) + return(ENAMETOOLONG); + + r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_REN_GRANT_OLD, + (vir_bytes) 0, (vir_bytes) old_name, (size_t) len, D); + if (r != OK) return r; + NUL(old_name, len, sizeof(old_name)); + + /* Copy the last component of the new name */ + len = fs_m_in.REQ_REN_LEN_NEW; /* including trailing '\0' */ + if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1) + return(ENAMETOOLONG); + + r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_REN_GRANT_NEW, + (vir_bytes) 0, (vir_bytes) new_name, (size_t) len, D); + if (r != OK) return r; + NUL(new_name, len, sizeof(new_name)); + + /* Get old dir inode */ + if( (old_dirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_REN_OLD_DIR)) == NULL) + return(err_code); + + old_ip = advance(old_dirp, old_name, IGN_PERM); + r = err_code; + + if (r == EENTERMOUNT || r == ELEAVEMOUNT) { + put_inode(old_ip); + if (r == EENTERMOUNT) r = EXDEV; /* should this fail at all? */ + else if (r == ELEAVEMOUNT) r = EINVAL; /* rename on dot-dot */ + } + + /* Get new dir inode */ + if( (new_dirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_REN_NEW_DIR)) == NULL) + r = err_code; + new_ip = advance(new_dirp, new_name, IGN_PERM); /* not required to exist */ + + /* However, if the check failed because the file does exist, don't continue. + * Note that ELEAVEMOUNT is covered by the dot-dot check later. */ + if(err_code == EENTERMOUNT) { + put_inode(new_ip); + r = EBUSY; + } + + if(old_ip != NULL) + odir = ((old_ip->i_mode & I_TYPE) == I_DIRECTORY); /* TRUE iff dir */ + else + odir = FALSE; + + /* If it is ok, check for a variety of possible errors. */ + if(r == OK) { + same_pdir = (old_dirp == new_dirp); + + /* The old inode must not be a superdirectory of the new last dir. */ + if (odir && !same_pdir) { + dup_inode(new_superdirp = new_dirp); + while (TRUE) { /* may hang in a file system loop */ + if (new_superdirp == old_ip) { + put_inode(new_superdirp); + r = EINVAL; + break; + } + next_new_superdirp = advance(new_superdirp, dot2, + IGN_PERM); + + put_inode(new_superdirp); + if(next_new_superdirp == new_superdirp) { + put_inode(new_superdirp); + break; + } + if(err_code == ELEAVEMOUNT) { + /* imitate that we are back at the root, + * cross device checked already on VFS */ + put_inode(next_new_superdirp); + err_code = OK; + break; + } + new_superdirp = next_new_superdirp; + if(new_superdirp == NULL) { + /* Missing ".." entry. Assume the worst. */ + r = EINVAL; + break; + } + } + } + + /* The old or new name must not be . or .. */ + if(strcmp(old_name, ".") == 0 || strcmp(old_name, "..") == 0 || + strcmp(new_name, ".") == 0 || strcmp(new_name, "..") == 0) { + r = EINVAL; + } + /* Both parent directories must be on the same device. + if(old_dirp->i_dev != new_dirp->i_dev) r = EXDEV; */ + + /* Some tests apply only if the new path exists. */ + if(new_ip == NULL) { + /* don't rename a file with a file system mounted on it. + if (old_ip->i_dev != old_dirp->i_dev) r = EXDEV;*/ + if(odir && (new_dirp->i_links_count >= SHRT_MAX || + new_dirp->i_links_count >= LINK_MAX) && + !same_pdir && r == OK) { + r = EMLINK; + } + } else { + if(old_ip == new_ip) r = SAME; /* old=new */ + + ndir = ((new_ip->i_mode & I_TYPE) == I_DIRECTORY);/* dir ? */ + if(odir == TRUE && ndir == FALSE) r = ENOTDIR; + if(odir == FALSE && ndir == TRUE) r = EISDIR; + } + } + + /* If a process has another root directory than the system root, we might + * "accidently" be moving it's working directory to a place where it's + * root directory isn't a super directory of it anymore. This can make + * the function chroot useless. If chroot will be used often we should + * probably check for it here. */ + + /* The rename will probably work. Only two things can go wrong now: + * 1. being unable to remove the new file. (when new file already exists) + * 2. being unable to make the new directory entry. (new file doesn't exists) + * [directory has to grow by one block and cannot because the disk + * is completely full]. + */ + if(r == OK) { + if(new_ip != NULL) { + /* There is already an entry for 'new'. Try to remove it. */ + if(odir) + r = remove_dir(new_dirp, new_ip, new_name); + else + r = unlink_file(new_dirp, new_ip, new_name); + } + /* if r is OK, the rename will succeed, while there is now an + * unused entry in the new parent directory. */ + } + + if(r == OK) { + /* If the new name will be in the same parent directory as the old + * one, first remove the old name to free an entry for the new name, + * otherwise first try to create the new name entry to make sure + * the rename will succeed. + */ + numb = old_ip->i_num; /* inode number of old file */ + + if(same_pdir) { + r = search_dir(old_dirp,old_name, NULL, DELETE,IGN_PERM, 0); + /* shouldn't go wrong. */ + if(r == OK) + (void) search_dir(old_dirp, new_name, &numb, ENTER, IGN_PERM, + old_ip->i_mode & I_TYPE); + } else { + r = search_dir(new_dirp, new_name, &numb, ENTER, IGN_PERM, + old_ip->i_mode & I_TYPE); + if(r == OK) + (void) search_dir(old_dirp, old_name, (ino_t *) 0, DELETE, + IGN_PERM, 0); + } + } + /* If r is OK, the ctime and mtime of old_dirp and new_dirp have been marked + * for update in search_dir. */ + + if(r == OK && odir && !same_pdir) { + /* Update the .. entry in the directory (still points to old_dirp).*/ + numb = new_dirp->i_num; + (void) unlink_file(old_ip, NULL, dot2); + if(search_dir(old_ip, dot2, &numb, ENTER, IGN_PERM, I_DIRECTORY) == OK) { + /* New link created. */ + new_dirp->i_links_count++; + new_dirp->i_dirt = DIRTY; + } + } + + /* Release the inodes. */ + put_inode(old_dirp); + put_inode(old_ip); + put_inode(new_dirp); + put_inode(new_ip); + return(r == SAME ? OK : r); +} + + +/*===========================================================================* + * fs_ftrunc * + *===========================================================================*/ +PUBLIC int fs_ftrunc(void) +{ + struct inode *rip; + off_t start, end; + int r; + + if( (rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + start = fs_m_in.REQ_TRC_START_LO; + end = fs_m_in.REQ_TRC_END_LO; + + if (end == 0) + r = truncate_inode(rip, start); + else + r = freesp_inode(rip, start, end); + + return(r); +} + + +/*===========================================================================* + * truncate_inode * + *===========================================================================*/ +PUBLIC int truncate_inode(rip, newsize) +register struct inode *rip; /* pointer to inode to be truncated */ +off_t newsize; /* inode must become this size */ +{ +/* Set inode to a certain size, freeing any blocks no longer referenced + * and updating the size in the inode. If the inode is extended, the + * extra space is a hole that reads as zeroes. + * + * Nothing special has to happen to file pointers if inode is opened in + * O_APPEND mode, as this is different per fd and is checked when + * writing is done. + */ + int r; + mode_t file_type; + + discard_preallocated_blocks(rip); + + file_type = rip->i_mode & I_TYPE; /* check to see if file is special */ + if (file_type == I_CHAR_SPECIAL || file_type == I_BLOCK_SPECIAL) + return(EINVAL); + if (newsize > rip->i_sp->s_max_size) /* don't let inode grow too big */ + return(EFBIG); + + /* Free the actual space if truncating. */ + if (newsize < rip->i_size) { + if ((r = freesp_inode(rip, newsize, rip->i_size)) != OK) + return(r); + } + + /* Clear the rest of the last block if expanding. */ + if (newsize > rip->i_size) zeroblock_half(rip, rip->i_size, LAST_HALF); + + /* Next correct the inode size. */ + rip->i_size = newsize; + rip->i_update |= CTIME | MTIME; + rip->i_dirt = DIRTY; + + return(OK); +} + + +/*===========================================================================* + * freesp_inode * + *===========================================================================*/ +PRIVATE int freesp_inode(rip, start, end) +register struct inode *rip; /* pointer to inode to be partly freed */ +off_t start, end; /* range of bytes to free (end uninclusive) */ +{ +/* Cut an arbitrary hole in an inode. The caller is responsible for checking + * the reasonableness of the inode type of rip. The reason is this is that + * this function can be called for different reasons, for which different + * sets of inode types are reasonable. Adjusting the final size of the inode + * is to be done by the caller too, if wished. + * + * Consumers of this function currently are truncate_inode() (used to + * free indirect and data blocks for any type of inode, but also to + * implement the ftruncate() and truncate() system calls) and the F_FREESP + * fcntl(). + */ + off_t p, e; + int r; + unsigned short block_size = rip->i_sp->s_block_size; + int zero_last, zero_first; + + discard_preallocated_blocks(rip); + + if (rip->i_blocks == 0) { + /* Either hole or symlink. Freeing fast symlink using + * write_map() causes segfaults since it doesn't use any + * blocks, but uses i_block[] to store target. + */ + return(OK); + } + + if(end > rip->i_size) /* freeing beyond end makes no sense */ + end = rip->i_size; + if(end <= start) /* end is uninclusive, so starti_size; + if (start/block_size == (end-1)/block_size && (zero_last || zero_first)) { + zeroblock_range(rip, start, end-start); + } else { + /* First zero unused part of partly used blocks. */ + if (zero_last) + zeroblock_half(rip, start, LAST_HALF); + if (zero_first) + zeroblock_half(rip, end, FIRST_HALF); + + /* Now completely free the completely unused blocks. + * write_map() will free unused indirect + * blocks too. Converting the range to block numbers avoids + * overflow on p when doing e.g. 'p += block_size'. + */ + e = end / block_size; + if (end == rip->i_size && (end % block_size)) + e++; + for (p = nextblock(start, block_size)/block_size; p < e; p++) { + if ((r = write_map(rip, p*block_size, NO_BLOCK, WMAP_FREE)) != OK) + return(r); + } + } + + rip->i_update |= CTIME | MTIME; + rip->i_dirt = DIRTY; + + return(OK); +} + + +/*===========================================================================* + * nextblock * + *===========================================================================*/ +PRIVATE off_t nextblock(pos, block_size) +off_t pos; +unsigned short block_size; +{ +/* Return the first position in the next block after position 'pos' + * (unless this is the first position in the current block). + * This can be done in one expression, but that can overflow pos. + */ + off_t p; + p = (pos / block_size) * block_size; + if (pos % block_size) p += block_size; /* Round up. */ + return(p); +} + + +/*===========================================================================* + * zeroblock_half * + *===========================================================================*/ +PRIVATE void zeroblock_half(rip, pos, half) +struct inode *rip; +off_t pos; +int half; +{ +/* Zero the upper or lower 'half' of a block that holds position 'pos'. + * half can be FIRST_HALF or LAST_HALF. + * + * FIRST_HALF: 0..pos-1 will be zeroed + * LAST_HALF: pos..blocksize-1 will be zeroed + */ + off_t offset, len; + + /* Offset of zeroing boundary. */ + offset = pos % rip->i_sp->s_block_size; + + if(half == LAST_HALF) { + len = rip->i_sp->s_block_size - offset; + } else { + len = offset; + pos -= offset; + offset = 0; + } + + zeroblock_range(rip, pos, len); +} + + +/*===========================================================================* + * zeroblock_range * + *===========================================================================*/ +PRIVATE void zeroblock_range(rip, pos, len) +struct inode *rip; +off_t pos; +off_t len; +{ +/* Zero a range in a block. + * This function is used to zero a segment of a block. + */ + block_t b; + struct buf *bp; + off_t offset; + + if (!len) return; /* no zeroing to be done. */ + if ( (b = read_map(rip, pos)) == NO_BLOCK) return; + if ( (bp = get_block(rip->i_dev, b, NORMAL)) == NULL) + panic("zeroblock_range: no block"); + offset = pos % rip->i_sp->s_block_size; + if (offset + len > rip->i_sp->s_block_size) + panic("zeroblock_range: len too long", len); + memset(bp->b_data + offset, 0, len); + bp->b_dirt = DIRTY; + put_block(bp, FULL_DATA_BLOCK); +} diff --git a/servers/ext2/main.c b/servers/ext2/main.c new file mode 100644 index 000000000..863a51d26 --- /dev/null +++ b/servers/ext2/main.c @@ -0,0 +1,236 @@ +#include "fs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "buf.h" +#include "inode.h" +#include "drivers.h" +#include "optset.h" + + +/* Declare some local functions. */ +FORWARD _PROTOTYPE(void get_work, (message *m_in) ); +FORWARD _PROTOTYPE(void cch_check, (void) ); +FORWARD _PROTOTYPE( void reply, (endpoint_t who, message *m_out) ); + +/* SEF functions and variables. */ +FORWARD _PROTOTYPE( void sef_local_startup, (void) ); +FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) ); +FORWARD _PROTOTYPE( void sef_cb_signal_handler, (int signo) ); + +EXTERN int env_argc; +EXTERN char **env_argv; + +PRIVATE struct optset optset_table[] = { + { "sb", OPT_INT, &opt.block_with_super, 0 }, + { "orlov", OPT_BOOL, &opt.use_orlov, TRUE }, + { "oldalloc", OPT_BOOL, &opt.use_orlov, FALSE }, + { "mfsalloc", OPT_BOOL, &opt.mfsalloc, TRUE }, + { "reserved", OPT_BOOL, &opt.use_reserved_blocks, TRUE }, + { "prealloc", OPT_BOOL, &opt.use_prealloc, TRUE }, + { "noprealloc", OPT_BOOL, &opt.use_prealloc, FALSE }, + { NULL } +}; + +/*===========================================================================* + * main * + *===========================================================================*/ +PUBLIC int main(int argc, char *argv[]) +{ +/* This is the main routine of this service. The main loop consists of + * three major activities: getting new work, processing the work, and + * sending the reply. The loop never terminates, unless a panic occurs. + */ + int error, ind; + unsigned short test_endian = 1; + + /* SEF local startup. */ + env_setargs(argc, argv); + sef_local_startup(); + + le_CPU = (*(unsigned char *) &test_endian == 0 ? 0 : 1); + + /* Server isn't tested on big endian CPU */ + ASSERT(le_CPU == 1); + + while(!unmountdone || !exitsignaled) { + endpoint_t src; + + /* Wait for request message. */ + get_work(&fs_m_in); + + src = fs_m_in.m_source; + error = OK; + caller_uid = INVAL_UID; /* To trap errors */ + caller_gid = INVAL_GID; + req_nr = fs_m_in.m_type; + + if (req_nr < VFS_BASE) { + fs_m_in.m_type += VFS_BASE; + req_nr = fs_m_in.m_type; + } + ind = req_nr - VFS_BASE; + + if (ind < 0 || ind >= NREQS) { + printf("mfs: bad request %d\n", req_nr); + printf("ind = %d\n", ind); + error = EINVAL; + } else { + error = (*fs_call_vec[ind])(); + /*cch_check();*/ + } + + fs_m_out.m_type = error; + reply(src, &fs_m_out); + + if (error == OK) + read_ahead(); /* do block read ahead */ + } +} + +/*===========================================================================* + * sef_local_startup * + *===========================================================================*/ +PRIVATE void sef_local_startup() +{ + /* Register init callbacks. */ + sef_setcb_init_fresh(sef_cb_init_fresh); + sef_setcb_init_restart(sef_cb_init_fail); + + /* No live update support for now. */ + + /* Register signal callbacks. */ + sef_setcb_signal_handler(sef_cb_signal_handler); + + /* Let SEF perform startup. */ + sef_startup(); +} + +/*===========================================================================* + * sef_cb_init_fresh * + *===========================================================================*/ +PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info) +{ +/* Initialize the Minix file server. */ + int i, r; + + /* Defaults */ + opt.use_orlov = TRUE; + opt.mfsalloc = FALSE; + opt.use_reserved_blocks = FALSE; + opt.block_with_super = 0; + opt.use_prealloc = FALSE; + + /* If we have been given an options string, parse options from there. */ + for (i = 1; i < env_argc - 1; i++) + if (!strcmp(env_argv[i], "-o")) + optset_parse(optset_table, env_argv[++i]); + + may_use_vmcache = 1; + + /* Init inode table */ + for (i = 0; i < NR_INODES; ++i) { + inode[i].i_count = 0; + cch[i] = 0; + } + + init_inode_cache(); + + /* Init driver mapping */ + for (i = 0; i < NR_DEVICES; ++i) + driver_endpoints[i].driver_e = NONE; + + SELF_E = getprocnr(); + buf_pool(DEFAULT_NR_BUFS); + fs_block_size = _MIN_BLOCK_SIZE; + + fs_m_in.m_type = FS_READY; + + if ((r = send(VFS_PROC_NR, &fs_m_in)) != OK) { + panic("Error sending login to VFS: %d", r); + } + + return(OK); +} + +/*===========================================================================* + * sef_cb_signal_handler * + *===========================================================================*/ +PRIVATE void sef_cb_signal_handler(int signo) +{ + /* Only check for termination signal, ignore anything else. */ + if (signo != SIGTERM) return; + + exitsignaled = 1; + (void) fs_sync(); + + /* If unmounting has already been performed, exit immediately. + * We might not get another message. + */ + if (unmountdone) exit(0); +} + +/*===========================================================================* + * get_work * + *===========================================================================*/ +PRIVATE void get_work(m_in) +message *m_in; /* pointer to message */ +{ + int r, srcok = 0; + endpoint_t src; + + do { + if ((r = sef_receive(ANY, m_in)) != OK) /* wait for message */ + panic("sef_receive failed: %d", r); + src = m_in->m_source; + + if(src == VFS_PROC_NR) { + if(unmountdone) + printf("ext2: unmounted: unexpected message from FS\n"); + else + srcok = 1; /* Normal FS request. */ + + } else + printf("ext2: unexpected source %d\n", src); + } while(!srcok); + + assert((src == VFS_PROC_NR && !unmountdone)); +} + + +/*===========================================================================* + * reply * + *===========================================================================*/ +PRIVATE void reply( + endpoint_t who, + message *m_out /* report result */ +) +{ + if (OK != send(who, m_out)) /* send the message */ + printf("ext2(%d) was unable to send reply\n", SELF_E); +} + + +/*===========================================================================* + * cch_check * + *===========================================================================*/ +PRIVATE void cch_check(void) +{ + int i; + + for (i = 0; i < NR_INODES; ++i) { + if (inode[i].i_count != cch[i] && req_nr != REQ_GETNODE && + req_nr != REQ_PUTNODE && req_nr != REQ_READSUPER && + req_nr != REQ_MOUNTPOINT && req_nr != REQ_UNMOUNT && + req_nr != REQ_SYNC && req_nr != REQ_LOOKUP) { + printf("ext2(%d) inode(%ul) cc: %d req_nr: %d\n", SELF_E, + inode[i].i_num, inode[i].i_count - cch[i], req_nr); + } + cch[i] = inode[i].i_count; + } +} diff --git a/servers/ext2/misc.c b/servers/ext2/misc.c new file mode 100644 index 000000000..1bdba2080 --- /dev/null +++ b/servers/ext2/misc.c @@ -0,0 +1,65 @@ +/* Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include "inode.h" +#include "super.h" + +/*===========================================================================* + * fs_sync * + *===========================================================================*/ +PUBLIC int fs_sync() +{ +/* Perform the sync() system call. Flush all the tables. + * The order in which the various tables are flushed is critical. The + * blocks must be flushed last, since rw_inode() leaves its results in + * the block cache. + */ + struct inode *rip; + struct buf *bp; + int r; + + assert(nr_bufs > 0); + assert(buf); + + if (superblock->s_rd_only) + return(OK); /* nothing to sync */ + + /* Write all the dirty inodes to the disk. */ + for(rip = &inode[0]; rip < &inode[NR_INODES]; rip++) + if(rip->i_count > 0 && rip->i_dirt == DIRTY) rw_inode(rip, WRITING); + + /* Write all the dirty blocks to the disk, one drive at a time. */ + for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++) + if(bp->b_dev != NO_DEV && bp->b_dirt == DIRTY) + flushall(bp->b_dev); + + if (superblock->s_dev != NO_DEV) { + superblock->s_wtime = clock_time(); + write_super(superblock); + } + + return(OK); /* sync() can't fail */ +} + + +/*===========================================================================* + * fs_flush * + *===========================================================================*/ +PUBLIC int fs_flush() +{ +/* Flush the blocks of a device from the cache after writing any dirty blocks + * to disk. + */ + dev_t dev = (dev_t) fs_m_in.REQ_DEV; + + if(dev == fs_dev) return(EBUSY); + + flushall(dev); + invalidate(dev); + + return(OK); +} diff --git a/servers/ext2/mount.c b/servers/ext2/mount.c new file mode 100644 index 000000000..08341dce1 --- /dev/null +++ b/servers/ext2/mount.c @@ -0,0 +1,258 @@ +/* Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include +#include +#include "buf.h" +#include "inode.h" +#include "super.h" +#include "drivers.h" +#include +#include + + +/*===========================================================================* + * fs_readsuper * + *===========================================================================*/ +PUBLIC int fs_readsuper() +{ +/* This function reads the superblock of the partition, gets the root inode + * and sends back the details of them. Note, that the FS process does not + * know the index of the vmnt object which refers to it, whenever the pathname + * lookup leaves a partition an ELEAVEMOUNT error is transferred back + * so that the VFS knows that it has to find the vnode on which this FS + * process' partition is mounted on. + */ + struct inode *root_ip; + cp_grant_id_t label_gid; + size_t label_len; + int r = OK; + endpoint_t driver_e; + int readonly, isroot; + u32_t mask; + + fs_dev = fs_m_in.REQ_DEV; + label_gid = fs_m_in.REQ_GRANT; + label_len = fs_m_in.REQ_PATH_LEN; + readonly = (fs_m_in.REQ_FLAGS & REQ_RDONLY) ? 1 : 0; + isroot = (fs_m_in.REQ_FLAGS & REQ_ISROOT) ? 1 : 0; + + if (label_len > sizeof(fs_dev_label)) + return(EINVAL); + + r = sys_safecopyfrom(fs_m_in.m_source, label_gid, 0, + (vir_bytes)fs_dev_label, label_len, D); + if (r != OK) { + printf("%s:%d fs_readsuper: safecopyfrom failed: %d\n", + __FILE__, __LINE__, r); + return(EINVAL); + } + + r= ds_retrieve_label_endpt(fs_dev_label, &driver_e); + if (r != OK) + { + printf("ext2:fs_readsuper: ds_retrieve_label_endpt failed for '%s': %d\n", + fs_dev_label, r); + return EINVAL; + } + + /* Map the driver endpoint for this major */ + driver_endpoints[(fs_dev >> MAJOR) & BYTE].driver_e = driver_e; + + /* Open the device the file system lives on. */ + if (dev_open(driver_e, fs_dev, driver_e, + readonly ? R_BIT : (R_BIT|W_BIT)) != OK) { + return(EINVAL); + } + + /* Fill in the super block. */ + STATICINIT(superblock, sizeof(struct super_block)); + if (!superblock) + panic("Can't allocate memory for superblock."); + superblock->s_dev = fs_dev; /* read_super() needs to know which dev */ + r = read_super(superblock); + + /* Is it recognized as a Minix filesystem? */ + if (r != OK) { + superblock->s_dev = NO_DEV; + dev_close(driver_e, fs_dev); + return(r); + } + + if (superblock->s_rev_level != EXT2_GOOD_OLD_REV) { + struct super_block *sp = superblock; /* just shorter name */ + mask = ~SUPPORTED_INCOMPAT_FEATURES; + if (HAS_INCOMPAT_FEATURE(sp, mask)) { + if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_COMPRESSION & mask)) + printf("ext2: fs compression is not supported by server\n"); + if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_FILETYPE & mask)) + printf("ext2: fs in dir filetype is not supported by server\n"); + if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_RECOVER & mask)) + printf("ext2: fs recovery is not supported by server\n"); + if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_JOURNAL_DEV & mask)) + printf("ext2: fs journal dev is not supported by server\n"); + if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_META_BG & mask)) + printf("ext2: fs meta bg is not supported by server\n"); + return(EINVAL); + } + mask = ~SUPPORTED_RO_COMPAT_FEATURES; + if (HAS_RO_COMPAT_FEATURE(sp, mask)) { + if (HAS_RO_COMPAT_FEATURE(sp, RO_COMPAT_SPARSE_SUPER & mask)) { + printf("ext2: sparse super is not supported by server, \ + remount read-only\n"); + } + if (HAS_RO_COMPAT_FEATURE(sp, RO_COMPAT_LARGE_FILE & mask)) { + printf("ext2: large files are not supported by server, \ + remount read-only\n"); + } + if (HAS_RO_COMPAT_FEATURE(sp, RO_COMPAT_BTREE_DIR & mask)) { + printf("ext2: dir's btree is not supported by server, \ + remount read-only\n"); + } + return(EINVAL); + } + } + + if (superblock->s_state == EXT2_ERROR_FS) { + printf("ext2: filesystem wasn't cleanly unmounted previous time\n"); + superblock->s_dev = NO_DEV; + dev_close(driver_e, fs_dev); + return(EINVAL); + } + + + set_blocksize(superblock->s_block_size); + + /* Get the root inode of the mounted file system. */ + if ( (root_ip = get_inode(fs_dev, ROOT_INODE)) == NULL) { + printf("ext2: couldn't get root inode\n"); + superblock->s_dev = NO_DEV; + dev_close(driver_e, fs_dev); + return(EINVAL); + } + + if (root_ip != NULL && root_ip->i_mode == 0) { + printf("%s:%d zero mode for root inode?\n", __FILE__, __LINE__); + put_inode(root_ip); + superblock->s_dev = NO_DEV; + dev_close(driver_e, fs_dev); + return(EINVAL); + } + + if (root_ip != NULL && (root_ip->i_mode & I_TYPE) != I_DIRECTORY) { + printf("%s:%d root inode has wrong type, it's not a DIR\n", + __FILE__, __LINE__); + put_inode(root_ip); + superblock->s_dev = NO_DEV; + dev_close(driver_e, fs_dev); + return(EINVAL); + } + + superblock->s_rd_only = readonly; + superblock->s_is_root = isroot; + + if (!readonly) { + superblock->s_state = EXT2_ERROR_FS; + superblock->s_mnt_count++; + superblock->s_mtime = clock_time(); + write_super(superblock); /* Commit info, we just set above */ + } + + /* Root inode properties */ + fs_m_out.RES_INODE_NR = root_ip->i_num; + fs_m_out.RES_MODE = root_ip->i_mode; + fs_m_out.RES_FILE_SIZE_LO = root_ip->i_size; + fs_m_out.RES_UID = root_ip->i_uid; + fs_m_out.RES_GID = root_ip->i_gid; + + return(r); +} + + +/*===========================================================================* + * fs_mountpoint * + *===========================================================================*/ +PUBLIC int fs_mountpoint() +{ +/* This function looks up the mount point, it checks the condition whether + * the partition can be mounted on the inode or not. + */ + register struct inode *rip; + int r = OK; + mode_t bits; + + /* Temporarily open the file. */ + if( (rip = get_inode(fs_dev, fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + + if(rip->i_mountpoint) r = EBUSY; + + /* It may not be special. */ + bits = rip->i_mode & I_TYPE; + if (bits == I_BLOCK_SPECIAL || bits == I_CHAR_SPECIAL) r = ENOTDIR; + + put_inode(rip); + + if(r == OK) rip->i_mountpoint = TRUE; + + return(r); +} + + +/*===========================================================================* + * fs_unmount * + *===========================================================================*/ +PUBLIC int fs_unmount() +{ +/* Unmount a file system by device number. */ + int count; + struct inode *rip, *root_ip; + + if(superblock->s_dev != fs_dev) return(EINVAL); + + /* See if the mounted device is busy. Only 1 inode using it should be + * open --the root inode-- and that inode only 1 time. */ + count = 0; + for (rip = &inode[0]; rip < &inode[NR_INODES]; rip++) + if (rip->i_count > 0 && rip->i_dev == fs_dev) count += rip->i_count; + + if ((root_ip = find_inode(fs_dev, ROOT_INODE)) == NULL) { + printf("ext2: couldn't find root inode. Unmount failed.\n"); + panic("ext2: couldn't find root inode"); + return(EINVAL); + } + + /* Sync fs data before checking count. In some cases VFS can force unmounting + * and it will damage unsynced FS. We don't sync before checking root_ip since + * if it is missing then something strange happened with FS, so it's better + * to not use possibly corrupted data for syncing. + */ + if (!superblock->s_rd_only) { + /* force any cached blocks out of memory */ + (void) fs_sync(); + } + + if (count > 1) return(EBUSY); /* can't umount a busy file system */ + + put_inode(root_ip); + + if (!superblock->s_rd_only) { + superblock->s_wtime = clock_time(); + superblock->s_state = EXT2_VALID_FS; + write_super(superblock); /* Commit info, we just set above */ + } + + /* Close the device the file system lives on. */ + dev_close(driver_endpoints[(fs_dev >> MAJOR) & BYTE].driver_e, fs_dev); + + /* Finish off the unmount. */ + superblock->s_dev = NO_DEV; + unmountdone = TRUE; + + return(OK); +} diff --git a/servers/ext2/open.c b/servers/ext2/open.c new file mode 100644 index 000000000..0a6b2f27e --- /dev/null +++ b/servers/ext2/open.c @@ -0,0 +1,355 @@ +/* Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include +#include "buf.h" +#include "inode.h" +#include "super.h" +#include + +FORWARD _PROTOTYPE( struct inode *new_node, (struct inode *ldirp, + char *string, mode_t bits, block_t z0)); + + +/*===========================================================================* + * fs_create * + *===========================================================================*/ +PUBLIC int fs_create() +{ + phys_bytes len; + int r; + struct inode *ldirp; + struct inode *rip; + mode_t omode; + char lastc[NAME_MAX + 1]; + + /* Read request message */ + omode = (mode_t) fs_m_in.REQ_MODE; + caller_uid = (uid_t) fs_m_in.REQ_UID; + caller_gid = (gid_t) fs_m_in.REQ_GID; + + /* Try to make the file. */ + + /* Copy the last component (i.e., file name) */ + len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */ + if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1) + return(ENAMETOOLONG); + + err_code = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT, + (vir_bytes) 0, (vir_bytes) lastc, (size_t) len, D); + if (err_code != OK) return err_code; + NUL(lastc, len, sizeof(lastc)); + + /* Get last directory inode (i.e., directory that will hold the new inode) */ + if ((ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(ENOENT); + + /* Create a new inode by calling new_node(). */ + rip = new_node(ldirp, lastc, omode, NO_BLOCK); + r = err_code; + + /* If an error occurred, release inode. */ + if (r != OK) { + put_inode(ldirp); + put_inode(rip); + return(r); + } + + /* Reply message */ + fs_m_out.RES_INODE_NR = rip->i_num; + fs_m_out.RES_MODE = rip->i_mode; + fs_m_out.RES_FILE_SIZE_LO = rip->i_size; + + /* This values are needed for the execution */ + fs_m_out.RES_UID = rip->i_uid; + fs_m_out.RES_GID = rip->i_gid; + + /* Drop parent dir */ + put_inode(ldirp); + + return(OK); +} + + +/*===========================================================================* + * fs_mknod * + *===========================================================================*/ +PUBLIC int fs_mknod() +{ + struct inode *ip, *ldirp; + char lastc[NAME_MAX + 1]; + phys_bytes len; + + /* Copy the last component and set up caller's user and group id */ + len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */ + if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1) + return(ENAMETOOLONG); + + err_code = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT, + (vir_bytes) 0, (vir_bytes) lastc, (size_t) len, D); + if (err_code != OK) return err_code; + NUL(lastc, len, sizeof(lastc)); + + caller_uid = (uid_t) fs_m_in.REQ_UID; + caller_gid = (gid_t) fs_m_in.REQ_GID; + + /* Get last directory inode */ + if((ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(ENOENT); + + /* Try to create the new node */ + ip = new_node(ldirp, lastc, (mode_t) fs_m_in.REQ_MODE, + (block_t) fs_m_in.REQ_DEV); + + put_inode(ip); + put_inode(ldirp); + return(err_code); +} + + +/*===========================================================================* + * fs_mkdir * + *===========================================================================*/ +PUBLIC int fs_mkdir() +{ + int r1, r2; /* status codes */ + ino_t dot, dotdot; /* inode numbers for . and .. */ + struct inode *rip, *ldirp; + char lastc[NAME_MAX + 1]; /* last component */ + phys_bytes len; + + /* Copy the last component and set up caller's user and group id */ + len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */ + if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1) + return(ENAMETOOLONG); + + err_code = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT, + (vir_bytes) 0, (vir_bytes) lastc, (phys_bytes) len, D); + if(err_code != OK) return(err_code); + NUL(lastc, len, sizeof(lastc)); + + caller_uid = (uid_t) fs_m_in.REQ_UID; + caller_gid = (gid_t) fs_m_in.REQ_GID; + + /* Get last directory inode */ + if((ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(ENOENT); + + /* Next make the inode. If that fails, return error code. */ + rip = new_node(ldirp, lastc, (ino_t) fs_m_in.REQ_MODE, (block_t) 0); + + if(rip == NULL || err_code == EEXIST) { + put_inode(rip); /* can't make dir: it already exists */ + put_inode(ldirp); + return(err_code); + } + + /* Get the inode numbers for . and .. to enter in the directory. */ + dotdot = ldirp->i_num; /* parent's inode number */ + dot = rip->i_num; /* inode number of the new dir itself */ + + /* Now make dir entries for . and .. unless the disk is completely full. */ + /* Use dot1 and dot2, so the mode of the directory isn't important. */ + rip->i_mode = (mode_t) fs_m_in.REQ_MODE; /* set mode */ + /* enter . in the new dir*/ + r1 = search_dir(rip, dot1, &dot, ENTER, IGN_PERM, I_DIRECTORY); + /* enter .. in the new dir */ + r2 = search_dir(rip, dot2, &dotdot, ENTER, IGN_PERM, I_DIRECTORY); + + /* If both . and .. were successfully entered, increment the link counts. */ + if (r1 == OK && r2 == OK) { + /* Normal case. It was possible to enter . and .. in the new dir. */ + rip->i_links_count++; /* this accounts for . */ + ldirp->i_links_count++; /* this accounts for .. */ + ldirp->i_dirt = DIRTY; /* mark parent's inode as dirty */ + } else { + /* It was not possible to enter . or .. probably disk was full - + * links counts haven't been touched. */ + if (search_dir(ldirp, lastc, NULL, DELETE, IGN_PERM, 0) != OK) + panic("Dir disappeared ", rip->i_num); + rip->i_links_count--; /* undo the increment done in new_node() */ + } + rip->i_dirt = DIRTY; /* either way, i_links_count has changed */ + + put_inode(ldirp); /* return the inode of the parent dir */ + put_inode(rip); /* return the inode of the newly made dir */ + return(err_code); /* new_node() always sets 'err_code' */ +} + + +/*===========================================================================* + * fs_slink * + *===========================================================================*/ +PUBLIC int fs_slink() +{ + phys_bytes len; + struct inode *sip; /* inode containing symbolic link */ + struct inode *ldirp; /* directory containing link */ + register int r; /* error code */ + char string[NAME_MAX]; /* last component of the new dir's path name */ + char* link_target_buf; /* either sip->i_block or bp->b_data */ + struct buf *bp = NULL; /* disk buffer for link */ + + caller_uid = (uid_t) fs_m_in.REQ_UID; + caller_gid = (gid_t) fs_m_in.REQ_GID; + + /* Copy the link name's last component */ + len = fs_m_in.REQ_PATH_LEN; + if (len > NAME_MAX || len > EXT2_NAME_MAX) + return(ENAMETOOLONG); + + r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT, + (vir_bytes) 0, (vir_bytes) string, (size_t) len, D); + if (r != OK) return(r); + NUL(string, len, sizeof(string)); + + /* Temporarily open the dir. */ + if( (ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + /* Create the inode for the symlink. */ + sip = new_node(ldirp, string, (mode_t) (I_SYMBOLIC_LINK | RWX_MODES), + (block_t) 0); + + /* If we can then create fast symlink (store it in inode), + * Otherwise allocate a disk block for the contents of the symlink and + * copy contents of symlink (the name pointed to) into first disk block. */ + if( (r = err_code) == OK) { + if ( (fs_m_in.REQ_MEM_SIZE + 1) > sip->i_sp->s_block_size) { + r = ENAMETOOLONG; + } else if ((fs_m_in.REQ_MEM_SIZE + 1) <= MAX_FAST_SYMLINK_LENGTH) { + r = sys_safecopyfrom(VFS_PROC_NR, + (cp_grant_id_t) fs_m_in.REQ_GRANT3, + (vir_bytes) 0, (vir_bytes) sip->i_block, + (vir_bytes) fs_m_in.REQ_MEM_SIZE, D); + sip->i_dirt = DIRTY; + link_target_buf = (char*) sip->i_block; + } else { + r = (bp = new_block(sip, (off_t) 0)) == NULL ? err_code : + sys_safecopyfrom(VFS_PROC_NR, + (cp_grant_id_t) fs_m_in.REQ_GRANT3, + (vir_bytes) 0, (vir_bytes) bp->b_data, + (vir_bytes) fs_m_in.REQ_MEM_SIZE, D); + bp->b_dirt = DIRTY; + link_target_buf = bp->b_data; + } + if (r == OK) { + link_target_buf[fs_m_in.REQ_MEM_SIZE] = '\0'; + sip->i_size = (off_t) strlen(link_target_buf); + if (sip->i_size != fs_m_in.REQ_MEM_SIZE) { + /* This can happen if the user provides a buffer + * with a \0 in it. This can cause a lot of trouble + * when the symlink is used later. We could just use + * the strlen() value, but we want to let the user + * know he did something wrong. ENAMETOOLONG doesn't + * exactly describe the error, but there is no + * ENAMETOOWRONG. + */ + r = ENAMETOOLONG; + } + } + + put_block(bp, DIRECTORY_BLOCK); /* put_block() accepts NULL. */ + + if(r != OK) { + sip->i_links_count = NO_LINK; + if (search_dir(ldirp, string, NULL, DELETE, IGN_PERM, 0) != OK) + panic("Symbolic link vanished"); + } + } + + /* put_inode() accepts NULL as a noop, so the below are safe. */ + put_inode(sip); + put_inode(ldirp); + + return(r); +} + +/*===========================================================================* + * new_node * + *===========================================================================*/ +PRIVATE struct inode *new_node(struct inode *ldirp, + char *string, mode_t bits, block_t b0) +{ +/* New_node() is called by fs_open(), fs_mknod(), and fs_mkdir(). + * In all cases it allocates a new inode, makes a directory entry for it in + * the ldirp directory with string name, and initializes it. + * It returns a pointer to the inode if it can do this; + * otherwise it returns NULL. It always sets 'err_code' + * to an appropriate value (OK or an error code). + */ + + register struct inode *rip; + register int r; + + /* Get final component of the path. */ + rip = advance(ldirp, string, IGN_PERM); + + if (S_ISDIR(bits) && (ldirp->i_links_count >= USHRT_MAX || + ldirp->i_links_count >= LINK_MAX)) { + /* New entry is a directory, alas we can't give it a ".." */ + put_inode(rip); + err_code = EMLINK; + return(NULL); + } + + if ( rip == NULL && err_code == ENOENT) { + /* Last path component does not exist. Make new directory entry. */ + if ( (rip = alloc_inode(ldirp, bits)) == NULL) { + /* Can't creat new inode: out of inodes. */ + return(NULL); + } + + /* Force inode to the disk before making directory entry to make + * the system more robust in the face of a crash: an inode with + * no directory entry is much better than the opposite. + */ + rip->i_links_count++; + rip->i_block[0] = b0; /* major/minor device numbers */ + rw_inode(rip, WRITING); /* force inode to disk now */ + + /* New inode acquired. Try to make directory entry. */ + if ((r=search_dir(ldirp, string, &rip->i_num, ENTER, IGN_PERM, + rip->i_mode & I_TYPE)) != OK) { + rip->i_links_count--; /* pity, have to free disk inode */ + rip->i_dirt = DIRTY; /* dirty inodes are written out */ + put_inode(rip); /* this call frees the inode */ + err_code = r; + return(NULL); + } + + } else if (err_code == EENTERMOUNT || err_code == ELEAVEMOUNT) { + r = EEXIST; + } else { + /* Either last component exists, or there is some problem. */ + if (rip != NULL) + r = EEXIST; + else + r = err_code; + } + + /* The caller has to return the directory inode (*ldirp). */ + err_code = r; + return(rip); +} + + +/*===========================================================================* + * fs_inhibread * + *===========================================================================*/ +PUBLIC int fs_inhibread() +{ + struct inode *rip; + + if((rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + /* inhibit read ahead */ + rip->i_seek = ISEEK; + + return(OK); +} diff --git a/servers/ext2/optset.c b/servers/ext2/optset.c new file mode 100644 index 000000000..a338a4717 --- /dev/null +++ b/servers/ext2/optset.c @@ -0,0 +1,128 @@ +/* This file provides functionality to parse strings of comma-separated + * options, each being either a single key name or a key=value pair, where the + * value may be enclosed in quotes. A table of optset entries is provided to + * determine which options are recognized, how to parse their values, and where + * to store those. Unrecognized options are silently ignored; improperly + * formatted options are silently set to reasonably acceptable values. + * + * The entry points into this file are: + * optset_parse parse the given options string using the given table + * + * Created: + * May 2009 (D.C. van Moolenbroek) + */ + +#define _MINIX 1 +#include +#include +#include +#include + +#include "optset.h" + +FORWARD _PROTOTYPE( void optset_parse_entry, (struct optset *entry, + char *ptr, int len) ); + +/*===========================================================================* + * optset_parse_entry * + *===========================================================================*/ +PRIVATE void optset_parse_entry(entry, ptr, len) +struct optset *entry; +char *ptr; +int len; +{ +/* Parse and store the value of a single option. + */ + char *dst; + int val; + + switch (entry->os_type) { + case OPT_BOOL: + *((int *) entry->os_ptr) = entry->os_val; + + break; + + case OPT_STRING: + if (len >= entry->os_val) + len = entry->os_val - 1; + + dst = (char *) entry->os_ptr; + + if (len > 0) + memcpy(dst, ptr, len); + dst[len] = 0; + + break; + + case OPT_INT: + if (len > 0) + val = strtol(ptr, NULL, entry->os_val); + else + val = 0; + + *((int *) entry->os_ptr) = val; + + break; + } +} + +/*===========================================================================* + * optset_parse * + *===========================================================================*/ +PUBLIC void optset_parse(table, string) +struct optset *table; +char *string; +{ +/* Parse a string of options, using the provided table of optset entries. + */ + char *p, *kptr, *vptr; + int i, klen, vlen; + + for (p = string; *p; ) { + /* Get the key name for the field. */ + for (kptr = p, klen = 0; *p && *p != '=' && *p != ','; p++, klen++); + + if (*p == '=') { + /* The field has an associated value. */ + vptr = ++p; + + /* If the first character after the '=' is a quote character, + * find a matching quote character followed by either a comma + * or the terminating null character, and use the string in + * between. Otherwise, use the string up to the next comma or + * the terminating null character. + */ + if (*p == '\'' || *p == '"') { + p++; + + for (vlen = 0; *p && (*p != *vptr || + (p[1] && p[1] != ',')); p++, vlen++); + + if (*p) p++; + vptr++; + } + else + for (vlen = 0; *p && *p != ','; p++, vlen++); + } + else { + vptr = NULL; + vlen = 0; + } + + if (*p == ',') p++; + + /* Find a matching entry for this key in the given table. If found, + * call optset_parse_entry() on it. Silently ignore the option + * otherwise. + */ + for (i = 0; table[i].os_name != NULL; i++) { + if (strlen(table[i].os_name) == klen && + !strncasecmp(table[i].os_name, kptr, klen)) { + + optset_parse_entry(&table[i], vptr, vlen); + + break; + } + } + } +} diff --git a/servers/ext2/optset.h b/servers/ext2/optset.h new file mode 100644 index 000000000..87ea4ce9f --- /dev/null +++ b/servers/ext2/optset.h @@ -0,0 +1,30 @@ +#ifndef _OPTSET_H +#define _OPTSET_H + +enum { + OPT_BOOL, + OPT_STRING, + OPT_INT +}; + +/* An entry for the parser of an options set. The 'os_name' field must point + * to a string, which is treated case-insensitively; the last entry of a table + * must have NULL name. The 'os_type' field must be set to one of the OPT_ + * values defined above. The 'os_ptr' field must point to the field that is to + * receive the value of a recognized option. For OPT_STRING, it must point to a + * string of a size set in 'os_val'; the resulting string may be truncated, but + * will always be null-terminated. For OPT_BOOL, it must point to an int which + * will be set to the value in 'os_val' if the option is present. For OPT_INT, + * it must point to an int which will be set to the provided option value; + * 'os_val' is then a base passed to strtol(). + */ +struct optset { + char *os_name; + int os_type; + void *os_ptr; + int os_val; +}; + +_PROTOTYPE( void optset_parse, (struct optset *table, char *string) ); + +#endif /* _OPTSET_H */ diff --git a/servers/ext2/path.c b/servers/ext2/path.c new file mode 100644 index 000000000..a0b6e688d --- /dev/null +++ b/servers/ext2/path.c @@ -0,0 +1,729 @@ +/* This file contains the procedures that look up path names in the directory + * system and determine the inode number that goes with a given path name. + * + * The entry points into this file are + * eat_path: the 'main' routine of the path-to-inode conversion mechanism + * last_dir: find the final directory on a given path + * advance: parse one component of a path name + * search_dir: search a directory for a string and return its inode number + * + * Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include "buf.h" +#include "inode.h" +#include "super.h" +#include + +PUBLIC char dot1[2] = "."; /* used for search_dir to bypass the access */ +PUBLIC char dot2[3] = ".."; /* permissions for . and .. */ + +FORWARD _PROTOTYPE( char *get_name, (char *name, char string[NAME_MAX+1]) ); +FORWARD _PROTOTYPE( int ltraverse, (struct inode *rip, char *suffix) ); +FORWARD _PROTOTYPE( int parse_path, (ino_t dir_ino, ino_t root_ino, + int flags, struct inode **res_inop, + size_t *offsetp, int *symlinkp) ); + +/*===========================================================================* + * fs_lookup * + *===========================================================================*/ +PUBLIC int fs_lookup() +{ + cp_grant_id_t grant, grant2; + int r, r1, flags, symlinks; + unsigned int len; + size_t offset = 0, path_size, cred_size; + ino_t dir_ino, root_ino; + struct inode *rip; + + grant = (cp_grant_id_t) fs_m_in.REQ_GRANT; + path_size = (size_t) fs_m_in.REQ_PATH_SIZE; /* Size of the buffer */ + len = (int) fs_m_in.REQ_PATH_LEN; /* including terminating nul */ + dir_ino = (ino_t) fs_m_in.REQ_DIR_INO; + root_ino = (ino_t) fs_m_in.REQ_ROOT_INO; + flags = (int) fs_m_in.REQ_FLAGS; + + /* Check length. */ + if(len > sizeof(user_path)) return(E2BIG); /* too big for buffer */ + if(len == 0) return(EINVAL); /* too small */ + + /* Copy the pathname and set up caller's user and group id */ + r = sys_safecopyfrom(VFS_PROC_NR, grant, /*offset*/ 0, + (vir_bytes) user_path, (size_t) len, D); + if(r != OK) return(r); + + /* Verify this is a null-terminated path. */ + if(user_path[len - 1] != '\0') return(EINVAL); + + if(flags & PATH_GET_UCRED) { /* Do we have to copy uid/gid credentials? */ + grant2 = (cp_grant_id_t) fs_m_in.REQ_GRANT2; + cred_size = (size_t) fs_m_in.REQ_UCRED_SIZE; + + if (cred_size > sizeof(credentials)) return(EINVAL); /* Too big. */ + r = sys_safecopyfrom(VFS_PROC_NR, grant2, (vir_bytes) 0, + (vir_bytes) &credentials, cred_size, D); + if (r != OK) return(r); + + caller_uid = (uid_t) credentials.vu_uid; + caller_gid = (gid_t) credentials.vu_gid; + } else { + memset(&credentials, 0, sizeof(credentials)); + caller_uid = fs_m_in.REQ_UID; + caller_gid = fs_m_in.REQ_GID; + } + + /* Lookup inode */ + rip = NULL; + r = parse_path(dir_ino, root_ino, flags, &rip, &offset, &symlinks); + + if(symlinks != 0 && (r == ELEAVEMOUNT || r == EENTERMOUNT || r == ESYMLINK)){ + len = strlen(user_path)+1; + if(len > path_size) return(ENAMETOOLONG); + + r1 = sys_safecopyto(VFS_PROC_NR, grant, (vir_bytes) 0, + (vir_bytes) user_path, (size_t) len, D); + if (r1 != OK) return(r1); + } + + if(r == ELEAVEMOUNT || r == ESYMLINK) { + /* Report offset and the error */ + fs_m_out.RES_OFFSET = offset; + fs_m_out.RES_SYMLOOP = symlinks; + + return(r); + } + + if (r != OK && r != EENTERMOUNT) return(r); + + fs_m_out.RES_INODE_NR = rip->i_num; + fs_m_out.RES_MODE = rip->i_mode; + fs_m_out.RES_FILE_SIZE_LO = rip->i_size; + fs_m_out.RES_SYMLOOP = symlinks; + fs_m_out.RES_UID = rip->i_uid; + fs_m_out.RES_GID = rip->i_gid; + + /* This is only valid for block and character specials. But it doesn't + * cause any harm to set RES_DEV always. */ + fs_m_out.RES_DEV = (dev_t) rip->i_block[0]; + + if(r == EENTERMOUNT) { + fs_m_out.RES_OFFSET = offset; + put_inode(rip); /* Only return a reference to the final object */ + } + + return(r); +} + + +/*===========================================================================* + * parse_path * + *===========================================================================*/ +PRIVATE int parse_path(dir_ino, root_ino, flags, res_inop, offsetp, symlinkp) +ino_t dir_ino; +ino_t root_ino; +int flags; +struct inode **res_inop; +size_t *offsetp; +int *symlinkp; +{ + /* Parse the path in user_path, starting at dir_ino. If the path is the empty + * string, just return dir_ino. It is upto the caller to treat an empty + * path in a special way. Otherwise, if the path consists of just one or + * more slash ('/') characters, the path is replaced with ".". Otherwise, + * just look up the first (or only) component in path after skipping any + * leading slashes. + */ + int r, leaving_mount; + struct inode *rip, *dir_ip; + char *cp, *next_cp; /* component and next component */ + char component[NAME_MAX+1]; + + /* Start parsing path at the first component in user_path */ + cp = user_path; + + /* No symlinks encountered yet */ + *symlinkp = 0; + + /* Find starting inode inode according to the request message */ + if((rip = find_inode(fs_dev, dir_ino)) == NULL) + return(ENOENT); + + /* If dir has been removed return ENOENT. */ + if (rip->i_links_count == NO_LINK) return(ENOENT); + + dup_inode(rip); + + /* If the given start inode is a mountpoint, we must be here because the file + * system mounted on top returned an ELEAVEMOUNT error. In this case, we must + * only accept ".." as the first path component. + */ + leaving_mount = rip->i_mountpoint; /* True iff rip is a mountpoint */ + + /* Scan the path component by component. */ + while (TRUE) { + if(cp[0] == '\0') { + /* We're done; either the path was empty or we've parsed all + components of the path */ + + *res_inop = rip; + *offsetp += cp - user_path; + + /* Return EENTERMOUNT if we are at a mount point */ + if (rip->i_mountpoint) return(EENTERMOUNT); + + return(OK); + } + + while(cp[0] == '/') cp++; + next_cp = get_name(cp, component); + if (next_cp == NULL) { + put_inode(rip); + return(err_code); + } + + /* Special code for '..'. A process is not allowed to leave a chrooted + * environment. A lookup of '..' at the root of a mounted filesystem + * has to return ELEAVEMOUNT. In both cases, the caller needs search + * permission for the current inode, as it is used as directory. + */ + if(strcmp(component, "..") == 0) { + /* 'rip' is now accessed as directory */ + if ((r = forbidden(rip, X_BIT)) != OK) { + put_inode(rip); + return(r); + } + + if (rip->i_num == root_ino) { + cp = next_cp; + continue; /* Ignore the '..' at a process' root + and move on to the next component */ + } + + if (rip->i_num == ROOT_INODE && !rip->i_sp->s_is_root) { + /* Climbing up to parent FS */ + + put_inode(rip); + *offsetp += cp - user_path; + return(ELEAVEMOUNT); + } + } + + /* Only check for a mount point if we are not coming from one. */ + if (!leaving_mount && rip->i_mountpoint) { + /* Going to enter a child FS */ + + *res_inop = rip; + *offsetp += cp - user_path; + return(EENTERMOUNT); + } + + /* There is more path. Keep parsing. + * If we're leaving a mountpoint, skip directory permission checks. + */ + dir_ip = rip; + rip = advance(dir_ip, leaving_mount ? dot2 : component, CHK_PERM); + if(err_code == ELEAVEMOUNT || err_code == EENTERMOUNT) + err_code = OK; + + if (err_code != OK) { + put_inode(dir_ip); + return(err_code); + } + + leaving_mount = 0; + + /* The call to advance() succeeded. Fetch next component. */ + if (S_ISLNK(rip->i_mode)) { + + if (next_cp[0] == '\0' && (flags & PATH_RET_SYMLINK)) { + put_inode(dir_ip); + *res_inop = rip; + *offsetp += next_cp - user_path; + return(OK); + } + + /* Extract path name from the symlink file */ + r = ltraverse(rip, next_cp); + next_cp = user_path; + *offsetp = 0; + + /* Symloop limit reached? */ + if (++(*symlinkp) > SYMLOOP_MAX) + r = ELOOP; + + if (r != OK) { + put_inode(dir_ip); + put_inode(rip); + return(r); + } + + if (next_cp[0] == '/') { + put_inode(dir_ip); + put_inode(rip); + return(ESYMLINK); + } + + put_inode(rip); + dup_inode(dir_ip); + rip = dir_ip; + } + + put_inode(dir_ip); + cp = next_cp; /* Process subsequent component in next round */ + } + +} + + +/*===========================================================================* + * ltraverse * + *===========================================================================*/ +PRIVATE int ltraverse(rip, suffix) +register struct inode *rip; /* symbolic link */ +char *suffix; /* current remaining path. Has to point in the + * user_path buffer + */ +{ +/* Traverse a symbolic link. Copy the link text from the inode and insert + * the text into the path. Return error code or report success. Base + * directory has to be determined according to the first character of the + * new pathname. + */ + + block_t blink; /* block containing link text */ + size_t llen; /* length of link */ + size_t slen; /* length of suffix */ + struct buf *bp; /* buffer containing link text */ + const char *sp; /* start of link text */ + + llen = (size_t) rip->i_size; + + if (llen > MAX_FAST_SYMLINK_LENGTH) { + /* normal symlink */ + if ((blink = read_map(rip, (off_t) 0)) == NO_BLOCK) + return(EIO); + bp = get_block(rip->i_dev, blink, NORMAL); + sp = bp->b_data; + } else { + /* fast symlink, stored in inode */ + sp = (const char*) rip->i_block; + } + + slen = strlen(suffix); + + /* The path we're parsing looks like this: + * /already/processed/path/ or + * /already/processed/path//not/yet/processed/path + * After expanding the , the path will look like + * or + * /not/yet/processed + * In both cases user_path must have enough room to hold . + * However, in the latter case we have to move /not/yet/processed to the + * right place first, before we expand . When strlen() is + * smaller than strlen(/already/processes/path), we move the suffix to the + * left. Is strlen() greater then we move it to the right. Else + * we do nothing. + */ + + if (slen > 0) { /* Do we have path after the link? */ + /* For simplicity we require that suffix starts with a slash */ + if (suffix[0] != '/') { + panic("ltraverse: suffix does not start with a slash"); + } + + /* To be able to expand the , we have to move the 'suffix' + * to the right place. + */ + if (slen + llen + 1 > sizeof(user_path)) + return(ENAMETOOLONG);/* +suffix+\0 does not fit*/ + if ((unsigned)(suffix - user_path) != llen) { + /* Move suffix left or right if needed */ + memmove(&user_path[llen], suffix, slen+1); + } + } else { + if (llen + 1 > sizeof(user_path)) + return(ENAMETOOLONG); /* + \0 does not fit */ + + /* Set terminating nul */ + user_path[llen]= '\0'; + } + + /* Everything is set, now copy the expanded link to user_path */ + memmove(user_path, sp, llen); + + if (llen > MAX_FAST_SYMLINK_LENGTH) + put_block(bp, DIRECTORY_BLOCK); + + return(OK); +} + + +/*===========================================================================* + * advance * + *===========================================================================*/ +PUBLIC struct inode *advance(dirp, string, chk_perm) +struct inode *dirp; /* inode for directory to be searched */ +char string[NAME_MAX + 1]; /* component name to look for */ +int chk_perm; /* check permissions when string is looked up*/ +{ +/* Given a directory and a component of a path, look up the component in + * the directory, find the inode, open it, and return a pointer to its inode + * slot. + */ + ino_t numb; + struct inode *rip; + + /* If 'string' is empty, return an error. */ + if (string[0] == '\0') { + err_code = ENOENT; + return(NULL); + } + + /* Check for NULL. */ + if (dirp == NULL) return(NULL); + + /* If 'string' is not present in the directory, signal error. */ + if ( (err_code = search_dir(dirp, string, &numb, LOOK_UP, + chk_perm, 0)) != OK) { + return(NULL); + } + + /* The component has been found in the directory. Get inode. */ + if ( (rip = get_inode(dirp->i_dev, (int) numb)) == NULL) { + return(NULL); + } + + /* The following test is for "mountpoint/.." where mountpoint is a + * mountpoint. ".." will refer to the root of the mounted filesystem, + * but has to become a reference to the parent of the 'mountpoint' + * directory. + * + * This case is recognized by the looked up name pointing to a + * root inode, and the directory in which it is held being a + * root inode, _and_ the name[1] being '.'. (This is a test for '..' + * and excludes '.'.) + */ + if (rip->i_num == ROOT_INODE) { + if (dirp->i_num == ROOT_INODE) { + if (string[1] == '.') { + if (!rip->i_sp->s_is_root) { + /* Climbing up mountpoint */ + err_code = ELEAVEMOUNT; + } + } + } + } + + /* See if the inode is mounted on. If so, switch to root directory of the + * mounted file system. The super_block provides the linkage between the + * inode mounted on and the root directory of the mounted file system. + */ + if (rip->i_mountpoint) { + /* Mountpoint encountered, report it */ + err_code = EENTERMOUNT; + } + + return(rip); +} + + +/*===========================================================================* + * get_name * + *===========================================================================*/ +PRIVATE char *get_name(path_name, string) +char *path_name; /* path name to parse */ +char string[NAME_MAX+1]; /* component extracted from 'old_name' */ +{ +/* Given a pointer to a path name in fs space, 'path_name', copy the first + * component to 'string' (truncated if necessary, always nul terminated). + * A pointer to the string after the first component of the name as yet + * unparsed is returned. Roughly speaking, + * 'get_name' = 'path_name' - 'string'. + * + * This routine follows the standard convention that /usr/ast, /usr//ast, + * //usr///ast and /usr/ast/ are all equivalent. + * + * If len of component is greater, than allowed, then return 0. + */ + size_t len; + char *cp, *ep; + + cp = path_name; + + /* Skip leading slashes */ + while (cp[0] == '/') cp++; + + /* Find the end of the first component */ + ep = cp; + while(ep[0] != '\0' && ep[0] != '/') + ep++; + + len = (size_t) (ep - cp); + + if (len > NAME_MAX || len > EXT2_NAME_MAX) { + err_code = ENAMETOOLONG; + return(NULL); + } + + /* Special case of the string at cp is empty */ + if (len == 0) + strcpy(string, "."); /* Return "." */ + else { + memcpy(string, cp, len); + string[len]= '\0'; + } + + return(ep); +} + + +/*===========================================================================* + * search_dir * + *===========================================================================*/ +PUBLIC int search_dir(ldir_ptr, string, numb, flag, check_permissions, ftype) +register struct inode *ldir_ptr; /* ptr to inode for dir to search */ +char string[NAME_MAX + 1]; /* component to search for */ +ino_t *numb; /* pointer to inode number */ +int flag; /* LOOK_UP, ENTER, DELETE or IS_EMPTY */ +int check_permissions; /* check permissions when flag is !IS_EMPTY */ +int ftype; /* used when ENTER and + * INCOMPAT_FILETYPE */ +{ +/* This function searches the directory whose inode is pointed to by 'ldip': + * if (flag == ENTER) enter 'string' in the directory with inode # '*numb'; + * if (flag == DELETE) delete 'string' from the directory; + * if (flag == LOOK_UP) search for 'string' and return inode # in 'numb'; + * if (flag == IS_EMPTY) return OK if only . and .. in dir else ENOTEMPTY; + * + * if 'string' is dot1 or dot2, no access permissions are checked. + */ + + register struct ext2_disk_dir_desc *dp = NULL; + register struct ext2_disk_dir_desc *prev_dp = NULL; + register struct buf *bp = NULL; + int i, r, e_hit, t, match; + mode_t bits; + off_t pos; + unsigned new_slots; + block_t b; + int extended = 0; + int required_space = 0; + int string_len = 0; + + /* If 'ldir_ptr' is not a pointer to a dir inode, error. */ + if ( (ldir_ptr->i_mode & I_TYPE) != I_DIRECTORY) { + return(ENOTDIR); + } + + r = OK; + + if (flag != IS_EMPTY) { + bits = (flag == LOOK_UP ? X_BIT : W_BIT | X_BIT); + + if (string == dot1 || string == dot2) { + if (flag != LOOK_UP) r = read_only(ldir_ptr); + /* only a writable device is required. */ + } else if(check_permissions) { + r = forbidden(ldir_ptr, bits); /* check access permissions */ + } + } + if (r != OK) return(r); + + new_slots = 0; + e_hit = FALSE; + match = 0; /* set when a string match occurs */ + pos = 0; + + if (flag == ENTER) { + string_len = strlen(string); + required_space = MIN_DIR_ENTRY_SIZE + string_len; + required_space += (required_space & 0x03) == 0 ? 0 : + (DIR_ENTRY_ALIGN - (required_space & 0x03) ); + + if (ldir_ptr->i_last_dpos < ldir_ptr->i_size && + ldir_ptr->i_last_dentry_size <= required_space) + pos = ldir_ptr->i_last_dpos; + } + + for (; pos < ldir_ptr->i_size; pos += ldir_ptr->i_sp->s_block_size) { + b = read_map(ldir_ptr, pos); /* get block number */ + + /* Since directories don't have holes, 'b' cannot be NO_BLOCK. */ + bp = get_block(ldir_ptr->i_dev, b, NORMAL); /* get a dir block */ + prev_dp = NULL; /* New block - new first dentry, so no prev. */ + + if (bp == NO_BLOCK) + panic("get_block returned NO_BLOCK"); + assert(bp != NULL); + + /* Search a directory block. + * Note, we set prev_dp at the end of the loop. + */ + for (dp = (struct ext2_disk_dir_desc*) &bp->b_data; + CUR_DISC_DIR_POS(dp, &bp->b_data) < ldir_ptr->i_sp->s_block_size; + dp = NEXT_DISC_DIR_DESC(dp) ) { + /* Match occurs if string found. */ + if (flag != ENTER && dp->d_ino != NO_ENTRY) { + if (flag == IS_EMPTY) { + /* If this test succeeds, dir is not empty. */ + if (ansi_strcmp(dp->d_name, ".", dp->d_name_len) != 0 && + ansi_strcmp(dp->d_name, "..", dp->d_name_len) != 0) match = 1; + } else { + if (ansi_strcmp(dp->d_name, string, dp->d_name_len) == 0){ + match = 1; + } + } + } + + if (match) { + /* LOOK_UP or DELETE found what it wanted. */ + r = OK; + if (flag == IS_EMPTY) r = ENOTEMPTY; + else if (flag == DELETE) { + if (dp->d_name_len >= sizeof(ino_t)) { + /* Save d_ino for recovery. */ + t = dp->d_name_len - sizeof(ino_t); + *((ino_t *) &dp->d_name[t]) = dp->d_ino; + } + dp->d_ino = NO_ENTRY; /* erase entry */ + bp->b_dirt = DIRTY; + + /* If we don't support HTree (directory index), + * which is fully compatible ext2 feature, + * we should reset EXT2_INDEX_FL, when modify + * linked directory structure. + * + * @TODO: actually we could just reset it for + * each directory, but I added if() to not + * forget about it later, when add HTree + * support. + */ + if (!HAS_COMPAT_FEATURE(ldir_ptr->i_sp, + COMPAT_DIR_INDEX)) + ldir_ptr->i_flags &= ~EXT2_INDEX_FL; + ldir_ptr->i_last_dpos = pos; + ldir_ptr->i_last_dentry_size = conv2(le_CPU, + dp->d_rec_len); + ldir_ptr->i_update |= CTIME | MTIME; + ldir_ptr->i_dirt = DIRTY; + /* Now we have cleared dentry, if it's not + * the first one, merge it with previous one. + * Since we assume, that existing dentry must be + * correct, there is no way to spann a data block. + */ + if (prev_dp) { + u16_t temp = conv2(le_CPU, + prev_dp->d_rec_len); + temp += conv2(le_CPU, + dp->d_rec_len); + prev_dp->d_rec_len = conv2(le_CPU, + temp); + } + } else { + /* 'flag' is LOOK_UP */ + *numb = (ino_t) conv4(le_CPU, dp->d_ino); + } + put_block(bp, DIRECTORY_BLOCK); + return(r); + } + + /* Check for free slot for the benefit of ENTER. */ + if (flag == ENTER && dp->d_ino == NO_ENTRY) { + /* we found a free slot, check if it has enough space */ + if (required_space <= conv2(le_CPU, dp->d_rec_len)) { + e_hit = TRUE; /* we found a free slot */ + break; + } + } + /* Can we shrink dentry? */ + if (flag == ENTER && required_space <= DIR_ENTRY_SHRINK(dp)) { + /* Shrink directory and create empty slot, now + * dp->d_rec_len = DIR_ENTRY_ACTUAL_SIZE + DIR_ENTRY_SHRINK. + */ + int new_slot_size = conv2(le_CPU, dp->d_rec_len); + int actual_size = DIR_ENTRY_ACTUAL_SIZE(dp); + new_slot_size -= actual_size; + dp->d_rec_len = conv2(le_CPU, actual_size); + dp = NEXT_DISC_DIR_DESC(dp); + dp->d_rec_len = conv2(le_CPU, new_slot_size); + /* if we fail before writing real ino */ + dp->d_ino = NO_ENTRY; + bp->b_dirt = DIRTY; + e_hit = TRUE; /* we found a free slot */ + break; + } + + prev_dp = dp; + } + + /* The whole block has been searched or ENTER has a free slot. */ + if (e_hit) break; /* e_hit set if ENTER can be performed now */ + put_block(bp, DIRECTORY_BLOCK); /* otherwise, continue searching dir */ + } + + /* The whole directory has now been searched. */ + if (flag != ENTER) { + return(flag == IS_EMPTY ? OK : ENOENT); + } + + /* When ENTER next time, start searching for free slot from + * i_last_dpos. It gives solid performance improvement. + */ + ldir_ptr->i_last_dpos = pos; + ldir_ptr->i_last_dentry_size = required_space; + + /* This call is for ENTER. If no free slot has been found so far, try to + * extend directory. + */ + if (e_hit == FALSE) { /* directory is full and no room left in last block */ + new_slots++; /* increase directory size by 1 entry */ + if ( (bp = new_block(ldir_ptr, ldir_ptr->i_size)) == NULL) + return(err_code); + dp = (struct ext2_disk_dir_desc*) &bp->b_data; + dp->d_rec_len = conv2(le_CPU, ldir_ptr->i_sp->s_block_size); + dp->d_name_len = DIR_ENTRY_MAX_NAME_LEN(dp); /* for failure */ + extended = 1; + } + + /* 'bp' now points to a directory block with space. 'dp' points to slot. */ + dp->d_name_len = string_len; + for (i = 0; i < NAME_MAX && i < dp->d_name_len && string[i]; i++) + dp->d_name[i] = string[i]; + dp->d_ino = (int) conv4(le_CPU, *numb); + if (HAS_INCOMPAT_FEATURE(ldir_ptr->i_sp, INCOMPAT_FILETYPE)) { + /* Convert ftype (from inode.i_mode) to dp->d_file_type */ + if (ftype == I_REGULAR) + dp->d_file_type = EXT2_FT_REG_FILE; + else if (ftype == I_DIRECTORY) + dp->d_file_type = EXT2_FT_DIR; + else if (ftype == I_SYMBOLIC_LINK) + dp->d_file_type = EXT2_FT_SYMLINK; + else if (ftype == I_BLOCK_SPECIAL) + dp->d_file_type = EXT2_FT_BLKDEV; + else if (ftype == I_CHAR_SPECIAL) + dp->d_file_type = EXT2_FT_CHRDEV; + else if (ftype == I_NAMED_PIPE) + dp->d_file_type = EXT2_FT_FIFO; + else + dp->d_file_type = EXT2_FT_UNKNOWN; + } + bp->b_dirt = DIRTY; + put_block(bp, DIRECTORY_BLOCK); + ldir_ptr->i_update |= CTIME | MTIME; /* mark mtime for update later */ + ldir_ptr->i_dirt = DIRTY; + + if (new_slots == 1) { + ldir_ptr->i_size += (off_t) conv2(le_CPU, dp->d_rec_len); + /* Send the change to disk if the directory is extended. */ + if (extended) rw_inode(ldir_ptr, WRITING); + } + return(OK); + +} diff --git a/servers/ext2/protect.c b/servers/ext2/protect.c new file mode 100644 index 000000000..952e6f74e --- /dev/null +++ b/servers/ext2/protect.c @@ -0,0 +1,154 @@ +/* Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include "inode.h" +#include "super.h" +#include + +FORWARD _PROTOTYPE( int in_group, (gid_t grp) ); + + +/*===========================================================================* + * fs_chmod * + *===========================================================================*/ +PUBLIC int fs_chmod() +{ +/* Perform the chmod(name, mode) system call. */ + + register struct inode *rip; + mode_t mode; + + mode = (mode_t) fs_m_in.REQ_MODE; + + /* Temporarily open the file. */ + if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + /* Now make the change. Clear setgid bit if file is not in caller's grp */ + rip->i_mode = (rip->i_mode & ~ALL_MODES) | (mode & ALL_MODES); + rip->i_update |= CTIME; + rip->i_dirt = DIRTY; + + /* Return full new mode to caller. */ + fs_m_out.RES_MODE = rip->i_mode; + + put_inode(rip); + return(OK); +} + + +/*===========================================================================* + * fs_chown * + *===========================================================================*/ +PUBLIC int fs_chown() +{ + register struct inode *rip; + register int r; + + /* Temporarily open the file. */ + if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + /* Not permitted to change the owner of a file on a read-only file sys. */ + r = read_only(rip); + if (r == OK) { + rip->i_uid = fs_m_in.REQ_UID; + rip->i_gid = fs_m_in.REQ_GID; + rip->i_mode &= ~(I_SET_UID_BIT | I_SET_GID_BIT); + rip->i_update |= CTIME; + rip->i_dirt = DIRTY; + } + + /* Update caller on current mode, as it may have changed. */ + fs_m_out.RES_MODE = rip->i_mode; + put_inode(rip); + + return(r); +} + + +/*===========================================================================* + * forbidden * + *===========================================================================*/ +PUBLIC int forbidden(register struct inode *rip, mode_t access_desired) +{ +/* Given a pointer to an inode, 'rip', and the access desired, determine + * if the access is allowed, and if not why not. The routine looks up the + * caller's uid in the 'fproc' table. If access is allowed, OK is returned + * if it is forbidden, EACCES is returned. + */ + + register struct inode *old_rip = rip; + register mode_t bits, perm_bits; + int r, shift; + + /* Isolate the relevant rwx bits from the mode. */ + bits = rip->i_mode; + if (caller_uid == SU_UID) { + /* Grant read and write permission. Grant search permission for + * directories. Grant execute permission (for non-directories) if + * and only if one of the 'X' bits is set. + */ + if ( (bits & I_TYPE) == I_DIRECTORY || + bits & ((X_BIT << 6) | (X_BIT << 3) | X_BIT)) + perm_bits = R_BIT | W_BIT | X_BIT; + else + perm_bits = R_BIT | W_BIT; + } else { + if (caller_uid == rip->i_uid) shift = 6; /* owner */ + else if (caller_gid == rip->i_gid) shift = 3; /* group */ + else if (in_group(rip->i_gid) == OK) shift = 3; /* other groups */ + else shift = 0; /* other */ + perm_bits = (bits >> shift) & (R_BIT | W_BIT | X_BIT); + } + + /* If access desired is not a subset of what is allowed, it is refused. */ + r = OK; + if ((perm_bits | access_desired) != perm_bits) r = EACCES; + + /* Check to see if someone is trying to write on a file system that is + * mounted read-only. + */ + if (r == OK) { + if (access_desired & W_BIT) { + r = read_only(rip); + } + } + + if (rip != old_rip) put_inode(rip); + + return(r); +} + + +/*===========================================================================* + * in_group * + *===========================================================================*/ +PRIVATE int in_group(gid_t grp) +{ + int i; + for(i = 0; i < credentials.vu_ngroups; i++) + if (credentials.vu_sgroups[i] == grp) + return(OK); + + return(EINVAL); +} + + +/*===========================================================================* + * read_only * + *===========================================================================*/ +PUBLIC int read_only(ip) +struct inode *ip; /* ptr to inode whose file sys is to be cked */ +{ +/* Check to see if the file system on which the inode 'ip' resides is mounted + * read only. If so, return EROFS, else return OK. + */ + + register struct super_block *sp; + + sp = ip->i_sp; + return(sp->s_rd_only ? EROFS : OK); +} diff --git a/servers/ext2/proto.h b/servers/ext2/proto.h new file mode 100644 index 000000000..eda3f6358 --- /dev/null +++ b/servers/ext2/proto.h @@ -0,0 +1,135 @@ +#ifndef EXT2_PROTO_H +#define EXT2_PROTO_H + +/* Function prototypes. */ + +/* Structs used in prototypes must be declared as such first. */ +struct buf; +struct filp; +struct inode; +struct super_block; + + +/* balloc.c */ +_PROTOTYPE( void discard_preallocated_blocks, (struct inode *rip) ); +_PROTOTYPE( block_t alloc_block, (struct inode *rip, block_t goal) ); +_PROTOTYPE( void free_block, (struct super_block *sp, bit_t bit) ); + +/* cache.c */ +_PROTOTYPE( void buf_pool, (int bufs) ); +_PROTOTYPE( void flushall, (dev_t dev) ); +_PROTOTYPE( struct buf *get_block, (dev_t dev, block_t block,int only_search)); +_PROTOTYPE( void invalidate, (dev_t device) ); +_PROTOTYPE( void put_block, (struct buf *bp, int block_type) ); +_PROTOTYPE( void set_blocksize, (unsigned int blocksize) ); +_PROTOTYPE( void rw_scattered, (dev_t dev, + struct buf **bufq, int bufqsize, int rw_flag) ); + +/* device.c */ +_PROTOTYPE( int block_dev_io, (int op, dev_t dev, endpoint_t proc_e, + void *buf, u64_t pos, size_t bytes) ); +_PROTOTYPE( int dev_open, (endpoint_t driver_e, dev_t dev, endpoint_t proc_e, + int flags) ); +_PROTOTYPE( void dev_close, (endpoint_t driver_e, dev_t dev) ); +_PROTOTYPE( int fs_new_driver, (void) ); + +/* ialloc.c */ +_PROTOTYPE( struct inode *alloc_inode, (struct inode *parent, mode_t bits)); +_PROTOTYPE( void free_inode, (struct inode *rip) ); + +/* inode.c */ +_PROTOTYPE( void dup_inode, (struct inode *ip) ); +_PROTOTYPE( struct inode *find_inode, (dev_t dev, ino_t numb) ); +_PROTOTYPE( int fs_putnode, (void) ); +_PROTOTYPE( void init_inode_cache, (void) ); +_PROTOTYPE( struct inode *get_inode, (dev_t dev, ino_t numb) ); +_PROTOTYPE( void put_inode, (struct inode *rip) ); +_PROTOTYPE( void update_times, (struct inode *rip) ); +_PROTOTYPE( void rw_inode, (struct inode *rip, int rw_flag) ); + +/* link.c */ +_PROTOTYPE( int fs_ftrunc, (void) ); +_PROTOTYPE( int fs_link, (void) ); +_PROTOTYPE( int fs_rdlink, (void) ); +_PROTOTYPE( int fs_rename, (void) ); +_PROTOTYPE( int fs_unlink, (void) ); +_PROTOTYPE( int truncate_inode, (struct inode *rip, off_t len) ); + +/* misc.c */ +_PROTOTYPE( int fs_flush, (void) ); +_PROTOTYPE( int fs_sync, (void) ); + +/* mount.c */ +_PROTOTYPE( int fs_mountpoint, (void) ); +_PROTOTYPE( int fs_readsuper, (void) ); +_PROTOTYPE( int fs_unmount, (void) ); + +/* open.c */ +_PROTOTYPE( int fs_create, (void) ); +_PROTOTYPE( int fs_inhibread, (void) ); +_PROTOTYPE( int fs_mkdir, (void) ); +_PROTOTYPE( int fs_mknod, (void) ); +_PROTOTYPE( int fs_slink, (void) ); + +/* path.c */ +_PROTOTYPE( int fs_lookup, (void) ); +_PROTOTYPE( struct inode *advance, (struct inode *dirp, + char string[NAME_MAX + 1], int chk_perm)); +_PROTOTYPE( int search_dir, (struct inode *ldir_ptr, + char string [NAME_MAX + 1], ino_t *numb, int flag, + int check_permissions, int ftype) ); + +/* protect.c */ +_PROTOTYPE( int fs_chmod, (void) ); +_PROTOTYPE( int fs_chown, (void) ); +_PROTOTYPE( int fs_getdents, (void) ); +_PROTOTYPE( int forbidden, (struct inode *rip, mode_t access_desired) ); +_PROTOTYPE( int read_only, (struct inode *ip) ); + +/* read.c */ +_PROTOTYPE( int fs_breadwrite, (void) ); +_PROTOTYPE( int fs_readwrite, (void) ); +_PROTOTYPE( void read_ahead, (void) ); +_PROTOTYPE( block_t read_map, (struct inode *rip, off_t pos) ); +_PROTOTYPE( block_t rd_indir, (struct buf *bp, int index) ); + +/* stadir.c */ +_PROTOTYPE( int fs_fstatfs, (void) ); +_PROTOTYPE( int fs_stat, (void) ); +_PROTOTYPE( int fs_statvfs, (void) ); + +/* super.c */ +_PROTOTYPE( unsigned int get_block_size, (dev_t dev) ); +_PROTOTYPE( struct super_block *get_super, (dev_t dev) ); +_PROTOTYPE( int read_super, (struct super_block *sp) ); +_PROTOTYPE( void write_super, (struct super_block *sp) ); +_PROTOTYPE( struct group_desc* get_group_desc, (unsigned int bnum) ); + +/* time.c */ +_PROTOTYPE( int fs_utime, (void) ); + +/* utility.c */ +_PROTOTYPE( time_t clock_time, (void) ); +_PROTOTYPE( unsigned conv2, (int norm, int w) ); +_PROTOTYPE( long conv4, (int norm, long x) ); +_PROTOTYPE( void mfs_nul_f, (char *file, int line, char *str, + unsigned int len, unsigned int maxlen) ); +_PROTOTYPE( int min, (unsigned int l, unsigned int r) ); +_PROTOTYPE( int no_sys, (void) ); +_PROTOTYPE( void sanitycheck, (char *file, int line) ); +#define SANITYCHECK sanitycheck(__FILE__, __LINE__) +_PROTOTYPE( int ansi_strcmp, (register const char* ansi_s, + register const char *s2, + register size_t ansi_s_length) ); +_PROTOTYPE( bit_t setbit, (bitchunk_t *bitmap, bit_t max_bits, + unsigned int word)); +_PROTOTYPE( bit_t setbyte, (bitchunk_t *bitmap, bit_t max_bits, + unsigned int word)); +_PROTOTYPE( int unsetbit, (bitchunk_t *bitmap, bit_t bit) ); + +/* write.c */ +_PROTOTYPE( struct buf *new_block, (struct inode *rip, off_t position) ); +_PROTOTYPE( void zero_block, (struct buf *bp) ); +_PROTOTYPE( int write_map, (struct inode *, off_t, block_t, int) ); + +#endif /* EXT2_PROTO_H */ diff --git a/servers/ext2/read.c b/servers/ext2/read.c new file mode 100644 index 000000000..d908beecf --- /dev/null +++ b/servers/ext2/read.c @@ -0,0 +1,685 @@ +/* Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include +#include +#include +#include "buf.h" +#include "inode.h" +#include "super.h" +#include +#include + + +FORWARD _PROTOTYPE( struct buf *rahead, (struct inode *rip, block_t baseblock, + u64_t position, unsigned bytes_ahead) ); +FORWARD _PROTOTYPE( int rw_chunk, (struct inode *rip, u64_t position, + unsigned off, size_t chunk, unsigned left, int rw_flag, + cp_grant_id_t gid, unsigned buf_off, unsigned int block_size, + int *completed)); + +PRIVATE char getdents_buf[GETDENTS_BUFSIZ]; + +PRIVATE off_t rdahedpos; /* position to read ahead */ +PRIVATE struct inode *rdahed_inode; /* pointer to inode to read ahead */ + +/*===========================================================================* + * fs_readwrite * + *===========================================================================*/ +PUBLIC int fs_readwrite(void) +{ + int r, rw_flag, block_spec; + int regular; + cp_grant_id_t gid; + off_t position, f_size, bytes_left; + unsigned int off, cum_io, block_size, chunk; + mode_t mode_word; + int completed; + struct inode *rip; + size_t nrbytes; + + r = OK; + + /* Find the inode referred */ + if ((rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + mode_word = rip->i_mode & I_TYPE; + regular = (mode_word == I_REGULAR || mode_word == I_NAMED_PIPE); + block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0); + + /* Determine blocksize */ + if (block_spec) { + block_size = get_block_size( (dev_t) rip->i_block[0]); + f_size = MAX_FILE_POS; + } else { + block_size = rip->i_sp->s_block_size; + f_size = rip->i_size; + if (f_size < 0) f_size = MAX_FILE_POS; + } + + /* Get the values from the request message */ + rw_flag = (fs_m_in.m_type == REQ_READ ? READING : WRITING); + gid = (cp_grant_id_t) fs_m_in.REQ_GRANT; + position = (off_t) fs_m_in.REQ_SEEK_POS_LO; + nrbytes = (size_t) fs_m_in.REQ_NBYTES; + + rdwt_err = OK; /* set to EIO if disk error occurs */ + + if (rw_flag == WRITING && !block_spec) { + /* Check in advance to see if file will grow too big. */ + if (position > (off_t) (rip->i_sp->s_max_size - nrbytes)) + return(EFBIG); + } + + cum_io = 0; + /* Split the transfer into chunks that don't span two blocks. */ + while (nrbytes != 0) { + off = (unsigned int) (position % block_size);/* offset in blk*/ + chunk = MIN(nrbytes, block_size - off); + + if (rw_flag == READING) { + bytes_left = f_size - position; + if (position >= f_size) break; /* we are beyond EOF */ + if (chunk > bytes_left) chunk = (int) bytes_left; + } + + /* Read or write 'chunk' bytes. */ + r = rw_chunk(rip, cvul64((unsigned long) position), off, chunk, + nrbytes, rw_flag, gid, cum_io, block_size, &completed); + + if (r != OK) break; /* EOF reached */ + if (rdwt_err < 0) break; + + /* Update counters and pointers. */ + nrbytes -= chunk; /* bytes yet to be read */ + cum_io += chunk; /* bytes read so far */ + position += (off_t) chunk; /* position within the file */ + } + + fs_m_out.RES_SEEK_POS_LO = position; /* It might change later and the VFS + has to know this value */ + + /* On write, update file size and access time. */ + if (rw_flag == WRITING) { + if (regular || mode_word == I_DIRECTORY) { + if (position > f_size) rip->i_size = position; + } + } + + /* Check to see if read-ahead is called for, and if so, set it up. */ + if(rw_flag == READING && rip->i_seek == NO_SEEK && + (unsigned int) position % block_size == 0 && + (regular || mode_word == I_DIRECTORY)) { + rdahed_inode = rip; + rdahedpos = position; + } + + rip->i_seek = NO_SEEK; + + if (rdwt_err != OK) r = rdwt_err; /* check for disk error */ + if (rdwt_err == END_OF_FILE) r = OK; + + if (r == OK) { + if (rw_flag == READING) rip->i_update |= ATIME; + if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME; + rip->i_dirt = DIRTY; /* inode is thus now dirty */ + } + + fs_m_out.RES_NBYTES = cum_io; + + return(r); +} + + +/*===========================================================================* + * fs_breadwrite * + *===========================================================================*/ +PUBLIC int fs_breadwrite(void) +{ + int r, rw_flag, completed; + cp_grant_id_t gid; + u64_t position; + unsigned int off, cum_io, chunk, block_size; + size_t nrbytes; + + /* Pseudo inode for rw_chunk */ + struct inode rip; + + r = OK; + + /* Get the values from the request message */ + rw_flag = (fs_m_in.m_type == REQ_BREAD ? READING : WRITING); + gid = (cp_grant_id_t) fs_m_in.REQ_GRANT; + position = make64((unsigned long) fs_m_in.REQ_SEEK_POS_LO, + (unsigned long) fs_m_in.REQ_SEEK_POS_HI); + nrbytes = (size_t) fs_m_in.REQ_NBYTES; + + block_size = get_block_size( (dev_t) fs_m_in.REQ_DEV2); + + rip.i_block[0] = (block_t) fs_m_in.REQ_DEV2; + rip.i_mode = I_BLOCK_SPECIAL; + rip.i_size = 0; + + rdwt_err = OK; /* set to EIO if disk error occurs */ + + cum_io = 0; + /* Split the transfer into chunks that don't span two blocks. */ + while (nrbytes > 0) { + off = rem64u(position, block_size); /* offset in blk*/ + chunk = min(nrbytes, block_size - off); + + /* Read or write 'chunk' bytes. */ + r = rw_chunk(&rip, position, off, chunk, nrbytes, rw_flag, gid, + cum_io, block_size, &completed); + + if (r != OK) break; /* EOF reached */ + if (rdwt_err < 0) break; + + /* Update counters and pointers. */ + nrbytes -= chunk; /* bytes yet to be read */ + cum_io += chunk; /* bytes read so far */ + position = add64ul(position, chunk); /* position within the file */ + } + + fs_m_out.RES_SEEK_POS_LO = ex64lo(position); + fs_m_out.RES_SEEK_POS_HI = ex64hi(position); + + if (rdwt_err != OK) r = rdwt_err; /* check for disk error */ + if (rdwt_err == END_OF_FILE) r = OK; + + fs_m_out.RES_NBYTES = cum_io; + + return(r); +} + + +/*===========================================================================* + * rw_chunk * + *===========================================================================*/ +PRIVATE int rw_chunk(rip, position, off, chunk, left, rw_flag, gid, + buf_off, block_size, completed) +register struct inode *rip; /* pointer to inode for file to be rd/wr */ +u64_t position; /* position within file to read or write */ +unsigned off; /* off within the current block */ +unsigned int chunk; /* number of bytes to read or write */ +unsigned left; /* max number of bytes wanted after position */ +int rw_flag; /* READING or WRITING */ +cp_grant_id_t gid; /* grant */ +unsigned buf_off; /* offset in grant */ +unsigned int block_size; /* block size of FS operating on */ +int *completed; /* number of bytes copied */ +{ +/* Read or write (part of) a block. */ + + register struct buf *bp; + register int r = OK; + int n, block_spec; + block_t b; + dev_t dev; + + *completed = 0; + + block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL; + + if (block_spec) { + b = div64u(position, block_size); + dev = (dev_t) rip->i_block[0]; + } else { + if (ex64hi(position) != 0) + panic("rw_chunk: position too high"); + b = read_map(rip, (off_t) ex64lo(position)); + dev = rip->i_dev; + } + + if (!block_spec && b == NO_BLOCK) { + if (rw_flag == READING) { + /* Reading from a nonexistent block. Must read as all zeros.*/ + bp = get_block(NO_DEV, NO_BLOCK, NORMAL); /* get a buffer */ + zero_block(bp); + } else { + /* Writing to a nonexistent block. Create and enter in inode.*/ + if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL) + return(err_code); + } + } else if (rw_flag == READING) { + /* Read and read ahead if convenient. */ + bp = rahead(rip, b, position, left); + } else { + /* Normally an existing block to be partially overwritten is first read + * in. However, a full block need not be read in. If it is already in + * the cache, acquire it, otherwise just acquire a free buffer. + */ + n = (chunk == block_size ? NO_READ : NORMAL); + if (!block_spec && off == 0 && (off_t) ex64lo(position) >= rip->i_size) + n = NO_READ; + bp = get_block(dev, b, n); + } + + /* In all cases, bp now points to a valid buffer. */ + if (bp == NULL) + panic("bp not valid in rw_chunk, this can't happen"); + + if (rw_flag == WRITING && chunk != block_size && !block_spec && + (off_t) ex64lo(position) >= rip->i_size && off == 0) { + zero_block(bp); + } + + if (rw_flag == READING) { + /* Copy a chunk from the block buffer to user space. */ + r = sys_safecopyto(VFS_PROC_NR, gid, (vir_bytes) buf_off, + (vir_bytes) (bp->b_data+off), (size_t) chunk, D); + } else { + /* Copy a chunk from user space to the block buffer. */ + r = sys_safecopyfrom(VFS_PROC_NR, gid, (vir_bytes) buf_off, + (vir_bytes) (bp->b_data+off), (size_t) chunk, D); + bp->b_dirt = DIRTY; + } + + n = (off + chunk == block_size ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK); + put_block(bp, n); + + return(r); +} + + +/*===========================================================================* + * read_map * + *===========================================================================*/ +PUBLIC block_t read_map(rip, position) +register struct inode *rip; /* ptr to inode to map from */ +off_t position; /* position in file whose blk wanted */ +{ +/* Given an inode and a position within the corresponding file, locate the + * block number in which that position is to be found and return it. + */ + + struct buf *bp; + int index; + block_t b; + unsigned long excess, block_pos; + static char first_time = TRUE; + static long addr_in_block; + static long addr_in_block2; + static long doub_ind_s; + static long triple_ind_s; + static long out_range_s; + + if (first_time) { + addr_in_block = rip->i_sp->s_block_size / BLOCK_ADDRESS_BYTES; + addr_in_block2 = addr_in_block * addr_in_block; + doub_ind_s = EXT2_NDIR_BLOCKS + addr_in_block; + triple_ind_s = doub_ind_s + addr_in_block2; + out_range_s = triple_ind_s + addr_in_block2 * addr_in_block; + first_time = FALSE; + } + + block_pos = position / rip->i_sp->s_block_size; /* relative blk # in file */ + + /* Is 'position' to be found in the inode itself? */ + if (block_pos < EXT2_NDIR_BLOCKS) + return(rip->i_block[block_pos]); + + /* It is not in the inode, so it must be single, double or triple indirect */ + if (block_pos < doub_ind_s) { + b = rip->i_block[EXT2_NDIR_BLOCKS]; /* address of single indirect block */ + index = block_pos - EXT2_NDIR_BLOCKS; + } else if (block_pos >= out_range_s) { /* TODO: do we need it? */ + return(NO_BLOCK); + } else { + /* double or triple indirect block. At first if it's triple, + * find double indirect block. + */ + excess = block_pos - doub_ind_s; + b = rip->i_block[EXT2_DIND_BLOCK]; + if (block_pos >= triple_ind_s) { + b = rip->i_block[EXT2_TIND_BLOCK]; + if (b == NO_BLOCK) return(NO_BLOCK); + bp = get_block(rip->i_dev, b, NORMAL); /* get triple ind block */ + ASSERT(bp->b_dev != NO_DEV); + ASSERT(bp->b_dev == rip->i_dev); + excess = block_pos - triple_ind_s; + index = excess / addr_in_block2; + b = rd_indir(bp, index); /* num of double ind block */ + put_block(bp, INDIRECT_BLOCK); /* release triple ind block */ + excess = excess % addr_in_block2; + } + if (b == NO_BLOCK) return(NO_BLOCK); + bp = get_block(rip->i_dev, b, NORMAL); /* get double indirect block */ + ASSERT(bp->b_dev != NO_DEV); + ASSERT(bp->b_dev == rip->i_dev); + index = excess / addr_in_block; + b = rd_indir(bp, index); /* num of single ind block */ + put_block(bp, INDIRECT_BLOCK); /* release double ind block */ + index = excess % addr_in_block; /* index into single ind blk */ + } + if (b == NO_BLOCK) return(NO_BLOCK); + bp = get_block(rip->i_dev, b, NORMAL); + ASSERT(bp->b_dev != NO_DEV); + ASSERT(bp->b_dev == rip->i_dev); + b = rd_indir(bp, index); + put_block(bp, INDIRECT_BLOCK); /* release single ind block */ + + return(b); +} + + +/*===========================================================================* + * rd_indir * + *===========================================================================*/ +PUBLIC block_t rd_indir(bp, index) +struct buf *bp; /* pointer to indirect block */ +int index; /* index into *bp */ +{ + if (bp == NULL) + panic("rd_indir() on NULL"); + /* TODO: use conv call */ + return conv4(le_CPU, bp->b_ind[index]); +} + + +/*===========================================================================* + * read_ahead * + *===========================================================================*/ +PUBLIC void read_ahead() +{ +/* Read a block into the cache before it is needed. */ + unsigned int block_size; + register struct inode *rip; + struct buf *bp; + block_t b; + + if(!rdahed_inode) + return; + + rip = rdahed_inode; /* pointer to inode to read ahead from */ + block_size = get_block_size(rip->i_dev); + rdahed_inode = NULL; /* turn off read ahead */ + if ( (b = read_map(rip, rdahedpos)) == NO_BLOCK) return; /* at EOF */ + + assert(rdahedpos > 0); /* So we can safely cast it to unsigned below */ + + bp = rahead(rip, b, cvul64((unsigned long) rdahedpos), block_size); + put_block(bp, PARTIAL_DATA_BLOCK); +} + + +/*===========================================================================* + * rahead * + *===========================================================================*/ +PRIVATE struct buf *rahead(rip, baseblock, position, bytes_ahead) +register struct inode *rip; /* pointer to inode for file to be read */ +block_t baseblock; /* block at current position */ +u64_t position; /* position within file */ +unsigned bytes_ahead; /* bytes beyond position for immediate use */ +{ +/* Fetch a block from the cache or the device. If a physical read is + * required, prefetch as many more blocks as convenient into the cache. + * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM. + * The device driver may decide it knows better and stop reading at a + * cylinder boundary (or after an error). Rw_scattered() puts an optional + * flag on all reads to allow this. + */ +/* Minimum number of blocks to prefetch. */ +# define BLOCKS_MINIMUM (nr_bufs < 50 ? 18 : 32) + int block_spec, read_q_size; + unsigned int blocks_ahead, fragment, block_size; + block_t block, blocks_left; + off_t ind1_pos; + dev_t dev; + struct buf *bp; + static unsigned int readqsize = 0; + static struct buf **read_q; + + if(readqsize != nr_bufs) { + if(readqsize > 0) { + assert(read_q != NULL); + free(read_q); + } + if(!(read_q = malloc(sizeof(read_q[0])*nr_bufs))) + panic("couldn't allocate read_q"); + readqsize = nr_bufs; + } + + block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL; + if (block_spec) + dev = (dev_t) rip->i_block[0]; + else + dev = rip->i_dev; + + block_size = get_block_size(dev); + + block = baseblock; + bp = get_block(dev, block, PREFETCH); + if (bp->b_dev != NO_DEV) return(bp); + + /* The best guess for the number of blocks to prefetch: A lot. + * It is impossible to tell what the device looks like, so we don't even + * try to guess the geometry, but leave it to the driver. + * + * The floppy driver can read a full track with no rotational delay, and it + * avoids reading partial tracks if it can, so handing it enough buffers to + * read two tracks is perfect. (Two, because some diskette types have + * an odd number of sectors per track, so a block may span tracks.) + * + * The disk drivers don't try to be smart. With todays disks it is + * impossible to tell what the real geometry looks like, so it is best to + * read as much as you can. With luck the caching on the drive allows + * for a little time to start the next read. + * + * The current solution below is a bit of a hack, it just reads blocks from + * the current file position hoping that more of the file can be found. A + * better solution must look at the already available + * indirect blocks (but don't call read_map!). + */ + + fragment = rem64u(position, block_size); + position = sub64u(position, fragment); + bytes_ahead += fragment; + + blocks_ahead = (bytes_ahead + block_size - 1) / block_size; + + if (block_spec && rip->i_size == 0) { + blocks_left = (block_t) NR_IOREQS; + } else { + blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) / + block_size; + + /* Go for the first indirect block if we are in its neighborhood. */ + if (!block_spec) { + ind1_pos = (EXT2_NDIR_BLOCKS) * block_size; + if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) { + blocks_ahead++; + blocks_left++; + } + } + } + + /* No more than the maximum request. */ + if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS; + + /* Read at least the minimum number of blocks, but not after a seek. */ + if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK) + blocks_ahead = BLOCKS_MINIMUM; + + /* Can't go past end of file. */ + if (blocks_ahead > blocks_left) blocks_ahead = blocks_left; + + read_q_size = 0; + + /* Acquire block buffers. */ + for (;;) { + read_q[read_q_size++] = bp; + + if (--blocks_ahead == 0) break; + + /* Don't trash the cache, leave 4 free. */ + if (bufs_in_use >= nr_bufs - 4) break; + + block++; + + bp = get_block(dev, block, PREFETCH); + if (bp->b_dev != NO_DEV) { + /* Oops, block already in the cache, get out. */ + put_block(bp, FULL_DATA_BLOCK); + break; + } + } + rw_scattered(dev, read_q, read_q_size, READING); + return(get_block(dev, baseblock, NORMAL)); +} + + +/*===========================================================================* + * fs_getdents * + *===========================================================================*/ +PUBLIC int fs_getdents(void) +{ + register struct inode *rip; + int o, r, done; + unsigned int block_size, len, reclen; + ino_t ino; + block_t b; + cp_grant_id_t gid; + size_t size, tmpbuf_off, userbuf_off; + off_t pos, off, block_pos, new_pos, ent_pos; + struct buf *bp; + struct ext2_disk_dir_desc *d_desc; + struct dirent *dep; + char *cp; + + ino = (ino_t) fs_m_in.REQ_INODE_NR; + gid = (gid_t) fs_m_in.REQ_GRANT; + size = (size_t) fs_m_in.REQ_MEM_SIZE; + pos = (off_t) fs_m_in.REQ_SEEK_POS_LO; + + /* Check whether the position is properly aligned */ + if ((unsigned int) pos % DIR_ENTRY_ALIGN) + return(ENOENT); + + if ((rip = get_inode(fs_dev, ino)) == NULL) + return(EINVAL); + + block_size = rip->i_sp->s_block_size; + off = (pos % block_size); /* Offset in block */ + block_pos = pos - off; + done = FALSE; /* Stop processing directory blocks when done is set */ + + memset(getdents_buf, '\0', GETDENTS_BUFSIZ); /* Avoid leaking any data */ + tmpbuf_off = 0; /* Offset in getdents_buf */ + userbuf_off = 0; /* Offset in the user's buffer */ + + /* The default position for the next request is EOF. If the user's buffer + * fills up before EOF, new_pos will be modified. */ + new_pos = rip->i_size; + + for (; block_pos < rip->i_size; block_pos += block_size) { + off_t temp_pos = block_pos; + b = read_map(rip, block_pos); /* get block number */ + /* Since directories don't have holes, 'b' cannot be NO_BLOCK. */ + bp = get_block(rip->i_dev, b, NORMAL); /* get a dir block */ + + if (bp == NO_BLOCK) + panic("get_block returned NO_BLOCK"); + assert(bp != NULL); + + /* Search a directory block. */ + d_desc = (struct ext2_disk_dir_desc*) &bp->b_data; + + /* we need to seek to entry at off bytes. + * when NEXT_DISC_DIR_POS == block_size it's last dentry. + */ + for (; temp_pos + conv2(le_CPU, d_desc->d_rec_len) <= pos + && NEXT_DISC_DIR_POS(d_desc, &bp->b_data) < block_size; + d_desc = NEXT_DISC_DIR_DESC(d_desc)) { + temp_pos += conv2(le_CPU, d_desc->d_rec_len); + } + + for (; CUR_DISC_DIR_POS(d_desc, &bp->b_data) < block_size; + d_desc = NEXT_DISC_DIR_DESC(d_desc)) { + if (d_desc->d_ino == 0) + continue; /* Entry is not in use */ + + if (d_desc->d_name_len > NAME_MAX || + d_desc->d_name_len > EXT2_NAME_MAX) { + len = min(NAME_MAX, EXT2_NAME_MAX); + } else { + len = d_desc->d_name_len; + } + + /* Compute record length */ + reclen = offsetof(struct dirent, d_name) + len + 1; + o = (reclen % sizeof(long)); + if (o != 0) + reclen += sizeof(long) - o; + + /* Need the position of this entry in the directory */ + ent_pos = block_pos + ((char *)d_desc - bp->b_data); + + if (tmpbuf_off + reclen > GETDENTS_BUFSIZ) { + r = sys_safecopyto(VFS_PROC_NR, gid, + (vir_bytes) userbuf_off, + (vir_bytes) getdents_buf, + (size_t) tmpbuf_off, D); + if (r != OK) { + put_inode(rip); + return(r); + } + userbuf_off += tmpbuf_off; + tmpbuf_off = 0; + } + + if (userbuf_off + tmpbuf_off + reclen > size) { + /* The user has no space for one more record */ + done = TRUE; + + /* Record the position of this entry, it is the + * starting point of the next request (unless the + * position is modified with lseek). + */ + new_pos = ent_pos; + break; + } + + dep = (struct dirent *) &getdents_buf[tmpbuf_off]; + dep->d_ino = conv4(le_CPU, d_desc->d_ino); + dep->d_off = ent_pos; + dep->d_reclen = (unsigned short) reclen; + memcpy(dep->d_name, d_desc->d_name, len); + dep->d_name[len] = '\0'; + tmpbuf_off += reclen; + } + + put_block(bp, DIRECTORY_BLOCK); + if (done) + break; + } + + if (tmpbuf_off != 0) { + r = sys_safecopyto(VFS_PROC_NR, gid, (vir_bytes) userbuf_off, + (vir_bytes) getdents_buf, (size_t) tmpbuf_off, D); + if (r != OK) { + put_inode(rip); + return(r); + } + + userbuf_off += tmpbuf_off; + } + + if (done && userbuf_off == 0) + r = EINVAL; /* The user's buffer is too small */ + else { + fs_m_out.RES_NBYTES = userbuf_off; + fs_m_out.RES_SEEK_POS_LO = new_pos; + rip->i_update |= ATIME; + rip->i_dirt = DIRTY; + r = OK; + } + + put_inode(rip); /* release the inode */ + return(r); +} diff --git a/servers/ext2/stadir.c b/servers/ext2/stadir.c new file mode 100644 index 000000000..43036c1f0 --- /dev/null +++ b/servers/ext2/stadir.c @@ -0,0 +1,125 @@ +/* Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include +#include "inode.h" +#include "super.h" +#include + + +/*===========================================================================* + * stat_inode * + *===========================================================================*/ +PRIVATE int stat_inode( + register struct inode *rip, /* pointer to inode to stat */ + endpoint_t who_e, /* Caller endpoint */ + cp_grant_id_t gid /* grant for the stat buf */ +) +{ +/* Common code for stat and fstat system calls. */ + + struct stat statbuf; + mode_t mo; + int r, s; + + /* Update the atime, ctime, and mtime fields in the inode, if need be. */ + if (rip->i_update) update_times(rip); + + /* Fill in the statbuf struct. */ + mo = rip->i_mode & I_TYPE; + + /* true iff special */ + s = (mo == I_CHAR_SPECIAL || mo == I_BLOCK_SPECIAL); + + statbuf.st_dev = rip->i_dev; + statbuf.st_ino = rip->i_num; + statbuf.st_mode = rip->i_mode; + statbuf.st_nlink = rip->i_links_count; + statbuf.st_uid = rip->i_uid; + statbuf.st_gid = rip->i_gid; + statbuf.st_rdev = (s ? rip->i_block[0] : NO_DEV); + statbuf.st_size = rip->i_size; + statbuf.st_atime = rip->i_atime; + statbuf.st_mtime = rip->i_mtime; + statbuf.st_ctime = rip->i_ctime; + + /* Copy the struct to user space. */ + r = sys_safecopyto(who_e, gid, (vir_bytes) 0, (vir_bytes) &statbuf, + (size_t) sizeof(statbuf), D); + + return(r); +} + + +/*===========================================================================* + * fs_fstatfs * + *===========================================================================*/ +PUBLIC int fs_fstatfs() +{ + struct statfs st; + struct inode *rip; + int r; + + if((rip = find_inode(fs_dev, ROOT_INODE)) == NULL) + return(EINVAL); + + st.f_bsize = rip->i_sp->s_block_size; + + /* Copy the struct to user space. */ + r = sys_safecopyto(fs_m_in.m_source, (cp_grant_id_t) fs_m_in.REQ_GRANT, + (vir_bytes) 0, (vir_bytes) &st, (size_t) sizeof(st), D); + + return(r); +} + + +/*===========================================================================* + * fs_stat * + *===========================================================================*/ +PUBLIC int fs_stat() +{ + register int r; /* return value */ + register struct inode *rip; /* target inode */ + + if ((rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + r = stat_inode(rip, fs_m_in.m_source, (cp_grant_id_t) fs_m_in.REQ_GRANT); + put_inode(rip); /* release the inode */ + return(r); +} + +/*===========================================================================* + * fs_statvfs * + *===========================================================================*/ +PUBLIC int fs_statvfs() +{ + struct statvfs st; + struct super_block *sp; + int r; + + sp = get_super(fs_dev); + + st.f_bsize = sp->s_block_size; + st.f_frsize = sp->s_block_size; + st.f_blocks = sp->s_blocks_count; + st.f_bfree = sp->s_free_blocks_count; + st.f_bavail = sp->s_free_blocks_count - sp->s_r_blocks_count; + st.f_files = sp->s_inodes_count; + st.f_ffree = sp->s_free_inodes_count; + st.f_favail = sp->s_free_inodes_count; + st.f_fsid = fs_dev; + st.f_flag = (sp->s_rd_only == 1 ? ST_RDONLY : 0); + st.f_flag |= ST_NOTRUNC; + st.f_namemax = NAME_MAX; + + /* Copy the struct to user space. */ + r = sys_safecopyto(fs_m_in.m_source, fs_m_in.REQ_GRANT, 0, (vir_bytes) &st, + (phys_bytes) sizeof(st), D); + + return(r); +} diff --git a/servers/ext2/super.c b/servers/ext2/super.c new file mode 100644 index 000000000..cb1d83b94 --- /dev/null +++ b/servers/ext2/super.c @@ -0,0 +1,446 @@ +/* This file manages the super block structure. + * + * The entry points into this file are + * get_super: search the 'superblock' table for a device + * read_super: read a superblock + * + * Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include +#include +#include "buf.h" +#include "inode.h" +#include "super.h" +#include "const.h" + +FORWARD _PROTOTYPE( off_t ext2_max_size, (int block_size) ); +FORWARD _PROTOTYPE( u32_t ext2_count_dirs, (struct super_block *sp) ); + +FORWARD _PROTOTYPE( void super_copy, (register struct super_block *dest, + register struct super_block *source)); +FORWARD _PROTOTYPE( void copy_group_descriptors, + (register struct group_desc *dest_array, + register struct group_desc *source_array, + unsigned int ngroups)); + +PRIVATE off_t super_block_offset; + + +/*===========================================================================* + * get_super * + *===========================================================================*/ +PUBLIC struct super_block *get_super( + dev_t dev /* device number whose super_block is sought */ +) +{ + if (dev == NO_DEV) + panic("request for super_block of NO_DEV"); + if (superblock->s_dev != dev) + panic("wrong superblock", (int) dev); + + return(superblock); +} + + +/*===========================================================================* + * get_block_size * + *===========================================================================*/ +PUBLIC unsigned int get_block_size(dev_t dev) +{ + if (dev == NO_DEV) + panic("request for block size of NO_DEV"); + return(fs_block_size); +} + +PRIVATE struct group_desc *ondisk_group_descs; + +/*===========================================================================* + * read_super * + *===========================================================================*/ +PUBLIC int read_super(sp) +register struct super_block *sp; /* pointer to a superblock */ +{ + /* Read a superblock. */ + dev_t dev; + int r; + /* group descriptors, sp->s_group_desc points to this. */ + static struct group_desc *group_descs; + char *buf; + block_t gd_size; /* group descriptors table size in blocks */ + int gdt_position; + + dev = sp->s_dev; /* save device (will be overwritten by copy) */ + if (dev == NO_DEV) + panic("request for super_block of NO_DEV"); + + if (opt.block_with_super == 0) { + super_block_offset = SUPER_BLOCK_BYTES; + } else { + /* The block number here uses 1k units */ + super_block_offset = opt.block_with_super * 1024; + } + + STATICINIT(ondisk_superblock, sizeof(struct super_block)); + + if (!sp || !ondisk_superblock) + panic("can't allocate memory for super_block buffers"); + + r = block_dev_io(MFS_DEV_READ, dev, SELF_E, + (char*) ondisk_superblock, cvu64(super_block_offset), + _MIN_BLOCK_SIZE); + if (r != _MIN_BLOCK_SIZE) + return(EINVAL); + + super_copy(sp, ondisk_superblock); + + sp->s_dev = NO_DEV; /* restore later */ + + if (sp->s_magic != SUPER_MAGIC) + return(EINVAL); + + sp->s_block_size = 1024*(1<s_log_block_size); + + if (sp->s_block_size < _MIN_BLOCK_SIZE + || sp->s_block_size >_MAX_BLOCK_SIZE) { + return(EINVAL); + printf("data block size is too large\n"); + } + + if ((sp->s_block_size % 512) != 0) + return(EINVAL); + + if (SUPER_SIZE_D > sp->s_block_size) + return(EINVAL); + + /* Variable added for convinience (i_blocks counts 512-byte blocks). */ + sp->s_sectors_in_block = sp->s_block_size / 512; + + /* TODO: this code is for revision 1 (but bw compatible with 0) + * inode must be power of 2 and smaller, than block size. + */ + if (EXT2_INODE_SIZE(sp) & (EXT2_INODE_SIZE(sp) - 1) != 0 + || EXT2_INODE_SIZE(sp) > sp->s_block_size) { + printf("superblock->s_inode_size is incorrect...\n"); + return(EINVAL); + } + + sp->s_blocksize_bits = sp->s_log_block_size + 10; + sp->s_max_size = ext2_max_size(sp->s_block_size); + sp->s_inodes_per_block = sp->s_block_size / EXT2_INODE_SIZE(sp); + if (sp->s_inodes_per_block == 0 || sp->s_inodes_per_group == 0) { + printf("either inodes_per_block or inodes_per_group count is 0\n"); + return(EINVAL); + } + + sp->s_itb_per_group = sp->s_inodes_per_group / sp->s_inodes_per_block; + sp->s_desc_per_block = sp->s_block_size / sizeof(struct group_desc); + + sp->s_groups_count = ((sp->s_blocks_count - sp->s_first_data_block - 1) + / sp->s_blocks_per_group) + 1; + + /* ceil(groups_count/desc_per_block) */ + sp->s_gdb_count = (sp->s_groups_count + sp->s_desc_per_block - 1) + / sp->s_desc_per_block; + + gd_size = sp->s_gdb_count * sp->s_block_size; + + buf = 0; + STATICINIT(buf, gd_size); + group_descs = (struct group_desc *) buf; + + buf = 0; + STATICINIT(buf, gd_size); + ondisk_group_descs = (struct group_desc *) buf; + + if (!group_descs || !ondisk_group_descs) + panic("can't allocate memory for gdt buffer"); + + /* s_first_data_block (block number, where superblock is stored) + * is 1 for 1Kb blocks and 0 for larger blocks. + * For fs with 1024-byte blocks first 1024 bytes (block0) used by MBR, + * and block1 stores superblock. When block size is larger, block0 stores + * both MBR and superblock, but gdt lives in next block anyway. + * If sb=N was specified, then gdt is stored in N+1 block, the block number + * here uses 1k units. + * + */ + if (opt.block_with_super == 0) { + gdt_position = (sp->s_first_data_block + 1) * sp->s_block_size; + } else { + gdt_position = (opt.block_with_super + 1) * 1024; + } + + r = block_dev_io(MFS_DEV_READ, dev, SELF_E, + (char*) ondisk_group_descs, cvu64(gdt_position), + gd_size); + if (r != gd_size) { + printf("Can not read group descriptors\n"); + return(EINVAL); + } + + /* TODO: check descriptors we just read */ + + copy_group_descriptors(group_descs, ondisk_group_descs, sp->s_groups_count); + sp->s_group_desc = group_descs; + + /* Make a few basic checks to see if super block looks reasonable. */ + if (sp->s_inodes_count < 1 || sp->s_blocks_count < 1) { + printf("not enough inodes or data blocks, \n"); + return(EINVAL); + } + + sp->s_dirs_counter = ext2_count_dirs(sp); + + /* Start block search from this block. + * We skip superblock (1 block), group descriptors blocks (sp->s_gdb_count) + * block and inode bitmaps (2 blocks) and inode table. + */ + sp->s_bsearch = sp->s_first_data_block + 1 + sp->s_gdb_count + 2 + + sp->s_itb_per_group; + + sp->s_igsearch = 0; + + sp->s_dev = dev; /* restore device number */ + return(OK); +} + + +/*===========================================================================* + * write_super * + *===========================================================================*/ +PUBLIC void write_super(sp) +struct super_block *sp; /* pointer to a superblock */ +{ +/* Write a superblock and gdt. */ + int r; + block_t gd_size; /* group descriptors table size in blocks */ + int gdt_position; + + if (sp->s_rd_only) + panic("can't write superblock on read-only filesys."); + + if (sp->s_dev == NO_DEV) + panic("request to write super_block, but NO_DEV"); + + super_copy(ondisk_superblock, sp); + + r = block_dev_io(MFS_DEV_WRITE, sp->s_dev, SELF_E, + sp, cvu64(super_block_offset), SUPER_SIZE_D); + if (r != SUPER_SIZE_D) + printf("ext2: Warning, failed to write superblock to the disk!\n"); + + if (group_descriptors_dirty == DIRTY) { + /* Locate the appropriate super_block. */ + gd_size = sp->s_gdb_count * sp->s_block_size; + + if (opt.block_with_super == 0) { + gdt_position = (sp->s_first_data_block + 1) * sp->s_block_size; + } else { + gdt_position = (opt.block_with_super + 1) * 1024; + } + + copy_group_descriptors(ondisk_group_descs, sp->s_group_desc, + sp->s_groups_count); + + r = block_dev_io(MFS_DEV_WRITE, sp->s_dev, SELF_E, + (char*) ondisk_group_descs, cvu64(gdt_position), + gd_size); + if (r != gd_size) { + printf("Can not write group descriptors\n"); + } + group_descriptors_dirty = CLEAN; + } +} + + +/*===========================================================================* + * get_group_desc * + *===========================================================================*/ +struct group_desc* get_group_desc(unsigned int bnum) +{ + if (bnum >= superblock->s_groups_count) { + printf("ext2, get_group_desc: wrong bnum (%d) requested\n", bnum); + return NULL; + } + return &superblock->s_group_desc[bnum]; +} + + +PRIVATE u32_t ext2_count_dirs(struct super_block *sp) +{ + u32_t count = 0; + int i; + + for (i = 0; i < sp->s_groups_count; i++) { + struct group_desc *desc = get_group_desc(i); + if (!desc) + continue; /* TODO: fail? */ + count += desc->used_dirs_count; + } + return count; +} + + +/*===========================================================================* + * ext2_max_size * + *===========================================================================*/ +/* There are several things, which affect max filesize: + * - inode.i_blocks (512-byte blocks) is limited to (2^32 - 1). + * - number of addressed direct, single, double and triple indirect blocks. + * Number of addressed blocks depends on block_size only, thus unlike in + * linux (ext2_max_size) we do not make calculations, but use constants + * for different block sizes. Calculations (gcc code) are commented. + * Note: linux ext2_max_size makes calculated based on shifting, not + * arithmetics. + * (!!!)Note: constants hardly tight to EXT2_NDIR_BLOCKS, but I doubt its value + * will be changed someday. So if it's changed, then just recalculate constatns. + * Anyway this function is safe for any change. + * Note: there is also limitation from VFS (to LONG_MAX, i.e. 2GB). + */ +PRIVATE off_t ext2_max_size(int block_size) +{ + /* 12 is EXT2_NDIR_BLOCKS used in calculations. */ + if (EXT2_NDIR_BLOCKS != 12) + panic("ext2_max_size needs modification!"); + switch(block_size) { + case 1024: return LONG_MAX; /* actually 17247252480 */ + case 2048: return LONG_MAX; /* 275415851008 */ + case 4096: return LONG_MAX; /* 2194719883264 */ + default: { + ext2_debug("ext2_max_size: Unsupported block_size! \ + Assuming bs is 1024 bytes\n"); + return 67383296L; + } + } +#if 0 + long addr_in_block = block_size/4; /* 4 bytes per addr */ + long sectors_in_block = block_size/512; + long long meta_blocks; /* single, double and triple indirect blocks */ + unsigned long long out_range_s; /* max blocks addressed by inode */ + unsigned long long max_bytes; + unsigned long long upper_limit; + + /* 1 indirect block, 1 + addr_in_block dindirect and 1 + addr_in_block + + * + addr_in_block*addr_in_block triple indirect blocks */ + meta_blocks = 2*addr_in_block + addr_in_block*addr_in_block + 3; + out_range_s = EXT2_NDIR_BLOCKS + addr_in_block + addr_in_block * addr_in_block + + addr_in_block * addr_in_block * addr_in_block; + max_bytes = out_range_s * block_size; + + upper_limit = (1LL << 32) - 1; /* max 512-byte blocks by i_blocks */ + upper_limit /= sectors_in_block; /* total block_size blocks */ + upper_limit -= meta_blocks; /* total data blocks */ + upper_limit *= (long long)block_size; /* max size in bytes */ + + if (max_bytes > upper_limit) + max_bytes = upper_limit; + + /* Limit s_max_size to LONG_MAX */ + if (max_bytes > LONG_MAX) + max_bytes = LONG_MAX; + + return max_bytes; +#endif +} + + +/*===========================================================================* + * super_copy * + *===========================================================================*/ +PRIVATE void super_copy( + register struct super_block *dest, + register struct super_block *source +) +/* Note: we don't convert stuff, used in ext3. */ +{ +/* Copy super_block to the in-core table, swapping bytes if need be. */ + if (le_CPU) { + /* Just use memcpy */ + memcpy(dest, source, SUPER_SIZE_D); + return; + } + dest->s_inodes_count = conv4(le_CPU, source->s_inodes_count); + dest->s_blocks_count = conv4(le_CPU, source->s_blocks_count); + dest->s_r_blocks_count = conv4(le_CPU, source->s_r_blocks_count); + dest->s_free_blocks_count = conv4(le_CPU, source->s_free_blocks_count); + dest->s_free_inodes_count = conv4(le_CPU, source->s_free_inodes_count); + dest->s_first_data_block = conv4(le_CPU, source->s_first_data_block); + dest->s_log_block_size = conv4(le_CPU, source->s_log_block_size); + dest->s_log_frag_size = conv4(le_CPU, source->s_log_frag_size); + dest->s_blocks_per_group = conv4(le_CPU, source->s_blocks_per_group); + dest->s_frags_per_group = conv4(le_CPU, source->s_frags_per_group); + dest->s_inodes_per_group = conv4(le_CPU, source->s_inodes_per_group); + dest->s_mtime = conv4(le_CPU, source->s_mtime); + dest->s_wtime = conv4(le_CPU, source->s_wtime); + dest->s_mnt_count = conv2(le_CPU, source->s_mnt_count); + dest->s_max_mnt_count = conv2(le_CPU, source->s_max_mnt_count); + dest->s_magic = conv2(le_CPU, source->s_magic); + dest->s_state = conv2(le_CPU, source->s_state); + dest->s_errors = conv2(le_CPU, source->s_errors); + dest->s_minor_rev_level = conv2(le_CPU, source->s_minor_rev_level); + dest->s_lastcheck = conv4(le_CPU, source->s_lastcheck); + dest->s_checkinterval = conv4(le_CPU, source->s_checkinterval); + dest->s_creator_os = conv4(le_CPU, source->s_creator_os); + dest->s_rev_level = conv4(le_CPU, source->s_rev_level); + dest->s_def_resuid = conv2(le_CPU, source->s_def_resuid); + dest->s_def_resgid = conv2(le_CPU, source->s_def_resgid); + dest->s_first_ino = conv4(le_CPU, source->s_first_ino); + dest->s_inode_size = conv2(le_CPU, source->s_inode_size); + dest->s_block_group_nr = conv2(le_CPU, source->s_block_group_nr); + dest->s_feature_compat = conv4(le_CPU, source->s_feature_compat); + dest->s_feature_incompat = conv4(le_CPU, source->s_feature_incompat); + dest->s_feature_ro_compat = conv4(le_CPU, source->s_feature_ro_compat); + memcpy(dest->s_uuid, source->s_uuid, sizeof(dest->s_uuid)); + memcpy(dest->s_volume_name, source->s_volume_name, + sizeof(dest->s_volume_name)); + memcpy(dest->s_last_mounted, source->s_last_mounted, + sizeof(dest->s_last_mounted)); + dest->s_algorithm_usage_bitmap = + conv4(le_CPU, source->s_algorithm_usage_bitmap); + dest->s_prealloc_blocks = source->s_prealloc_blocks; + dest->s_prealloc_dir_blocks = source->s_prealloc_dir_blocks; + dest->s_padding1 = conv2(le_CPU, source->s_padding1); +} + + +/*===========================================================================* + * gd_copy * + *===========================================================================*/ +PRIVATE void gd_copy( + register struct group_desc *dest, + register struct group_desc *source +) +{ + /* Copy super_block to the in-core table, swapping bytes if need be. */ + if (le_CPU) { + /* Just use memcpy */ + memcpy(dest, source, sizeof(struct group_desc)); + return; + } + dest->block_bitmap = conv4(le_CPU, source->block_bitmap); + dest->inode_bitmap = conv4(le_CPU, source->inode_bitmap); + dest->inode_table = conv4(le_CPU, source->inode_table); + dest->free_blocks_count = conv2(le_CPU, source->free_blocks_count); + dest->free_inodes_count = conv2(le_CPU, source->free_inodes_count); + dest->used_dirs_count = conv2(le_CPU, source->used_dirs_count); +} + + +/*===========================================================================* + * copy_group_descriptors * + *===========================================================================*/ +PRIVATE void copy_group_descriptors( + register struct group_desc *dest_array, + register struct group_desc *source_array, + unsigned int ngroups +) +{ + int i; + for (i = 0; i < ngroups; i++) + gd_copy(&dest_array[i], &source_array[i]); +} diff --git a/servers/ext2/super.h b/servers/ext2/super.h new file mode 100644 index 000000000..09192a4e0 --- /dev/null +++ b/servers/ext2/super.h @@ -0,0 +1,128 @@ +/* Super block table. The root file system and every mounted file system + * has an entry here. The entry holds information about the sizes of the bit + * maps and inodes. + * + * A super_block slot is free if s_dev == NO_DEV. + * + */ + +#ifndef EXT2_SUPER_H +#define EXT2_SUPER_H + +/* super_block (on-disk part) was taken from linux/include/linux/ext2_fs.h */ +EXTERN struct super_block { + u32_t s_inodes_count; /* Inodes count */ + u32_t s_blocks_count; /* Blocks count */ + u32_t s_r_blocks_count; /* Reserved blocks count */ + u32_t s_free_blocks_count; /* Free blocks count */ + u32_t s_free_inodes_count; /* Free inodes count */ + u32_t s_first_data_block; /* First Data Block */ + u32_t s_log_block_size; /* Block size */ + u32_t s_log_frag_size; /* Fragment size */ + u32_t s_blocks_per_group; /* # Blocks per group */ + u32_t s_frags_per_group; /* # Fragments per group */ + u32_t s_inodes_per_group; /* # Inodes per group */ + u32_t s_mtime; /* Mount time */ + u32_t s_wtime; /* Write time */ + u16_t s_mnt_count; /* Mount count */ + u16_t s_max_mnt_count; /* Maximal mount count */ + u16_t s_magic; /* Magic signature */ + u16_t s_state; /* File system state */ + u16_t s_errors; /* Behaviour when detecting errors */ + u16_t s_minor_rev_level; /* minor revision level */ + u32_t s_lastcheck; /* time of last check */ + u32_t s_checkinterval; /* max. time between checks */ + u32_t s_creator_os; /* OS */ + u32_t s_rev_level; /* Revision level */ + u16_t s_def_resuid; /* Default uid for reserved blocks */ + u16_t s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT2_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + u32_t s_first_ino; /* First non-reserved inode */ + u16_t s_inode_size; /* size of inode structure */ + u16_t s_block_group_nr; /* block group # of this superblock */ + u32_t s_feature_compat; /* compatible feature set */ + u32_t s_feature_incompat; /* incompatible feature set */ + u32_t s_feature_ro_compat; /* readonly-compatible feature set */ + u8_t s_uuid[16]; /* 128-bit uuid for volume */ + char s_volume_name[16]; /* volume name */ + char s_last_mounted[64]; /* directory where last mounted */ + u32_t s_algorithm_usage_bitmap; /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT2_COMPAT_PREALLOC flag is on. + */ + u8_t s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ + u8_t s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + u16_t s_padding1; + /* + * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set. + */ + u8_t s_journal_uuid[16]; /* uuid of journal superblock */ + u32_t s_journal_inum; /* inode number of journal file */ + u32_t s_journal_dev; /* device number of journal file */ + u32_t s_last_orphan; /* start of list of inodes to delete */ + u32_t s_hash_seed[4]; /* HTREE hash seed */ + u8_t s_def_hash_version; /* Default hash version to use */ + u8_t s_reserved_char_pad; + u16_t s_reserved_word_pad; + u32_t s_default_mount_opts; + u32_t s_first_meta_bg; /* First metablock block group */ + u32_t s_reserved[190]; /* Padding to the end of the block */ + + /* The following items are only used when the super_block is in memory. */ + u32_t s_inodes_per_block; /* Number of inodes per block */ + u32_t s_itb_per_group; /* Number of inode table blocks per group */ + u32_t s_gdb_count; /* Number of group descriptor blocks */ + u32_t s_desc_per_block; /* Number of group descriptors per block */ + u32_t s_groups_count; /* Number of groups in the fs */ + u8_t s_blocksize_bits; /* Used to calculate offsets + * (e.g. inode block), + * always s_log_block_size+10. + */ + struct group_desc *s_group_desc; /* Group descriptors read into RAM */ + + u16_t s_block_size; /* block size in bytes. */ + u16_t s_sectors_in_block; /* s_block_size / 512 */ + u32_t s_max_size; /* maximum file size on this device */ + dev_t s_dev; /* whose super block is this? */ + int s_rd_only; /* set to 1 if file sys mounted read only */ + block_t s_bsearch; /* all data blocks below this block are in use*/ + int s_igsearch; /* all groups below this one have no free inodes */ + char s_is_root; + u32_t s_dirs_counter; + +} *superblock, *ondisk_superblock; + + +/* Structure of a blocks group descriptor. + * On disk stored in little endian format. + */ +struct group_desc +{ + u32_t block_bitmap; /* Blocks bitmap block */ + u32_t inode_bitmap; /* Inodes bitmap block */ + u32_t inode_table; /* Inodes table block */ + u16_t free_blocks_count; /* Free blocks count */ + u16_t free_inodes_count; /* Free inodes count */ + u16_t used_dirs_count; /* Directories count */ + u16_t pad; + u32_t reserved[3]; +}; + +#define IMAP 0 /* operating on the inode bit map */ +#define BMAP 1 /* operating on the block bit map */ +#define IMAPD 2 /* operating on the inode bit map, inode is dir */ + +#endif /* EXT2_SUPER_H */ diff --git a/servers/ext2/table.c b/servers/ext2/table.c new file mode 100644 index 000000000..222bd6d03 --- /dev/null +++ b/servers/ext2/table.c @@ -0,0 +1,50 @@ +/* This file contains the table used to map system call numbers onto the + * routines that perform them. + * + * Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#define _TABLE + +#include "fs.h" +#include "inode.h" +#include "buf.h" +#include "super.h" +#include "drivers.h" + +PUBLIC _PROTOTYPE (int (*fs_call_vec[]), (void) ) = { + no_sys, /* 0 not used */ + no_sys, /* 1 */ /* Was: fs_getnode */ + fs_putnode, /* 2 */ + fs_slink, /* 3 */ + fs_ftrunc, /* 4 */ + fs_chown, /* 5 */ + fs_chmod, /* 6 */ + fs_inhibread, /* 7 */ + fs_stat, /* 8 */ + fs_utime, /* 9 */ + fs_fstatfs, /* 10 */ + fs_breadwrite, /* 11 */ + fs_breadwrite, /* 12 */ + fs_unlink, /* 13 */ + fs_unlink, /* 14 */ + fs_unmount, /* 15 */ + fs_sync, /* 16 */ + fs_new_driver, /* 17 */ + fs_flush, /* 18 */ + fs_readwrite, /* 19 */ + fs_readwrite, /* 20 */ + fs_mknod, /* 21 */ + fs_mkdir, /* 22 */ + fs_create, /* 23 */ + fs_link, /* 24 */ + fs_rename, /* 25 */ + fs_lookup, /* 26 */ + fs_mountpoint, /* 27 */ + fs_readsuper, /* 28 */ + no_sys, /* 29 */ /* Was: fs_newnode */ + fs_rdlink, /* 30 */ + fs_getdents, /* 31 */ + fs_statvfs, /* 32 */ +}; diff --git a/servers/ext2/time.c b/servers/ext2/time.c new file mode 100644 index 000000000..9ae6170cd --- /dev/null +++ b/servers/ext2/time.c @@ -0,0 +1,36 @@ +/* Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include +#include "inode.h" +#include + + +/*===========================================================================* + * fs_utime * + *===========================================================================*/ +PUBLIC int fs_utime() +{ + register struct inode *rip; + register int r; + + /* Temporarily open the file. */ + if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL) + return(EINVAL); + + /* Only the owner of a file or the super_user can change its time. */ + r = OK; + if(read_only(rip) != OK) r = EROFS; /* not even su can touch if R/O */ + if(r == OK) { + rip->i_atime = fs_m_in.REQ_ACTIME; + rip->i_mtime = fs_m_in.REQ_MODTIME; + rip->i_update = CTIME; /* discard any stale ATIME and MTIME flags */ + rip->i_dirt = DIRTY; + } + + put_inode(rip); + return(r); +} diff --git a/servers/ext2/type.h b/servers/ext2/type.h new file mode 100644 index 000000000..f2498c707 --- /dev/null +++ b/servers/ext2/type.h @@ -0,0 +1,116 @@ +#ifndef EXT2_TYPE_H +#define EXT2_TYPE_H + +/* On the disk all attributes are stored in little endian format. + * Inode structure was taken from linux/include/linux/ext2_fs.h. + */ +typedef struct { + u16_t i_mode; /* File mode */ + u16_t i_uid; /* Low 16 bits of Owner Uid */ + u32_t i_size; /* Size in bytes */ + u32_t i_atime; /* Access time */ + u32_t i_ctime; /* Creation time */ + u32_t i_mtime; /* Modification time */ + u32_t i_dtime; /* Deletion Time */ + u16_t i_gid; /* Low 16 bits of Group Id */ + u16_t i_links_count; /* Links count */ + u32_t i_blocks; /* Blocks count */ + u32_t i_flags; /* File flags */ + union { + struct { + u32_t l_i_reserved1; + } linux1; + struct { + u32_t h_i_translator; + } hurd1; + struct { + u32_t m_i_reserved1; + } masix1; + } osd1; /* OS dependent 1 */ + u32_t i_block[EXT2_N_BLOCKS];/* Pointers to blocks */ + u32_t i_generation; /* File version (for NFS) */ + u32_t i_file_acl; /* File ACL */ + u32_t i_dir_acl; /* Directory ACL */ + u32_t i_faddr; /* Fragment address */ + union { + struct { + u8_t l_i_frag; /* Fragment number */ + u8_t l_i_fsize; /* Fragment size */ + u16_t i_pad1; + u16_t l_i_uid_high; /* these 2 fields */ + u16_t l_i_gid_high; /* were reserved2[0] */ + u32_t l_i_reserved2; + } linux2; + struct { + u8_t h_i_frag; /* Fragment number */ + u8_t h_i_fsize; /* Fragment size */ + u16_t h_i_mode_high; + u16_t h_i_uid_high; + u16_t h_i_gid_high; + u32_t h_i_author; + } hurd2; + struct { + u8_t m_i_frag; /* Fragment number */ + u8_t m_i_fsize; /* Fragment size */ + u16_t m_pad1; + u32_t m_i_reserved2[2]; + } masix2; + } osd2; /* OS dependent 2 */ +} d_inode; + + +/* Part of on disk directory (entry description). + * It includes all fields except name (since size is unknown. + * In revision 0 name_len is u16_t (here is structure of rev >= 0.5, + * where name_len was truncated with the upper 8 bit to add file_type). + * MIN_DIR_ENTRY_SIZE depends on this structure. + */ +struct ext2_disk_dir_desc { + u32_t d_ino; + u16_t d_rec_len; + u8_t d_name_len; + u8_t d_file_type; + char d_name[1]; +}; + +/* Current position in block */ +#define CUR_DISC_DIR_POS(cur_desc, base) ((char*)cur_desc - (char*)base) +/* Return pointer to the next dentry */ +#define NEXT_DISC_DIR_DESC(cur_desc) ((struct ext2_disk_dir_desc*)\ + ((char*)cur_desc + cur_desc->d_rec_len)) +/* Return next dentry's position in block */ +#define NEXT_DISC_DIR_POS(cur_desc, base) (cur_desc->d_rec_len +\ + CUR_DISC_DIR_POS(cur_desc, base)) + +struct buf { + /* Data portion of the buffer. */ + union fsdata_u *bp; + + /* Header portion of the buffer. */ + struct buf *b_next; /* used to link all free bufs in a chain */ + struct buf *b_prev; /* used to link all free bufs the other way */ + struct buf *b_hash; /* used to link bufs on hash chains */ + block_t b_blocknr; /* block number of its (minor) device */ + dev_t b_dev; /* major | minor device where block resides */ + char b_dirt; /* CLEAN or DIRTY */ + char b_count; /* number of users of this buffer */ + unsigned int b_bytes; /* Number of bytes allocated in bp */ +}; + + +/* Structure with options affecting global behavior. */ +struct opt { + int use_orlov; /* Bool: Use Orlov allocator */ + /* In ext2 there are reserved blocks, which can be used by super user only or + * user specified by resuid/resgid. Right now we can't check what user + * requested operation (VFS limitation), so it's a small warkaround. + */ + int mfsalloc; /* Bool: use mfslike allocator */ + int use_reserved_blocks; /* Bool: small workaround */ + unsigned int block_with_super;/* Int: where to read super block, + * uses 1k units. */ + int use_prealloc; /* Bool: use preallocation */ +}; + + +#endif /* EXT2_TYPE_H */ diff --git a/servers/ext2/utility.c b/servers/ext2/utility.c new file mode 100644 index 000000000..08f12c074 --- /dev/null +++ b/servers/ext2/utility.c @@ -0,0 +1,255 @@ +/* Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include "buf.h" +#include "inode.h" +#include "super.h" + + +/*===========================================================================* + * no_sys * + *===========================================================================*/ +PUBLIC int no_sys() +{ +/* Somebody has used an illegal system call number */ + printf("no_sys: invalid call %d\n", req_nr); + return(EINVAL); +} + + +/*===========================================================================* + * conv2 * + *===========================================================================*/ +PUBLIC unsigned conv2(norm, w) +int norm; /* TRUE if no swap, FALSE for byte swap */ +int w; /* promotion of 16-bit word to be swapped */ +{ +/* Possibly swap a 16-bit word between 8086 and 68000 byte order. */ + if (norm) return( (unsigned) w & 0xFFFF); + return( ((w&BYTE) << 8) | ( (w>>8) & BYTE)); +} + + +/*===========================================================================* + * conv4 * + *===========================================================================*/ +PUBLIC long conv4(norm, x) +int norm; /* TRUE if no swap, FALSE for byte swap */ +long x; /* 32-bit long to be byte swapped */ +{ +/* Possibly swap a 32-bit long between 8086 and 68000 byte order. */ + unsigned lo, hi; + long l; + + if (norm) return(x); /* byte order was already ok */ + lo = conv2(FALSE, (int) x & 0xFFFF); /* low-order half, byte swapped */ + hi = conv2(FALSE, (int) (x>>16) & 0xFFFF); /* high-order half, swapped */ + l = ( (long) lo <<16) | hi; + return(l); +} + + +/*===========================================================================* + * clock_time * + *===========================================================================*/ +PUBLIC time_t clock_time() +{ +/* This routine returns the time in seconds since 1.1.1970. MINIX is an + * astrophysically naive system that assumes the earth rotates at a constant + * rate and that such things as leap seconds do not exist. + */ + + register int k; + clock_t uptime; + time_t boottime; + + if ( (k=getuptime2(&uptime, &boottime)) != OK) + panic("clock_time: getuptme2 failed: %d", k); + + return( (time_t) (boottime + (uptime/sys_hz()))); +} + + +/*===========================================================================* + * mfs_min * + *===========================================================================*/ +PUBLIC int min(unsigned int l, unsigned int r) +{ + if(r >= l) return(l); + + return(r); +} + + +/*===========================================================================* + * mfs_nul * + *===========================================================================*/ +PUBLIC void mfs_nul_f(char *file, int line, char *str, unsigned int len, + unsigned int maxlen) +{ + if(len < maxlen && str[len-1] != '\0') { + printf("ext2 %s:%d string (length %d, maxlen %d) not null-terminated\n", + file, line, len, maxlen); + } +} + +#define MYASSERT(c) if(!(c)) { printf("ext2:%s:%d: sanity check: %s failed\n", \ + file, line, #c); panic("sanity check " #c " failed: %d", __LINE__); } + + +/*===========================================================================* + * sanity_check * + *===========================================================================*/ +PUBLIC void sanitycheck(char *file, int line) +{ + MYASSERT(SELF_E > 0); + if(superblock->s_dev != NO_DEV) { + MYASSERT(superblock->s_dev == fs_dev); + MYASSERT(superblock->s_block_size == fs_block_size); + } else { + MYASSERT(_MIN_BLOCK_SIZE == fs_block_size); + } +} + +/*===========================================================================* + * ansi_strcmp * + *===========================================================================*/ +PUBLIC int ansi_strcmp(register const char* ansi_s, register const char *s2, + register size_t ansi_s_length) +{ +/* Compare non null-terminated string ansi_s (length=ansi_s_length) + * with C-string s2. + * It returns 0 if strings are equal, otherwise -1 is returned. + */ + if (ansi_s_length) { + do { + if (*s2 == '\0') + return -1; + if (*ansi_s++ != *s2++) + return -1; + } while (--ansi_s_length > 0); + + if (*s2 == '\0') + return 0; + else + return -1; + } + return 0; +} + + +/*===========================================================================* + * setbit * + *===========================================================================*/ +PUBLIC bit_t setbit(bitchunk_t *bitmap, bit_t max_bits, unsigned int word) +{ + /* Find free bit in bitmap and set. Return number of the bit, + * if failed return -1. + */ + bitchunk_t *wptr, *wlim; + bit_t b = -1; + + /* TODO: do we need to add 1? I saw a situation, when it was + * required, and since we check bit number with max_bits it + * should be safe. + */ + wlim = &bitmap[FS_BITMAP_CHUNKS(max_bits >> 3)]; + + /* Iterate over the words in block. */ + for (wptr = &bitmap[word]; wptr < wlim; wptr++) { + bit_t i; + bitchunk_t k; + + /* Does this word contain a free bit? */ + if (*wptr == (bitchunk_t) ~0) + continue; + + /* Find and allocate the free bit. */ + k = (int) *wptr; + for (i = 0; (k & (1 << i)) != 0; ++i) {} + + /* Bit number from the start of the bit map. */ + b = (wptr - &bitmap[0]) * FS_BITCHUNK_BITS + i; + + /* Don't allocate bits beyond the end of the map. */ + if (b >= max_bits) { + b = -1; + continue; + } + + /* Allocate bit number. */ + k |= 1 << i; + *wptr = (int) k; + break; + } + + return b; +} + + +/*===========================================================================* + * setbyte * + *===========================================================================*/ +PUBLIC bit_t setbyte(bitchunk_t *bitmap, bit_t max_bits, unsigned int word) +{ + /* Find free byte in bitmap and set it. Return number of the starting bit, + * if failed return -1. + */ + unsigned char *wptr, *wlim; + bit_t b = -1; + + wptr = (unsigned char*) &bitmap[0]; + /* TODO: do we need to add 1? I saw a situation, when it was + * required, and since we check bit number with max_bits it + * should be safe. + */ + wlim = &wptr[(max_bits >> 3)]; + + /* Iterate over the words in block. */ + for ( ; wptr < wlim; wptr++) { + /* Is it a free byte? */ + if (*wptr | 0) + continue; + + /* Bit number from the start of the bit map. */ + b = (wptr - (unsigned char*) &bitmap[0]) * CHAR_BIT; + + /* Don't allocate bits beyond the end of the map. */ + if (b + CHAR_BIT >= max_bits) { + b = -1; + continue; + } + + /* Allocate byte number. */ + *wptr = (unsigned char) ~0; + break; + } + return b; +} + + +/*===========================================================================* + * unsetbit * + *===========================================================================*/ +PUBLIC int unsetbit(bitchunk_t *bitmap, bit_t bit) +{ + /* Unset specified bit. If requested bit is already free return -1, + * otherwise return 0. + */ + unsigned int word; /* bit_returned word in bitmap */ + bitchunk_t k, mask; + + word = bit / FS_BITCHUNK_BITS; + bit = bit % FS_BITCHUNK_BITS; /* index in word */ + mask = 1 << bit; + + k = (int) bitmap[word]; + if (!(k & mask)) + return -1; + + k &= ~mask; + bitmap[word] = (int) k; + return 0; +} diff --git a/servers/ext2/write.c b/servers/ext2/write.c new file mode 100644 index 000000000..9dc8c49d1 --- /dev/null +++ b/servers/ext2/write.c @@ -0,0 +1,375 @@ +/* This file is the counterpart of "read.c". It contains the code for writing + * insofar as this is not contained in fs_readwrite(). + * + * The entry points into this file are + * write_map: write a new block into an inode + * new_block: acquire a new block + * zero_block: overwrite a block with zeroes + * + * Created (MFS based): + * February 2010 (Evgeniy Ivanov) + */ + +#include "fs.h" +#include +#include "buf.h" +#include "inode.h" +#include "super.h" + +FORWARD _PROTOTYPE( void wr_indir, (struct buf *bp, int index, block_t block) ); +FORWARD _PROTOTYPE( int empty_indir, (struct buf *, struct super_block *) ); + +/*===========================================================================* + * write_map * + *===========================================================================*/ +PUBLIC int write_map(rip, position, new_block, op) +struct inode *rip; /* pointer to inode to be changed */ +off_t position; /* file address to be mapped */ +block_t new_block; /* block # to be inserted */ +int op; /* special actions */ +{ +/* Write a new block into an inode. + * + * If op includes WMAP_FREE, free the block corresponding to that position + * in the inode ('new_block' is ignored then). Also free the indirect block + * if that was the last entry in the indirect block. + * Also free the double/triple indirect block if that was the last entry in + * the double/triple indirect block. + * It's the only function which should take care about rip->i_blocks counter. + */ + int index1, index2, index3; /* indexes in single..triple indirect blocks */ + long excess, block_pos; + char new_ind = 0, new_dbl = 0, new_triple = 0; + int single = 0, triple = 0; + register block_t old_block, b1, b2, b3; + struct buf *bp = NULL, + *bp_dindir = NULL, + *bp_tindir = NULL; + static char first_time = TRUE; + static long addr_in_block; + static long addr_in_block2; + static long doub_ind_s; + static long triple_ind_s; + static long out_range_s; + + if (first_time) { + addr_in_block = rip->i_sp->s_block_size / BLOCK_ADDRESS_BYTES; + addr_in_block2 = addr_in_block * addr_in_block; + doub_ind_s = EXT2_NDIR_BLOCKS + addr_in_block; + triple_ind_s = doub_ind_s + addr_in_block2; + out_range_s = triple_ind_s + addr_in_block2 * addr_in_block; + first_time = FALSE; + } + + block_pos = position / rip->i_sp->s_block_size; /* relative blk # in file */ + rip->i_dirt = DIRTY; /* inode will be changed */ + + /* Is 'position' to be found in the inode itself? */ + if (block_pos < EXT2_NDIR_BLOCKS) { + if (rip->i_block[block_pos] != NO_BLOCK && (op & WMAP_FREE)) { + free_block(rip->i_sp, rip->i_block[block_pos]); + rip->i_block[block_pos] = NO_BLOCK; + rip->i_blocks -= rip->i_sp->s_sectors_in_block; + } else { + rip->i_block[block_pos] = new_block; + rip->i_blocks += rip->i_sp->s_sectors_in_block; + } + return(OK); + } + + /* It is not in the inode, so it must be single, double or triple indirect */ + if (block_pos < doub_ind_s) { + b1 = rip->i_block[EXT2_NDIR_BLOCKS]; /* addr of single indirect block */ + index1 = block_pos - EXT2_NDIR_BLOCKS; + single = TRUE; + } else if (block_pos >= out_range_s) { /* TODO: do we need it? */ + return(EFBIG); + } else { + /* double or triple indirect block. At first if it's triple, + * find double indirect block. + */ + excess = block_pos - doub_ind_s; + b2 = rip->i_block[EXT2_DIND_BLOCK]; + if (block_pos >= triple_ind_s) { + b3 = rip->i_block[EXT2_TIND_BLOCK]; + if (b3 == NO_BLOCK && !(op & WMAP_FREE)) { + /* Create triple indirect block. */ + if ( (b3 = alloc_block(rip, rip->i_bsearch) ) == NO_BLOCK) { + ext2_debug("failed to allocate tblock near %d\n", rip->i_block[0]); + return(ENOSPC); + } + rip->i_block[EXT2_TIND_BLOCK] = b3; + rip->i_blocks += rip->i_sp->s_sectors_in_block; + new_triple = TRUE; + } + /* 'b3' is block number for triple indirect block, either old + * or newly created. + * If there wasn't one and WMAP_FREE is set, 'b3' is NO_BLOCK. + */ + if (b3 == NO_BLOCK) { + /* WMAP_FREE and no triple indirect block - then no + * double and single indirect blocks either. + */ + b1 = b2 = NO_BLOCK; + } else { + bp_tindir = get_block(rip->i_dev, b3, (new_triple ? NO_READ : NORMAL)); + if (new_triple) { + zero_block(bp_tindir); + bp_tindir->b_dirt = DIRTY; + } + excess = block_pos - triple_ind_s; + index3 = excess / addr_in_block2; + b2 = rd_indir(bp_tindir, index3); + excess = excess % addr_in_block2; + } + triple = TRUE; + } + + if (b2 == NO_BLOCK && !(op & WMAP_FREE)) { + /* Create the double indirect block. */ + if ( (b2 = alloc_block(rip, rip->i_bsearch) ) == NO_BLOCK) { + /* Release triple ind blk. */ + put_block(bp_tindir, INDIRECT_BLOCK); + ext2_debug("failed to allocate dblock near %d\n", rip->i_block[0]); + return(ENOSPC); + } + if (triple) { + wr_indir(bp_tindir, index3, b2); /* update triple indir */ + bp_tindir->b_dirt = DIRTY; + } else { + rip->i_block[EXT2_DIND_BLOCK] = b2; + } + rip->i_blocks += rip->i_sp->s_sectors_in_block; + new_dbl = TRUE; /* set flag for later */ + } + + /* 'b2' is block number for double indirect block, either old + * or newly created. + * If there wasn't one and WMAP_FREE is set, 'b2' is NO_BLOCK. + */ + if (b2 == NO_BLOCK) { + /* WMAP_FREE and no double indirect block - then no + * single indirect block either. + */ + b1 = NO_BLOCK; + } else { + bp_dindir = get_block(rip->i_dev, b2, (new_dbl ? NO_READ : NORMAL)); + if (new_dbl) { + zero_block(bp_dindir); + bp_dindir->b_dirt = DIRTY; + } + index2 = excess / addr_in_block; + b1 = rd_indir(bp_dindir, index2); + index1 = excess % addr_in_block; + } + single = FALSE; + } + + /* b1 is now single indirect block or NO_BLOCK; 'index' is index. + * We have to create the indirect block if it's NO_BLOCK. Unless + * we're freing (WMAP_FREE). + */ + if (b1 == NO_BLOCK && !(op & WMAP_FREE)) { + if ( (b1 = alloc_block(rip, rip->i_bsearch) ) == NO_BLOCK) { + /* Release dbl and triple indirect blks. */ + put_block(bp_dindir, INDIRECT_BLOCK); + put_block(bp_tindir, INDIRECT_BLOCK); + ext2_debug("failed to allocate dblock near %d\n", rip->i_block[0]); + return(ENOSPC); + } + if (single) { + rip->i_block[EXT2_NDIR_BLOCKS] = b1; /* update inode single indirect */ + } else { + wr_indir(bp_dindir, index2, b1); /* update dbl indir */ + bp_dindir->b_dirt = DIRTY; + } + rip->i_blocks += rip->i_sp->s_sectors_in_block; + new_ind = TRUE; + } + + /* b1 is indirect block's number (unless it's NO_BLOCK when we're + * freeing). + */ + if (b1 != NO_BLOCK) { + bp = get_block(rip->i_dev, b1, (new_ind ? NO_READ : NORMAL) ); + if (new_ind) + zero_block(bp); + if (op & WMAP_FREE) { + if ((old_block = rd_indir(bp, index1)) != NO_BLOCK) { + free_block(rip->i_sp, old_block); + rip->i_blocks -= rip->i_sp->s_sectors_in_block; + wr_indir(bp, index1, NO_BLOCK); + } + + /* Last reference in the indirect block gone? Then + * free the indirect block. + */ + if (empty_indir(bp, rip->i_sp)) { + free_block(rip->i_sp, b1); + rip->i_blocks -= rip->i_sp->s_sectors_in_block; + b1 = NO_BLOCK; + /* Update the reference to the indirect block to + * NO_BLOCK - in the double indirect block if there + * is one, otherwise in the inode directly. + */ + if (single) { + rip->i_block[EXT2_NDIR_BLOCKS] = b1; + } else { + wr_indir(bp_dindir, index2, b1); + bp_dindir->b_dirt = DIRTY; + } + } + } else { + wr_indir(bp, index1, new_block); + rip->i_blocks += rip->i_sp->s_sectors_in_block; + } + /* b1 equals NO_BLOCK only when we are freeing up the indirect block. */ + bp->b_dirt = (b1 == NO_BLOCK) ? CLEAN : DIRTY;; + put_block(bp, INDIRECT_BLOCK); + } + + /* If the single indirect block isn't there (or was just freed), + * see if we have to keep the double indirect block, if any. + * If we don't have to keep it, don't bother writing it out. + */ + if (b1 == NO_BLOCK && !single && b2 != NO_BLOCK && + empty_indir(bp_dindir, rip->i_sp)) { + bp_dindir->b_dirt = CLEAN; + free_block(rip->i_sp, b2); + rip->i_blocks -= rip->i_sp->s_sectors_in_block; + b2 = NO_BLOCK; + if (triple) { + wr_indir(bp_tindir, index3, b2); /* update triple indir */ + bp_tindir->b_dirt = DIRTY; + } else { + rip->i_block[EXT2_DIND_BLOCK] = b2; + } + } + /* If the double indirect block isn't there (or was just freed), + * see if we have to keep the triple indirect block, if any. + * If we don't have to keep it, don't bother writing it out. + */ + if (b2 == NO_BLOCK && triple && b3 != NO_BLOCK && + empty_indir(bp_tindir, rip->i_sp)) { + bp_tindir->b_dirt = CLEAN; + free_block(rip->i_sp, b3); + rip->i_blocks -= rip->i_sp->s_sectors_in_block; + rip->i_block[EXT2_TIND_BLOCK] = NO_BLOCK; + } + + put_block(bp_dindir, INDIRECT_BLOCK); /* release double indirect blk */ + put_block(bp_tindir, INDIRECT_BLOCK); /* release triple indirect blk */ + + return(OK); +} + + +/*===========================================================================* + * wr_indir * + *===========================================================================*/ +PRIVATE void wr_indir(bp, index, block) +struct buf *bp; /* pointer to indirect block */ +int index; /* index into *bp */ +block_t block; /* block to write */ +{ +/* Given a pointer to an indirect block, write one entry. */ + + if(bp == NULL) + panic("wr_indir() on NULL"); + + /* write a block into an indirect block */ + bp->b_ind[index] = conv4(le_CPU, block); +} + + +/*===========================================================================* + * empty_indir * + *===========================================================================*/ +PRIVATE int empty_indir(bp, sb) +struct buf *bp; /* pointer to indirect block */ +struct super_block *sb; /* superblock of device block resides on */ +{ +/* Return nonzero if the indirect block pointed to by bp contains + * only NO_BLOCK entries. + */ + long addr_in_block = sb->s_block_size/4; /* 4 bytes per addr */ + int i; + for(i = 0; i < addr_in_block; i++) + if(bp->b_ind[i] != NO_BLOCK) + return(0); + return(1); +} + +/*===========================================================================* + * new_block * + *===========================================================================*/ +PUBLIC struct buf *new_block(rip, position) +register struct inode *rip; /* pointer to inode */ +off_t position; /* file pointer */ +{ +/* Acquire a new block and return a pointer to it. */ + register struct buf *bp; + int r; + block_t b; + + /* Is another block available? */ + if ( (b = read_map(rip, position)) == NO_BLOCK) { + /* Check if this position follows last allocated + * block. + */ + block_t goal = NO_BLOCK; + if (rip->i_last_pos_bl_alloc != 0) { + off_t position_diff = position - rip->i_last_pos_bl_alloc; + if (rip->i_bsearch == 0) { + /* Should never happen, but not critical */ + ext2_debug("warning, i_bsearch is 0, while\ + i_last_pos_bl_alloc is not!"); + } + if (position_diff <= rip->i_sp->s_block_size) { + goal = rip->i_bsearch + 1; + } else { + /* Non-sequential write operation, + * disable preallocation + * for this inode. + */ + rip->i_preallocation = 0; + discard_preallocated_blocks(rip); + } + } + + if ( (b = alloc_block(rip, goal) ) == NO_BLOCK) { + err_code = ENOSPC; + return(NULL); + } + if ( (r = write_map(rip, position, b, 0)) != OK) { + free_block(rip->i_sp, b); + err_code = r; + ext2_debug("write_map failed\n"); + return(NULL); + } + rip->i_last_pos_bl_alloc = position; + if (position == 0) { + /* rip->i_last_pos_bl_alloc points to the block position, + * and zero indicates first usage, thus just increment. + */ + rip->i_last_pos_bl_alloc++; + } + } + + bp = get_block(rip->i_dev, b, NO_READ); + zero_block(bp); + return(bp); +} + +/*===========================================================================* + * zero_block * + *===========================================================================*/ +PUBLIC void zero_block(bp) +register struct buf *bp; /* pointer to buffer to zero */ +{ +/* Zero a block. */ + ASSERT(bp->b_bytes > 0); + ASSERT(bp->bp); + memset(bp->b_data, 0, (size_t) bp->b_bytes); + bp->b_dirt = DIRTY; +}