+20100802:
+ /usr/src/etc/system.conf updated to include ext2 file server: copy it
+ (or merge it) to /etc/system.conf.
+
20100719:
If you installed using a 3.1.6 image (or earlier) and haven't updated
the boot monitor since r6246 you need to do so now:
quantum 500; # default server quantum
};
+service ext2
+{
+ ipc ALL; # ALL ipc targets allowed
+ system BASIC; # Only basic kernel calls allowed
+ vm BASIC; # Only basic VM calls allowed
+ io NONE; # No I/O range allowed
+ irq NONE; # No IRQ allowed
+ sigmgr rs; # Signal manager is RS
+ scheduler sched; # Scheduler is sched
+ priority 5; # priority queue 5
+ quantum 500; # default server quantum
+};
+
service pfs
{
uid 0;
.include <bsd.own.mk>
-SUBDIR= ds hgfs inet init ipc is iso9660fs mfs pfs pm rs sched vfs vm
+SUBDIR= ds ext2 hgfs inet init ipc is iso9660fs mfs pfs pm rs sched vfs vm
IMAGE_SUBDIR= ds init mfs pfs pm rs sched vfs vm
--- /dev/null
+# Makefile for ext2 filesystem
+PROG= ext2
+SRCS= balloc.c cache.c device.c link.c \
+ mount.c misc.c open.c protect.c read.c \
+ stadir.c table.c time.c utility.c \
+ write.c ialloc.c inode.c main.c path.c \
+ super.c optset.c
+DPADD+= ${LIBSYS}
+LDADD+= -lsys
+
+MAN=
+
+BINDIR?= /sbin
+INSTALLFLAGS+= -S 128k
+
+DEFAULT_NR_BUFS= 1024
+CPPFLAGS+= -DDEFAULT_NR_BUFS=${DEFAULT_NR_BUFS}
+
+.include <bsd.prog.mk>
--- /dev/null
+/* This files manages blocks allocation and deallocation.
+ *
+ * The entry points into this file are:
+ * discard_preallocated_blocks: Discard preallocated blocks.
+ * alloc_block: somebody wants to allocate a block; find one.
+ * free_block: indicate that a block is available for new allocation.
+ *
+ * Created:
+ * June 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <string.h>
+#include <stdlib.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include "buf.h"
+#include "inode.h"
+#include "super.h"
+#include "const.h"
+
+
+FORWARD _PROTOTYPE( block_t alloc_block_bit, (struct super_block *sp,
+ block_t origin,
+ struct inode *rip));
+
+/*===========================================================================*
+ * discard_preallocated_blocks *
+ *===========================================================================*/
+PUBLIC void discard_preallocated_blocks(struct inode *rip)
+{
+/* When called for rip, discard (free) blocks preallocated for rip,
+ * otherwise discard all preallocated blocks.
+ * Normally it should be called in following situations:
+ * 1. File is closed.
+ * 2. File is truncated.
+ * 3. Non-sequential write.
+ * 4. inode is "unloaded" from the memory.
+ * 5. No free blocks left (discard all preallocated blocks).
+ */
+ int i;
+
+ if (rip) {
+ rip->i_prealloc_count = rip->i_prealloc_index = 0;
+ for (i = 0; i < EXT2_PREALLOC_BLOCKS; i++) {
+ if (rip->i_prealloc_blocks[i] != NO_BLOCK) {
+ free_block(rip->i_sp, rip->i_prealloc_blocks[i]);
+ rip->i_prealloc_blocks[i] = NO_BLOCK;
+ }
+ }
+ return;
+ }
+
+ /* Discard all allocated blocks.
+ * Probably there are just few blocks on the disc, so forbid preallocation.*/
+ for(rip = &inode[0]; rip < &inode[NR_INODES]; rip++) {
+ rip->i_prealloc_count = rip->i_prealloc_index = 0;
+ rip->i_preallocation = 0; /* forbid preallocation */
+ for (i = 0; i < EXT2_PREALLOC_BLOCKS; i++) {
+ if (rip->i_prealloc_blocks[i] != NO_BLOCK) {
+ free_block(rip->i_sp, rip->i_prealloc_blocks[i]);
+ rip->i_prealloc_blocks[i] = NO_BLOCK;
+ }
+ }
+ }
+}
+
+
+/*===========================================================================*
+ * alloc_block *
+ *===========================================================================*/
+PUBLIC block_t alloc_block(struct inode *rip, block_t block)
+{
+/* Allocate a block for inode. If block is provided, then use it as a goal:
+ * try to allocate this block or his neghbors.
+ * If block is not provided then goal is group, where inode lives.
+ */
+ block_t goal;
+ block_t b;
+ struct super_block *sp = rip->i_sp;
+
+ if (sp->s_rd_only)
+ panic("can't alloc block on read-only filesys.");
+
+ /* Check for free blocks. First time discard preallocation,
+ * next time return NO_BLOCK
+ */
+ if (!opt.use_reserved_blocks &&
+ sp->s_free_blocks_count <= sp->s_r_blocks_count) {
+ discard_preallocated_blocks(NULL);
+ } else if (sp->s_free_blocks_count <= EXT2_PREALLOC_BLOCKS) {
+ discard_preallocated_blocks(NULL);
+ }
+
+ if (!opt.use_reserved_blocks &&
+ sp->s_free_blocks_count <= sp->s_r_blocks_count) {
+ return(NO_BLOCK);
+ } else if (sp->s_free_blocks_count == 0) {
+ return(NO_BLOCK);
+ }
+
+ if (block != NO_BLOCK) {
+ goal = block;
+ if (rip->i_preallocation && rip->i_prealloc_count > 0) {
+ /* check if goal is preallocated */
+ b = rip->i_prealloc_blocks[rip->i_prealloc_index];
+ if (block == b || (block + 1) == b) {
+ /* use preallocated block */
+ rip->i_prealloc_blocks[rip->i_prealloc_index] = NO_BLOCK;
+ rip->i_prealloc_count--;
+ rip->i_prealloc_index++;
+ if (rip->i_prealloc_index >= EXT2_PREALLOC_BLOCKS) {
+ rip->i_prealloc_index = 0;
+ ASSERT(rip->i_prealloc_count == 0);
+ }
+ rip->i_bsearch = b;
+ return b;
+ } else {
+ /* probably non-sequential write operation,
+ * disable preallocation for this inode.
+ */
+ rip->i_preallocation = 0;
+ discard_preallocated_blocks(rip);
+ }
+ }
+ } else {
+ int group = (rip->i_num - 1) / sp->s_inodes_per_group;
+ goal = sp->s_blocks_per_group*group + sp->s_first_data_block;
+ }
+
+ if (rip->i_preallocation && rip->i_prealloc_count) {
+ ext2_debug("There're preallocated blocks, but they're\
+ neither used or freed!");
+ }
+
+ b = alloc_block_bit(sp, goal, rip);
+
+ if (b != NO_BLOCK)
+ rip->i_bsearch = b;
+
+ return b;
+}
+
+
+FORWARD _PROTOTYPE( void check_block_number, (block_t block,
+ struct super_block *sp,
+ struct group_desc *gd) );
+
+/*===========================================================================*
+ * alloc_block_bit *
+ *===========================================================================*/
+PRIVATE block_t alloc_block_bit(sp, goal, rip)
+struct super_block *sp; /* the filesystem to allocate from */
+block_t goal; /* try to allocate near this block */
+struct inode *rip; /* used for preallocation */
+{
+ block_t block = NO_BLOCK; /* allocated block */
+ int word; /* word in block bitmap */
+ bit_t bit = -1;
+ int group;
+ char update_bsearch = FALSE;
+ int i;
+
+ if (goal >= sp->s_blocks_count ||
+ (goal < sp->s_first_data_block && goal != 0)) {
+ goal = sp->s_bsearch;
+ }
+
+ if (goal <= sp->s_bsearch) {
+ /* No reason to search in a place with no free blocks */
+ goal = sp->s_bsearch;
+ update_bsearch = TRUE;
+ }
+
+ /* Figure out where to start the bit search. */
+ word = ((goal - sp->s_first_data_block) % sp->s_blocks_per_group)
+ / FS_BITCHUNK_BITS;
+
+ /* Try to allocate block at any group starting from the goal's group.
+ * First time goal's group is checked from the word=goal, after all
+ * groups checked, it's checked again from word=0, that's why "i <=".
+ */
+ group = (goal - sp->s_first_data_block) / sp->s_blocks_per_group;
+ for (i = 0; i <= sp->s_groups_count; i++, group++) {
+ struct buf *bp;
+ struct group_desc *gd;
+
+ if (group >= sp->s_groups_count)
+ group = 0;
+
+ gd = get_group_desc(group);
+ if (gd == NULL)
+ panic("can't get group_desc to alloc block");
+
+ if (gd->free_blocks_count == 0) {
+ word = 0;
+ continue;
+ }
+
+ bp = get_block(sp->s_dev, gd->block_bitmap, NORMAL);
+
+ if (rip->i_preallocation &&
+ gd->free_blocks_count >= (EXT2_PREALLOC_BLOCKS * 4) ) {
+ /* Try to preallocate blocks */
+ if (rip->i_prealloc_count != 0) {
+ /* kind of glitch... */
+ discard_preallocated_blocks(rip);
+ ext2_debug("warning, discarding previously preallocated\
+ blocks! It had to be done by another code.");
+ }
+ ASSERT(rip->i_prealloc_count == 0);
+ /* we preallocate bytes only */
+ ASSERT(EXT2_PREALLOC_BLOCKS == sizeof(char)*CHAR_BIT);
+
+ bit = setbyte(bp->b_bitmap, sp->s_blocks_per_group, word);
+ if (bit != -1) {
+ block = bit + sp->s_first_data_block +
+ group * sp->s_blocks_per_group;
+ check_block_number(block, sp, gd);
+
+ /* We preallocate a byte starting from block.
+ * First preallocated block will be returned as
+ * normally allocated block.
+ */
+ for (i = 1; i < EXT2_PREALLOC_BLOCKS; i++) {
+ check_block_number(block + i, sp, gd);
+ rip->i_prealloc_blocks[i-1] = block + i;
+ }
+ rip->i_prealloc_index = 0;
+ rip->i_prealloc_count = EXT2_PREALLOC_BLOCKS - 1;
+
+ bp->b_dirt = DIRTY; /* by setbyte */
+ put_block(bp, MAP_BLOCK);
+
+ gd->free_blocks_count -= EXT2_PREALLOC_BLOCKS;
+ sp->s_free_blocks_count -= EXT2_PREALLOC_BLOCKS;
+ group_descriptors_dirty = DIRTY;
+ return block;
+ }
+ }
+
+ bit = setbit(bp->b_bitmap, sp->s_blocks_per_group, word);
+ if (bit == -1) {
+ if (word == 0) {
+ panic("ext2: allocator failed to allocate a bit in bitmap\
+ with free bits.");
+ } else {
+ word = 0;
+ continue;
+ }
+ }
+
+ block = sp->s_first_data_block + group * sp->s_blocks_per_group + bit;
+ check_block_number(block, sp, gd);
+
+ bp->b_dirt = DIRTY; /* Now it's safe to mark it as dirty */
+ put_block(bp, MAP_BLOCK);
+
+ gd->free_blocks_count--;
+ sp->s_free_blocks_count--;
+ group_descriptors_dirty = DIRTY;
+
+ if (update_bsearch && block != -1 && block != NO_BLOCK) {
+ /* We searched from the beginning, update bsearch. */
+ sp->s_bsearch = block;
+ }
+
+ return block;
+ }
+
+ return block;
+}
+
+
+/*===========================================================================*
+ * free_block *
+ *===========================================================================*/
+PUBLIC void free_block(struct super_block *sp, bit_t bit_returned)
+{
+/* Return a block by turning off its bitmap bit. */
+ int group; /* group number of bit_returned */
+ int bit; /* bit_returned number within its group */
+ struct buf *bp;
+ struct group_desc *gd;
+
+ if (sp->s_rd_only)
+ panic("can't free bit on read-only filesys.");
+
+ if (bit_returned >= sp->s_blocks_count ||
+ bit_returned < sp->s_first_data_block)
+ panic("trying to free block %d beyond blocks scope.",
+ bit_returned);
+
+ /* At first search group, to which bit_returned belongs to
+ * and figure out in what word bit is stored.
+ */
+ group = (bit_returned - sp->s_first_data_block) / sp->s_blocks_per_group;
+ bit = (bit_returned - sp->s_first_data_block) % sp->s_blocks_per_group;
+
+ gd = get_group_desc(group);
+ if (gd == NULL)
+ panic("can't get group_desc to alloc block");
+
+ /* We might be buggy (No way! :P), so check if we deallocate
+ * data block, but not control (system) block.
+ * This should never happen.
+ */
+ if (bit_returned == gd->inode_bitmap || bit_returned == gd->block_bitmap
+ || (bit_returned >= gd->inode_table
+ && bit_returned < (gd->inode_table + sp->s_itb_per_group))) {
+ ext2_debug("ext2: freeing non-data block %d\n", bit_returned);
+ panic("trying to deallocate \
+ system/control block, hardly poke author.");
+ }
+
+ bp = get_block(sp->s_dev, gd->block_bitmap, NORMAL);
+
+ if (unsetbit(bp->b_bitmap, bit))
+ panic("Tried to free unused block", bit_returned);
+
+ bp->b_dirt = DIRTY;
+ put_block(bp, MAP_BLOCK);
+
+ gd->free_blocks_count++;
+ sp->s_free_blocks_count++;
+
+ group_descriptors_dirty = DIRTY;
+
+ if (bit_returned < sp->s_bsearch)
+ sp->s_bsearch = bit_returned;
+}
+
+
+PRIVATE void check_block_number(block_t block, struct super_block *sp,
+ struct group_desc *gd)
+{
+
+ /* Check if we allocated a data block, but not control (system) block.
+ * Only major bug can cause us to allocate wrong block. If it happens,
+ * we panic (and don't bloat filesystem's bitmap).
+ */
+ if (block == gd->inode_bitmap || block == gd->block_bitmap ||
+ (block >= gd->inode_table
+ && block < (gd->inode_table + sp->s_itb_per_group))) {
+ ext2_debug("ext2: allocating non-data block %d\n", block);
+ panic("ext2: block allocator tryed to return \
+ system/control block, poke author.\n");
+ }
+
+ if (block >= sp->s_blocks_count) {
+ panic("ext2: allocator returned blocknum greater, than \
+ total number of blocks.\n");
+ }
+}
--- /dev/null
+/* Buffer (block) cache. To acquire a block, a routine calls get_block(),
+ * telling which block it wants. The block is then regarded as "in use"
+ * and has its 'b_count' field incremented. All the blocks that are not
+ * in use are chained together in an LRU list, with 'front' pointing
+ * to the least recently used block, and 'rear' to the most recently used
+ * block. A reverse chain, using the field b_prev is also maintained.
+ * Usage for LRU is measured by the time the put_block() is done. The second
+ * parameter to put_block() can violate the LRU order and put a block on the
+ * front of the list, if it will probably not be needed soon. If a block
+ * is modified, the modifying routine must set b_dirt to DIRTY, so the block
+ * will eventually be rewritten to the disk.
+ */
+
+#ifndef EXT2_BUF_H
+#define EXT2_BUF_H
+
+#include <sys/dir.h> /* need struct direct */
+#include <dirent.h>
+
+union fsdata_u {
+ char b__data[_MAX_BLOCK_SIZE]; /* ordinary user data */
+/* indirect block */
+ block_t b__ind[_MAX_BLOCK_SIZE/sizeof(block_t)];
+/* bit map block */
+ bitchunk_t b__bitmap[FS_BITMAP_CHUNKS(_MAX_BLOCK_SIZE)];
+};
+
+/* A block is free if b_dev == NO_DEV. */
+
+/* These defs make it possible to use to bp->b_data instead of bp->b.b__data */
+#define b_data bp->b__data
+#define b_ind bp->b__ind
+#define b_ino bp->b__ino
+#define b_bitmap bp->b__bitmap
+
+#define BUFHASH(b) ((b) % nr_bufs)
+
+EXTERN struct buf *front; /* points to least recently used free block */
+EXTERN struct buf *rear; /* points to most recently used free block */
+EXTERN unsigned int bufs_in_use; /* # bufs currently in use (not on free list)*/
+
+/* When a block is released, the type of usage is passed to put_block(). */
+#define WRITE_IMMED 0100 /* block should be written to disk now */
+#define ONE_SHOT 0200 /* set if block not likely to be needed soon */
+
+#define INODE_BLOCK 0 /* inode block */
+#define DIRECTORY_BLOCK 1 /* directory block */
+#define INDIRECT_BLOCK 2 /* pointer block */
+#define MAP_BLOCK 3 /* bit map */
+#define FULL_DATA_BLOCK 5 /* data, fully used */
+#define PARTIAL_DATA_BLOCK 6 /* data, partly used*/
+
+#endif /* EXT2_BUF_H */
--- /dev/null
+/* The file system maintains a buffer cache to reduce the number of disk
+ * accesses needed. Whenever a read or write to the disk is done, a check is
+ * first made to see if the block is in the cache. This file manages the
+ * cache.
+ *
+ * The entry points into this file are:
+ * get_block: request to fetch a block for reading or writing from cache
+ * put_block: return a block previously requested with get_block
+ * invalidate: remove all the cache blocks on some device
+ *
+ * Private functions:
+ * rw_block: read or write a block from the disk itself
+ *
+ * Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <minix/u64.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "buf.h"
+#include "super.h"
+#include "inode.h"
+
+FORWARD _PROTOTYPE( void rm_lru, (struct buf *bp) );
+FORWARD _PROTOTYPE( void rw_block, (struct buf *, int) );
+
+PRIVATE int vmcache_avail = -1; /* 0 if not available, >0 if available. */
+
+/*===========================================================================*
+ * get_block *
+ *===========================================================================*/
+PUBLIC struct buf *get_block(
+ register dev_t dev, /* on which device is the block? */
+ register block_t block, /* which block is wanted? */
+ int only_search /* if NO_READ, don't read, else act normal */
+)
+{
+/* Check to see if the requested block is in the block cache. If so, return
+ * a pointer to it. If not, evict some other block and fetch it (unless
+ * 'only_search' is 1). All the blocks in the cache that are not in use
+ * are linked together in a chain, with 'front' pointing to the least recently
+ * used block and 'rear' to the most recently used block. If 'only_search' is
+ * 1, the block being requested will be overwritten in its entirety, so it is
+ * only necessary to see if it is in the cache; if it is not, any free buffer
+ * will do. It is not necessary to actually read the block in from disk.
+ * If 'only_search' is PREFETCH, the block need not be read from the disk,
+ * and the device is not to be marked on the block, so callers can tell if
+ * the block returned is valid.
+ * In addition to the LRU chain, there is also a hash chain to link together
+ * blocks whose block numbers end with the same bit strings, for fast lookup.
+ */
+
+ int b;
+ static struct buf *bp, *prev_ptr;
+ u64_t yieldid = VM_BLOCKID_NONE, getid = make64(dev, block);
+ int vmcache = 0;
+
+ assert(buf_hash);
+ assert(buf);
+ assert(nr_bufs > 0);
+
+ if(vmcache_avail < 0) {
+ /* Test once for the availability of the vm yield block feature. */
+ if(vm_forgetblock(VM_BLOCKID_NONE) == ENOSYS) {
+ vmcache_avail = 0;
+ } else {
+ vmcache_avail = 1;
+ }
+ }
+
+ /* use vmcache if it's available, and allowed, and we're not doing
+ * i/o on a ram disk device.
+ */
+ if(vmcache_avail && may_use_vmcache && major(dev) != MEMORY_MAJOR)
+ vmcache = 1;
+
+ ASSERT(fs_block_size > 0);
+
+ /* Search the hash chain for (dev, block). Do_read() can use
+ * get_block(NO_DEV ...) to get an unnamed block to fill with zeros when
+ * someone wants to read from a hole in a file, in which case this search
+ * is skipped
+ */
+ if (dev != NO_DEV) {
+ b = BUFHASH(block);
+ bp = buf_hash[b];
+ while (bp != NULL) {
+ if (bp->b_blocknr == block && bp->b_dev == dev) {
+ /* Block needed has been found. */
+ if (bp->b_count == 0) rm_lru(bp);
+ bp->b_count++; /* record that block is in use */
+ ASSERT(bp->b_bytes == fs_block_size);
+ ASSERT(bp->b_dev == dev);
+ ASSERT(bp->b_dev != NO_DEV);
+ ASSERT(bp->bp);
+ return(bp);
+ } else {
+ /* This block is not the one sought. */
+ bp = bp->b_hash; /* move to next block on hash chain */
+ }
+ }
+ }
+
+ /* Desired block is not on available chain. Take oldest block ('front'). */
+ if ((bp = front) == NULL) panic("all buffers in use", nr_bufs);
+
+ if(bp->b_bytes < fs_block_size) {
+ ASSERT(!bp->bp);
+ ASSERT(bp->b_bytes == 0);
+ if(!(bp->bp = alloc_contig( (size_t) fs_block_size, 0, NULL))) {
+ ext2_debug("ext2: couldn't allocate a new block.\n");
+ for(bp = front;
+ bp && bp->b_bytes < fs_block_size; bp = bp->b_next)
+ ;
+ if(!bp) {
+ panic("no buffer available");
+ }
+ } else {
+ bp->b_bytes = fs_block_size;
+ }
+ }
+
+ ASSERT(bp);
+ ASSERT(bp->bp);
+ ASSERT(bp->b_bytes == fs_block_size);
+ ASSERT(bp->b_count == 0);
+
+ rm_lru(bp);
+
+ /* Remove the block that was just taken from its hash chain. */
+ b = BUFHASH(bp->b_blocknr);
+ prev_ptr = buf_hash[b];
+ if (prev_ptr == bp) {
+ buf_hash[b] = bp->b_hash;
+ } else {
+ /* The block just taken is not on the front of its hash chain. */
+ while (prev_ptr->b_hash != NULL)
+ if (prev_ptr->b_hash == bp) {
+ prev_ptr->b_hash = bp->b_hash; /* found it */
+ break;
+ } else {
+ prev_ptr = prev_ptr->b_hash; /* keep looking */
+ }
+ }
+
+ /* If the block taken is dirty, make it clean by writing it to the disk.
+ * Avoid hysteresis by flushing all other dirty blocks for the same device.
+ */
+ if (bp->b_dev != NO_DEV) {
+ if (bp->b_dirt == DIRTY) flushall(bp->b_dev);
+
+ /* Are we throwing out a block that contained something?
+ * Give it to VM for the second-layer cache.
+ */
+ yieldid = make64(bp->b_dev, bp->b_blocknr);
+ assert(bp->b_bytes == fs_block_size);
+ bp->b_dev = NO_DEV;
+ }
+
+ /* Fill in block's parameters and add it to the hash chain where it goes. */
+ bp->b_dev = dev; /* fill in device number */
+ bp->b_blocknr = block; /* fill in block number */
+ bp->b_count++; /* record that block is being used */
+ b = BUFHASH(bp->b_blocknr);
+ bp->b_hash = buf_hash[b];
+
+ buf_hash[b] = bp; /* add to hash list */
+
+ if(dev == NO_DEV) {
+ if(vmcache && cmp64(yieldid, VM_BLOCKID_NONE) != 0) {
+ vm_yield_block_get_block(yieldid, VM_BLOCKID_NONE,
+ bp->bp, fs_block_size);
+ }
+ return(bp); /* If the caller wanted a NO_DEV block, work is done. */
+ }
+
+ /* Go get the requested block unless searching or prefetching. */
+ if(only_search == PREFETCH || only_search == NORMAL) {
+ /* Block is not found in our cache, but we do want it
+ * if it's in the vm cache.
+ */
+ if(vmcache) {
+ /* If we can satisfy the PREFETCH or NORMAL request
+ * from the vm cache, work is done.
+ */
+ if(vm_yield_block_get_block(yieldid, getid,
+ bp->bp, fs_block_size) == OK) {
+ return bp;
+ }
+ }
+ }
+
+ if(only_search == PREFETCH) {
+ /* PREFETCH: don't do i/o. */
+ bp->b_dev = NO_DEV;
+ } else if (only_search == NORMAL) {
+ rw_block(bp, READING);
+ } else if(only_search == NO_READ) {
+ /* we want this block, but its contents
+ * will be overwritten. VM has to forget
+ * about it.
+ */
+ if(vmcache) {
+ vm_forgetblock(getid);
+ }
+ } else
+ panic("unexpected only_search value: %d", only_search);
+
+ assert(bp->bp);
+
+ return(bp); /* return the newly acquired block */
+}
+
+/*===========================================================================*
+ * put_block *
+ *===========================================================================*/
+PUBLIC void put_block(
+ register struct buf *bp, /* pointer to the buffer to be released */
+ int block_type /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */
+)
+{
+/* Return a block to the list of available blocks. Depending on 'block_type'
+ * it may be put on the front or rear of the LRU chain. Blocks that are
+ * expected to be needed again shortly (e.g., partially full data blocks)
+ * go on the rear; blocks that are unlikely to be needed again shortly
+ * (e.g., full data blocks) go on the front. Blocks whose loss can hurt
+ * the integrity of the file system (e.g., inode blocks) are written to
+ * disk immediately if they are dirty.
+ */
+ if (bp == NULL) return; /* it is easier to check here than in caller */
+
+ bp->b_count--; /* there is one use fewer now */
+ if (bp->b_count != 0) return; /* block is still in use */
+
+ bufs_in_use--; /* one fewer block buffers in use */
+
+ /* Put this block back on the LRU chain. If the ONE_SHOT bit is set in
+ * 'block_type', the block is not likely to be needed again shortly, so put
+ * it on the front of the LRU chain where it will be the first one to be
+ * taken when a free buffer is needed later.
+ */
+ if (bp->b_dev == DEV_RAM || (block_type & ONE_SHOT)) {
+ /* Block probably won't be needed quickly. Put it on front of chain.
+ * It will be the next block to be evicted from the cache.
+ */
+ bp->b_prev = NULL;
+ bp->b_next = front;
+ if (front == NULL)
+ rear = bp; /* LRU chain was empty */
+ else
+ front->b_prev = bp;
+ front = bp;
+ }
+ else {
+ /* Block probably will be needed quickly. Put it on rear of chain.
+ * It will not be evicted from the cache for a long time.
+ */
+ bp->b_prev = rear;
+ bp->b_next = NULL;
+ if (rear == NULL)
+ front = bp;
+ else
+ rear->b_next = bp;
+ rear = bp;
+ }
+
+ /* Some blocks are so important (e.g., inodes, indirect blocks) that they
+ * should be written to the disk immediately to avoid messing up the file
+ * system in the event of a crash.
+ */
+ if ((block_type & WRITE_IMMED) && bp->b_dirt==DIRTY && bp->b_dev != NO_DEV) {
+ rw_block(bp, WRITING);
+ }
+}
+
+
+/*===========================================================================*
+ * rw_block *
+ *===========================================================================*/
+PRIVATE void rw_block(
+ register struct buf *bp, /* buffer pointer */
+ int rw_flag /* READING or WRITING */
+)
+{
+/* Read or write a disk block. This is the only routine in which actual disk
+ * I/O is invoked. If an error occurs, a message is printed here, but the error
+ * is not reported to the caller. If the error occurred while purging a block
+ * from the cache, it is not clear what the caller could do about it anyway.
+ */
+ int r, op, op_failed = 0;
+ u64_t pos;
+ dev_t dev;
+
+ if ( (dev = bp->b_dev) != NO_DEV) {
+ pos = mul64u(bp->b_blocknr, fs_block_size);
+ op = (rw_flag == READING ? MFS_DEV_READ : MFS_DEV_WRITE);
+ r = block_dev_io(op, dev, SELF_E, bp->b_data, pos, fs_block_size);
+ if (r < 0) {
+ printf("Ext2(%d) I/O error on device %d/%d, block %lu\n",
+ SELF_E, major(dev), minor(dev), bp->b_blocknr);
+ op_failed = 1;
+ } else if( (unsigned) r != fs_block_size) {
+ r = END_OF_FILE;
+ op_failed = 1;
+ }
+
+ if (op_failed) {
+ bp->b_dev = NO_DEV; /* invalidate block */
+
+ /* Report read errors to interested parties. */
+ if (rw_flag == READING) rdwt_err = r;
+
+ }
+ }
+
+ bp->b_dirt = CLEAN;
+}
+
+/*===========================================================================*
+ * invalidate *
+ *===========================================================================*/
+PUBLIC void invalidate(
+ dev_t device /* device whose blocks are to be purged */
+)
+{
+/* Remove all the blocks belonging to some device from the cache. */
+
+ register struct buf *bp;
+
+ for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++)
+ if (bp->b_dev == device) bp->b_dev = NO_DEV;
+
+ vm_forgetblocks();
+}
+
+/*===========================================================================*
+ * flushall *
+ *===========================================================================*/
+PUBLIC void flushall(
+ dev_t dev /* device to flush */
+)
+{
+/* Flush all dirty blocks for one device. */
+
+ register struct buf *bp;
+ static struct buf **dirty; /* static so it isn't on stack */
+ static int unsigned dirtylistsize = 0;
+ int ndirty;
+
+ if(dirtylistsize != nr_bufs) {
+ if(dirtylistsize > 0) {
+ assert(dirty != NULL);
+ free(dirty);
+ }
+ if(!(dirty = malloc(sizeof(dirty[0])*nr_bufs)))
+ panic("couldn't allocate dirty buf list");
+ dirtylistsize = nr_bufs;
+ }
+
+ for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++)
+ if (bp->b_dirt == DIRTY && bp->b_dev == dev) dirty[ndirty++] = bp;
+ rw_scattered(dev, dirty, ndirty, WRITING);
+}
+
+/*===========================================================================*
+ * rw_scattered *
+ *===========================================================================*/
+PUBLIC void rw_scattered(
+ dev_t dev, /* major-minor device number */
+ struct buf **bufq, /* pointer to array of buffers */
+ int bufqsize, /* number of buffers */
+ int rw_flag /* READING or WRITING */
+)
+{
+/* Read or write scattered data from a device. */
+
+ register struct buf *bp;
+ int gap;
+ register int i;
+ register iovec_t *iop;
+ static iovec_t *iovec = NULL;
+ int j, r;
+
+ STATICINIT(iovec, NR_IOREQS);
+
+ /* (Shell) sort buffers on b_blocknr. */
+ gap = 1;
+ do
+ gap = 3 * gap + 1;
+ while (gap <= bufqsize);
+ while (gap != 1) {
+ gap /= 3;
+ for (j = gap; j < bufqsize; j++) {
+ for (i = j - gap;
+ i >= 0 && bufq[i]->b_blocknr > bufq[i + gap]->b_blocknr;
+ i -= gap) {
+ bp = bufq[i];
+ bufq[i] = bufq[i + gap];
+ bufq[i + gap] = bp;
+ }
+ }
+ }
+
+ /* Set up I/O vector and do I/O. The result of dev_io is OK if everything
+ * went fine, otherwise the error code for the first failed transfer.
+ */
+ while (bufqsize > 0) {
+ for (j = 0, iop = iovec; j < NR_IOREQS && j < bufqsize; j++, iop++) {
+ bp = bufq[j];
+ if (bp->b_blocknr != (block_t) bufq[0]->b_blocknr + j) break;
+ iop->iov_addr = (vir_bytes) bp->b_data;
+ iop->iov_size = (vir_bytes) fs_block_size;
+ }
+ r = block_dev_io(rw_flag == WRITING ? MFS_DEV_SCATTER : MFS_DEV_GATHER,
+ dev, SELF_E, iovec,
+ mul64u(bufq[0]->b_blocknr, fs_block_size), j);
+
+ /* Harvest the results. Dev_io reports the first error it may have
+ * encountered, but we only care if it's the first block that failed.
+ */
+ for (i = 0, iop = iovec; i < j; i++, iop++) {
+ bp = bufq[i];
+ if (iop->iov_size != 0) {
+ /* Transfer failed. An error? Do we care? */
+ if (r != OK && i == 0) {
+ printf(
+ "fs: I/O error on device %d/%d, block %lu\n",
+ major(dev), minor(dev), bp->b_blocknr);
+ bp->b_dev = NO_DEV; /* invalidate block */
+ vm_forgetblocks();
+ }
+ break;
+ }
+ if (rw_flag == READING) {
+ bp->b_dev = dev; /* validate block */
+ put_block(bp, PARTIAL_DATA_BLOCK);
+ } else {
+ bp->b_dirt = CLEAN;
+ }
+ }
+ bufq += i;
+ bufqsize -= i;
+ if (rw_flag == READING) {
+ /* Don't bother reading more than the device is willing to
+ * give at this time. Don't forget to release those extras.
+ */
+ while (bufqsize > 0) {
+ put_block(*bufq++, PARTIAL_DATA_BLOCK);
+ bufqsize--;
+ }
+ }
+ if (rw_flag == WRITING && i == 0) {
+ /* We're not making progress, this means we might keep
+ * looping. Buffers remain dirty if un-written. Buffers are
+ * lost if invalidate()d or LRU-removed while dirty. This
+ * is better than keeping unwritable blocks around forever..
+ */
+ break;
+ }
+ }
+}
+
+/*===========================================================================*
+ * rm_lru *
+ *===========================================================================*/
+PRIVATE void rm_lru(
+ struct buf *bp
+)
+{
+/* Remove a block from its LRU chain. */
+ struct buf *next_ptr, *prev_ptr;
+
+ bufs_in_use++;
+ next_ptr = bp->b_next; /* successor on LRU chain */
+ prev_ptr = bp->b_prev; /* predecessor on LRU chain */
+ if (prev_ptr != NULL)
+ prev_ptr->b_next = next_ptr;
+ else
+ front = next_ptr; /* this block was at front of chain */
+
+ if (next_ptr != NULL)
+ next_ptr->b_prev = prev_ptr;
+ else
+ rear = prev_ptr; /* this block was at rear of chain */
+}
+
+/*===========================================================================*
+ * set_blocksize *
+ *===========================================================================*/
+PUBLIC void set_blocksize(unsigned int blocksize)
+{
+ struct buf *bp;
+ struct inode *rip;
+
+ ASSERT(blocksize > 0);
+
+ for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++)
+ if(bp->b_count != 0) panic("change blocksize with buffer in use");
+
+ for (rip = &inode[0]; rip < &inode[NR_INODES]; rip++)
+ if (rip->i_count > 0) panic("change blocksize with inode in use");
+
+ buf_pool(nr_bufs);
+ fs_block_size = blocksize;
+}
+
+/*===========================================================================*
+ * buf_pool *
+ *===========================================================================*/
+PUBLIC void buf_pool(int new_nr_bufs)
+{
+/* Initialize the buffer pool. */
+ register struct buf *bp;
+
+ assert(new_nr_bufs > 0);
+
+ if(nr_bufs > 0) {
+ assert(buf);
+ (void) fs_sync();
+ for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) {
+ if(bp->bp) {
+ assert(bp->b_bytes > 0);
+ free_contig(bp->bp, bp->b_bytes);
+ }
+ }
+ }
+
+ if(buf)
+ free(buf);
+
+ if(!(buf = calloc(sizeof(buf[0]), new_nr_bufs)))
+ panic("couldn't allocate buf list (%d)", new_nr_bufs);
+
+ if(buf_hash)
+ free(buf_hash);
+ if(!(buf_hash = calloc(sizeof(buf_hash[0]), new_nr_bufs)))
+ panic("couldn't allocate buf hash list (%d)", new_nr_bufs);
+
+ nr_bufs = new_nr_bufs;
+
+ bufs_in_use = 0;
+ front = &buf[0];
+ rear = &buf[nr_bufs - 1];
+
+ for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) {
+ bp->b_blocknr = NO_BLOCK;
+ bp->b_dev = NO_DEV;
+ bp->b_next = bp + 1;
+ bp->b_prev = bp - 1;
+ bp->bp = NULL;
+ bp->b_bytes = 0;
+ }
+ buf[0].b_prev = NULL;
+ buf[nr_bufs - 1].b_next = NULL;
+
+ for (bp = &buf[0]; bp < &buf[nr_bufs]; bp++) bp->b_hash = bp->b_next;
+ buf_hash[0] = front;
+
+ vm_forgetblocks();
+}
--- /dev/null
+#ifndef EXT2_CONST_H
+#define EXT2_CONST_H
+
+/* Tables sizes */
+
+#define NR_INODES 256 /* # slots in "in core" inode table */
+#define GETDENTS_BUFSIZ 257
+
+#define INODE_HASH_LOG2 7 /* 2 based logarithm of the inode hash size */
+#define INODE_HASH_SIZE ((unsigned long)1<<INODE_HASH_LOG2)
+#define INODE_HASH_MASK (((unsigned long)1<<INODE_HASH_LOG2)-1)
+
+
+/* The type of sizeof may be (unsigned) long. Use the following macro for
+ * taking the sizes of small objects so that there are no surprises like
+ * (small) long constants being passed to routines expecting an int.
+ */
+#define usizeof(t) ((unsigned) sizeof(t))
+
+#define SUPER_MAGIC 0xEF53 /* magic number contained in super-block */
+
+#define EXT2_NAME_MAX 255
+
+/* Miscellaneous constants */
+#define SU_UID ((uid_t) 0) /* super_user's uid_t */
+#define NORMAL 0 /* forces get_block to do disk read */
+#define NO_READ 1 /* prevents get_block from doing disk read */
+#define PREFETCH 2 /* tells get_block not to read or mark dev */
+
+#define NO_BIT ((bit_t) 0) /* returned by alloc_bit() to signal failure */
+
+#define LOOK_UP 0 /* tells search_dir to lookup string */
+#define ENTER 1 /* tells search_dir to make dir entry */
+#define DELETE 2 /* tells search_dir to delete entry */
+#define IS_EMPTY 3 /* tells search_dir to ret. OK or ENOTEMPTY */
+
+/* write_map() args */
+#define WMAP_FREE (1 << 0)
+
+#define IGN_PERM 0
+#define CHK_PERM 1
+
+#define CLEAN 0 /* disk and memory copies identical */
+#define DIRTY 1 /* disk and memory copies differ */
+#define ATIME 002 /* set if atime field needs updating */
+#define CTIME 004 /* set if ctime field needs updating */
+#define MTIME 010 /* set if mtime field needs updating */
+
+#define BYTE_SWAP 0 /* tells conv2/conv4 to swap bytes */
+
+#define END_OF_FILE (-104) /* eof detected */
+
+#define SUPER_BLOCK_BYTES (1024) /* bytes offset */
+
+#define ROOT_INODE ((ino_t) 2) /* inode number for root directory */
+#define BOOT_BLOCK ((block_t) 0) /* block number of boot block */
+#define START_BLOCK ((block_t) 2) /* first block of FS (not counting SB) */
+#define BLOCK_ADDRESS_BYTES 4 /* bytes per address */
+
+#define SUPER_SIZE usizeof (struct super_block) /* sb size in RAM */
+#define SUPER_SIZE_D (1024) /* max size of superblock stored on disk */
+
+/* Directories related macroses */
+
+#define DIR_ENTRY_ALIGN 4
+
+/* ino + rec_len + name_len + file_type, doesn't include name and padding */
+#define MIN_DIR_ENTRY_SIZE 8
+
+#define DIR_ENTRY_CONTENTS_SIZE(d) (MIN_DIR_ENTRY_SIZE + (d)->d_name_len)
+
+/* size with padding */
+#define DIR_ENTRY_ACTUAL_SIZE(d) (DIR_ENTRY_CONTENTS_SIZE(d) + \
+ ((DIR_ENTRY_CONTENTS_SIZE(d) & 0x03) == 0 ? 0 : \
+ DIR_ENTRY_ALIGN - (DIR_ENTRY_CONTENTS_SIZE(d) & 0x03) ))
+
+/* How many bytes can be taken from the end of dentry */
+#define DIR_ENTRY_SHRINK(d) (conv2(le_CPU, (d)->d_rec_len) \
+ - DIR_ENTRY_ACTUAL_SIZE(d))
+
+/* Dentry can have padding, which can be used to enlarge namelen */
+#define DIR_ENTRY_MAX_NAME_LEN(d) (conv2(le_CPU, (d)->d_rec_len) \
+ - MIN_DIR_ENTRY_SIZE)
+
+/* Constants relative to the data blocks */
+/* When change EXT2_NDIR_BLOCKS, modify ext2_max_size()!!!*/
+#define EXT2_NDIR_BLOCKS 12
+#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS
+#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1)
+#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1)
+#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1)
+
+#define FS_BITMAP_CHUNKS(b) ((b)/usizeof (bitchunk_t))/* # map chunks/blk */
+#define FS_BITCHUNK_BITS (usizeof(bitchunk_t) * CHAR_BIT)
+#define FS_BITS_PER_BLOCK(b) (FS_BITMAP_CHUNKS(b) * FS_BITCHUNK_BITS)
+
+/* Inodes */
+
+/* Next 4 following macroses were taken from linux' ext2_fs.h */
+#define EXT2_GOOD_OLD_INODE_SIZE 128
+#define EXT2_GOOD_OLD_FIRST_INO 11
+
+#define EXT2_INODE_SIZE(s) (((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \
+ EXT2_GOOD_OLD_INODE_SIZE : \
+ (s)->s_inode_size)
+#define EXT2_FIRST_INO(s) (((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \
+ EXT2_GOOD_OLD_FIRST_INO : \
+ (s)->s_first_ino)
+
+/* Maximum size of a fast symlink including trailing '\0' */
+#define MAX_FAST_SYMLINK_LENGTH \
+ ( sizeof(((d_inode *)0)->i_block[0]) * EXT2_N_BLOCKS )
+
+#define NUL(str,l,m) mfs_nul_f(__FILE__,__LINE__,(str), (l), (m))
+
+/* Args to dev_bio/dev_io */
+#define MFS_DEV_READ 10001
+#define MFS_DEV_WRITE 10002
+#define MFS_DEV_SCATTER 10003
+#define MFS_DEV_GATHER 10004
+
+/* FS states */
+#define EXT2_VALID_FS 0x0001 /* Cleanly unmounted */
+#define EXT2_ERROR_FS 0x0002 /* Errors detected */
+
+#define EXT2_GOOD_OLD_REV 0 /* The good old (original) format */
+#define EXT2_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
+
+/* ext2 features, names shorted (cut EXT2_ prefix) */
+#define COMPAT_DIR_PREALLOC 0x0001
+#define COMPAT_IMAGIC_INODES 0x0002
+#define COMPAT_HAS_JOURNAL 0x0004
+#define COMPAT_EXT_ATTR 0x0008
+#define COMPAT_RESIZE_INO 0x0010
+#define COMPAT_DIR_INDEX 0x0020
+#define COMPAT_ANY 0xffffffff
+
+#define RO_COMPAT_SPARSE_SUPER 0x0001
+#define RO_COMPAT_LARGE_FILE 0x0002
+#define RO_COMPAT_BTREE_DIR 0x0004
+#define RO_COMPAT_ANY 0xffffffff
+
+#define INCOMPAT_COMPRESSION 0x0001
+#define INCOMPAT_FILETYPE 0x0002
+#define INCOMPAT_RECOVER 0x0004
+#define INCOMPAT_JOURNAL_DEV 0x0008
+#define INCOMPAT_META_BG 0x0010
+#define INCOMPAT_ANY 0xffffffff
+
+/* What do we support? */
+#define SUPPORTED_INCOMPAT_FEATURES (INCOMPAT_FILETYPE)
+#define SUPPORTED_RO_COMPAT_FEATURES (RO_COMPAT_SPARSE_SUPER | \
+ RO_COMPAT_LARGE_FILE)
+
+/* Ext2 directory file types. Only the low 3 bits are used.
+ * The other bits are reserved for now.
+ */
+#define EXT2_FT_UNKNOWN 0
+#define EXT2_FT_REG_FILE 1
+#define EXT2_FT_DIR 2
+#define EXT2_FT_CHRDEV 3
+#define EXT2_FT_BLKDEV 4
+#define EXT2_FT_FIFO 5
+#define EXT2_FT_SOCK 6
+#define EXT2_FT_SYMLINK 7
+
+#define EXT2_FT_MAX 8
+
+#define HAS_COMPAT_FEATURE(sp, mask) \
+ ( (sp)->s_feature_compat & (mask) )
+#define HAS_RO_COMPAT_FEATURE(sp, mask) \
+ ( (sp)->s_feature_ro_compat & (mask) )
+#define HAS_INCOMPAT_FEATURE(sp, mask) \
+ ( (sp)->s_feature_incompat & (mask) )
+
+
+/* hash-indexed directory */
+#define EXT2_INDEX_FL 0x00001000
+/* Top of directory hierarchies*/
+#define EXT2_TOPDIR_FL 0x00020000
+
+#define EXT2_PREALLOC_BLOCKS 8
+
+
+#endif /* EXT2_CONST_H */
--- /dev/null
+#include "fs.h"
+#include <minix/com.h>
+#include <minix/endpoint.h>
+#include <minix/safecopies.h>
+#include <minix/u64.h>
+#include <string.h>
+#include "inode.h"
+#include "super.h"
+#include "const.h"
+#include "drivers.h"
+
+#include <minix/vfsif.h>
+
+FORWARD _PROTOTYPE( int safe_io_conversion, (endpoint_t driver,
+ cp_grant_id_t *gid, int *op, cp_grant_id_t *gids, endpoint_t *io_ept,
+ void **buffer, int *vec_grants, size_t bytes));
+FORWARD _PROTOTYPE( void safe_io_cleanup, (cp_grant_id_t, cp_grant_id_t *,
+ int));
+FORWARD _PROTOTYPE( int gen_opcl, (endpoint_t driver_e, int op,
+ dev_t dev, endpoint_t proc_e, int flags) );
+FORWARD _PROTOTYPE( int gen_io, (endpoint_t task_nr, message *mess_ptr) );
+
+
+/*===========================================================================*
+ * fs_new_driver *
+ *===========================================================================*/
+PUBLIC int fs_new_driver(void)
+{
+ /* New driver endpoint for this device */
+ dev_t dev;
+ dev = (dev_t) fs_m_in.REQ_DEV;
+ driver_endpoints[major(dev)].driver_e = (endpoint_t) fs_m_in.REQ_DRIVER_E;
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * safe_io_conversion *
+ *===========================================================================*/
+PRIVATE int safe_io_conversion(driver, gid, op, gids, io_ept, buffer,
+ vec_grants, bytes)
+endpoint_t driver;
+cp_grant_id_t *gid;
+int *op;
+cp_grant_id_t *gids;
+endpoint_t *io_ept;
+void **buffer;
+int *vec_grants;
+size_t bytes;
+{
+ unsigned int j;
+ int access;
+ iovec_t *v;
+ static iovec_t *new_iovec;
+
+ STATICINIT(new_iovec, NR_IOREQS);
+
+ /* Number of grants allocated in vector I/O. */
+ *vec_grants = 0;
+
+ /* Driver can handle it - change request to a safe one. */
+
+ *gid = GRANT_INVALID;
+
+ switch(*op) {
+ case MFS_DEV_READ:
+ case MFS_DEV_WRITE:
+ /* Change to safe op. */
+ *op = *op == MFS_DEV_READ ? DEV_READ_S : DEV_WRITE_S;
+ *gid = cpf_grant_direct(driver, (vir_bytes) *buffer, bytes,
+ *op == DEV_READ_S ? CPF_WRITE : CPF_READ);
+ if(*gid == GRANT_INVALID) {
+ panic("cpf_grant_magic of buffer failed");
+ }
+
+ break;
+ case MFS_DEV_GATHER:
+ case MFS_DEV_SCATTER:
+ /* Change to safe op. */
+ *op = *op == MFS_DEV_GATHER ? DEV_GATHER_S : DEV_SCATTER_S;
+
+ /* Grant access to my new i/o vector. */
+ *gid = cpf_grant_direct(driver, (vir_bytes) new_iovec,
+ bytes * sizeof(iovec_t), CPF_READ|CPF_WRITE);
+ if(*gid == GRANT_INVALID) {
+ panic("cpf_grant_direct of vector failed");
+ }
+
+ v = (iovec_t *) *buffer;
+
+ /* Grant access to i/o buffers. */
+ for(j = 0; j < bytes; j++) {
+ if(j >= NR_IOREQS)
+ panic("vec too big: %u", bytes);
+ access = (*op == DEV_GATHER_S) ? CPF_WRITE : CPF_READ;
+ new_iovec[j].iov_addr = gids[j] =
+ cpf_grant_direct(driver, (vir_bytes) v[j].iov_addr,
+ (size_t) v[j].iov_size, access);
+
+ if(!GRANT_VALID(gids[j])) {
+ panic("ext2: grant to iovec buf failed");
+ }
+ new_iovec[j].iov_size = v[j].iov_size;
+ (*vec_grants)++;
+ }
+
+ /* Set user's vector to the new one. */
+ *buffer = new_iovec;
+ break;
+ default:
+ panic("Illegal operation %d\n", *op);
+ break;
+ }
+
+ /* If we have converted to a safe operation, I/O
+ * endpoint becomes FS if it wasn't already.
+ */
+ if(GRANT_VALID(*gid)) {
+ *io_ept = SELF_E;
+ return 1;
+ }
+
+ /* Not converted to a safe operation (because there is no
+ * copying involved in this operation).
+ */
+ return 0;
+}
+
+/*===========================================================================*
+ * safe_io_cleanup *
+ *===========================================================================*/
+PRIVATE void safe_io_cleanup(gid, gids, gids_size)
+cp_grant_id_t gid;
+cp_grant_id_t *gids;
+int gids_size;
+{
+/* Free resources (specifically, grants) allocated by safe_io_conversion(). */
+ int j;
+
+ (void) cpf_revoke(gid);
+
+ for(j = 0; j < gids_size; j++)
+ (void) cpf_revoke(gids[j]);
+
+ return;
+}
+
+/*===========================================================================*
+ * block_dev_io *
+ *===========================================================================*/
+PUBLIC int block_dev_io(
+ int op, /* MFS_DEV_READ, MFS_DEV_WRITE, etc. */
+ dev_t dev, /* major-minor device number */
+ endpoint_t proc_e, /* in whose address space is buf? */
+ void *buffer, /* virtual address of the buffer */
+ u64_t pos, /* byte position */
+ size_t bytes /* how many bytes to transfer */
+)
+{
+/* Read or write from a device. The parameter 'dev' tells which one. */
+ int r, safe;
+ message m;
+ cp_grant_id_t gid = GRANT_INVALID;
+ int vec_grants;
+ int op_used;
+ void *buf_used;
+ static cp_grant_id_t *gids;
+ endpoint_t driver_e;
+
+ STATICINIT(gids, NR_IOREQS);
+
+ /* Determine driver endpoint for this device */
+ driver_e = driver_endpoints[major(dev)].driver_e;
+
+ /* See if driver is roughly valid. */
+ if (driver_e == NONE) {
+ printf("ext2(%d) block_dev_io: no driver for dev %x\n", SELF_E, dev);
+ return(EDEADEPT);
+ }
+
+ /* The io vector copying relies on this I/O being for FS itself. */
+ if(proc_e != SELF_E) {
+ printf("ext2(%d) doing block_dev_io for non-self %d\n", SELF_E, proc_e);
+ panic("doing block_dev_io for non-self: %d", proc_e);
+ }
+
+ /* By default, these are right. */
+ m.IO_ENDPT = proc_e;
+ m.ADDRESS = buffer;
+ buf_used = buffer;
+
+ /* Convert parameters to 'safe mode'. */
+ op_used = op;
+ safe = safe_io_conversion(driver_e, &gid, &op_used, gids, &m.IO_ENDPT,
+ &buf_used, &vec_grants, bytes);
+
+ /* Set up rest of the message. */
+ if (safe) m.IO_GRANT = (char *) gid;
+
+ m.m_type = op_used;
+ m.DEVICE = minor(dev);
+ m.POSITION = ex64lo(pos);
+ m.COUNT = bytes;
+ m.HIGHPOS = ex64hi(pos);
+
+ /* Call the task. */
+ r = sendrec(driver_e, &m);
+ if(r == OK && m.REP_STATUS == ERESTART) r = EDEADEPT;
+
+ /* As block I/O never SUSPENDs, safe cleanup must be done whether
+ * the I/O succeeded or not. */
+ if (safe) safe_io_cleanup(gid, gids, vec_grants);
+
+ /* RECOVERY:
+ * - send back dead driver number
+ * - VFS unmaps it, waits for new driver
+ * - VFS sends the new driver endp for the FS proc and the request again
+ */
+ if (r != OK) {
+ if (r == EDEADSRCDST || r == EDEADEPT) {
+ printf("ext2(%d) dead driver %d\n", SELF_E, driver_e);
+ driver_endpoints[major(dev)].driver_e = NONE;
+ return(r);
+ } else if (r == ELOCKED) {
+ printf("ext2(%d) ELOCKED talking to %d\n", SELF_E, driver_e);
+ return(r);
+ } else
+ panic("call_task: can't send/receive: %d", r);
+ } else {
+ /* Did the process we did the sendrec() for get a result? */
+ if (m.REP_ENDPT != proc_e) {
+ printf("ext2(%d) strange device reply from %d, type = %d, proc "
+ "= %d (not %d) (2) ignored\n", SELF_E, m.m_source,
+ m.m_type, proc_e, m.REP_ENDPT);
+ r = EIO;
+ }
+ }
+
+ /* Task has completed. See if call completed. */
+ if (m.REP_STATUS == SUSPEND) {
+ panic("ext2 block_dev_io: driver returned SUSPEND");
+ }
+
+ if(buffer != buf_used && r == OK) {
+ memcpy(buffer, buf_used, bytes * sizeof(iovec_t));
+ }
+
+ return(m.REP_STATUS);
+}
+
+/*===========================================================================*
+ * dev_open *
+ *===========================================================================*/
+PUBLIC int dev_open(
+ endpoint_t driver_e,
+ dev_t dev, /* device to open */
+ endpoint_t proc_e, /* process to open for */
+ int flags /* mode bits and flags */
+)
+{
+ int major, r;
+
+ /* Determine the major device number call the device class specific
+ * open/close routine. (This is the only routine that must check the
+ * device number for being in range. All others can trust this check.)
+ */
+ major = major(dev);
+ if (major >= NR_DEVICES) {
+ printf("Major device number %d not in range\n", major(dev));
+ return(EIO);
+ }
+ r = gen_opcl(driver_e, DEV_OPEN, dev, proc_e, flags);
+ if (r == SUSPEND) panic("suspend on open from");
+ return(r);
+}
+
+
+/*===========================================================================*
+ * dev_close *
+ *===========================================================================*/
+PUBLIC void dev_close(
+ endpoint_t driver_e,
+ dev_t dev /* device to close */
+)
+{
+ (void) gen_opcl(driver_e, DEV_CLOSE, dev, 0, 0);
+}
+
+
+/*===========================================================================*
+ * gen_opcl *
+ *===========================================================================*/
+PRIVATE int gen_opcl(
+ endpoint_t driver_e,
+ int op, /* operation, DEV_OPEN or DEV_CLOSE */
+ dev_t dev, /* device to open or close */
+ endpoint_t proc_e, /* process to open/close for */
+ int flags /* mode bits and flags */
+)
+{
+/* Called from the dmap struct in table.c on opens & closes of special files.*/
+ message dev_mess;
+
+ dev_mess.m_type = op;
+ dev_mess.DEVICE = minor(dev);
+ dev_mess.IO_ENDPT = proc_e;
+ dev_mess.COUNT = flags;
+
+ /* Call the task. */
+ (void) gen_io(driver_e, &dev_mess);
+
+ return(dev_mess.REP_STATUS);
+}
+
+
+/*===========================================================================*
+ * gen_io *
+ *===========================================================================*/
+PRIVATE int gen_io(
+ endpoint_t task_nr, /* which task to call */
+ message *mess_ptr /* pointer to message for task */
+)
+{
+/* All file system I/O ultimately comes down to I/O on major/minor device
+ * pairs. These lead to calls on the following routines via the dmap table.
+ */
+
+ int r, proc_e;
+
+ proc_e = mess_ptr->IO_ENDPT;
+
+ r = sendrec(task_nr, mess_ptr);
+ if(r == OK && mess_ptr->REP_STATUS == ERESTART)
+ r = EDEADEPT;
+
+ if (r != OK) {
+ if (r == EDEADSRCDST || r == EDEADEPT) {
+ printf("fs: dead driver %d\n", task_nr);
+ panic("should handle crashed drivers");
+ return(r);
+ }
+ if (r == ELOCKED) {
+ printf("fs: ELOCKED talking to %d\n", task_nr);
+ return(r);
+ }
+ panic("call_task: can't send/receive: %d", r);
+ }
+
+ /* Did the process we did the sendrec() for get a result? */
+ if (mess_ptr->REP_ENDPT != proc_e) {
+ printf("fs: strange device reply from %d, type = %d, proc = %d (not "
+ "%d) (2) ignored\n", mess_ptr->m_source, mess_ptr->m_type,
+ proc_e,
+ mess_ptr->REP_ENDPT);
+ return(EIO);
+ }
+
+ return(OK);
+}
--- /dev/null
+#ifndef EXT2_DRIVERS_H
+#define EXT2_DRIVERS_H
+
+/* Driver endpoints for major devices. Only the block devices
+ * are mapped here, it's a subset of the mapping in the VFS */
+
+EXTERN struct driver_endpoints {
+ endpoint_t driver_e;
+} driver_endpoints[NR_DEVICES];
+
+#endif /* EXT2_DRIVERS_H */
--- /dev/null
+/* This is the master header for fs. It includes some other files
+ * and defines the principal constants.
+ */
+
+#ifndef EXT2_FS_H
+#define EXT2_FS_H
+
+#define _POSIX_SOURCE 1 /* tell headers to include POSIX stuff */
+#define _MINIX 1 /* tell headers to include MINIX stuff */
+#define _SYSTEM 1 /* tell headers that this is the kernel */
+
+#define VERBOSE 0 /* show messages during initialization? */
+
+/* The following are so basic, all the *.c files get them automatically. */
+#include <minix/config.h> /* MUST be first */
+#include <ansi.h> /* MUST be second */
+#include <sys/types.h>
+#include <minix/const.h>
+#include <minix/type.h>
+#include <minix/dmap.h>
+
+#include <limits.h>
+#include <errno.h>
+
+#include <minix/syslib.h>
+#include <minix/sysutil.h>
+
+#include "const.h"
+#include "type.h"
+#include "proto.h"
+#include "glo.h"
+
+#define ext2_debug printf
+
+#endif /* EXT2_FS_H */
--- /dev/null
+/* EXTERN should be extern except for the table file */
+
+#ifndef EXT2_GLO_H
+#define EXT2_GLO_H
+
+#ifdef _TABLE
+#undef EXTERN
+#define EXTERN
+#endif
+
+#include <minix/vfsif.h>
+
+/* The following variables are used for returning results to the caller. */
+EXTERN int err_code; /* temporary storage for error number */
+EXTERN int rdwt_err; /* status of last disk i/o request */
+
+EXTERN int cch[NR_INODES];
+
+extern char dot1[2]; /* dot1 (&dot1[0]) and dot2 (&dot2[0]) have a special */
+extern char dot2[3]; /* meaning to search_dir: no access permission check. */
+
+extern _PROTOTYPE (int (*fs_call_vec[]), (void) ); /* fs call table */
+
+EXTERN message fs_m_in;
+EXTERN message fs_m_out;
+EXTERN vfs_ucred_t credentials;
+
+EXTERN uid_t caller_uid;
+EXTERN gid_t caller_gid;
+
+EXTERN int req_nr;
+
+EXTERN endpoint_t SELF_E;
+
+EXTERN char user_path[PATH_MAX+1]; /* pathname to be processed */
+
+EXTERN dev_t fs_dev; /* The device that is handled by this FS proc
+ */
+EXTERN char fs_dev_label[16]; /* Name of the device driver that is handled
+ * by this FS proc.
+ */
+EXTERN int unmountdone;
+EXTERN int exitsignaled;
+
+/* our block size. */
+EXTERN unsigned int fs_block_size;
+
+/* Buffer cache. */
+EXTERN struct buf *buf;
+EXTERN struct buf **buf_hash; /* the buffer hash table */
+EXTERN unsigned int nr_bufs;
+EXTERN int may_use_vmcache;
+/* Little hack for syncing group descriptors. */
+EXTERN int group_descriptors_dirty;
+
+EXTERN struct opt opt; /* global options */
+
+/* On ext2 metadata is stored in little endian format, so we shoud take
+ * care about byte swapping, when have BE CPU. */
+EXTERN int le_CPU; /* little/big endian, if TRUE do not swap bytes */
+
+#endif /* EXT2_GLO_H */
--- /dev/null
+/* This files manages inodes allocation and deallocation.
+ *
+ * The entry points into this file are:
+ * alloc_inode: allocate a new, unused inode.
+ * free_inode: mark an inode as available for a new file.
+ *
+ * Created (alloc_inode/free_inode/wipe_inode are from MFS):
+ * June 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <string.h>
+#include <stdlib.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include "buf.h"
+#include "inode.h"
+#include "super.h"
+#include "const.h"
+
+
+FORWARD _PROTOTYPE( bit_t alloc_inode_bit, (struct super_block *sp,
+ struct inode *parent,
+ int is_dir));
+FORWARD _PROTOTYPE( void free_inode_bit, (struct super_block *sp,
+ bit_t bit_returned,
+ int is_dir));
+FORWARD _PROTOTYPE( void wipe_inode, (struct inode *rip));
+
+
+/*===========================================================================*
+ * alloc_inode *
+ *===========================================================================*/
+PUBLIC struct inode *alloc_inode(struct inode *parent, mode_t bits)
+{
+/* Allocate a free inode on parent's dev, and return a pointer to it. */
+
+ register struct inode *rip;
+ register struct super_block *sp;
+ int major, minor, inumb;
+ bit_t b;
+
+ sp = get_super(parent->i_dev); /* get pointer to super_block */
+ if (sp->s_rd_only) { /* can't allocate an inode on a read only device. */
+ err_code = EROFS;
+ return(NULL);
+ }
+
+ /* Acquire an inode from the bit map. */
+ b = alloc_inode_bit(sp, parent, (bits & I_TYPE) == I_DIRECTORY);
+ if (b == NO_BIT) {
+ err_code = ENFILE;
+ major = (int) (sp->s_dev >> MAJOR) & BYTE;
+ minor = (int) (sp->s_dev >> MINOR) & BYTE;
+ ext2_debug("Out of i-nodes on device %d/%d\n", major, minor);
+ return(NULL);
+ }
+
+ inumb = (int) b; /* be careful not to pass unshort as param */
+
+ /* Try to acquire a slot in the inode table. */
+ if ((rip = get_inode(NO_DEV, inumb)) == NULL) {
+ /* No inode table slots available. Free the inode just allocated. */
+ free_inode_bit(sp, b, (bits & I_TYPE) == I_DIRECTORY);
+ } else {
+ /* An inode slot is available. Put the inode just allocated into it. */
+ rip->i_mode = bits; /* set up RWX bits */
+ rip->i_links_count = NO_LINK; /* initial no links */
+ rip->i_uid = caller_uid; /* file's uid is owner's */
+ rip->i_gid = caller_gid; /* ditto group id */
+ rip->i_dev = parent->i_dev; /* mark which device it is on */
+ rip->i_sp = sp; /* pointer to super block */
+
+ /* Fields not cleared already are cleared in wipe_inode(). They have
+ * been put there because truncate() needs to clear the same fields if
+ * the file happens to be open while being truncated. It saves space
+ * not to repeat the code twice.
+ */
+ wipe_inode(rip);
+ }
+
+ return(rip);
+}
+
+
+/*===========================================================================*
+ * free_inode *
+ *===========================================================================*/
+PUBLIC void free_inode(
+ register struct inode *rip /* inode to free */
+)
+{
+/* Return an inode to the pool of unallocated inodes. */
+ register struct super_block *sp;
+ dev_t dev = rip->i_dev;
+ bit_t b = rip->i_num;
+ u16_t mode = rip->i_mode;
+
+ /* Locate the appropriate super_block. */
+ sp = get_super(dev);
+
+ if (b <= NO_ENTRY || b > sp->s_inodes_count)
+ return;
+ free_inode_bit(sp, b, (mode & I_TYPE) == I_DIRECTORY);
+
+ rip->i_mode = I_NOT_ALLOC; /* clear I_TYPE field */
+}
+
+
+FORWARD _PROTOTYPE( int find_group_dir, (struct super_block *sp,
+ struct inode *parent) );
+FORWARD _PROTOTYPE( int find_group_hashalloc, (struct super_block *sp,
+ struct inode *parent) );
+FORWARD _PROTOTYPE( int find_group_any, (struct super_block *sp,
+ struct inode *parent) );
+FORWARD _PROTOTYPE( int find_group_orlov, (struct super_block *sp,
+ struct inode *parent) );
+
+
+/*===========================================================================*
+ * alloc_inode_bit *
+ *===========================================================================*/
+PRIVATE bit_t alloc_inode_bit(sp, parent, is_dir)
+struct super_block *sp; /* the filesystem to allocate from */
+struct inode *parent; /* parent of newly allocated inode */
+int is_dir; /* inode will be a directory if it is TRUE */
+{
+ int group;
+ ino_t inumber = NO_BIT;
+ bit_t bit;
+ struct buf *bp;
+ struct group_desc *gd;
+
+ if (sp->s_rd_only)
+ panic("can't alloc inode on read-only filesys.");
+
+ if (opt.mfsalloc) {
+ group = find_group_any(sp, parent);
+ } else {
+ if (is_dir) {
+ if (opt.use_orlov) {
+ group = find_group_orlov(sp, parent);
+ } else {
+ group = find_group_dir(sp, parent);
+ }
+ } else {
+ group = find_group_hashalloc(sp, parent);
+ }
+ }
+ /* Check if we have a group where to allocate an inode */
+ if (group == -1)
+ return(NO_BIT); /* no bit could be allocated */
+
+ gd = get_group_desc(group);
+ if (gd == NULL)
+ panic("can't get group_desc to alloc block");
+
+ /* find_group_* should always return either a group with
+ * a free inode slot or -1, which we checked earlier.
+ */
+ ASSERT(gd->free_inodes_count);
+
+ bp = get_block(sp->s_dev, gd->inode_bitmap, NORMAL);
+ bit = setbit(bp->b_bitmap, sp->s_inodes_per_group, 0);
+ ASSERT(bit != -1); /* group definitly contains free inode */
+
+ inumber = group * sp->s_inodes_per_group + bit + 1;
+
+ /* Extra checks before real allocation.
+ * Only major bug can cause problems. Since setbit changed
+ * bp->b_bitmap there is no way to recover from this bug.
+ * Should never happen.
+ */
+ if (inumber > sp->s_inodes_count) {
+ panic("ext2: allocator returned inum greater, than\
+ total number of inodes.\n");
+ }
+
+ if (inumber < EXT2_FIRST_INO(sp)) {
+ panic("ext2: allocator tryed to use reserved inode.\n");
+ }
+
+ bp->b_dirt = DIRTY;
+ put_block(bp, MAP_BLOCK);
+
+ gd->free_inodes_count--;
+ sp->s_free_inodes_count--;
+ if (is_dir) {
+ gd->used_dirs_count++;
+ sp->s_dirs_counter++;
+ }
+
+ group_descriptors_dirty = DIRTY;
+
+ /* Almost the same as previous 'group' ASSERT */
+ ASSERT(inumber != NO_BIT);
+ return inumber;
+}
+
+
+/*===========================================================================*
+ * free_inode_bit *
+ *===========================================================================*/
+PRIVATE void free_inode_bit(struct super_block *sp, bit_t bit_returned,
+ int is_dir)
+{
+ /* Return an inode by turning off its bitmap bit. */
+ int group; /* group number of bit_returned */
+ int bit; /* bit_returned number within its group */
+ struct buf *bp;
+ struct group_desc *gd;
+
+ if (sp->s_rd_only)
+ panic("can't free bit on read-only filesys.");
+
+ /* At first search group, to which bit_returned belongs to
+ * and figure out in what word bit is stored.
+ */
+ if (bit_returned > sp->s_inodes_count ||
+ bit_returned < EXT2_FIRST_INO(sp))
+ panic("trying to free inode %d beyond inodes scope.", bit_returned);
+
+ group = (bit_returned - 1) / sp->s_inodes_per_group;
+ bit = (bit_returned - 1) % sp->s_inodes_per_group; /* index in bitmap */
+
+ gd = get_group_desc(group);
+ if (gd == NULL)
+ panic("can't get group_desc to alloc block");
+
+ bp = get_block(sp->s_dev, gd->inode_bitmap, NORMAL);
+
+ if (unsetbit(bp->b_bitmap, bit))
+ panic("Tried to free unused inode", bit_returned);
+
+ bp->b_dirt = DIRTY;
+ put_block(bp, MAP_BLOCK);
+
+ gd->free_inodes_count++;
+ sp->s_free_inodes_count++;
+
+ if (is_dir) {
+ gd->used_dirs_count--;
+ sp->s_dirs_counter--;
+ }
+
+ group_descriptors_dirty = DIRTY;
+
+ if (group < sp->s_igsearch)
+ sp->s_igsearch = group;
+}
+
+
+/* it's implemented very close to the linux' find_group_dir() */
+PRIVATE int find_group_dir(struct super_block *sp, struct inode *parent)
+{
+ int avefreei = sp->s_free_inodes_count / sp->s_groups_count;
+ struct group_desc *gd, *best_gd = NULL;
+ int group, best_group = -1;
+
+ for (group = 0; group < sp->s_groups_count; ++group) {
+ gd = get_group_desc(group);
+ if (gd == NULL)
+ panic("can't get group_desc to alloc inode");
+ if (gd->free_inodes_count == 0)
+ continue;
+ if (gd->free_inodes_count < avefreei)
+ continue;
+ if (!best_gd ||
+ gd->free_blocks_count > best_gd->free_blocks_count) {
+ best_gd = gd;
+ best_group = group;
+ }
+ }
+
+ return best_group; /* group or -1 */
+}
+
+
+/* Analog of ffs_hashalloc() from *BSD.
+ * 1) Check parent's for free inodes and blocks.
+ * 2) Quadradically rehash on the group number.
+ * 3) Make a linear search for free inode.
+ */
+PRIVATE int find_group_hashalloc(struct super_block *sp, struct inode *parent)
+{
+ int ngroups = sp->s_groups_count;
+ struct group_desc *gd;
+ int group, i;
+ int parent_group = (parent->i_num - 1) / sp->s_inodes_per_group;
+
+ /* Try to place new inode in its parent group */
+ gd = get_group_desc(parent_group);
+ if (gd == NULL)
+ panic("can't get group_desc to alloc inode");
+ if (gd->free_inodes_count && gd->free_blocks_count)
+ return parent_group;
+
+ /* We can't allocate inode in the parent's group.
+ * Now we will try to place it in another blockgroup.
+ * The main idea is still to keep files from the same
+ * directory together and use different blockgroups for
+ * files from another directory, which lives in the same
+ * blockgroup as our parent.
+ * Thus we will spread things on the disk.
+ */
+ group = (parent_group + parent->i_num) % ngroups;
+
+ /* Make quadratic probing to find a group with free inodes and blocks. */
+ for (i = 1; i < ngroups; i <<= 1) {
+ group += i;
+ if (group >= ngroups)
+ group -= ngroups;
+ gd = get_group_desc(group);
+ if (gd == NULL)
+ panic("can't get group_desc to alloc inode");
+ if (gd->free_inodes_count && gd->free_blocks_count)
+ return group;
+ }
+
+ /* Still no group for new inode, try linear search.
+ * Also check parent again (but for free inodes only).
+ */
+ group = parent_group;
+ for (i = 0; i < ngroups; i++, group++) {
+ if (group >= ngroups)
+ group = 0;
+ gd = get_group_desc(group);
+ if (gd == NULL)
+ panic("can't get group_desc to alloc inode");
+ if (gd->free_inodes_count)
+ return group;
+ }
+
+ return -1;
+}
+
+
+/* Find first group which has free inode slot.
+ * This is similar to what MFS does.
+ */
+PRIVATE int find_group_any(struct super_block *sp, struct inode *parent)
+{
+ int ngroups = sp->s_groups_count;
+ struct group_desc *gd;
+ int group = sp->s_igsearch;
+
+ for (; group < ngroups; group++) {
+ gd = get_group_desc(group);
+ if (gd == NULL)
+ panic("can't get group_desc to alloc inode");
+ if (gd->free_inodes_count) {
+ sp->s_igsearch = group;
+ return group;
+ }
+ }
+
+ return -1;
+}
+
+
+/* We try to spread first-level directories (i.e. directories in the root
+ * or in the directory marked as TOPDIR).
+ * If there are blockgroups with counts for blocks and inodes less than average
+ * we return a group with lowest directory count. Otherwise we either
+ * return a group with good free inodes and blocks counts or just a group
+ * with free inode.
+ *
+ * For other directories we try to find a 'good' group, we consider a group as
+ * a 'good' if it has enough blocks and inodes (greater than min_blocks and
+ * min_inodes).
+ *
+ */
+PRIVATE int find_group_orlov(struct super_block *sp, struct inode *parent)
+{
+ int avefreei = sp->s_free_inodes_count / sp->s_groups_count;
+ int avefreeb = sp->s_free_blocks_count / sp->s_groups_count;
+
+ int group = -1;
+ int fallback_group = -1; /* Group with at least 1 free inode */
+ struct group_desc *gd;
+ int i;
+
+ if (parent->i_num == ROOT_INODE ||
+ parent->i_flags & EXT2_TOPDIR_FL) {
+ int best_group = -1;
+ int best_avefree_group = -1; /* Best value of avefreei/avefreeb */
+ int best_ndir = sp->s_inodes_per_group;
+
+ group = (unsigned int)random();
+ for (i = 0; i < sp->s_groups_count; i++, group++) {
+ if (group >= sp->s_groups_count)
+ group = 0;
+ gd = get_group_desc(group);
+ if (gd == NULL)
+ panic("can't get group_desc to alloc inode");
+ if (gd->free_inodes_count == 0)
+ continue;
+
+ fallback_group = group;
+
+ if (gd->free_inodes_count < avefreei ||
+ gd->free_blocks_count < avefreeb)
+ continue;
+
+ best_avefree_group = group;
+
+ if (gd->used_dirs_count >= best_ndir)
+ continue;
+ best_ndir = gd->used_dirs_count;
+ best_group = group;
+ }
+ if (best_group >= 0)
+ return best_group;
+ if (best_avefree_group >= 0)
+ return best_avefree_group;
+ return fallback_group;
+ } else {
+ int parent_group = (parent->i_num - 1) / sp->s_inodes_per_group;
+ /* 2 is kind of random thing for now,
+ * but performance results are still good.
+ */
+ int min_blocks = avefreeb / 2;
+ int min_inodes = avefreei / 2;
+
+ group = parent_group;
+ for (i = 0; i < sp->s_groups_count; i++, group++) {
+ if (group >= sp->s_groups_count)
+ group = 0;
+ gd = get_group_desc(group);
+ if (gd == NULL)
+ panic("can't get group_desc to alloc inode");
+ if (gd->free_inodes_count == 0)
+ continue;
+
+ fallback_group = group;
+
+ if (gd->free_inodes_count >= min_inodes &&
+ gd->free_blocks_count >= min_blocks)
+ return group;
+ }
+ return fallback_group;
+ }
+
+ return -1;
+}
+
+
+/*===========================================================================*
+ * wipe_inode *
+ *===========================================================================*/
+PRIVATE void wipe_inode(
+ register struct inode *rip /* the inode to be erased */
+)
+{
+/* Erase some fields in the inode. This function is called from alloc_inode()
+ * when a new inode is to be allocated, and from truncate(), when an existing
+ * inode is to be truncated.
+ */
+
+ register int i;
+
+ rip->i_size = 0;
+ rip->i_update = ATIME | CTIME | MTIME; /* update all times later */
+ rip->i_blocks = 0;
+ rip->i_flags = 0;
+ rip->i_generation = 0;
+ rip->i_file_acl = 0;
+ rip->i_dir_acl = 0;
+ rip->i_faddr = 0;
+
+ for (i = 0; i < EXT2_N_BLOCKS; i++)
+ rip->i_block[i] = NO_BLOCK;
+ rip->i_block[0] = NO_BLOCK;
+
+ rip->i_dirt = DIRTY;
+}
--- /dev/null
+/* This file manages the inode table. There are procedures to allocate and
+ * deallocate inodes, acquire, erase, and release them, and read and write
+ * them from the disk.
+ *
+ * The entry points into this file are
+ * get_inode: search inode table for a given inode; if not there,
+ * read it
+ * put_inode: indicate that an inode is no longer needed in memory
+ * update_times: update atime, ctime, and mtime
+ * rw_inode: read a disk block and extract an inode, or corresp. write
+ * dup_inode: indicate that someone else is using an inode table entry
+ * find_inode: retrieve pointer to inode in inode cache
+ *
+ * Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <string.h>
+#include "buf.h"
+#include "inode.h"
+#include "super.h"
+#include <minix/vfsif.h>
+
+FORWARD _PROTOTYPE( void icopy, (struct inode *rip, d_inode *dip,
+ int direction, int norm));
+FORWARD _PROTOTYPE( void addhash_inode, (struct inode *node) );
+FORWARD _PROTOTYPE( void unhash_inode, (struct inode *node) );
+
+
+/*===========================================================================*
+ * fs_putnode *
+ *===========================================================================*/
+PUBLIC int fs_putnode(void)
+{
+/* Find the inode specified by the request message and decrease its counter.*/
+
+ struct inode *rip;
+ int count;
+
+ rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR);
+
+ if (!rip) {
+ printf("%s:%d put_inode: inode #%d dev: %d not found\n", __FILE__,
+ __LINE__, (ino_t) fs_m_in.REQ_INODE_NR, fs_dev);
+ panic("fs_putnode failed");
+ }
+
+ count = fs_m_in.REQ_COUNT;
+ if (count <= 0) {
+ printf("%s:%d put_inode: bad value for count: %d\n", __FILE__,
+ __LINE__, count);
+ panic("fs_putnode failed");
+ } else if (count > rip->i_count) {
+ printf("%s:%d put_inode: count too high: %d > %d\n", __FILE__,
+ __LINE__, count, rip->i_count);
+ panic("fs_putnode failed");
+ }
+
+ /* Decrease reference counter, but keep one reference;
+ * it will be consumed by put_inode().
+ */
+ rip->i_count -= count - 1;
+ put_inode(rip);
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * init_inode_cache *
+ *===========================================================================*/
+PUBLIC void init_inode_cache()
+{
+ struct inode *rip;
+ struct inodelist *rlp;
+
+ inode_cache_hit = 0;
+ inode_cache_miss = 0;
+
+ /* init free/unused list */
+ TAILQ_INIT(&unused_inodes);
+
+ /* init hash lists */
+ for (rlp = &hash_inodes[0]; rlp < &hash_inodes[INODE_HASH_SIZE]; ++rlp)
+ LIST_INIT(rlp);
+
+ /* add free inodes to unused/free list */
+ for (rip = &inode[0]; rip < &inode[NR_INODES]; ++rip) {
+ rip->i_num = NO_ENTRY;
+ TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused);
+ }
+}
+
+
+/*===========================================================================*
+ * addhash_inode *
+ *===========================================================================*/
+PRIVATE void addhash_inode(struct inode *node)
+{
+ int hashi = node->i_num & INODE_HASH_MASK;
+
+ /* insert into hash table */
+ LIST_INSERT_HEAD(&hash_inodes[hashi], node, i_hash);
+}
+
+
+/*===========================================================================*
+ * unhash_inode *
+ *===========================================================================*/
+PRIVATE void unhash_inode(struct inode *node)
+{
+ /* remove from hash table */
+ LIST_REMOVE(node, i_hash);
+}
+
+
+/*===========================================================================*
+ * get_inode *
+ *===========================================================================*/
+PUBLIC struct inode *get_inode(
+ dev_t dev, /* device on which inode resides */
+ ino_t numb /* inode number (ANSI: may not be unshort) */
+)
+{
+/* Find the inode in the hash table. If it is not there, get a free inode
+ * load it from the disk if it's necessary and put on the hash list
+ */
+ register struct inode *rip;
+ int hashi;
+ int i;
+
+ hashi = (int) numb & INODE_HASH_MASK;
+
+ /* Search inode in the hash table */
+ LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) {
+ if (rip->i_num == numb && rip->i_dev == dev) {
+ /* If unused, remove it from the unused/free list */
+ if (rip->i_count == 0) {
+ inode_cache_hit++;
+ TAILQ_REMOVE(&unused_inodes, rip, i_unused);
+ }
+ ++rip->i_count;
+ return(rip);
+ }
+ }
+
+ inode_cache_miss++;
+
+ /* Inode is not on the hash, get a free one */
+ if (TAILQ_EMPTY(&unused_inodes)) {
+ err_code = ENFILE;
+ return(NULL);
+ }
+ rip = TAILQ_FIRST(&unused_inodes);
+
+ /* If not free unhash it */
+ if (rip->i_num != NO_ENTRY)
+ unhash_inode(rip);
+
+ /* Inode is not unused any more */
+ TAILQ_REMOVE(&unused_inodes, rip, i_unused);
+
+ /* Load the inode. */
+ rip->i_dev = dev;
+ rip->i_num = numb;
+ rip->i_count = 1;
+ if (dev != NO_DEV)
+ rw_inode(rip, READING); /* get inode from disk */
+ rip->i_update = 0; /* all the times are initially up-to-date */
+ rip->i_last_dpos = 0; /* no dentries searched for yet */
+ rip->i_bsearch = NO_BLOCK;
+ rip->i_last_pos_bl_alloc = 0;
+ rip->i_last_dentry_size = 0;
+ rip->i_mountpoint= FALSE;
+
+ rip->i_preallocation = opt.use_prealloc;
+ rip->i_prealloc_count = rip->i_prealloc_index = 0;
+
+ for (i = 0; i < EXT2_PREALLOC_BLOCKS; i++) {
+ if (rip->i_prealloc_blocks[i] != NO_BLOCK) {
+ /* Actually this should never happen */
+ free_block(rip->i_sp, rip->i_prealloc_blocks[i]);
+ rip->i_prealloc_blocks[i] = NO_BLOCK;
+ ext2_debug("Warning: Unexpected preallocated block.");
+ }
+ }
+
+ /* Add to hash */
+ addhash_inode(rip);
+
+ return(rip);
+}
+
+
+/*===========================================================================*
+ * find_inode *
+ *===========================================================================*/
+PUBLIC struct inode *find_inode(
+ dev_t dev, /* device on which inode resides */
+ ino_t numb /* inode number (ANSI: may not be unshort) */
+)
+{
+/* Find the inode specified by the inode and device number. */
+ struct inode *rip;
+ int hashi;
+
+ hashi = (int) numb & INODE_HASH_MASK;
+
+ /* Search inode in the hash table */
+ LIST_FOREACH(rip, &hash_inodes[hashi], i_hash) {
+ if (rip->i_count > 0 && rip->i_num == numb && rip->i_dev == dev) {
+ return(rip);
+ }
+ }
+
+ return(NULL);
+}
+
+
+/*===========================================================================*
+ * put_inode *
+ *===========================================================================*/
+PUBLIC void put_inode(
+ register struct inode *rip /* pointer to inode to be released */
+)
+{
+/* The caller is no longer using this inode. If no one else is using it either
+ * write it back to the disk immediately. If it has no links, truncate it and
+ * return it to the pool of available inodes.
+ */
+
+ if (rip == NULL)
+ return; /* checking here is easier than in caller */
+
+ if (rip->i_count < 1)
+ panic("put_inode: i_count already below 1", rip->i_count);
+
+ if (--rip->i_count == 0) { /* i_count == 0 means no one is using it now */
+ if (rip->i_links_count == NO_LINK) {
+ /* i_nlinks == NO_LINK means free the inode. */
+ /* return all the disk blocks */
+
+ /* Ignore errors by truncate_inode in case inode is a block
+ * special or character special file.
+ */
+ (void) truncate_inode(rip, (off_t) 0);
+ /* free inode clears I_TYPE field, since it's used there */
+ rip->i_dirt = DIRTY;
+ free_inode(rip);
+ }
+
+ rip->i_mountpoint = FALSE;
+ if (rip->i_dirt == DIRTY) rw_inode(rip, WRITING);
+
+ discard_preallocated_blocks(rip); /* Return blocks to the filesystem */
+
+ if (rip->i_links_count == NO_LINK) {
+ /* free, put at the front of the LRU list */
+ unhash_inode(rip);
+ rip->i_num = NO_ENTRY;
+ TAILQ_INSERT_HEAD(&unused_inodes, rip, i_unused);
+ } else {
+ /* unused, put at the back of the LRU (cache it) */
+ TAILQ_INSERT_TAIL(&unused_inodes, rip, i_unused);
+ }
+ }
+}
+
+
+/*===========================================================================*
+ * update_times *
+ *===========================================================================*/
+PUBLIC void update_times(
+ register struct inode *rip /* pointer to inode to be read/written */
+)
+{
+/* Various system calls are required by the standard to update atime, ctime,
+ * or mtime. Since updating a time requires sending a message to the clock
+ * task--an expensive business--the times are marked for update by setting
+ * bits in i_update. When a stat, fstat, or sync is done, or an inode is
+ * released, update_times() may be called to actually fill in the times.
+ */
+
+ time_t cur_time;
+ struct super_block *sp;
+
+ sp = rip->i_sp; /* get pointer to super block. */
+ if (sp->s_rd_only)
+ return; /* no updates for read-only file systems */
+
+ cur_time = clock_time();
+ if (rip->i_update & ATIME)
+ rip->i_atime = cur_time;
+ if (rip->i_update & CTIME)
+ rip->i_ctime = cur_time;
+ if (rip->i_update & MTIME)
+ rip->i_mtime = cur_time;
+ rip->i_update = 0; /* they are all up-to-date now */
+}
+
+/*===========================================================================*
+ * rw_inode *
+ *===========================================================================*/
+PUBLIC void rw_inode(
+ register struct inode *rip, /* pointer to inode to be read/written */
+ int rw_flag /* READING or WRITING */
+)
+{
+/* An entry in the inode table is to be copied to or from the disk. */
+
+ register struct buf *bp;
+ register struct super_block *sp;
+ register struct group_desc *gd;
+ register d_inode *dip;
+ u32_t block_group_number;
+ block_t b, offset;
+
+ /* Get the block where the inode resides. */
+ sp = get_super(rip->i_dev); /* get pointer to super block */
+ rip->i_sp = sp; /* inode must contain super block pointer */
+
+ block_group_number = (rip->i_num - 1) / sp->s_inodes_per_group;
+
+ gd = get_group_desc(block_group_number);
+
+ if (gd == NULL)
+ panic("can't get group_desc to read/write inode");
+
+ offset = ((rip->i_num - 1) % sp->s_inodes_per_group) * EXT2_INODE_SIZE(sp);
+ /* offset requires shifting, since each block contains several inodes,
+ * e.g. inode 2 is stored in bklock 0.
+ */
+ b = (block_t) gd->inode_table + (offset >> sp->s_blocksize_bits);
+ bp = get_block(rip->i_dev, b, NORMAL);
+
+ offset &= (sp->s_block_size - 1);
+ dip = (d_inode*) (bp->b_data + offset);
+
+ /* Do the read or write. */
+ if (rw_flag == WRITING) {
+ if (rip->i_update)
+ update_times(rip); /* times need updating */
+ if (sp->s_rd_only == FALSE)
+ bp->b_dirt = DIRTY;
+ }
+
+ icopy(rip, dip, rw_flag, TRUE);
+
+ put_block(bp, INODE_BLOCK);
+ rip->i_dirt = CLEAN;
+}
+
+
+/*===========================================================================*
+ * icopy *
+ *===========================================================================*/
+PRIVATE void icopy(
+ register struct inode *rip, /* pointer to the in-core inode struct */
+ register d_inode *dip, /* pointer to the on-disk struct */
+ int direction, /* READING (from disk) or WRITING (to disk) */
+ int norm /* TRUE = do not swap bytes; FALSE = swap */
+)
+{
+ int i;
+
+ if (direction == READING) {
+ /* Copy inode to the in-core table, swapping bytes if need be. */
+ rip->i_mode = conv2(norm,dip->i_mode);
+ rip->i_uid = conv2(norm,dip->i_uid);
+ rip->i_size = conv4(norm,dip->i_size);
+ rip->i_atime = conv4(norm,dip->i_atime);
+ rip->i_ctime = conv4(norm,dip->i_ctime);
+ rip->i_mtime = conv4(norm,dip->i_mtime);
+ rip->i_dtime = conv4(norm,dip->i_dtime);
+ rip->i_gid = conv2(norm,dip->i_gid);
+ rip->i_links_count = conv2(norm,dip->i_links_count);
+ rip->i_blocks = conv4(norm,dip->i_blocks);
+ rip->i_flags = conv4(norm,dip->i_flags);
+ /* Minix doesn't touch osd1 and osd2 either, so just copy. */
+ memcpy(&rip->osd1, &dip->osd1, sizeof(rip->osd1));
+ for (i = 0; i < EXT2_N_BLOCKS; i++)
+ rip->i_block[i] = conv4(norm, dip->i_block[i]);
+ rip->i_generation = conv4(norm,dip->i_generation);
+ rip->i_file_acl = conv4(norm,dip->i_file_acl);
+ rip->i_dir_acl = conv4(norm,dip->i_dir_acl);
+ rip->i_faddr = conv4(norm,dip->i_faddr);
+ memcpy(&rip->osd2, &dip->osd2, sizeof(rip->osd2));
+ } else {
+ /* Copying inode to disk from the in-core table. */
+ dip->i_mode = conv2(norm,rip->i_mode);
+ dip->i_uid = conv2(norm,rip->i_uid);
+ dip->i_size = conv4(norm,rip->i_size);
+ dip->i_atime = conv4(norm,rip->i_atime);
+ dip->i_ctime = conv4(norm,rip->i_ctime);
+ dip->i_mtime = conv4(norm,rip->i_mtime);
+ dip->i_dtime = conv4(norm,rip->i_dtime);
+ dip->i_gid = conv2(norm,rip->i_gid);
+ dip->i_links_count = conv2(norm,rip->i_links_count);
+ dip->i_blocks = conv4(norm,rip->i_blocks);
+ dip->i_flags = conv4(norm,rip->i_flags);
+ /* Minix doesn't touch osd1 and osd2 either, so just copy. */
+ memcpy(&dip->osd1, &rip->osd1, sizeof(dip->osd1));
+ for (i = 0; i < EXT2_N_BLOCKS; i++)
+ dip->i_block[i] = conv4(norm, rip->i_block[i]);
+ dip->i_generation = conv4(norm,rip->i_generation);
+ dip->i_file_acl = conv4(norm,rip->i_file_acl);
+ dip->i_dir_acl = conv4(norm,rip->i_dir_acl);
+ dip->i_faddr = conv4(norm,rip->i_faddr);
+ memcpy(&dip->osd2, &rip->osd2, sizeof(dip->osd2));
+ }
+}
+
+
+/*===========================================================================*
+ * dup_inode *
+ *===========================================================================*/
+PUBLIC void dup_inode(
+ struct inode *ip /* The inode to be duplicated. */
+)
+{
+/* This routine is a simplified form of get_inode() for the case where
+ * the inode pointer is already known.
+ */
+ ip->i_count++;
+}
--- /dev/null
+/* Inode table. This table holds inodes that are currently in use. In some
+ * cases they have been opened by an open() or creat() system call, in other
+ * cases the file system itself needs the inode for one reason or another,
+ * such as to search a directory for a path name.
+ * The first part of the struct holds fields that are present on the
+ * disk; the second part holds fields not present on the disk.
+ * The disk inode part is also declared in "type.h" as 'd_inode'
+ *
+ */
+
+#ifndef EXT2_INODE_H
+#define EXT2_INODE_H
+
+#include <sys/queue.h>
+
+/* Disk part of inode structure was taken from
+ * linux/include/linux/ext2_fs.h.
+ */
+EXTERN struct inode {
+ u16_t i_mode; /* File mode */
+ u16_t i_uid; /* Low 16 bits of Owner Uid */
+ u32_t i_size; /* Size in bytes */
+ u32_t i_atime; /* Access time */
+ u32_t i_ctime; /* Creation time */
+ u32_t i_mtime; /* Modification time */
+ u32_t i_dtime; /* Deletion Time */
+ u16_t i_gid; /* Low 16 bits of Group Id */
+ u16_t i_links_count; /* Links count */
+ u32_t i_blocks; /* 512-byte blocks count */
+ u32_t i_flags; /* File flags */
+ union {
+ struct {
+ u32_t l_i_reserved1;
+ } linux1;
+ struct {
+ u32_t h_i_translator;
+ } hurd1;
+ struct {
+ u32_t m_i_reserved1;
+ } masix1;
+ } osd1; /* OS dependent 1 */
+ u32_t i_block[EXT2_N_BLOCKS]; /* Pointers to blocks */
+ u32_t i_generation; /* File version (for NFS) */
+ u32_t i_file_acl; /* File ACL */
+ u32_t i_dir_acl; /* Directory ACL */
+ u32_t i_faddr; /* Fragment address */
+ union {
+ struct {
+ u8_t l_i_frag; /* Fragment number */
+ u8_t l_i_fsize; /* Fragment size */
+ u16_t i_pad1;
+ u16_t l_i_uid_high; /* these 2 fields */
+ u16_t l_i_gid_high; /* were reserved2[0] */
+ u32_t l_i_reserved2;
+ } linux2;
+ struct {
+ u8_t h_i_frag; /* Fragment number */
+ u8_t h_i_fsize; /* Fragment size */
+ u16_t h_i_mode_high;
+ u16_t h_i_uid_high;
+ u16_t h_i_gid_high;
+ u32_t h_i_author;
+ } hurd2;
+ struct {
+ u8_t m_i_frag; /* Fragment number */
+ u8_t m_i_fsize; /* Fragment size */
+ u16_t m_pad1;
+ u32_t m_i_reserved2[2];
+ } masix2;
+ } osd2; /* OS dependent 2 */
+
+ /* The following items are not present on the disk. */
+ dev_t i_dev; /* which device is the inode on */
+ ino_t i_num; /* inode number on its (minor) device */
+ int i_count; /* # times inode used; 0 means slot is free */
+ struct super_block *i_sp; /* pointer to super block for inode's device */
+ char i_dirt; /* CLEAN or DIRTY */
+ block_t i_bsearch; /* where to start search for new blocks,
+ * also this is last allocated block.
+ */
+ off_t i_last_pos_bl_alloc; /* last write position for which we allocated
+ * a new block (should be block i_bsearch).
+ * used to check for sequential operation.
+ */
+ off_t i_last_dpos; /* where to start dentry search */
+ int i_last_dentry_size; /* size of last found dentry */
+
+ char i_mountpoint; /* true if mounted on */
+
+ char i_seek; /* set on LSEEK, cleared on READ/WRITE */
+ char i_update; /* the ATIME, CTIME, and MTIME bits are here */
+
+ block_t i_prealloc_blocks[EXT2_PREALLOC_BLOCKS]; /* preallocated blocks */
+ int i_prealloc_count; /* number of preallocated blocks */
+ int i_prealloc_index; /* index into i_prealloc_blocks */
+ int i_preallocation; /* use preallocation for this inode, normally
+ * it's reset only when non-sequential write
+ * happens.
+ */
+
+ LIST_ENTRY(inode) i_hash; /* hash list */
+ TAILQ_ENTRY(inode) i_unused; /* free and unused list */
+
+} inode[NR_INODES];
+
+
+/* list of unused/free inodes */
+EXTERN TAILQ_HEAD(unused_inodes_t, inode) unused_inodes;
+
+/* inode hashtable */
+EXTERN LIST_HEAD(inodelist, inode) hash_inodes[INODE_HASH_SIZE];
+
+EXTERN unsigned int inode_cache_hit;
+EXTERN unsigned int inode_cache_miss;
+
+/* Field values. Note that CLEAN and DIRTY are defined in "const.h" */
+#define NO_SEEK 0 /* i_seek = NO_SEEK if last op was not SEEK */
+#define ISEEK 1 /* i_seek = ISEEK if last op was SEEK */
+
+#endif /* EXT2_INODE_H */
--- /dev/null
+/* Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <sys/stat.h>
+#include <string.h>
+#include <minix/com.h>
+#include "buf.h"
+#include "inode.h"
+#include "super.h"
+#include <minix/vfsif.h>
+
+#define SAME 1000
+
+FORWARD _PROTOTYPE( int freesp_inode, (struct inode *rip, off_t st,
+ off_t end) );
+FORWARD _PROTOTYPE( int remove_dir, (struct inode *rldirp,
+ struct inode *rip, char dir_name[NAME_MAX + 1]) );
+FORWARD _PROTOTYPE( int unlink_file, (struct inode *dirp,
+ struct inode *rip, char file_name[NAME_MAX + 1]));
+FORWARD _PROTOTYPE( off_t nextblock, (off_t pos, int blocksize) );
+FORWARD _PROTOTYPE( void zeroblock_half, (struct inode *i, off_t p, int l));
+FORWARD _PROTOTYPE( void zeroblock_range, (struct inode *i, off_t p, off_t h));
+
+/* Args to zeroblock_half() */
+#define FIRST_HALF 0
+#define LAST_HALF 1
+
+
+/*===========================================================================*
+ * fs_link *
+ *===========================================================================*/
+PUBLIC int fs_link()
+{
+/* Perform the link(name1, name2) system call. */
+
+ struct inode *ip, *rip;
+ register int r;
+ char string[NAME_MAX + 1];
+ struct inode *new_ip;
+ phys_bytes len;
+
+ /* Copy the link name's last component */
+ len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */
+ if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
+ return(ENAMETOOLONG);
+
+ r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT, 0,
+ (vir_bytes) string, (size_t) len, D);
+ if (r != OK) return r;
+ NUL(string, len, sizeof(string));
+
+ /* Temporarily open the file. */
+ if( (rip = get_inode(fs_dev, fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+ /* Check to see if the file has maximum number of links already. */
+ r = OK;
+ if (rip->i_links_count >= USHRT_MAX)
+ r = EMLINK;
+ if(rip->i_links_count >= LINK_MAX)
+ r = EMLINK;
+
+ /* Only super_user may link to directories. */
+ if(r == OK)
+ if( (rip->i_mode & I_TYPE) == I_DIRECTORY && caller_uid != SU_UID)
+ r = EPERM;
+
+ /* If error with 'name', return the inode. */
+ if (r != OK) {
+ put_inode(rip);
+ return(r);
+ }
+
+ /* Temporarily open the last dir */
+ if( (ip = get_inode(fs_dev, fs_m_in.REQ_DIR_INO)) == NULL)
+ return(EINVAL);
+
+ /* If 'name2' exists in full (even if no space) set 'r' to error. */
+ if ((new_ip = advance(ip, string, IGN_PERM)) == NULL) {
+ r = err_code;
+ if(r == ENOENT)
+ r = OK;
+ } else {
+ put_inode(new_ip);
+ r = EEXIST;
+ }
+
+ /* Try to link. */
+ if(r == OK)
+ r = search_dir(ip, string, &rip->i_num, ENTER, IGN_PERM,
+ rip->i_mode & I_TYPE);
+
+ /* If success, register the linking. */
+ if(r == OK) {
+ rip->i_links_count++;
+ rip->i_update |= CTIME;
+ rip->i_dirt = DIRTY;
+ }
+
+ /* Done. Release both inodes. */
+ put_inode(rip);
+ put_inode(ip);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * fs_unlink *
+ *===========================================================================*/
+PUBLIC int fs_unlink()
+{
+/* Perform the unlink(name) or rmdir(name) system call. The code for these two
+ * is almost the same. They differ only in some condition testing. Unlink()
+ * may be used by the superuser to do dangerous things; rmdir() may not.
+ */
+ register struct inode *rip;
+ struct inode *rldirp;
+ int r;
+ char string[NAME_MAX + 1];
+ phys_bytes len;
+
+ /* Copy the last component */
+ len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */
+ if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
+ return(ENAMETOOLONG);
+
+ r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
+ (vir_bytes) 0, (vir_bytes) string, (size_t) len, D);
+ if (r != OK) return r;
+ NUL(string, len, sizeof(string));
+
+ /* Temporarily open the dir. */
+ if( (rldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+ /* The last directory exists. Does the file also exist? */
+ rip = advance(rldirp, string, IGN_PERM);
+ r = err_code;
+
+ /* If error, return inode. */
+ if(r != OK) {
+ /* Mount point? */
+ if (r == EENTERMOUNT || r == ELEAVEMOUNT) {
+ put_inode(rip);
+ r = EBUSY;
+ }
+ put_inode(rldirp);
+ return(r);
+ }
+
+ /* Now test if the call is allowed, separately for unlink() and rmdir(). */
+ if(fs_m_in.m_type == REQ_UNLINK) {
+ /* Only the su may unlink directories, but the su can unlink any
+ * dir.*/
+ if( (rip->i_mode & I_TYPE) == I_DIRECTORY) r = EPERM;
+
+ /* Actually try to unlink the file; fails if parent is mode 0 etc. */
+ if (r == OK) r = unlink_file(rldirp, rip, string);
+ } else {
+ r = remove_dir(rldirp, rip, string); /* call is RMDIR */
+ }
+
+ /* If unlink was possible, it has been done, otherwise it has not. */
+ put_inode(rip);
+ put_inode(rldirp);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * fs_rdlink *
+ *===========================================================================*/
+PUBLIC int fs_rdlink()
+{
+ block_t b; /* block containing link text */
+ struct buf *bp; /* buffer containing link text */
+ char* link_text; /* either bp->b_data or rip->i_block */
+ register struct inode *rip; /* target inode */
+ register int r; /* return value */
+ size_t copylen;
+
+ copylen = min( (size_t) fs_m_in.REQ_MEM_SIZE, UMAX_FILE_POS);
+
+ /* Temporarily open the file. */
+ if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+ if (!S_ISLNK(rip->i_mode))
+ r = EACCES;
+ if (rip->i_size > MAX_FAST_SYMLINK_LENGTH) {
+ /* normal symlink */
+ if ((b = read_map(rip, (off_t) 0)) == NO_BLOCK) {
+ r = EIO;
+ } else {
+ bp = get_block(rip->i_dev, b, NORMAL);
+ link_text = bp->b_data;
+ if (bp)
+ r = OK;
+ else
+ r = EIO;
+ }
+ } else {
+ /* fast symlink, stored in inode */
+ link_text = (char*) rip->i_block;
+ r = OK;
+ }
+ if (r == OK) {
+ /* Passed all checks */
+ /* We can safely cast to unsigned, because copylen is guaranteed to be
+ below max file size */
+ copylen = min( copylen, (unsigned) rip->i_size);
+ bp = get_block(rip->i_dev, b, NORMAL);
+ r = sys_safecopyto(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
+ (vir_bytes) 0, (vir_bytes) link_text,
+ (size_t) copylen, D);
+ put_block(bp, DIRECTORY_BLOCK);
+ if (r == OK)
+ fs_m_out.RES_NBYTES = copylen;
+ }
+
+ put_inode(rip);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * remove_dir *
+ *===========================================================================*/
+PRIVATE int remove_dir(rldirp, rip, dir_name)
+struct inode *rldirp; /* parent directory */
+struct inode *rip; /* directory to be removed */
+char dir_name[NAME_MAX + 1]; /* name of directory to be removed */
+{
+ /* A directory file has to be removed. Five conditions have to met:
+ * - The file must be a directory
+ * - The directory must be empty (except for . and ..)
+ * - The final component of the path must not be . or ..
+ * - The directory must not be the root of a mounted file system (VFS)
+ * - The directory must not be anybody's root/working directory (VFS)
+ */
+ int r;
+
+ /* search_dir checks that rip is a directory too. */
+ if ((r = search_dir(rip, "", NULL, IS_EMPTY, IGN_PERM, 0)) != OK)
+ return r;
+
+ if (strcmp(dir_name, ".") == 0 || strcmp(dir_name, "..") == 0)return(EINVAL);
+ if (rip->i_num == ROOT_INODE) return(EBUSY); /* can't remove 'root' */
+
+ /* Actually try to unlink the file; fails if parent is mode 0 etc. */
+ if ((r = unlink_file(rldirp, rip, dir_name)) != OK) return r;
+
+ /* Unlink . and .. from the dir. The super user can link and unlink any dir,
+ * so don't make too many assumptions about them.
+ */
+ (void) unlink_file(rip, NULL, dot1);
+ (void) unlink_file(rip, NULL, dot2);
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * unlink_file *
+ *===========================================================================*/
+PRIVATE int unlink_file(dirp, rip, file_name)
+struct inode *dirp; /* parent directory of file */
+struct inode *rip; /* inode of file, may be NULL too. */
+char file_name[NAME_MAX + 1]; /* name of file to be removed */
+{
+/* Unlink 'file_name'; rip must be the inode of 'file_name' or NULL. */
+
+ ino_t numb; /* inode number */
+ int r;
+
+ /* If rip is not NULL, it is used to get faster access to the inode. */
+ if (rip == NULL) {
+ /* Search for file in directory and try to get its inode. */
+ err_code = search_dir(dirp, file_name, &numb, LOOK_UP, IGN_PERM, 0);
+ if (err_code == OK) rip = get_inode(dirp->i_dev, (int) numb);
+ if (err_code != OK || rip == NULL) return(err_code);
+ } else {
+ dup_inode(rip); /* inode will be returned with put_inode */
+ }
+
+ r = search_dir(dirp, file_name, NULL, DELETE, IGN_PERM, 0);
+
+ if (r == OK) {
+ rip->i_links_count--; /* entry deleted from parent's dir */
+ rip->i_update |= CTIME;
+ rip->i_dirt = DIRTY;
+ }
+
+ put_inode(rip);
+ return(r);
+}
+
+
+/*===========================================================================*
+ * fs_rename *
+ *===========================================================================*/
+PUBLIC int fs_rename()
+{
+/* Perform the rename(name1, name2) system call. */
+ struct inode *old_dirp, *old_ip; /* ptrs to old dir, file inodes */
+ struct inode *new_dirp, *new_ip; /* ptrs to new dir, file inodes */
+ struct inode *new_superdirp, *next_new_superdirp;
+ int r = OK; /* error flag; initially no error */
+ int odir, ndir; /* TRUE iff {old|new} file is dir */
+ int same_pdir; /* TRUE iff parent dirs are the same */
+ char old_name[NAME_MAX + 1], new_name[NAME_MAX + 1];
+ ino_t numb;
+ phys_bytes len;
+
+ /* Copy the last component of the old name */
+ len = fs_m_in.REQ_REN_LEN_OLD; /* including trailing '\0' */
+ if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
+ return(ENAMETOOLONG);
+
+ r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_REN_GRANT_OLD,
+ (vir_bytes) 0, (vir_bytes) old_name, (size_t) len, D);
+ if (r != OK) return r;
+ NUL(old_name, len, sizeof(old_name));
+
+ /* Copy the last component of the new name */
+ len = fs_m_in.REQ_REN_LEN_NEW; /* including trailing '\0' */
+ if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
+ return(ENAMETOOLONG);
+
+ r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_REN_GRANT_NEW,
+ (vir_bytes) 0, (vir_bytes) new_name, (size_t) len, D);
+ if (r != OK) return r;
+ NUL(new_name, len, sizeof(new_name));
+
+ /* Get old dir inode */
+ if( (old_dirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_REN_OLD_DIR)) == NULL)
+ return(err_code);
+
+ old_ip = advance(old_dirp, old_name, IGN_PERM);
+ r = err_code;
+
+ if (r == EENTERMOUNT || r == ELEAVEMOUNT) {
+ put_inode(old_ip);
+ if (r == EENTERMOUNT) r = EXDEV; /* should this fail at all? */
+ else if (r == ELEAVEMOUNT) r = EINVAL; /* rename on dot-dot */
+ }
+
+ /* Get new dir inode */
+ if( (new_dirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_REN_NEW_DIR)) == NULL)
+ r = err_code;
+ new_ip = advance(new_dirp, new_name, IGN_PERM); /* not required to exist */
+
+ /* However, if the check failed because the file does exist, don't continue.
+ * Note that ELEAVEMOUNT is covered by the dot-dot check later. */
+ if(err_code == EENTERMOUNT) {
+ put_inode(new_ip);
+ r = EBUSY;
+ }
+
+ if(old_ip != NULL)
+ odir = ((old_ip->i_mode & I_TYPE) == I_DIRECTORY); /* TRUE iff dir */
+ else
+ odir = FALSE;
+
+ /* If it is ok, check for a variety of possible errors. */
+ if(r == OK) {
+ same_pdir = (old_dirp == new_dirp);
+
+ /* The old inode must not be a superdirectory of the new last dir. */
+ if (odir && !same_pdir) {
+ dup_inode(new_superdirp = new_dirp);
+ while (TRUE) { /* may hang in a file system loop */
+ if (new_superdirp == old_ip) {
+ put_inode(new_superdirp);
+ r = EINVAL;
+ break;
+ }
+ next_new_superdirp = advance(new_superdirp, dot2,
+ IGN_PERM);
+
+ put_inode(new_superdirp);
+ if(next_new_superdirp == new_superdirp) {
+ put_inode(new_superdirp);
+ break;
+ }
+ if(err_code == ELEAVEMOUNT) {
+ /* imitate that we are back at the root,
+ * cross device checked already on VFS */
+ put_inode(next_new_superdirp);
+ err_code = OK;
+ break;
+ }
+ new_superdirp = next_new_superdirp;
+ if(new_superdirp == NULL) {
+ /* Missing ".." entry. Assume the worst. */
+ r = EINVAL;
+ break;
+ }
+ }
+ }
+
+ /* The old or new name must not be . or .. */
+ if(strcmp(old_name, ".") == 0 || strcmp(old_name, "..") == 0 ||
+ strcmp(new_name, ".") == 0 || strcmp(new_name, "..") == 0) {
+ r = EINVAL;
+ }
+ /* Both parent directories must be on the same device.
+ if(old_dirp->i_dev != new_dirp->i_dev) r = EXDEV; */
+
+ /* Some tests apply only if the new path exists. */
+ if(new_ip == NULL) {
+ /* don't rename a file with a file system mounted on it.
+ if (old_ip->i_dev != old_dirp->i_dev) r = EXDEV;*/
+ if(odir && (new_dirp->i_links_count >= SHRT_MAX ||
+ new_dirp->i_links_count >= LINK_MAX) &&
+ !same_pdir && r == OK) {
+ r = EMLINK;
+ }
+ } else {
+ if(old_ip == new_ip) r = SAME; /* old=new */
+
+ ndir = ((new_ip->i_mode & I_TYPE) == I_DIRECTORY);/* dir ? */
+ if(odir == TRUE && ndir == FALSE) r = ENOTDIR;
+ if(odir == FALSE && ndir == TRUE) r = EISDIR;
+ }
+ }
+
+ /* If a process has another root directory than the system root, we might
+ * "accidently" be moving it's working directory to a place where it's
+ * root directory isn't a super directory of it anymore. This can make
+ * the function chroot useless. If chroot will be used often we should
+ * probably check for it here. */
+
+ /* The rename will probably work. Only two things can go wrong now:
+ * 1. being unable to remove the new file. (when new file already exists)
+ * 2. being unable to make the new directory entry. (new file doesn't exists)
+ * [directory has to grow by one block and cannot because the disk
+ * is completely full].
+ */
+ if(r == OK) {
+ if(new_ip != NULL) {
+ /* There is already an entry for 'new'. Try to remove it. */
+ if(odir)
+ r = remove_dir(new_dirp, new_ip, new_name);
+ else
+ r = unlink_file(new_dirp, new_ip, new_name);
+ }
+ /* if r is OK, the rename will succeed, while there is now an
+ * unused entry in the new parent directory. */
+ }
+
+ if(r == OK) {
+ /* If the new name will be in the same parent directory as the old
+ * one, first remove the old name to free an entry for the new name,
+ * otherwise first try to create the new name entry to make sure
+ * the rename will succeed.
+ */
+ numb = old_ip->i_num; /* inode number of old file */
+
+ if(same_pdir) {
+ r = search_dir(old_dirp,old_name, NULL, DELETE,IGN_PERM, 0);
+ /* shouldn't go wrong. */
+ if(r == OK)
+ (void) search_dir(old_dirp, new_name, &numb, ENTER, IGN_PERM,
+ old_ip->i_mode & I_TYPE);
+ } else {
+ r = search_dir(new_dirp, new_name, &numb, ENTER, IGN_PERM,
+ old_ip->i_mode & I_TYPE);
+ if(r == OK)
+ (void) search_dir(old_dirp, old_name, (ino_t *) 0, DELETE,
+ IGN_PERM, 0);
+ }
+ }
+ /* If r is OK, the ctime and mtime of old_dirp and new_dirp have been marked
+ * for update in search_dir. */
+
+ if(r == OK && odir && !same_pdir) {
+ /* Update the .. entry in the directory (still points to old_dirp).*/
+ numb = new_dirp->i_num;
+ (void) unlink_file(old_ip, NULL, dot2);
+ if(search_dir(old_ip, dot2, &numb, ENTER, IGN_PERM, I_DIRECTORY) == OK) {
+ /* New link created. */
+ new_dirp->i_links_count++;
+ new_dirp->i_dirt = DIRTY;
+ }
+ }
+
+ /* Release the inodes. */
+ put_inode(old_dirp);
+ put_inode(old_ip);
+ put_inode(new_dirp);
+ put_inode(new_ip);
+ return(r == SAME ? OK : r);
+}
+
+
+/*===========================================================================*
+ * fs_ftrunc *
+ *===========================================================================*/
+PUBLIC int fs_ftrunc(void)
+{
+ struct inode *rip;
+ off_t start, end;
+ int r;
+
+ if( (rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+ start = fs_m_in.REQ_TRC_START_LO;
+ end = fs_m_in.REQ_TRC_END_LO;
+
+ if (end == 0)
+ r = truncate_inode(rip, start);
+ else
+ r = freesp_inode(rip, start, end);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * truncate_inode *
+ *===========================================================================*/
+PUBLIC int truncate_inode(rip, newsize)
+register struct inode *rip; /* pointer to inode to be truncated */
+off_t newsize; /* inode must become this size */
+{
+/* Set inode to a certain size, freeing any blocks no longer referenced
+ * and updating the size in the inode. If the inode is extended, the
+ * extra space is a hole that reads as zeroes.
+ *
+ * Nothing special has to happen to file pointers if inode is opened in
+ * O_APPEND mode, as this is different per fd and is checked when
+ * writing is done.
+ */
+ int r;
+ mode_t file_type;
+
+ discard_preallocated_blocks(rip);
+
+ file_type = rip->i_mode & I_TYPE; /* check to see if file is special */
+ if (file_type == I_CHAR_SPECIAL || file_type == I_BLOCK_SPECIAL)
+ return(EINVAL);
+ if (newsize > rip->i_sp->s_max_size) /* don't let inode grow too big */
+ return(EFBIG);
+
+ /* Free the actual space if truncating. */
+ if (newsize < rip->i_size) {
+ if ((r = freesp_inode(rip, newsize, rip->i_size)) != OK)
+ return(r);
+ }
+
+ /* Clear the rest of the last block if expanding. */
+ if (newsize > rip->i_size) zeroblock_half(rip, rip->i_size, LAST_HALF);
+
+ /* Next correct the inode size. */
+ rip->i_size = newsize;
+ rip->i_update |= CTIME | MTIME;
+ rip->i_dirt = DIRTY;
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * freesp_inode *
+ *===========================================================================*/
+PRIVATE int freesp_inode(rip, start, end)
+register struct inode *rip; /* pointer to inode to be partly freed */
+off_t start, end; /* range of bytes to free (end uninclusive) */
+{
+/* Cut an arbitrary hole in an inode. The caller is responsible for checking
+ * the reasonableness of the inode type of rip. The reason is this is that
+ * this function can be called for different reasons, for which different
+ * sets of inode types are reasonable. Adjusting the final size of the inode
+ * is to be done by the caller too, if wished.
+ *
+ * Consumers of this function currently are truncate_inode() (used to
+ * free indirect and data blocks for any type of inode, but also to
+ * implement the ftruncate() and truncate() system calls) and the F_FREESP
+ * fcntl().
+ */
+ off_t p, e;
+ int r;
+ unsigned short block_size = rip->i_sp->s_block_size;
+ int zero_last, zero_first;
+
+ discard_preallocated_blocks(rip);
+
+ if (rip->i_blocks == 0) {
+ /* Either hole or symlink. Freeing fast symlink using
+ * write_map() causes segfaults since it doesn't use any
+ * blocks, but uses i_block[] to store target.
+ */
+ return(OK);
+ }
+
+ if(end > rip->i_size) /* freeing beyond end makes no sense */
+ end = rip->i_size;
+ if(end <= start) /* end is uninclusive, so start<end */
+ return(EINVAL);
+
+ /* If freeing doesn't cross a block boundary, then we may only zero
+ * a range of the block.
+ */
+ zero_last = start % block_size;
+ zero_first = end % block_size && end < rip->i_size;
+ if (start/block_size == (end-1)/block_size && (zero_last || zero_first)) {
+ zeroblock_range(rip, start, end-start);
+ } else {
+ /* First zero unused part of partly used blocks. */
+ if (zero_last)
+ zeroblock_half(rip, start, LAST_HALF);
+ if (zero_first)
+ zeroblock_half(rip, end, FIRST_HALF);
+
+ /* Now completely free the completely unused blocks.
+ * write_map() will free unused indirect
+ * blocks too. Converting the range to block numbers avoids
+ * overflow on p when doing e.g. 'p += block_size'.
+ */
+ e = end / block_size;
+ if (end == rip->i_size && (end % block_size))
+ e++;
+ for (p = nextblock(start, block_size)/block_size; p < e; p++) {
+ if ((r = write_map(rip, p*block_size, NO_BLOCK, WMAP_FREE)) != OK)
+ return(r);
+ }
+ }
+
+ rip->i_update |= CTIME | MTIME;
+ rip->i_dirt = DIRTY;
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * nextblock *
+ *===========================================================================*/
+PRIVATE off_t nextblock(pos, block_size)
+off_t pos;
+unsigned short block_size;
+{
+/* Return the first position in the next block after position 'pos'
+ * (unless this is the first position in the current block).
+ * This can be done in one expression, but that can overflow pos.
+ */
+ off_t p;
+ p = (pos / block_size) * block_size;
+ if (pos % block_size) p += block_size; /* Round up. */
+ return(p);
+}
+
+
+/*===========================================================================*
+ * zeroblock_half *
+ *===========================================================================*/
+PRIVATE void zeroblock_half(rip, pos, half)
+struct inode *rip;
+off_t pos;
+int half;
+{
+/* Zero the upper or lower 'half' of a block that holds position 'pos'.
+ * half can be FIRST_HALF or LAST_HALF.
+ *
+ * FIRST_HALF: 0..pos-1 will be zeroed
+ * LAST_HALF: pos..blocksize-1 will be zeroed
+ */
+ off_t offset, len;
+
+ /* Offset of zeroing boundary. */
+ offset = pos % rip->i_sp->s_block_size;
+
+ if(half == LAST_HALF) {
+ len = rip->i_sp->s_block_size - offset;
+ } else {
+ len = offset;
+ pos -= offset;
+ offset = 0;
+ }
+
+ zeroblock_range(rip, pos, len);
+}
+
+
+/*===========================================================================*
+ * zeroblock_range *
+ *===========================================================================*/
+PRIVATE void zeroblock_range(rip, pos, len)
+struct inode *rip;
+off_t pos;
+off_t len;
+{
+/* Zero a range in a block.
+ * This function is used to zero a segment of a block.
+ */
+ block_t b;
+ struct buf *bp;
+ off_t offset;
+
+ if (!len) return; /* no zeroing to be done. */
+ if ( (b = read_map(rip, pos)) == NO_BLOCK) return;
+ if ( (bp = get_block(rip->i_dev, b, NORMAL)) == NULL)
+ panic("zeroblock_range: no block");
+ offset = pos % rip->i_sp->s_block_size;
+ if (offset + len > rip->i_sp->s_block_size)
+ panic("zeroblock_range: len too long", len);
+ memset(bp->b_data + offset, 0, len);
+ bp->b_dirt = DIRTY;
+ put_block(bp, FULL_DATA_BLOCK);
+}
--- /dev/null
+#include "fs.h"
+#include <assert.h>
+#include <minix/callnr.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <minix/dmap.h>
+#include <minix/endpoint.h>
+#include <minix/vfsif.h>
+#include "buf.h"
+#include "inode.h"
+#include "drivers.h"
+#include "optset.h"
+
+
+/* Declare some local functions. */
+FORWARD _PROTOTYPE(void get_work, (message *m_in) );
+FORWARD _PROTOTYPE(void cch_check, (void) );
+FORWARD _PROTOTYPE( void reply, (endpoint_t who, message *m_out) );
+
+/* SEF functions and variables. */
+FORWARD _PROTOTYPE( void sef_local_startup, (void) );
+FORWARD _PROTOTYPE( int sef_cb_init_fresh, (int type, sef_init_info_t *info) );
+FORWARD _PROTOTYPE( void sef_cb_signal_handler, (int signo) );
+
+EXTERN int env_argc;
+EXTERN char **env_argv;
+
+PRIVATE struct optset optset_table[] = {
+ { "sb", OPT_INT, &opt.block_with_super, 0 },
+ { "orlov", OPT_BOOL, &opt.use_orlov, TRUE },
+ { "oldalloc", OPT_BOOL, &opt.use_orlov, FALSE },
+ { "mfsalloc", OPT_BOOL, &opt.mfsalloc, TRUE },
+ { "reserved", OPT_BOOL, &opt.use_reserved_blocks, TRUE },
+ { "prealloc", OPT_BOOL, &opt.use_prealloc, TRUE },
+ { "noprealloc", OPT_BOOL, &opt.use_prealloc, FALSE },
+ { NULL }
+};
+
+/*===========================================================================*
+ * main *
+ *===========================================================================*/
+PUBLIC int main(int argc, char *argv[])
+{
+/* This is the main routine of this service. The main loop consists of
+ * three major activities: getting new work, processing the work, and
+ * sending the reply. The loop never terminates, unless a panic occurs.
+ */
+ int error, ind;
+ unsigned short test_endian = 1;
+
+ /* SEF local startup. */
+ env_setargs(argc, argv);
+ sef_local_startup();
+
+ le_CPU = (*(unsigned char *) &test_endian == 0 ? 0 : 1);
+
+ /* Server isn't tested on big endian CPU */
+ ASSERT(le_CPU == 1);
+
+ while(!unmountdone || !exitsignaled) {
+ endpoint_t src;
+
+ /* Wait for request message. */
+ get_work(&fs_m_in);
+
+ src = fs_m_in.m_source;
+ error = OK;
+ caller_uid = INVAL_UID; /* To trap errors */
+ caller_gid = INVAL_GID;
+ req_nr = fs_m_in.m_type;
+
+ if (req_nr < VFS_BASE) {
+ fs_m_in.m_type += VFS_BASE;
+ req_nr = fs_m_in.m_type;
+ }
+ ind = req_nr - VFS_BASE;
+
+ if (ind < 0 || ind >= NREQS) {
+ printf("mfs: bad request %d\n", req_nr);
+ printf("ind = %d\n", ind);
+ error = EINVAL;
+ } else {
+ error = (*fs_call_vec[ind])();
+ /*cch_check();*/
+ }
+
+ fs_m_out.m_type = error;
+ reply(src, &fs_m_out);
+
+ if (error == OK)
+ read_ahead(); /* do block read ahead */
+ }
+}
+
+/*===========================================================================*
+ * sef_local_startup *
+ *===========================================================================*/
+PRIVATE void sef_local_startup()
+{
+ /* Register init callbacks. */
+ sef_setcb_init_fresh(sef_cb_init_fresh);
+ sef_setcb_init_restart(sef_cb_init_fail);
+
+ /* No live update support for now. */
+
+ /* Register signal callbacks. */
+ sef_setcb_signal_handler(sef_cb_signal_handler);
+
+ /* Let SEF perform startup. */
+ sef_startup();
+}
+
+/*===========================================================================*
+ * sef_cb_init_fresh *
+ *===========================================================================*/
+PRIVATE int sef_cb_init_fresh(int type, sef_init_info_t *info)
+{
+/* Initialize the Minix file server. */
+ int i, r;
+
+ /* Defaults */
+ opt.use_orlov = TRUE;
+ opt.mfsalloc = FALSE;
+ opt.use_reserved_blocks = FALSE;
+ opt.block_with_super = 0;
+ opt.use_prealloc = FALSE;
+
+ /* If we have been given an options string, parse options from there. */
+ for (i = 1; i < env_argc - 1; i++)
+ if (!strcmp(env_argv[i], "-o"))
+ optset_parse(optset_table, env_argv[++i]);
+
+ may_use_vmcache = 1;
+
+ /* Init inode table */
+ for (i = 0; i < NR_INODES; ++i) {
+ inode[i].i_count = 0;
+ cch[i] = 0;
+ }
+
+ init_inode_cache();
+
+ /* Init driver mapping */
+ for (i = 0; i < NR_DEVICES; ++i)
+ driver_endpoints[i].driver_e = NONE;
+
+ SELF_E = getprocnr();
+ buf_pool(DEFAULT_NR_BUFS);
+ fs_block_size = _MIN_BLOCK_SIZE;
+
+ fs_m_in.m_type = FS_READY;
+
+ if ((r = send(VFS_PROC_NR, &fs_m_in)) != OK) {
+ panic("Error sending login to VFS: %d", r);
+ }
+
+ return(OK);
+}
+
+/*===========================================================================*
+ * sef_cb_signal_handler *
+ *===========================================================================*/
+PRIVATE void sef_cb_signal_handler(int signo)
+{
+ /* Only check for termination signal, ignore anything else. */
+ if (signo != SIGTERM) return;
+
+ exitsignaled = 1;
+ (void) fs_sync();
+
+ /* If unmounting has already been performed, exit immediately.
+ * We might not get another message.
+ */
+ if (unmountdone) exit(0);
+}
+
+/*===========================================================================*
+ * get_work *
+ *===========================================================================*/
+PRIVATE void get_work(m_in)
+message *m_in; /* pointer to message */
+{
+ int r, srcok = 0;
+ endpoint_t src;
+
+ do {
+ if ((r = sef_receive(ANY, m_in)) != OK) /* wait for message */
+ panic("sef_receive failed: %d", r);
+ src = m_in->m_source;
+
+ if(src == VFS_PROC_NR) {
+ if(unmountdone)
+ printf("ext2: unmounted: unexpected message from FS\n");
+ else
+ srcok = 1; /* Normal FS request. */
+
+ } else
+ printf("ext2: unexpected source %d\n", src);
+ } while(!srcok);
+
+ assert((src == VFS_PROC_NR && !unmountdone));
+}
+
+
+/*===========================================================================*
+ * reply *
+ *===========================================================================*/
+PRIVATE void reply(
+ endpoint_t who,
+ message *m_out /* report result */
+)
+{
+ if (OK != send(who, m_out)) /* send the message */
+ printf("ext2(%d) was unable to send reply\n", SELF_E);
+}
+
+
+/*===========================================================================*
+ * cch_check *
+ *===========================================================================*/
+PRIVATE void cch_check(void)
+{
+ int i;
+
+ for (i = 0; i < NR_INODES; ++i) {
+ if (inode[i].i_count != cch[i] && req_nr != REQ_GETNODE &&
+ req_nr != REQ_PUTNODE && req_nr != REQ_READSUPER &&
+ req_nr != REQ_MOUNTPOINT && req_nr != REQ_UNMOUNT &&
+ req_nr != REQ_SYNC && req_nr != REQ_LOOKUP) {
+ printf("ext2(%d) inode(%ul) cc: %d req_nr: %d\n", SELF_E,
+ inode[i].i_num, inode[i].i_count - cch[i], req_nr);
+ }
+ cch[i] = inode[i].i_count;
+ }
+}
--- /dev/null
+/* Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <assert.h>
+#include <minix/vfsif.h>
+#include "inode.h"
+#include "super.h"
+
+/*===========================================================================*
+ * fs_sync *
+ *===========================================================================*/
+PUBLIC int fs_sync()
+{
+/* Perform the sync() system call. Flush all the tables.
+ * The order in which the various tables are flushed is critical. The
+ * blocks must be flushed last, since rw_inode() leaves its results in
+ * the block cache.
+ */
+ struct inode *rip;
+ struct buf *bp;
+ int r;
+
+ assert(nr_bufs > 0);
+ assert(buf);
+
+ if (superblock->s_rd_only)
+ return(OK); /* nothing to sync */
+
+ /* Write all the dirty inodes to the disk. */
+ for(rip = &inode[0]; rip < &inode[NR_INODES]; rip++)
+ if(rip->i_count > 0 && rip->i_dirt == DIRTY) rw_inode(rip, WRITING);
+
+ /* Write all the dirty blocks to the disk, one drive at a time. */
+ for(bp = &buf[0]; bp < &buf[nr_bufs]; bp++)
+ if(bp->b_dev != NO_DEV && bp->b_dirt == DIRTY)
+ flushall(bp->b_dev);
+
+ if (superblock->s_dev != NO_DEV) {
+ superblock->s_wtime = clock_time();
+ write_super(superblock);
+ }
+
+ return(OK); /* sync() can't fail */
+}
+
+
+/*===========================================================================*
+ * fs_flush *
+ *===========================================================================*/
+PUBLIC int fs_flush()
+{
+/* Flush the blocks of a device from the cache after writing any dirty blocks
+ * to disk.
+ */
+ dev_t dev = (dev_t) fs_m_in.REQ_DEV;
+
+ if(dev == fs_dev) return(EBUSY);
+
+ flushall(dev);
+ invalidate(dev);
+
+ return(OK);
+}
--- /dev/null
+/* Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <fcntl.h>
+#include <string.h>
+#include <minix/com.h>
+#include <sys/stat.h>
+#include "buf.h"
+#include "inode.h"
+#include "super.h"
+#include "drivers.h"
+#include <minix/ds.h>
+#include <minix/vfsif.h>
+
+
+/*===========================================================================*
+ * fs_readsuper *
+ *===========================================================================*/
+PUBLIC int fs_readsuper()
+{
+/* This function reads the superblock of the partition, gets the root inode
+ * and sends back the details of them. Note, that the FS process does not
+ * know the index of the vmnt object which refers to it, whenever the pathname
+ * lookup leaves a partition an ELEAVEMOUNT error is transferred back
+ * so that the VFS knows that it has to find the vnode on which this FS
+ * process' partition is mounted on.
+ */
+ struct inode *root_ip;
+ cp_grant_id_t label_gid;
+ size_t label_len;
+ int r = OK;
+ endpoint_t driver_e;
+ int readonly, isroot;
+ u32_t mask;
+
+ fs_dev = fs_m_in.REQ_DEV;
+ label_gid = fs_m_in.REQ_GRANT;
+ label_len = fs_m_in.REQ_PATH_LEN;
+ readonly = (fs_m_in.REQ_FLAGS & REQ_RDONLY) ? 1 : 0;
+ isroot = (fs_m_in.REQ_FLAGS & REQ_ISROOT) ? 1 : 0;
+
+ if (label_len > sizeof(fs_dev_label))
+ return(EINVAL);
+
+ r = sys_safecopyfrom(fs_m_in.m_source, label_gid, 0,
+ (vir_bytes)fs_dev_label, label_len, D);
+ if (r != OK) {
+ printf("%s:%d fs_readsuper: safecopyfrom failed: %d\n",
+ __FILE__, __LINE__, r);
+ return(EINVAL);
+ }
+
+ r= ds_retrieve_label_endpt(fs_dev_label, &driver_e);
+ if (r != OK)
+ {
+ printf("ext2:fs_readsuper: ds_retrieve_label_endpt failed for '%s': %d\n",
+ fs_dev_label, r);
+ return EINVAL;
+ }
+
+ /* Map the driver endpoint for this major */
+ driver_endpoints[(fs_dev >> MAJOR) & BYTE].driver_e = driver_e;
+
+ /* Open the device the file system lives on. */
+ if (dev_open(driver_e, fs_dev, driver_e,
+ readonly ? R_BIT : (R_BIT|W_BIT)) != OK) {
+ return(EINVAL);
+ }
+
+ /* Fill in the super block. */
+ STATICINIT(superblock, sizeof(struct super_block));
+ if (!superblock)
+ panic("Can't allocate memory for superblock.");
+ superblock->s_dev = fs_dev; /* read_super() needs to know which dev */
+ r = read_super(superblock);
+
+ /* Is it recognized as a Minix filesystem? */
+ if (r != OK) {
+ superblock->s_dev = NO_DEV;
+ dev_close(driver_e, fs_dev);
+ return(r);
+ }
+
+ if (superblock->s_rev_level != EXT2_GOOD_OLD_REV) {
+ struct super_block *sp = superblock; /* just shorter name */
+ mask = ~SUPPORTED_INCOMPAT_FEATURES;
+ if (HAS_INCOMPAT_FEATURE(sp, mask)) {
+ if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_COMPRESSION & mask))
+ printf("ext2: fs compression is not supported by server\n");
+ if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_FILETYPE & mask))
+ printf("ext2: fs in dir filetype is not supported by server\n");
+ if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_RECOVER & mask))
+ printf("ext2: fs recovery is not supported by server\n");
+ if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_JOURNAL_DEV & mask))
+ printf("ext2: fs journal dev is not supported by server\n");
+ if (HAS_INCOMPAT_FEATURE(sp, INCOMPAT_META_BG & mask))
+ printf("ext2: fs meta bg is not supported by server\n");
+ return(EINVAL);
+ }
+ mask = ~SUPPORTED_RO_COMPAT_FEATURES;
+ if (HAS_RO_COMPAT_FEATURE(sp, mask)) {
+ if (HAS_RO_COMPAT_FEATURE(sp, RO_COMPAT_SPARSE_SUPER & mask)) {
+ printf("ext2: sparse super is not supported by server, \
+ remount read-only\n");
+ }
+ if (HAS_RO_COMPAT_FEATURE(sp, RO_COMPAT_LARGE_FILE & mask)) {
+ printf("ext2: large files are not supported by server, \
+ remount read-only\n");
+ }
+ if (HAS_RO_COMPAT_FEATURE(sp, RO_COMPAT_BTREE_DIR & mask)) {
+ printf("ext2: dir's btree is not supported by server, \
+ remount read-only\n");
+ }
+ return(EINVAL);
+ }
+ }
+
+ if (superblock->s_state == EXT2_ERROR_FS) {
+ printf("ext2: filesystem wasn't cleanly unmounted previous time\n");
+ superblock->s_dev = NO_DEV;
+ dev_close(driver_e, fs_dev);
+ return(EINVAL);
+ }
+
+
+ set_blocksize(superblock->s_block_size);
+
+ /* Get the root inode of the mounted file system. */
+ if ( (root_ip = get_inode(fs_dev, ROOT_INODE)) == NULL) {
+ printf("ext2: couldn't get root inode\n");
+ superblock->s_dev = NO_DEV;
+ dev_close(driver_e, fs_dev);
+ return(EINVAL);
+ }
+
+ if (root_ip != NULL && root_ip->i_mode == 0) {
+ printf("%s:%d zero mode for root inode?\n", __FILE__, __LINE__);
+ put_inode(root_ip);
+ superblock->s_dev = NO_DEV;
+ dev_close(driver_e, fs_dev);
+ return(EINVAL);
+ }
+
+ if (root_ip != NULL && (root_ip->i_mode & I_TYPE) != I_DIRECTORY) {
+ printf("%s:%d root inode has wrong type, it's not a DIR\n",
+ __FILE__, __LINE__);
+ put_inode(root_ip);
+ superblock->s_dev = NO_DEV;
+ dev_close(driver_e, fs_dev);
+ return(EINVAL);
+ }
+
+ superblock->s_rd_only = readonly;
+ superblock->s_is_root = isroot;
+
+ if (!readonly) {
+ superblock->s_state = EXT2_ERROR_FS;
+ superblock->s_mnt_count++;
+ superblock->s_mtime = clock_time();
+ write_super(superblock); /* Commit info, we just set above */
+ }
+
+ /* Root inode properties */
+ fs_m_out.RES_INODE_NR = root_ip->i_num;
+ fs_m_out.RES_MODE = root_ip->i_mode;
+ fs_m_out.RES_FILE_SIZE_LO = root_ip->i_size;
+ fs_m_out.RES_UID = root_ip->i_uid;
+ fs_m_out.RES_GID = root_ip->i_gid;
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * fs_mountpoint *
+ *===========================================================================*/
+PUBLIC int fs_mountpoint()
+{
+/* This function looks up the mount point, it checks the condition whether
+ * the partition can be mounted on the inode or not.
+ */
+ register struct inode *rip;
+ int r = OK;
+ mode_t bits;
+
+ /* Temporarily open the file. */
+ if( (rip = get_inode(fs_dev, fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+
+ if(rip->i_mountpoint) r = EBUSY;
+
+ /* It may not be special. */
+ bits = rip->i_mode & I_TYPE;
+ if (bits == I_BLOCK_SPECIAL || bits == I_CHAR_SPECIAL) r = ENOTDIR;
+
+ put_inode(rip);
+
+ if(r == OK) rip->i_mountpoint = TRUE;
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * fs_unmount *
+ *===========================================================================*/
+PUBLIC int fs_unmount()
+{
+/* Unmount a file system by device number. */
+ int count;
+ struct inode *rip, *root_ip;
+
+ if(superblock->s_dev != fs_dev) return(EINVAL);
+
+ /* See if the mounted device is busy. Only 1 inode using it should be
+ * open --the root inode-- and that inode only 1 time. */
+ count = 0;
+ for (rip = &inode[0]; rip < &inode[NR_INODES]; rip++)
+ if (rip->i_count > 0 && rip->i_dev == fs_dev) count += rip->i_count;
+
+ if ((root_ip = find_inode(fs_dev, ROOT_INODE)) == NULL) {
+ printf("ext2: couldn't find root inode. Unmount failed.\n");
+ panic("ext2: couldn't find root inode");
+ return(EINVAL);
+ }
+
+ /* Sync fs data before checking count. In some cases VFS can force unmounting
+ * and it will damage unsynced FS. We don't sync before checking root_ip since
+ * if it is missing then something strange happened with FS, so it's better
+ * to not use possibly corrupted data for syncing.
+ */
+ if (!superblock->s_rd_only) {
+ /* force any cached blocks out of memory */
+ (void) fs_sync();
+ }
+
+ if (count > 1) return(EBUSY); /* can't umount a busy file system */
+
+ put_inode(root_ip);
+
+ if (!superblock->s_rd_only) {
+ superblock->s_wtime = clock_time();
+ superblock->s_state = EXT2_VALID_FS;
+ write_super(superblock); /* Commit info, we just set above */
+ }
+
+ /* Close the device the file system lives on. */
+ dev_close(driver_endpoints[(fs_dev >> MAJOR) & BYTE].driver_e, fs_dev);
+
+ /* Finish off the unmount. */
+ superblock->s_dev = NO_DEV;
+ unmountdone = TRUE;
+
+ return(OK);
+}
--- /dev/null
+/* Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <sys/stat.h>
+#include <string.h>
+#include <minix/com.h>
+#include "buf.h"
+#include "inode.h"
+#include "super.h"
+#include <minix/vfsif.h>
+
+FORWARD _PROTOTYPE( struct inode *new_node, (struct inode *ldirp,
+ char *string, mode_t bits, block_t z0));
+
+
+/*===========================================================================*
+ * fs_create *
+ *===========================================================================*/
+PUBLIC int fs_create()
+{
+ phys_bytes len;
+ int r;
+ struct inode *ldirp;
+ struct inode *rip;
+ mode_t omode;
+ char lastc[NAME_MAX + 1];
+
+ /* Read request message */
+ omode = (mode_t) fs_m_in.REQ_MODE;
+ caller_uid = (uid_t) fs_m_in.REQ_UID;
+ caller_gid = (gid_t) fs_m_in.REQ_GID;
+
+ /* Try to make the file. */
+
+ /* Copy the last component (i.e., file name) */
+ len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */
+ if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
+ return(ENAMETOOLONG);
+
+ err_code = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
+ (vir_bytes) 0, (vir_bytes) lastc, (size_t) len, D);
+ if (err_code != OK) return err_code;
+ NUL(lastc, len, sizeof(lastc));
+
+ /* Get last directory inode (i.e., directory that will hold the new inode) */
+ if ((ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(ENOENT);
+
+ /* Create a new inode by calling new_node(). */
+ rip = new_node(ldirp, lastc, omode, NO_BLOCK);
+ r = err_code;
+
+ /* If an error occurred, release inode. */
+ if (r != OK) {
+ put_inode(ldirp);
+ put_inode(rip);
+ return(r);
+ }
+
+ /* Reply message */
+ fs_m_out.RES_INODE_NR = rip->i_num;
+ fs_m_out.RES_MODE = rip->i_mode;
+ fs_m_out.RES_FILE_SIZE_LO = rip->i_size;
+
+ /* This values are needed for the execution */
+ fs_m_out.RES_UID = rip->i_uid;
+ fs_m_out.RES_GID = rip->i_gid;
+
+ /* Drop parent dir */
+ put_inode(ldirp);
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * fs_mknod *
+ *===========================================================================*/
+PUBLIC int fs_mknod()
+{
+ struct inode *ip, *ldirp;
+ char lastc[NAME_MAX + 1];
+ phys_bytes len;
+
+ /* Copy the last component and set up caller's user and group id */
+ len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */
+ if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
+ return(ENAMETOOLONG);
+
+ err_code = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
+ (vir_bytes) 0, (vir_bytes) lastc, (size_t) len, D);
+ if (err_code != OK) return err_code;
+ NUL(lastc, len, sizeof(lastc));
+
+ caller_uid = (uid_t) fs_m_in.REQ_UID;
+ caller_gid = (gid_t) fs_m_in.REQ_GID;
+
+ /* Get last directory inode */
+ if((ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(ENOENT);
+
+ /* Try to create the new node */
+ ip = new_node(ldirp, lastc, (mode_t) fs_m_in.REQ_MODE,
+ (block_t) fs_m_in.REQ_DEV);
+
+ put_inode(ip);
+ put_inode(ldirp);
+ return(err_code);
+}
+
+
+/*===========================================================================*
+ * fs_mkdir *
+ *===========================================================================*/
+PUBLIC int fs_mkdir()
+{
+ int r1, r2; /* status codes */
+ ino_t dot, dotdot; /* inode numbers for . and .. */
+ struct inode *rip, *ldirp;
+ char lastc[NAME_MAX + 1]; /* last component */
+ phys_bytes len;
+
+ /* Copy the last component and set up caller's user and group id */
+ len = fs_m_in.REQ_PATH_LEN; /* including trailing '\0' */
+ if (len > NAME_MAX + 1 || len > EXT2_NAME_MAX + 1)
+ return(ENAMETOOLONG);
+
+ err_code = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
+ (vir_bytes) 0, (vir_bytes) lastc, (phys_bytes) len, D);
+ if(err_code != OK) return(err_code);
+ NUL(lastc, len, sizeof(lastc));
+
+ caller_uid = (uid_t) fs_m_in.REQ_UID;
+ caller_gid = (gid_t) fs_m_in.REQ_GID;
+
+ /* Get last directory inode */
+ if((ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(ENOENT);
+
+ /* Next make the inode. If that fails, return error code. */
+ rip = new_node(ldirp, lastc, (ino_t) fs_m_in.REQ_MODE, (block_t) 0);
+
+ if(rip == NULL || err_code == EEXIST) {
+ put_inode(rip); /* can't make dir: it already exists */
+ put_inode(ldirp);
+ return(err_code);
+ }
+
+ /* Get the inode numbers for . and .. to enter in the directory. */
+ dotdot = ldirp->i_num; /* parent's inode number */
+ dot = rip->i_num; /* inode number of the new dir itself */
+
+ /* Now make dir entries for . and .. unless the disk is completely full. */
+ /* Use dot1 and dot2, so the mode of the directory isn't important. */
+ rip->i_mode = (mode_t) fs_m_in.REQ_MODE; /* set mode */
+ /* enter . in the new dir*/
+ r1 = search_dir(rip, dot1, &dot, ENTER, IGN_PERM, I_DIRECTORY);
+ /* enter .. in the new dir */
+ r2 = search_dir(rip, dot2, &dotdot, ENTER, IGN_PERM, I_DIRECTORY);
+
+ /* If both . and .. were successfully entered, increment the link counts. */
+ if (r1 == OK && r2 == OK) {
+ /* Normal case. It was possible to enter . and .. in the new dir. */
+ rip->i_links_count++; /* this accounts for . */
+ ldirp->i_links_count++; /* this accounts for .. */
+ ldirp->i_dirt = DIRTY; /* mark parent's inode as dirty */
+ } else {
+ /* It was not possible to enter . or .. probably disk was full -
+ * links counts haven't been touched. */
+ if (search_dir(ldirp, lastc, NULL, DELETE, IGN_PERM, 0) != OK)
+ panic("Dir disappeared ", rip->i_num);
+ rip->i_links_count--; /* undo the increment done in new_node() */
+ }
+ rip->i_dirt = DIRTY; /* either way, i_links_count has changed */
+
+ put_inode(ldirp); /* return the inode of the parent dir */
+ put_inode(rip); /* return the inode of the newly made dir */
+ return(err_code); /* new_node() always sets 'err_code' */
+}
+
+
+/*===========================================================================*
+ * fs_slink *
+ *===========================================================================*/
+PUBLIC int fs_slink()
+{
+ phys_bytes len;
+ struct inode *sip; /* inode containing symbolic link */
+ struct inode *ldirp; /* directory containing link */
+ register int r; /* error code */
+ char string[NAME_MAX]; /* last component of the new dir's path name */
+ char* link_target_buf; /* either sip->i_block or bp->b_data */
+ struct buf *bp = NULL; /* disk buffer for link */
+
+ caller_uid = (uid_t) fs_m_in.REQ_UID;
+ caller_gid = (gid_t) fs_m_in.REQ_GID;
+
+ /* Copy the link name's last component */
+ len = fs_m_in.REQ_PATH_LEN;
+ if (len > NAME_MAX || len > EXT2_NAME_MAX)
+ return(ENAMETOOLONG);
+
+ r = sys_safecopyfrom(VFS_PROC_NR, (cp_grant_id_t) fs_m_in.REQ_GRANT,
+ (vir_bytes) 0, (vir_bytes) string, (size_t) len, D);
+ if (r != OK) return(r);
+ NUL(string, len, sizeof(string));
+
+ /* Temporarily open the dir. */
+ if( (ldirp = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+ /* Create the inode for the symlink. */
+ sip = new_node(ldirp, string, (mode_t) (I_SYMBOLIC_LINK | RWX_MODES),
+ (block_t) 0);
+
+ /* If we can then create fast symlink (store it in inode),
+ * Otherwise allocate a disk block for the contents of the symlink and
+ * copy contents of symlink (the name pointed to) into first disk block. */
+ if( (r = err_code) == OK) {
+ if ( (fs_m_in.REQ_MEM_SIZE + 1) > sip->i_sp->s_block_size) {
+ r = ENAMETOOLONG;
+ } else if ((fs_m_in.REQ_MEM_SIZE + 1) <= MAX_FAST_SYMLINK_LENGTH) {
+ r = sys_safecopyfrom(VFS_PROC_NR,
+ (cp_grant_id_t) fs_m_in.REQ_GRANT3,
+ (vir_bytes) 0, (vir_bytes) sip->i_block,
+ (vir_bytes) fs_m_in.REQ_MEM_SIZE, D);
+ sip->i_dirt = DIRTY;
+ link_target_buf = (char*) sip->i_block;
+ } else {
+ r = (bp = new_block(sip, (off_t) 0)) == NULL ? err_code :
+ sys_safecopyfrom(VFS_PROC_NR,
+ (cp_grant_id_t) fs_m_in.REQ_GRANT3,
+ (vir_bytes) 0, (vir_bytes) bp->b_data,
+ (vir_bytes) fs_m_in.REQ_MEM_SIZE, D);
+ bp->b_dirt = DIRTY;
+ link_target_buf = bp->b_data;
+ }
+ if (r == OK) {
+ link_target_buf[fs_m_in.REQ_MEM_SIZE] = '\0';
+ sip->i_size = (off_t) strlen(link_target_buf);
+ if (sip->i_size != fs_m_in.REQ_MEM_SIZE) {
+ /* This can happen if the user provides a buffer
+ * with a \0 in it. This can cause a lot of trouble
+ * when the symlink is used later. We could just use
+ * the strlen() value, but we want to let the user
+ * know he did something wrong. ENAMETOOLONG doesn't
+ * exactly describe the error, but there is no
+ * ENAMETOOWRONG.
+ */
+ r = ENAMETOOLONG;
+ }
+ }
+
+ put_block(bp, DIRECTORY_BLOCK); /* put_block() accepts NULL. */
+
+ if(r != OK) {
+ sip->i_links_count = NO_LINK;
+ if (search_dir(ldirp, string, NULL, DELETE, IGN_PERM, 0) != OK)
+ panic("Symbolic link vanished");
+ }
+ }
+
+ /* put_inode() accepts NULL as a noop, so the below are safe. */
+ put_inode(sip);
+ put_inode(ldirp);
+
+ return(r);
+}
+
+/*===========================================================================*
+ * new_node *
+ *===========================================================================*/
+PRIVATE struct inode *new_node(struct inode *ldirp,
+ char *string, mode_t bits, block_t b0)
+{
+/* New_node() is called by fs_open(), fs_mknod(), and fs_mkdir().
+ * In all cases it allocates a new inode, makes a directory entry for it in
+ * the ldirp directory with string name, and initializes it.
+ * It returns a pointer to the inode if it can do this;
+ * otherwise it returns NULL. It always sets 'err_code'
+ * to an appropriate value (OK or an error code).
+ */
+
+ register struct inode *rip;
+ register int r;
+
+ /* Get final component of the path. */
+ rip = advance(ldirp, string, IGN_PERM);
+
+ if (S_ISDIR(bits) && (ldirp->i_links_count >= USHRT_MAX ||
+ ldirp->i_links_count >= LINK_MAX)) {
+ /* New entry is a directory, alas we can't give it a ".." */
+ put_inode(rip);
+ err_code = EMLINK;
+ return(NULL);
+ }
+
+ if ( rip == NULL && err_code == ENOENT) {
+ /* Last path component does not exist. Make new directory entry. */
+ if ( (rip = alloc_inode(ldirp, bits)) == NULL) {
+ /* Can't creat new inode: out of inodes. */
+ return(NULL);
+ }
+
+ /* Force inode to the disk before making directory entry to make
+ * the system more robust in the face of a crash: an inode with
+ * no directory entry is much better than the opposite.
+ */
+ rip->i_links_count++;
+ rip->i_block[0] = b0; /* major/minor device numbers */
+ rw_inode(rip, WRITING); /* force inode to disk now */
+
+ /* New inode acquired. Try to make directory entry. */
+ if ((r=search_dir(ldirp, string, &rip->i_num, ENTER, IGN_PERM,
+ rip->i_mode & I_TYPE)) != OK) {
+ rip->i_links_count--; /* pity, have to free disk inode */
+ rip->i_dirt = DIRTY; /* dirty inodes are written out */
+ put_inode(rip); /* this call frees the inode */
+ err_code = r;
+ return(NULL);
+ }
+
+ } else if (err_code == EENTERMOUNT || err_code == ELEAVEMOUNT) {
+ r = EEXIST;
+ } else {
+ /* Either last component exists, or there is some problem. */
+ if (rip != NULL)
+ r = EEXIST;
+ else
+ r = err_code;
+ }
+
+ /* The caller has to return the directory inode (*ldirp). */
+ err_code = r;
+ return(rip);
+}
+
+
+/*===========================================================================*
+ * fs_inhibread *
+ *===========================================================================*/
+PUBLIC int fs_inhibread()
+{
+ struct inode *rip;
+
+ if((rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+ /* inhibit read ahead */
+ rip->i_seek = ISEEK;
+
+ return(OK);
+}
--- /dev/null
+/* This file provides functionality to parse strings of comma-separated
+ * options, each being either a single key name or a key=value pair, where the
+ * value may be enclosed in quotes. A table of optset entries is provided to
+ * determine which options are recognized, how to parse their values, and where
+ * to store those. Unrecognized options are silently ignored; improperly
+ * formatted options are silently set to reasonably acceptable values.
+ *
+ * The entry points into this file are:
+ * optset_parse parse the given options string using the given table
+ *
+ * Created:
+ * May 2009 (D.C. van Moolenbroek)
+ */
+
+#define _MINIX 1
+#include <stdlib.h>
+#include <string.h>
+#include <minix/config.h>
+#include <minix/const.h>
+
+#include "optset.h"
+
+FORWARD _PROTOTYPE( void optset_parse_entry, (struct optset *entry,
+ char *ptr, int len) );
+
+/*===========================================================================*
+ * optset_parse_entry *
+ *===========================================================================*/
+PRIVATE void optset_parse_entry(entry, ptr, len)
+struct optset *entry;
+char *ptr;
+int len;
+{
+/* Parse and store the value of a single option.
+ */
+ char *dst;
+ int val;
+
+ switch (entry->os_type) {
+ case OPT_BOOL:
+ *((int *) entry->os_ptr) = entry->os_val;
+
+ break;
+
+ case OPT_STRING:
+ if (len >= entry->os_val)
+ len = entry->os_val - 1;
+
+ dst = (char *) entry->os_ptr;
+
+ if (len > 0)
+ memcpy(dst, ptr, len);
+ dst[len] = 0;
+
+ break;
+
+ case OPT_INT:
+ if (len > 0)
+ val = strtol(ptr, NULL, entry->os_val);
+ else
+ val = 0;
+
+ *((int *) entry->os_ptr) = val;
+
+ break;
+ }
+}
+
+/*===========================================================================*
+ * optset_parse *
+ *===========================================================================*/
+PUBLIC void optset_parse(table, string)
+struct optset *table;
+char *string;
+{
+/* Parse a string of options, using the provided table of optset entries.
+ */
+ char *p, *kptr, *vptr;
+ int i, klen, vlen;
+
+ for (p = string; *p; ) {
+ /* Get the key name for the field. */
+ for (kptr = p, klen = 0; *p && *p != '=' && *p != ','; p++, klen++);
+
+ if (*p == '=') {
+ /* The field has an associated value. */
+ vptr = ++p;
+
+ /* If the first character after the '=' is a quote character,
+ * find a matching quote character followed by either a comma
+ * or the terminating null character, and use the string in
+ * between. Otherwise, use the string up to the next comma or
+ * the terminating null character.
+ */
+ if (*p == '\'' || *p == '"') {
+ p++;
+
+ for (vlen = 0; *p && (*p != *vptr ||
+ (p[1] && p[1] != ',')); p++, vlen++);
+
+ if (*p) p++;
+ vptr++;
+ }
+ else
+ for (vlen = 0; *p && *p != ','; p++, vlen++);
+ }
+ else {
+ vptr = NULL;
+ vlen = 0;
+ }
+
+ if (*p == ',') p++;
+
+ /* Find a matching entry for this key in the given table. If found,
+ * call optset_parse_entry() on it. Silently ignore the option
+ * otherwise.
+ */
+ for (i = 0; table[i].os_name != NULL; i++) {
+ if (strlen(table[i].os_name) == klen &&
+ !strncasecmp(table[i].os_name, kptr, klen)) {
+
+ optset_parse_entry(&table[i], vptr, vlen);
+
+ break;
+ }
+ }
+ }
+}
--- /dev/null
+#ifndef _OPTSET_H
+#define _OPTSET_H
+
+enum {
+ OPT_BOOL,
+ OPT_STRING,
+ OPT_INT
+};
+
+/* An entry for the parser of an options set. The 'os_name' field must point
+ * to a string, which is treated case-insensitively; the last entry of a table
+ * must have NULL name. The 'os_type' field must be set to one of the OPT_
+ * values defined above. The 'os_ptr' field must point to the field that is to
+ * receive the value of a recognized option. For OPT_STRING, it must point to a
+ * string of a size set in 'os_val'; the resulting string may be truncated, but
+ * will always be null-terminated. For OPT_BOOL, it must point to an int which
+ * will be set to the value in 'os_val' if the option is present. For OPT_INT,
+ * it must point to an int which will be set to the provided option value;
+ * 'os_val' is then a base passed to strtol().
+ */
+struct optset {
+ char *os_name;
+ int os_type;
+ void *os_ptr;
+ int os_val;
+};
+
+_PROTOTYPE( void optset_parse, (struct optset *table, char *string) );
+
+#endif /* _OPTSET_H */
--- /dev/null
+/* This file contains the procedures that look up path names in the directory
+ * system and determine the inode number that goes with a given path name.
+ *
+ * The entry points into this file are
+ * eat_path: the 'main' routine of the path-to-inode conversion mechanism
+ * last_dir: find the final directory on a given path
+ * advance: parse one component of a path name
+ * search_dir: search a directory for a string and return its inode number
+ *
+ * Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <assert.h>
+#include <string.h>
+#include <minix/endpoint.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include "buf.h"
+#include "inode.h"
+#include "super.h"
+#include <minix/vfsif.h>
+
+PUBLIC char dot1[2] = "."; /* used for search_dir to bypass the access */
+PUBLIC char dot2[3] = ".."; /* permissions for . and .. */
+
+FORWARD _PROTOTYPE( char *get_name, (char *name, char string[NAME_MAX+1]) );
+FORWARD _PROTOTYPE( int ltraverse, (struct inode *rip, char *suffix) );
+FORWARD _PROTOTYPE( int parse_path, (ino_t dir_ino, ino_t root_ino,
+ int flags, struct inode **res_inop,
+ size_t *offsetp, int *symlinkp) );
+
+/*===========================================================================*
+ * fs_lookup *
+ *===========================================================================*/
+PUBLIC int fs_lookup()
+{
+ cp_grant_id_t grant, grant2;
+ int r, r1, flags, symlinks;
+ unsigned int len;
+ size_t offset = 0, path_size, cred_size;
+ ino_t dir_ino, root_ino;
+ struct inode *rip;
+
+ grant = (cp_grant_id_t) fs_m_in.REQ_GRANT;
+ path_size = (size_t) fs_m_in.REQ_PATH_SIZE; /* Size of the buffer */
+ len = (int) fs_m_in.REQ_PATH_LEN; /* including terminating nul */
+ dir_ino = (ino_t) fs_m_in.REQ_DIR_INO;
+ root_ino = (ino_t) fs_m_in.REQ_ROOT_INO;
+ flags = (int) fs_m_in.REQ_FLAGS;
+
+ /* Check length. */
+ if(len > sizeof(user_path)) return(E2BIG); /* too big for buffer */
+ if(len == 0) return(EINVAL); /* too small */
+
+ /* Copy the pathname and set up caller's user and group id */
+ r = sys_safecopyfrom(VFS_PROC_NR, grant, /*offset*/ 0,
+ (vir_bytes) user_path, (size_t) len, D);
+ if(r != OK) return(r);
+
+ /* Verify this is a null-terminated path. */
+ if(user_path[len - 1] != '\0') return(EINVAL);
+
+ if(flags & PATH_GET_UCRED) { /* Do we have to copy uid/gid credentials? */
+ grant2 = (cp_grant_id_t) fs_m_in.REQ_GRANT2;
+ cred_size = (size_t) fs_m_in.REQ_UCRED_SIZE;
+
+ if (cred_size > sizeof(credentials)) return(EINVAL); /* Too big. */
+ r = sys_safecopyfrom(VFS_PROC_NR, grant2, (vir_bytes) 0,
+ (vir_bytes) &credentials, cred_size, D);
+ if (r != OK) return(r);
+
+ caller_uid = (uid_t) credentials.vu_uid;
+ caller_gid = (gid_t) credentials.vu_gid;
+ } else {
+ memset(&credentials, 0, sizeof(credentials));
+ caller_uid = fs_m_in.REQ_UID;
+ caller_gid = fs_m_in.REQ_GID;
+ }
+
+ /* Lookup inode */
+ rip = NULL;
+ r = parse_path(dir_ino, root_ino, flags, &rip, &offset, &symlinks);
+
+ if(symlinks != 0 && (r == ELEAVEMOUNT || r == EENTERMOUNT || r == ESYMLINK)){
+ len = strlen(user_path)+1;
+ if(len > path_size) return(ENAMETOOLONG);
+
+ r1 = sys_safecopyto(VFS_PROC_NR, grant, (vir_bytes) 0,
+ (vir_bytes) user_path, (size_t) len, D);
+ if (r1 != OK) return(r1);
+ }
+
+ if(r == ELEAVEMOUNT || r == ESYMLINK) {
+ /* Report offset and the error */
+ fs_m_out.RES_OFFSET = offset;
+ fs_m_out.RES_SYMLOOP = symlinks;
+
+ return(r);
+ }
+
+ if (r != OK && r != EENTERMOUNT) return(r);
+
+ fs_m_out.RES_INODE_NR = rip->i_num;
+ fs_m_out.RES_MODE = rip->i_mode;
+ fs_m_out.RES_FILE_SIZE_LO = rip->i_size;
+ fs_m_out.RES_SYMLOOP = symlinks;
+ fs_m_out.RES_UID = rip->i_uid;
+ fs_m_out.RES_GID = rip->i_gid;
+
+ /* This is only valid for block and character specials. But it doesn't
+ * cause any harm to set RES_DEV always. */
+ fs_m_out.RES_DEV = (dev_t) rip->i_block[0];
+
+ if(r == EENTERMOUNT) {
+ fs_m_out.RES_OFFSET = offset;
+ put_inode(rip); /* Only return a reference to the final object */
+ }
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * parse_path *
+ *===========================================================================*/
+PRIVATE int parse_path(dir_ino, root_ino, flags, res_inop, offsetp, symlinkp)
+ino_t dir_ino;
+ino_t root_ino;
+int flags;
+struct inode **res_inop;
+size_t *offsetp;
+int *symlinkp;
+{
+ /* Parse the path in user_path, starting at dir_ino. If the path is the empty
+ * string, just return dir_ino. It is upto the caller to treat an empty
+ * path in a special way. Otherwise, if the path consists of just one or
+ * more slash ('/') characters, the path is replaced with ".". Otherwise,
+ * just look up the first (or only) component in path after skipping any
+ * leading slashes.
+ */
+ int r, leaving_mount;
+ struct inode *rip, *dir_ip;
+ char *cp, *next_cp; /* component and next component */
+ char component[NAME_MAX+1];
+
+ /* Start parsing path at the first component in user_path */
+ cp = user_path;
+
+ /* No symlinks encountered yet */
+ *symlinkp = 0;
+
+ /* Find starting inode inode according to the request message */
+ if((rip = find_inode(fs_dev, dir_ino)) == NULL)
+ return(ENOENT);
+
+ /* If dir has been removed return ENOENT. */
+ if (rip->i_links_count == NO_LINK) return(ENOENT);
+
+ dup_inode(rip);
+
+ /* If the given start inode is a mountpoint, we must be here because the file
+ * system mounted on top returned an ELEAVEMOUNT error. In this case, we must
+ * only accept ".." as the first path component.
+ */
+ leaving_mount = rip->i_mountpoint; /* True iff rip is a mountpoint */
+
+ /* Scan the path component by component. */
+ while (TRUE) {
+ if(cp[0] == '\0') {
+ /* We're done; either the path was empty or we've parsed all
+ components of the path */
+
+ *res_inop = rip;
+ *offsetp += cp - user_path;
+
+ /* Return EENTERMOUNT if we are at a mount point */
+ if (rip->i_mountpoint) return(EENTERMOUNT);
+
+ return(OK);
+ }
+
+ while(cp[0] == '/') cp++;
+ next_cp = get_name(cp, component);
+ if (next_cp == NULL) {
+ put_inode(rip);
+ return(err_code);
+ }
+
+ /* Special code for '..'. A process is not allowed to leave a chrooted
+ * environment. A lookup of '..' at the root of a mounted filesystem
+ * has to return ELEAVEMOUNT. In both cases, the caller needs search
+ * permission for the current inode, as it is used as directory.
+ */
+ if(strcmp(component, "..") == 0) {
+ /* 'rip' is now accessed as directory */
+ if ((r = forbidden(rip, X_BIT)) != OK) {
+ put_inode(rip);
+ return(r);
+ }
+
+ if (rip->i_num == root_ino) {
+ cp = next_cp;
+ continue; /* Ignore the '..' at a process' root
+ and move on to the next component */
+ }
+
+ if (rip->i_num == ROOT_INODE && !rip->i_sp->s_is_root) {
+ /* Climbing up to parent FS */
+
+ put_inode(rip);
+ *offsetp += cp - user_path;
+ return(ELEAVEMOUNT);
+ }
+ }
+
+ /* Only check for a mount point if we are not coming from one. */
+ if (!leaving_mount && rip->i_mountpoint) {
+ /* Going to enter a child FS */
+
+ *res_inop = rip;
+ *offsetp += cp - user_path;
+ return(EENTERMOUNT);
+ }
+
+ /* There is more path. Keep parsing.
+ * If we're leaving a mountpoint, skip directory permission checks.
+ */
+ dir_ip = rip;
+ rip = advance(dir_ip, leaving_mount ? dot2 : component, CHK_PERM);
+ if(err_code == ELEAVEMOUNT || err_code == EENTERMOUNT)
+ err_code = OK;
+
+ if (err_code != OK) {
+ put_inode(dir_ip);
+ return(err_code);
+ }
+
+ leaving_mount = 0;
+
+ /* The call to advance() succeeded. Fetch next component. */
+ if (S_ISLNK(rip->i_mode)) {
+
+ if (next_cp[0] == '\0' && (flags & PATH_RET_SYMLINK)) {
+ put_inode(dir_ip);
+ *res_inop = rip;
+ *offsetp += next_cp - user_path;
+ return(OK);
+ }
+
+ /* Extract path name from the symlink file */
+ r = ltraverse(rip, next_cp);
+ next_cp = user_path;
+ *offsetp = 0;
+
+ /* Symloop limit reached? */
+ if (++(*symlinkp) > SYMLOOP_MAX)
+ r = ELOOP;
+
+ if (r != OK) {
+ put_inode(dir_ip);
+ put_inode(rip);
+ return(r);
+ }
+
+ if (next_cp[0] == '/') {
+ put_inode(dir_ip);
+ put_inode(rip);
+ return(ESYMLINK);
+ }
+
+ put_inode(rip);
+ dup_inode(dir_ip);
+ rip = dir_ip;
+ }
+
+ put_inode(dir_ip);
+ cp = next_cp; /* Process subsequent component in next round */
+ }
+
+}
+
+
+/*===========================================================================*
+ * ltraverse *
+ *===========================================================================*/
+PRIVATE int ltraverse(rip, suffix)
+register struct inode *rip; /* symbolic link */
+char *suffix; /* current remaining path. Has to point in the
+ * user_path buffer
+ */
+{
+/* Traverse a symbolic link. Copy the link text from the inode and insert
+ * the text into the path. Return error code or report success. Base
+ * directory has to be determined according to the first character of the
+ * new pathname.
+ */
+
+ block_t blink; /* block containing link text */
+ size_t llen; /* length of link */
+ size_t slen; /* length of suffix */
+ struct buf *bp; /* buffer containing link text */
+ const char *sp; /* start of link text */
+
+ llen = (size_t) rip->i_size;
+
+ if (llen > MAX_FAST_SYMLINK_LENGTH) {
+ /* normal symlink */
+ if ((blink = read_map(rip, (off_t) 0)) == NO_BLOCK)
+ return(EIO);
+ bp = get_block(rip->i_dev, blink, NORMAL);
+ sp = bp->b_data;
+ } else {
+ /* fast symlink, stored in inode */
+ sp = (const char*) rip->i_block;
+ }
+
+ slen = strlen(suffix);
+
+ /* The path we're parsing looks like this:
+ * /already/processed/path/<link> or
+ * /already/processed/path/<link>/not/yet/processed/path
+ * After expanding the <link>, the path will look like
+ * <expandedlink> or
+ * <expandedlink>/not/yet/processed
+ * In both cases user_path must have enough room to hold <expandedlink>.
+ * However, in the latter case we have to move /not/yet/processed to the
+ * right place first, before we expand <link>. When strlen(<expandedlink>) is
+ * smaller than strlen(/already/processes/path), we move the suffix to the
+ * left. Is strlen(<expandedlink>) greater then we move it to the right. Else
+ * we do nothing.
+ */
+
+ if (slen > 0) { /* Do we have path after the link? */
+ /* For simplicity we require that suffix starts with a slash */
+ if (suffix[0] != '/') {
+ panic("ltraverse: suffix does not start with a slash");
+ }
+
+ /* To be able to expand the <link>, we have to move the 'suffix'
+ * to the right place.
+ */
+ if (slen + llen + 1 > sizeof(user_path))
+ return(ENAMETOOLONG);/* <expandedlink>+suffix+\0 does not fit*/
+ if ((unsigned)(suffix - user_path) != llen) {
+ /* Move suffix left or right if needed */
+ memmove(&user_path[llen], suffix, slen+1);
+ }
+ } else {
+ if (llen + 1 > sizeof(user_path))
+ return(ENAMETOOLONG); /* <expandedlink> + \0 does not fit */
+
+ /* Set terminating nul */
+ user_path[llen]= '\0';
+ }
+
+ /* Everything is set, now copy the expanded link to user_path */
+ memmove(user_path, sp, llen);
+
+ if (llen > MAX_FAST_SYMLINK_LENGTH)
+ put_block(bp, DIRECTORY_BLOCK);
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * advance *
+ *===========================================================================*/
+PUBLIC struct inode *advance(dirp, string, chk_perm)
+struct inode *dirp; /* inode for directory to be searched */
+char string[NAME_MAX + 1]; /* component name to look for */
+int chk_perm; /* check permissions when string is looked up*/
+{
+/* Given a directory and a component of a path, look up the component in
+ * the directory, find the inode, open it, and return a pointer to its inode
+ * slot.
+ */
+ ino_t numb;
+ struct inode *rip;
+
+ /* If 'string' is empty, return an error. */
+ if (string[0] == '\0') {
+ err_code = ENOENT;
+ return(NULL);
+ }
+
+ /* Check for NULL. */
+ if (dirp == NULL) return(NULL);
+
+ /* If 'string' is not present in the directory, signal error. */
+ if ( (err_code = search_dir(dirp, string, &numb, LOOK_UP,
+ chk_perm, 0)) != OK) {
+ return(NULL);
+ }
+
+ /* The component has been found in the directory. Get inode. */
+ if ( (rip = get_inode(dirp->i_dev, (int) numb)) == NULL) {
+ return(NULL);
+ }
+
+ /* The following test is for "mountpoint/.." where mountpoint is a
+ * mountpoint. ".." will refer to the root of the mounted filesystem,
+ * but has to become a reference to the parent of the 'mountpoint'
+ * directory.
+ *
+ * This case is recognized by the looked up name pointing to a
+ * root inode, and the directory in which it is held being a
+ * root inode, _and_ the name[1] being '.'. (This is a test for '..'
+ * and excludes '.'.)
+ */
+ if (rip->i_num == ROOT_INODE) {
+ if (dirp->i_num == ROOT_INODE) {
+ if (string[1] == '.') {
+ if (!rip->i_sp->s_is_root) {
+ /* Climbing up mountpoint */
+ err_code = ELEAVEMOUNT;
+ }
+ }
+ }
+ }
+
+ /* See if the inode is mounted on. If so, switch to root directory of the
+ * mounted file system. The super_block provides the linkage between the
+ * inode mounted on and the root directory of the mounted file system.
+ */
+ if (rip->i_mountpoint) {
+ /* Mountpoint encountered, report it */
+ err_code = EENTERMOUNT;
+ }
+
+ return(rip);
+}
+
+
+/*===========================================================================*
+ * get_name *
+ *===========================================================================*/
+PRIVATE char *get_name(path_name, string)
+char *path_name; /* path name to parse */
+char string[NAME_MAX+1]; /* component extracted from 'old_name' */
+{
+/* Given a pointer to a path name in fs space, 'path_name', copy the first
+ * component to 'string' (truncated if necessary, always nul terminated).
+ * A pointer to the string after the first component of the name as yet
+ * unparsed is returned. Roughly speaking,
+ * 'get_name' = 'path_name' - 'string'.
+ *
+ * This routine follows the standard convention that /usr/ast, /usr//ast,
+ * //usr///ast and /usr/ast/ are all equivalent.
+ *
+ * If len of component is greater, than allowed, then return 0.
+ */
+ size_t len;
+ char *cp, *ep;
+
+ cp = path_name;
+
+ /* Skip leading slashes */
+ while (cp[0] == '/') cp++;
+
+ /* Find the end of the first component */
+ ep = cp;
+ while(ep[0] != '\0' && ep[0] != '/')
+ ep++;
+
+ len = (size_t) (ep - cp);
+
+ if (len > NAME_MAX || len > EXT2_NAME_MAX) {
+ err_code = ENAMETOOLONG;
+ return(NULL);
+ }
+
+ /* Special case of the string at cp is empty */
+ if (len == 0)
+ strcpy(string, "."); /* Return "." */
+ else {
+ memcpy(string, cp, len);
+ string[len]= '\0';
+ }
+
+ return(ep);
+}
+
+
+/*===========================================================================*
+ * search_dir *
+ *===========================================================================*/
+PUBLIC int search_dir(ldir_ptr, string, numb, flag, check_permissions, ftype)
+register struct inode *ldir_ptr; /* ptr to inode for dir to search */
+char string[NAME_MAX + 1]; /* component to search for */
+ino_t *numb; /* pointer to inode number */
+int flag; /* LOOK_UP, ENTER, DELETE or IS_EMPTY */
+int check_permissions; /* check permissions when flag is !IS_EMPTY */
+int ftype; /* used when ENTER and
+ * INCOMPAT_FILETYPE */
+{
+/* This function searches the directory whose inode is pointed to by 'ldip':
+ * if (flag == ENTER) enter 'string' in the directory with inode # '*numb';
+ * if (flag == DELETE) delete 'string' from the directory;
+ * if (flag == LOOK_UP) search for 'string' and return inode # in 'numb';
+ * if (flag == IS_EMPTY) return OK if only . and .. in dir else ENOTEMPTY;
+ *
+ * if 'string' is dot1 or dot2, no access permissions are checked.
+ */
+
+ register struct ext2_disk_dir_desc *dp = NULL;
+ register struct ext2_disk_dir_desc *prev_dp = NULL;
+ register struct buf *bp = NULL;
+ int i, r, e_hit, t, match;
+ mode_t bits;
+ off_t pos;
+ unsigned new_slots;
+ block_t b;
+ int extended = 0;
+ int required_space = 0;
+ int string_len = 0;
+
+ /* If 'ldir_ptr' is not a pointer to a dir inode, error. */
+ if ( (ldir_ptr->i_mode & I_TYPE) != I_DIRECTORY) {
+ return(ENOTDIR);
+ }
+
+ r = OK;
+
+ if (flag != IS_EMPTY) {
+ bits = (flag == LOOK_UP ? X_BIT : W_BIT | X_BIT);
+
+ if (string == dot1 || string == dot2) {
+ if (flag != LOOK_UP) r = read_only(ldir_ptr);
+ /* only a writable device is required. */
+ } else if(check_permissions) {
+ r = forbidden(ldir_ptr, bits); /* check access permissions */
+ }
+ }
+ if (r != OK) return(r);
+
+ new_slots = 0;
+ e_hit = FALSE;
+ match = 0; /* set when a string match occurs */
+ pos = 0;
+
+ if (flag == ENTER) {
+ string_len = strlen(string);
+ required_space = MIN_DIR_ENTRY_SIZE + string_len;
+ required_space += (required_space & 0x03) == 0 ? 0 :
+ (DIR_ENTRY_ALIGN - (required_space & 0x03) );
+
+ if (ldir_ptr->i_last_dpos < ldir_ptr->i_size &&
+ ldir_ptr->i_last_dentry_size <= required_space)
+ pos = ldir_ptr->i_last_dpos;
+ }
+
+ for (; pos < ldir_ptr->i_size; pos += ldir_ptr->i_sp->s_block_size) {
+ b = read_map(ldir_ptr, pos); /* get block number */
+
+ /* Since directories don't have holes, 'b' cannot be NO_BLOCK. */
+ bp = get_block(ldir_ptr->i_dev, b, NORMAL); /* get a dir block */
+ prev_dp = NULL; /* New block - new first dentry, so no prev. */
+
+ if (bp == NO_BLOCK)
+ panic("get_block returned NO_BLOCK");
+ assert(bp != NULL);
+
+ /* Search a directory block.
+ * Note, we set prev_dp at the end of the loop.
+ */
+ for (dp = (struct ext2_disk_dir_desc*) &bp->b_data;
+ CUR_DISC_DIR_POS(dp, &bp->b_data) < ldir_ptr->i_sp->s_block_size;
+ dp = NEXT_DISC_DIR_DESC(dp) ) {
+ /* Match occurs if string found. */
+ if (flag != ENTER && dp->d_ino != NO_ENTRY) {
+ if (flag == IS_EMPTY) {
+ /* If this test succeeds, dir is not empty. */
+ if (ansi_strcmp(dp->d_name, ".", dp->d_name_len) != 0 &&
+ ansi_strcmp(dp->d_name, "..", dp->d_name_len) != 0) match = 1;
+ } else {
+ if (ansi_strcmp(dp->d_name, string, dp->d_name_len) == 0){
+ match = 1;
+ }
+ }
+ }
+
+ if (match) {
+ /* LOOK_UP or DELETE found what it wanted. */
+ r = OK;
+ if (flag == IS_EMPTY) r = ENOTEMPTY;
+ else if (flag == DELETE) {
+ if (dp->d_name_len >= sizeof(ino_t)) {
+ /* Save d_ino for recovery. */
+ t = dp->d_name_len - sizeof(ino_t);
+ *((ino_t *) &dp->d_name[t]) = dp->d_ino;
+ }
+ dp->d_ino = NO_ENTRY; /* erase entry */
+ bp->b_dirt = DIRTY;
+
+ /* If we don't support HTree (directory index),
+ * which is fully compatible ext2 feature,
+ * we should reset EXT2_INDEX_FL, when modify
+ * linked directory structure.
+ *
+ * @TODO: actually we could just reset it for
+ * each directory, but I added if() to not
+ * forget about it later, when add HTree
+ * support.
+ */
+ if (!HAS_COMPAT_FEATURE(ldir_ptr->i_sp,
+ COMPAT_DIR_INDEX))
+ ldir_ptr->i_flags &= ~EXT2_INDEX_FL;
+ ldir_ptr->i_last_dpos = pos;
+ ldir_ptr->i_last_dentry_size = conv2(le_CPU,
+ dp->d_rec_len);
+ ldir_ptr->i_update |= CTIME | MTIME;
+ ldir_ptr->i_dirt = DIRTY;
+ /* Now we have cleared dentry, if it's not
+ * the first one, merge it with previous one.
+ * Since we assume, that existing dentry must be
+ * correct, there is no way to spann a data block.
+ */
+ if (prev_dp) {
+ u16_t temp = conv2(le_CPU,
+ prev_dp->d_rec_len);
+ temp += conv2(le_CPU,
+ dp->d_rec_len);
+ prev_dp->d_rec_len = conv2(le_CPU,
+ temp);
+ }
+ } else {
+ /* 'flag' is LOOK_UP */
+ *numb = (ino_t) conv4(le_CPU, dp->d_ino);
+ }
+ put_block(bp, DIRECTORY_BLOCK);
+ return(r);
+ }
+
+ /* Check for free slot for the benefit of ENTER. */
+ if (flag == ENTER && dp->d_ino == NO_ENTRY) {
+ /* we found a free slot, check if it has enough space */
+ if (required_space <= conv2(le_CPU, dp->d_rec_len)) {
+ e_hit = TRUE; /* we found a free slot */
+ break;
+ }
+ }
+ /* Can we shrink dentry? */
+ if (flag == ENTER && required_space <= DIR_ENTRY_SHRINK(dp)) {
+ /* Shrink directory and create empty slot, now
+ * dp->d_rec_len = DIR_ENTRY_ACTUAL_SIZE + DIR_ENTRY_SHRINK.
+ */
+ int new_slot_size = conv2(le_CPU, dp->d_rec_len);
+ int actual_size = DIR_ENTRY_ACTUAL_SIZE(dp);
+ new_slot_size -= actual_size;
+ dp->d_rec_len = conv2(le_CPU, actual_size);
+ dp = NEXT_DISC_DIR_DESC(dp);
+ dp->d_rec_len = conv2(le_CPU, new_slot_size);
+ /* if we fail before writing real ino */
+ dp->d_ino = NO_ENTRY;
+ bp->b_dirt = DIRTY;
+ e_hit = TRUE; /* we found a free slot */
+ break;
+ }
+
+ prev_dp = dp;
+ }
+
+ /* The whole block has been searched or ENTER has a free slot. */
+ if (e_hit) break; /* e_hit set if ENTER can be performed now */
+ put_block(bp, DIRECTORY_BLOCK); /* otherwise, continue searching dir */
+ }
+
+ /* The whole directory has now been searched. */
+ if (flag != ENTER) {
+ return(flag == IS_EMPTY ? OK : ENOENT);
+ }
+
+ /* When ENTER next time, start searching for free slot from
+ * i_last_dpos. It gives solid performance improvement.
+ */
+ ldir_ptr->i_last_dpos = pos;
+ ldir_ptr->i_last_dentry_size = required_space;
+
+ /* This call is for ENTER. If no free slot has been found so far, try to
+ * extend directory.
+ */
+ if (e_hit == FALSE) { /* directory is full and no room left in last block */
+ new_slots++; /* increase directory size by 1 entry */
+ if ( (bp = new_block(ldir_ptr, ldir_ptr->i_size)) == NULL)
+ return(err_code);
+ dp = (struct ext2_disk_dir_desc*) &bp->b_data;
+ dp->d_rec_len = conv2(le_CPU, ldir_ptr->i_sp->s_block_size);
+ dp->d_name_len = DIR_ENTRY_MAX_NAME_LEN(dp); /* for failure */
+ extended = 1;
+ }
+
+ /* 'bp' now points to a directory block with space. 'dp' points to slot. */
+ dp->d_name_len = string_len;
+ for (i = 0; i < NAME_MAX && i < dp->d_name_len && string[i]; i++)
+ dp->d_name[i] = string[i];
+ dp->d_ino = (int) conv4(le_CPU, *numb);
+ if (HAS_INCOMPAT_FEATURE(ldir_ptr->i_sp, INCOMPAT_FILETYPE)) {
+ /* Convert ftype (from inode.i_mode) to dp->d_file_type */
+ if (ftype == I_REGULAR)
+ dp->d_file_type = EXT2_FT_REG_FILE;
+ else if (ftype == I_DIRECTORY)
+ dp->d_file_type = EXT2_FT_DIR;
+ else if (ftype == I_SYMBOLIC_LINK)
+ dp->d_file_type = EXT2_FT_SYMLINK;
+ else if (ftype == I_BLOCK_SPECIAL)
+ dp->d_file_type = EXT2_FT_BLKDEV;
+ else if (ftype == I_CHAR_SPECIAL)
+ dp->d_file_type = EXT2_FT_CHRDEV;
+ else if (ftype == I_NAMED_PIPE)
+ dp->d_file_type = EXT2_FT_FIFO;
+ else
+ dp->d_file_type = EXT2_FT_UNKNOWN;
+ }
+ bp->b_dirt = DIRTY;
+ put_block(bp, DIRECTORY_BLOCK);
+ ldir_ptr->i_update |= CTIME | MTIME; /* mark mtime for update later */
+ ldir_ptr->i_dirt = DIRTY;
+
+ if (new_slots == 1) {
+ ldir_ptr->i_size += (off_t) conv2(le_CPU, dp->d_rec_len);
+ /* Send the change to disk if the directory is extended. */
+ if (extended) rw_inode(ldir_ptr, WRITING);
+ }
+ return(OK);
+
+}
--- /dev/null
+/* Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include "inode.h"
+#include "super.h"
+#include <minix/vfsif.h>
+
+FORWARD _PROTOTYPE( int in_group, (gid_t grp) );
+
+
+/*===========================================================================*
+ * fs_chmod *
+ *===========================================================================*/
+PUBLIC int fs_chmod()
+{
+/* Perform the chmod(name, mode) system call. */
+
+ register struct inode *rip;
+ mode_t mode;
+
+ mode = (mode_t) fs_m_in.REQ_MODE;
+
+ /* Temporarily open the file. */
+ if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+ /* Now make the change. Clear setgid bit if file is not in caller's grp */
+ rip->i_mode = (rip->i_mode & ~ALL_MODES) | (mode & ALL_MODES);
+ rip->i_update |= CTIME;
+ rip->i_dirt = DIRTY;
+
+ /* Return full new mode to caller. */
+ fs_m_out.RES_MODE = rip->i_mode;
+
+ put_inode(rip);
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * fs_chown *
+ *===========================================================================*/
+PUBLIC int fs_chown()
+{
+ register struct inode *rip;
+ register int r;
+
+ /* Temporarily open the file. */
+ if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+ /* Not permitted to change the owner of a file on a read-only file sys. */
+ r = read_only(rip);
+ if (r == OK) {
+ rip->i_uid = fs_m_in.REQ_UID;
+ rip->i_gid = fs_m_in.REQ_GID;
+ rip->i_mode &= ~(I_SET_UID_BIT | I_SET_GID_BIT);
+ rip->i_update |= CTIME;
+ rip->i_dirt = DIRTY;
+ }
+
+ /* Update caller on current mode, as it may have changed. */
+ fs_m_out.RES_MODE = rip->i_mode;
+ put_inode(rip);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * forbidden *
+ *===========================================================================*/
+PUBLIC int forbidden(register struct inode *rip, mode_t access_desired)
+{
+/* Given a pointer to an inode, 'rip', and the access desired, determine
+ * if the access is allowed, and if not why not. The routine looks up the
+ * caller's uid in the 'fproc' table. If access is allowed, OK is returned
+ * if it is forbidden, EACCES is returned.
+ */
+
+ register struct inode *old_rip = rip;
+ register mode_t bits, perm_bits;
+ int r, shift;
+
+ /* Isolate the relevant rwx bits from the mode. */
+ bits = rip->i_mode;
+ if (caller_uid == SU_UID) {
+ /* Grant read and write permission. Grant search permission for
+ * directories. Grant execute permission (for non-directories) if
+ * and only if one of the 'X' bits is set.
+ */
+ if ( (bits & I_TYPE) == I_DIRECTORY ||
+ bits & ((X_BIT << 6) | (X_BIT << 3) | X_BIT))
+ perm_bits = R_BIT | W_BIT | X_BIT;
+ else
+ perm_bits = R_BIT | W_BIT;
+ } else {
+ if (caller_uid == rip->i_uid) shift = 6; /* owner */
+ else if (caller_gid == rip->i_gid) shift = 3; /* group */
+ else if (in_group(rip->i_gid) == OK) shift = 3; /* other groups */
+ else shift = 0; /* other */
+ perm_bits = (bits >> shift) & (R_BIT | W_BIT | X_BIT);
+ }
+
+ /* If access desired is not a subset of what is allowed, it is refused. */
+ r = OK;
+ if ((perm_bits | access_desired) != perm_bits) r = EACCES;
+
+ /* Check to see if someone is trying to write on a file system that is
+ * mounted read-only.
+ */
+ if (r == OK) {
+ if (access_desired & W_BIT) {
+ r = read_only(rip);
+ }
+ }
+
+ if (rip != old_rip) put_inode(rip);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * in_group *
+ *===========================================================================*/
+PRIVATE int in_group(gid_t grp)
+{
+ int i;
+ for(i = 0; i < credentials.vu_ngroups; i++)
+ if (credentials.vu_sgroups[i] == grp)
+ return(OK);
+
+ return(EINVAL);
+}
+
+
+/*===========================================================================*
+ * read_only *
+ *===========================================================================*/
+PUBLIC int read_only(ip)
+struct inode *ip; /* ptr to inode whose file sys is to be cked */
+{
+/* Check to see if the file system on which the inode 'ip' resides is mounted
+ * read only. If so, return EROFS, else return OK.
+ */
+
+ register struct super_block *sp;
+
+ sp = ip->i_sp;
+ return(sp->s_rd_only ? EROFS : OK);
+}
--- /dev/null
+#ifndef EXT2_PROTO_H
+#define EXT2_PROTO_H
+
+/* Function prototypes. */
+
+/* Structs used in prototypes must be declared as such first. */
+struct buf;
+struct filp;
+struct inode;
+struct super_block;
+
+
+/* balloc.c */
+_PROTOTYPE( void discard_preallocated_blocks, (struct inode *rip) );
+_PROTOTYPE( block_t alloc_block, (struct inode *rip, block_t goal) );
+_PROTOTYPE( void free_block, (struct super_block *sp, bit_t bit) );
+
+/* cache.c */
+_PROTOTYPE( void buf_pool, (int bufs) );
+_PROTOTYPE( void flushall, (dev_t dev) );
+_PROTOTYPE( struct buf *get_block, (dev_t dev, block_t block,int only_search));
+_PROTOTYPE( void invalidate, (dev_t device) );
+_PROTOTYPE( void put_block, (struct buf *bp, int block_type) );
+_PROTOTYPE( void set_blocksize, (unsigned int blocksize) );
+_PROTOTYPE( void rw_scattered, (dev_t dev,
+ struct buf **bufq, int bufqsize, int rw_flag) );
+
+/* device.c */
+_PROTOTYPE( int block_dev_io, (int op, dev_t dev, endpoint_t proc_e,
+ void *buf, u64_t pos, size_t bytes) );
+_PROTOTYPE( int dev_open, (endpoint_t driver_e, dev_t dev, endpoint_t proc_e,
+ int flags) );
+_PROTOTYPE( void dev_close, (endpoint_t driver_e, dev_t dev) );
+_PROTOTYPE( int fs_new_driver, (void) );
+
+/* ialloc.c */
+_PROTOTYPE( struct inode *alloc_inode, (struct inode *parent, mode_t bits));
+_PROTOTYPE( void free_inode, (struct inode *rip) );
+
+/* inode.c */
+_PROTOTYPE( void dup_inode, (struct inode *ip) );
+_PROTOTYPE( struct inode *find_inode, (dev_t dev, ino_t numb) );
+_PROTOTYPE( int fs_putnode, (void) );
+_PROTOTYPE( void init_inode_cache, (void) );
+_PROTOTYPE( struct inode *get_inode, (dev_t dev, ino_t numb) );
+_PROTOTYPE( void put_inode, (struct inode *rip) );
+_PROTOTYPE( void update_times, (struct inode *rip) );
+_PROTOTYPE( void rw_inode, (struct inode *rip, int rw_flag) );
+
+/* link.c */
+_PROTOTYPE( int fs_ftrunc, (void) );
+_PROTOTYPE( int fs_link, (void) );
+_PROTOTYPE( int fs_rdlink, (void) );
+_PROTOTYPE( int fs_rename, (void) );
+_PROTOTYPE( int fs_unlink, (void) );
+_PROTOTYPE( int truncate_inode, (struct inode *rip, off_t len) );
+
+/* misc.c */
+_PROTOTYPE( int fs_flush, (void) );
+_PROTOTYPE( int fs_sync, (void) );
+
+/* mount.c */
+_PROTOTYPE( int fs_mountpoint, (void) );
+_PROTOTYPE( int fs_readsuper, (void) );
+_PROTOTYPE( int fs_unmount, (void) );
+
+/* open.c */
+_PROTOTYPE( int fs_create, (void) );
+_PROTOTYPE( int fs_inhibread, (void) );
+_PROTOTYPE( int fs_mkdir, (void) );
+_PROTOTYPE( int fs_mknod, (void) );
+_PROTOTYPE( int fs_slink, (void) );
+
+/* path.c */
+_PROTOTYPE( int fs_lookup, (void) );
+_PROTOTYPE( struct inode *advance, (struct inode *dirp,
+ char string[NAME_MAX + 1], int chk_perm));
+_PROTOTYPE( int search_dir, (struct inode *ldir_ptr,
+ char string [NAME_MAX + 1], ino_t *numb, int flag,
+ int check_permissions, int ftype) );
+
+/* protect.c */
+_PROTOTYPE( int fs_chmod, (void) );
+_PROTOTYPE( int fs_chown, (void) );
+_PROTOTYPE( int fs_getdents, (void) );
+_PROTOTYPE( int forbidden, (struct inode *rip, mode_t access_desired) );
+_PROTOTYPE( int read_only, (struct inode *ip) );
+
+/* read.c */
+_PROTOTYPE( int fs_breadwrite, (void) );
+_PROTOTYPE( int fs_readwrite, (void) );
+_PROTOTYPE( void read_ahead, (void) );
+_PROTOTYPE( block_t read_map, (struct inode *rip, off_t pos) );
+_PROTOTYPE( block_t rd_indir, (struct buf *bp, int index) );
+
+/* stadir.c */
+_PROTOTYPE( int fs_fstatfs, (void) );
+_PROTOTYPE( int fs_stat, (void) );
+_PROTOTYPE( int fs_statvfs, (void) );
+
+/* super.c */
+_PROTOTYPE( unsigned int get_block_size, (dev_t dev) );
+_PROTOTYPE( struct super_block *get_super, (dev_t dev) );
+_PROTOTYPE( int read_super, (struct super_block *sp) );
+_PROTOTYPE( void write_super, (struct super_block *sp) );
+_PROTOTYPE( struct group_desc* get_group_desc, (unsigned int bnum) );
+
+/* time.c */
+_PROTOTYPE( int fs_utime, (void) );
+
+/* utility.c */
+_PROTOTYPE( time_t clock_time, (void) );
+_PROTOTYPE( unsigned conv2, (int norm, int w) );
+_PROTOTYPE( long conv4, (int norm, long x) );
+_PROTOTYPE( void mfs_nul_f, (char *file, int line, char *str,
+ unsigned int len, unsigned int maxlen) );
+_PROTOTYPE( int min, (unsigned int l, unsigned int r) );
+_PROTOTYPE( int no_sys, (void) );
+_PROTOTYPE( void sanitycheck, (char *file, int line) );
+#define SANITYCHECK sanitycheck(__FILE__, __LINE__)
+_PROTOTYPE( int ansi_strcmp, (register const char* ansi_s,
+ register const char *s2,
+ register size_t ansi_s_length) );
+_PROTOTYPE( bit_t setbit, (bitchunk_t *bitmap, bit_t max_bits,
+ unsigned int word));
+_PROTOTYPE( bit_t setbyte, (bitchunk_t *bitmap, bit_t max_bits,
+ unsigned int word));
+_PROTOTYPE( int unsetbit, (bitchunk_t *bitmap, bit_t bit) );
+
+/* write.c */
+_PROTOTYPE( struct buf *new_block, (struct inode *rip, off_t position) );
+_PROTOTYPE( void zero_block, (struct buf *bp) );
+_PROTOTYPE( int write_map, (struct inode *, off_t, block_t, int) );
+
+#endif /* EXT2_PROTO_H */
--- /dev/null
+/* Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include "buf.h"
+#include "inode.h"
+#include "super.h"
+#include <minix/vfsif.h>
+#include <assert.h>
+
+
+FORWARD _PROTOTYPE( struct buf *rahead, (struct inode *rip, block_t baseblock,
+ u64_t position, unsigned bytes_ahead) );
+FORWARD _PROTOTYPE( int rw_chunk, (struct inode *rip, u64_t position,
+ unsigned off, size_t chunk, unsigned left, int rw_flag,
+ cp_grant_id_t gid, unsigned buf_off, unsigned int block_size,
+ int *completed));
+
+PRIVATE char getdents_buf[GETDENTS_BUFSIZ];
+
+PRIVATE off_t rdahedpos; /* position to read ahead */
+PRIVATE struct inode *rdahed_inode; /* pointer to inode to read ahead */
+
+/*===========================================================================*
+ * fs_readwrite *
+ *===========================================================================*/
+PUBLIC int fs_readwrite(void)
+{
+ int r, rw_flag, block_spec;
+ int regular;
+ cp_grant_id_t gid;
+ off_t position, f_size, bytes_left;
+ unsigned int off, cum_io, block_size, chunk;
+ mode_t mode_word;
+ int completed;
+ struct inode *rip;
+ size_t nrbytes;
+
+ r = OK;
+
+ /* Find the inode referred */
+ if ((rip = find_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+ mode_word = rip->i_mode & I_TYPE;
+ regular = (mode_word == I_REGULAR || mode_word == I_NAMED_PIPE);
+ block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0);
+
+ /* Determine blocksize */
+ if (block_spec) {
+ block_size = get_block_size( (dev_t) rip->i_block[0]);
+ f_size = MAX_FILE_POS;
+ } else {
+ block_size = rip->i_sp->s_block_size;
+ f_size = rip->i_size;
+ if (f_size < 0) f_size = MAX_FILE_POS;
+ }
+
+ /* Get the values from the request message */
+ rw_flag = (fs_m_in.m_type == REQ_READ ? READING : WRITING);
+ gid = (cp_grant_id_t) fs_m_in.REQ_GRANT;
+ position = (off_t) fs_m_in.REQ_SEEK_POS_LO;
+ nrbytes = (size_t) fs_m_in.REQ_NBYTES;
+
+ rdwt_err = OK; /* set to EIO if disk error occurs */
+
+ if (rw_flag == WRITING && !block_spec) {
+ /* Check in advance to see if file will grow too big. */
+ if (position > (off_t) (rip->i_sp->s_max_size - nrbytes))
+ return(EFBIG);
+ }
+
+ cum_io = 0;
+ /* Split the transfer into chunks that don't span two blocks. */
+ while (nrbytes != 0) {
+ off = (unsigned int) (position % block_size);/* offset in blk*/
+ chunk = MIN(nrbytes, block_size - off);
+
+ if (rw_flag == READING) {
+ bytes_left = f_size - position;
+ if (position >= f_size) break; /* we are beyond EOF */
+ if (chunk > bytes_left) chunk = (int) bytes_left;
+ }
+
+ /* Read or write 'chunk' bytes. */
+ r = rw_chunk(rip, cvul64((unsigned long) position), off, chunk,
+ nrbytes, rw_flag, gid, cum_io, block_size, &completed);
+
+ if (r != OK) break; /* EOF reached */
+ if (rdwt_err < 0) break;
+
+ /* Update counters and pointers. */
+ nrbytes -= chunk; /* bytes yet to be read */
+ cum_io += chunk; /* bytes read so far */
+ position += (off_t) chunk; /* position within the file */
+ }
+
+ fs_m_out.RES_SEEK_POS_LO = position; /* It might change later and the VFS
+ has to know this value */
+
+ /* On write, update file size and access time. */
+ if (rw_flag == WRITING) {
+ if (regular || mode_word == I_DIRECTORY) {
+ if (position > f_size) rip->i_size = position;
+ }
+ }
+
+ /* Check to see if read-ahead is called for, and if so, set it up. */
+ if(rw_flag == READING && rip->i_seek == NO_SEEK &&
+ (unsigned int) position % block_size == 0 &&
+ (regular || mode_word == I_DIRECTORY)) {
+ rdahed_inode = rip;
+ rdahedpos = position;
+ }
+
+ rip->i_seek = NO_SEEK;
+
+ if (rdwt_err != OK) r = rdwt_err; /* check for disk error */
+ if (rdwt_err == END_OF_FILE) r = OK;
+
+ if (r == OK) {
+ if (rw_flag == READING) rip->i_update |= ATIME;
+ if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME;
+ rip->i_dirt = DIRTY; /* inode is thus now dirty */
+ }
+
+ fs_m_out.RES_NBYTES = cum_io;
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * fs_breadwrite *
+ *===========================================================================*/
+PUBLIC int fs_breadwrite(void)
+{
+ int r, rw_flag, completed;
+ cp_grant_id_t gid;
+ u64_t position;
+ unsigned int off, cum_io, chunk, block_size;
+ size_t nrbytes;
+
+ /* Pseudo inode for rw_chunk */
+ struct inode rip;
+
+ r = OK;
+
+ /* Get the values from the request message */
+ rw_flag = (fs_m_in.m_type == REQ_BREAD ? READING : WRITING);
+ gid = (cp_grant_id_t) fs_m_in.REQ_GRANT;
+ position = make64((unsigned long) fs_m_in.REQ_SEEK_POS_LO,
+ (unsigned long) fs_m_in.REQ_SEEK_POS_HI);
+ nrbytes = (size_t) fs_m_in.REQ_NBYTES;
+
+ block_size = get_block_size( (dev_t) fs_m_in.REQ_DEV2);
+
+ rip.i_block[0] = (block_t) fs_m_in.REQ_DEV2;
+ rip.i_mode = I_BLOCK_SPECIAL;
+ rip.i_size = 0;
+
+ rdwt_err = OK; /* set to EIO if disk error occurs */
+
+ cum_io = 0;
+ /* Split the transfer into chunks that don't span two blocks. */
+ while (nrbytes > 0) {
+ off = rem64u(position, block_size); /* offset in blk*/
+ chunk = min(nrbytes, block_size - off);
+
+ /* Read or write 'chunk' bytes. */
+ r = rw_chunk(&rip, position, off, chunk, nrbytes, rw_flag, gid,
+ cum_io, block_size, &completed);
+
+ if (r != OK) break; /* EOF reached */
+ if (rdwt_err < 0) break;
+
+ /* Update counters and pointers. */
+ nrbytes -= chunk; /* bytes yet to be read */
+ cum_io += chunk; /* bytes read so far */
+ position = add64ul(position, chunk); /* position within the file */
+ }
+
+ fs_m_out.RES_SEEK_POS_LO = ex64lo(position);
+ fs_m_out.RES_SEEK_POS_HI = ex64hi(position);
+
+ if (rdwt_err != OK) r = rdwt_err; /* check for disk error */
+ if (rdwt_err == END_OF_FILE) r = OK;
+
+ fs_m_out.RES_NBYTES = cum_io;
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * rw_chunk *
+ *===========================================================================*/
+PRIVATE int rw_chunk(rip, position, off, chunk, left, rw_flag, gid,
+ buf_off, block_size, completed)
+register struct inode *rip; /* pointer to inode for file to be rd/wr */
+u64_t position; /* position within file to read or write */
+unsigned off; /* off within the current block */
+unsigned int chunk; /* number of bytes to read or write */
+unsigned left; /* max number of bytes wanted after position */
+int rw_flag; /* READING or WRITING */
+cp_grant_id_t gid; /* grant */
+unsigned buf_off; /* offset in grant */
+unsigned int block_size; /* block size of FS operating on */
+int *completed; /* number of bytes copied */
+{
+/* Read or write (part of) a block. */
+
+ register struct buf *bp;
+ register int r = OK;
+ int n, block_spec;
+ block_t b;
+ dev_t dev;
+
+ *completed = 0;
+
+ block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
+
+ if (block_spec) {
+ b = div64u(position, block_size);
+ dev = (dev_t) rip->i_block[0];
+ } else {
+ if (ex64hi(position) != 0)
+ panic("rw_chunk: position too high");
+ b = read_map(rip, (off_t) ex64lo(position));
+ dev = rip->i_dev;
+ }
+
+ if (!block_spec && b == NO_BLOCK) {
+ if (rw_flag == READING) {
+ /* Reading from a nonexistent block. Must read as all zeros.*/
+ bp = get_block(NO_DEV, NO_BLOCK, NORMAL); /* get a buffer */
+ zero_block(bp);
+ } else {
+ /* Writing to a nonexistent block. Create and enter in inode.*/
+ if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL)
+ return(err_code);
+ }
+ } else if (rw_flag == READING) {
+ /* Read and read ahead if convenient. */
+ bp = rahead(rip, b, position, left);
+ } else {
+ /* Normally an existing block to be partially overwritten is first read
+ * in. However, a full block need not be read in. If it is already in
+ * the cache, acquire it, otherwise just acquire a free buffer.
+ */
+ n = (chunk == block_size ? NO_READ : NORMAL);
+ if (!block_spec && off == 0 && (off_t) ex64lo(position) >= rip->i_size)
+ n = NO_READ;
+ bp = get_block(dev, b, n);
+ }
+
+ /* In all cases, bp now points to a valid buffer. */
+ if (bp == NULL)
+ panic("bp not valid in rw_chunk, this can't happen");
+
+ if (rw_flag == WRITING && chunk != block_size && !block_spec &&
+ (off_t) ex64lo(position) >= rip->i_size && off == 0) {
+ zero_block(bp);
+ }
+
+ if (rw_flag == READING) {
+ /* Copy a chunk from the block buffer to user space. */
+ r = sys_safecopyto(VFS_PROC_NR, gid, (vir_bytes) buf_off,
+ (vir_bytes) (bp->b_data+off), (size_t) chunk, D);
+ } else {
+ /* Copy a chunk from user space to the block buffer. */
+ r = sys_safecopyfrom(VFS_PROC_NR, gid, (vir_bytes) buf_off,
+ (vir_bytes) (bp->b_data+off), (size_t) chunk, D);
+ bp->b_dirt = DIRTY;
+ }
+
+ n = (off + chunk == block_size ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK);
+ put_block(bp, n);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * read_map *
+ *===========================================================================*/
+PUBLIC block_t read_map(rip, position)
+register struct inode *rip; /* ptr to inode to map from */
+off_t position; /* position in file whose blk wanted */
+{
+/* Given an inode and a position within the corresponding file, locate the
+ * block number in which that position is to be found and return it.
+ */
+
+ struct buf *bp;
+ int index;
+ block_t b;
+ unsigned long excess, block_pos;
+ static char first_time = TRUE;
+ static long addr_in_block;
+ static long addr_in_block2;
+ static long doub_ind_s;
+ static long triple_ind_s;
+ static long out_range_s;
+
+ if (first_time) {
+ addr_in_block = rip->i_sp->s_block_size / BLOCK_ADDRESS_BYTES;
+ addr_in_block2 = addr_in_block * addr_in_block;
+ doub_ind_s = EXT2_NDIR_BLOCKS + addr_in_block;
+ triple_ind_s = doub_ind_s + addr_in_block2;
+ out_range_s = triple_ind_s + addr_in_block2 * addr_in_block;
+ first_time = FALSE;
+ }
+
+ block_pos = position / rip->i_sp->s_block_size; /* relative blk # in file */
+
+ /* Is 'position' to be found in the inode itself? */
+ if (block_pos < EXT2_NDIR_BLOCKS)
+ return(rip->i_block[block_pos]);
+
+ /* It is not in the inode, so it must be single, double or triple indirect */
+ if (block_pos < doub_ind_s) {
+ b = rip->i_block[EXT2_NDIR_BLOCKS]; /* address of single indirect block */
+ index = block_pos - EXT2_NDIR_BLOCKS;
+ } else if (block_pos >= out_range_s) { /* TODO: do we need it? */
+ return(NO_BLOCK);
+ } else {
+ /* double or triple indirect block. At first if it's triple,
+ * find double indirect block.
+ */
+ excess = block_pos - doub_ind_s;
+ b = rip->i_block[EXT2_DIND_BLOCK];
+ if (block_pos >= triple_ind_s) {
+ b = rip->i_block[EXT2_TIND_BLOCK];
+ if (b == NO_BLOCK) return(NO_BLOCK);
+ bp = get_block(rip->i_dev, b, NORMAL); /* get triple ind block */
+ ASSERT(bp->b_dev != NO_DEV);
+ ASSERT(bp->b_dev == rip->i_dev);
+ excess = block_pos - triple_ind_s;
+ index = excess / addr_in_block2;
+ b = rd_indir(bp, index); /* num of double ind block */
+ put_block(bp, INDIRECT_BLOCK); /* release triple ind block */
+ excess = excess % addr_in_block2;
+ }
+ if (b == NO_BLOCK) return(NO_BLOCK);
+ bp = get_block(rip->i_dev, b, NORMAL); /* get double indirect block */
+ ASSERT(bp->b_dev != NO_DEV);
+ ASSERT(bp->b_dev == rip->i_dev);
+ index = excess / addr_in_block;
+ b = rd_indir(bp, index); /* num of single ind block */
+ put_block(bp, INDIRECT_BLOCK); /* release double ind block */
+ index = excess % addr_in_block; /* index into single ind blk */
+ }
+ if (b == NO_BLOCK) return(NO_BLOCK);
+ bp = get_block(rip->i_dev, b, NORMAL);
+ ASSERT(bp->b_dev != NO_DEV);
+ ASSERT(bp->b_dev == rip->i_dev);
+ b = rd_indir(bp, index);
+ put_block(bp, INDIRECT_BLOCK); /* release single ind block */
+
+ return(b);
+}
+
+
+/*===========================================================================*
+ * rd_indir *
+ *===========================================================================*/
+PUBLIC block_t rd_indir(bp, index)
+struct buf *bp; /* pointer to indirect block */
+int index; /* index into *bp */
+{
+ if (bp == NULL)
+ panic("rd_indir() on NULL");
+ /* TODO: use conv call */
+ return conv4(le_CPU, bp->b_ind[index]);
+}
+
+
+/*===========================================================================*
+ * read_ahead *
+ *===========================================================================*/
+PUBLIC void read_ahead()
+{
+/* Read a block into the cache before it is needed. */
+ unsigned int block_size;
+ register struct inode *rip;
+ struct buf *bp;
+ block_t b;
+
+ if(!rdahed_inode)
+ return;
+
+ rip = rdahed_inode; /* pointer to inode to read ahead from */
+ block_size = get_block_size(rip->i_dev);
+ rdahed_inode = NULL; /* turn off read ahead */
+ if ( (b = read_map(rip, rdahedpos)) == NO_BLOCK) return; /* at EOF */
+
+ assert(rdahedpos > 0); /* So we can safely cast it to unsigned below */
+
+ bp = rahead(rip, b, cvul64((unsigned long) rdahedpos), block_size);
+ put_block(bp, PARTIAL_DATA_BLOCK);
+}
+
+
+/*===========================================================================*
+ * rahead *
+ *===========================================================================*/
+PRIVATE struct buf *rahead(rip, baseblock, position, bytes_ahead)
+register struct inode *rip; /* pointer to inode for file to be read */
+block_t baseblock; /* block at current position */
+u64_t position; /* position within file */
+unsigned bytes_ahead; /* bytes beyond position for immediate use */
+{
+/* Fetch a block from the cache or the device. If a physical read is
+ * required, prefetch as many more blocks as convenient into the cache.
+ * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
+ * The device driver may decide it knows better and stop reading at a
+ * cylinder boundary (or after an error). Rw_scattered() puts an optional
+ * flag on all reads to allow this.
+ */
+/* Minimum number of blocks to prefetch. */
+# define BLOCKS_MINIMUM (nr_bufs < 50 ? 18 : 32)
+ int block_spec, read_q_size;
+ unsigned int blocks_ahead, fragment, block_size;
+ block_t block, blocks_left;
+ off_t ind1_pos;
+ dev_t dev;
+ struct buf *bp;
+ static unsigned int readqsize = 0;
+ static struct buf **read_q;
+
+ if(readqsize != nr_bufs) {
+ if(readqsize > 0) {
+ assert(read_q != NULL);
+ free(read_q);
+ }
+ if(!(read_q = malloc(sizeof(read_q[0])*nr_bufs)))
+ panic("couldn't allocate read_q");
+ readqsize = nr_bufs;
+ }
+
+ block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
+ if (block_spec)
+ dev = (dev_t) rip->i_block[0];
+ else
+ dev = rip->i_dev;
+
+ block_size = get_block_size(dev);
+
+ block = baseblock;
+ bp = get_block(dev, block, PREFETCH);
+ if (bp->b_dev != NO_DEV) return(bp);
+
+ /* The best guess for the number of blocks to prefetch: A lot.
+ * It is impossible to tell what the device looks like, so we don't even
+ * try to guess the geometry, but leave it to the driver.
+ *
+ * The floppy driver can read a full track with no rotational delay, and it
+ * avoids reading partial tracks if it can, so handing it enough buffers to
+ * read two tracks is perfect. (Two, because some diskette types have
+ * an odd number of sectors per track, so a block may span tracks.)
+ *
+ * The disk drivers don't try to be smart. With todays disks it is
+ * impossible to tell what the real geometry looks like, so it is best to
+ * read as much as you can. With luck the caching on the drive allows
+ * for a little time to start the next read.
+ *
+ * The current solution below is a bit of a hack, it just reads blocks from
+ * the current file position hoping that more of the file can be found. A
+ * better solution must look at the already available
+ * indirect blocks (but don't call read_map!).
+ */
+
+ fragment = rem64u(position, block_size);
+ position = sub64u(position, fragment);
+ bytes_ahead += fragment;
+
+ blocks_ahead = (bytes_ahead + block_size - 1) / block_size;
+
+ if (block_spec && rip->i_size == 0) {
+ blocks_left = (block_t) NR_IOREQS;
+ } else {
+ blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) /
+ block_size;
+
+ /* Go for the first indirect block if we are in its neighborhood. */
+ if (!block_spec) {
+ ind1_pos = (EXT2_NDIR_BLOCKS) * block_size;
+ if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) {
+ blocks_ahead++;
+ blocks_left++;
+ }
+ }
+ }
+
+ /* No more than the maximum request. */
+ if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS;
+
+ /* Read at least the minimum number of blocks, but not after a seek. */
+ if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
+ blocks_ahead = BLOCKS_MINIMUM;
+
+ /* Can't go past end of file. */
+ if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
+
+ read_q_size = 0;
+
+ /* Acquire block buffers. */
+ for (;;) {
+ read_q[read_q_size++] = bp;
+
+ if (--blocks_ahead == 0) break;
+
+ /* Don't trash the cache, leave 4 free. */
+ if (bufs_in_use >= nr_bufs - 4) break;
+
+ block++;
+
+ bp = get_block(dev, block, PREFETCH);
+ if (bp->b_dev != NO_DEV) {
+ /* Oops, block already in the cache, get out. */
+ put_block(bp, FULL_DATA_BLOCK);
+ break;
+ }
+ }
+ rw_scattered(dev, read_q, read_q_size, READING);
+ return(get_block(dev, baseblock, NORMAL));
+}
+
+
+/*===========================================================================*
+ * fs_getdents *
+ *===========================================================================*/
+PUBLIC int fs_getdents(void)
+{
+ register struct inode *rip;
+ int o, r, done;
+ unsigned int block_size, len, reclen;
+ ino_t ino;
+ block_t b;
+ cp_grant_id_t gid;
+ size_t size, tmpbuf_off, userbuf_off;
+ off_t pos, off, block_pos, new_pos, ent_pos;
+ struct buf *bp;
+ struct ext2_disk_dir_desc *d_desc;
+ struct dirent *dep;
+ char *cp;
+
+ ino = (ino_t) fs_m_in.REQ_INODE_NR;
+ gid = (gid_t) fs_m_in.REQ_GRANT;
+ size = (size_t) fs_m_in.REQ_MEM_SIZE;
+ pos = (off_t) fs_m_in.REQ_SEEK_POS_LO;
+
+ /* Check whether the position is properly aligned */
+ if ((unsigned int) pos % DIR_ENTRY_ALIGN)
+ return(ENOENT);
+
+ if ((rip = get_inode(fs_dev, ino)) == NULL)
+ return(EINVAL);
+
+ block_size = rip->i_sp->s_block_size;
+ off = (pos % block_size); /* Offset in block */
+ block_pos = pos - off;
+ done = FALSE; /* Stop processing directory blocks when done is set */
+
+ memset(getdents_buf, '\0', GETDENTS_BUFSIZ); /* Avoid leaking any data */
+ tmpbuf_off = 0; /* Offset in getdents_buf */
+ userbuf_off = 0; /* Offset in the user's buffer */
+
+ /* The default position for the next request is EOF. If the user's buffer
+ * fills up before EOF, new_pos will be modified. */
+ new_pos = rip->i_size;
+
+ for (; block_pos < rip->i_size; block_pos += block_size) {
+ off_t temp_pos = block_pos;
+ b = read_map(rip, block_pos); /* get block number */
+ /* Since directories don't have holes, 'b' cannot be NO_BLOCK. */
+ bp = get_block(rip->i_dev, b, NORMAL); /* get a dir block */
+
+ if (bp == NO_BLOCK)
+ panic("get_block returned NO_BLOCK");
+ assert(bp != NULL);
+
+ /* Search a directory block. */
+ d_desc = (struct ext2_disk_dir_desc*) &bp->b_data;
+
+ /* we need to seek to entry at off bytes.
+ * when NEXT_DISC_DIR_POS == block_size it's last dentry.
+ */
+ for (; temp_pos + conv2(le_CPU, d_desc->d_rec_len) <= pos
+ && NEXT_DISC_DIR_POS(d_desc, &bp->b_data) < block_size;
+ d_desc = NEXT_DISC_DIR_DESC(d_desc)) {
+ temp_pos += conv2(le_CPU, d_desc->d_rec_len);
+ }
+
+ for (; CUR_DISC_DIR_POS(d_desc, &bp->b_data) < block_size;
+ d_desc = NEXT_DISC_DIR_DESC(d_desc)) {
+ if (d_desc->d_ino == 0)
+ continue; /* Entry is not in use */
+
+ if (d_desc->d_name_len > NAME_MAX ||
+ d_desc->d_name_len > EXT2_NAME_MAX) {
+ len = min(NAME_MAX, EXT2_NAME_MAX);
+ } else {
+ len = d_desc->d_name_len;
+ }
+
+ /* Compute record length */
+ reclen = offsetof(struct dirent, d_name) + len + 1;
+ o = (reclen % sizeof(long));
+ if (o != 0)
+ reclen += sizeof(long) - o;
+
+ /* Need the position of this entry in the directory */
+ ent_pos = block_pos + ((char *)d_desc - bp->b_data);
+
+ if (tmpbuf_off + reclen > GETDENTS_BUFSIZ) {
+ r = sys_safecopyto(VFS_PROC_NR, gid,
+ (vir_bytes) userbuf_off,
+ (vir_bytes) getdents_buf,
+ (size_t) tmpbuf_off, D);
+ if (r != OK) {
+ put_inode(rip);
+ return(r);
+ }
+ userbuf_off += tmpbuf_off;
+ tmpbuf_off = 0;
+ }
+
+ if (userbuf_off + tmpbuf_off + reclen > size) {
+ /* The user has no space for one more record */
+ done = TRUE;
+
+ /* Record the position of this entry, it is the
+ * starting point of the next request (unless the
+ * position is modified with lseek).
+ */
+ new_pos = ent_pos;
+ break;
+ }
+
+ dep = (struct dirent *) &getdents_buf[tmpbuf_off];
+ dep->d_ino = conv4(le_CPU, d_desc->d_ino);
+ dep->d_off = ent_pos;
+ dep->d_reclen = (unsigned short) reclen;
+ memcpy(dep->d_name, d_desc->d_name, len);
+ dep->d_name[len] = '\0';
+ tmpbuf_off += reclen;
+ }
+
+ put_block(bp, DIRECTORY_BLOCK);
+ if (done)
+ break;
+ }
+
+ if (tmpbuf_off != 0) {
+ r = sys_safecopyto(VFS_PROC_NR, gid, (vir_bytes) userbuf_off,
+ (vir_bytes) getdents_buf, (size_t) tmpbuf_off, D);
+ if (r != OK) {
+ put_inode(rip);
+ return(r);
+ }
+
+ userbuf_off += tmpbuf_off;
+ }
+
+ if (done && userbuf_off == 0)
+ r = EINVAL; /* The user's buffer is too small */
+ else {
+ fs_m_out.RES_NBYTES = userbuf_off;
+ fs_m_out.RES_SEEK_POS_LO = new_pos;
+ rip->i_update |= ATIME;
+ rip->i_dirt = DIRTY;
+ r = OK;
+ }
+
+ put_inode(rip); /* release the inode */
+ return(r);
+}
--- /dev/null
+/* Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/statvfs.h>
+#include "inode.h"
+#include "super.h"
+#include <minix/vfsif.h>
+
+
+/*===========================================================================*
+ * stat_inode *
+ *===========================================================================*/
+PRIVATE int stat_inode(
+ register struct inode *rip, /* pointer to inode to stat */
+ endpoint_t who_e, /* Caller endpoint */
+ cp_grant_id_t gid /* grant for the stat buf */
+)
+{
+/* Common code for stat and fstat system calls. */
+
+ struct stat statbuf;
+ mode_t mo;
+ int r, s;
+
+ /* Update the atime, ctime, and mtime fields in the inode, if need be. */
+ if (rip->i_update) update_times(rip);
+
+ /* Fill in the statbuf struct. */
+ mo = rip->i_mode & I_TYPE;
+
+ /* true iff special */
+ s = (mo == I_CHAR_SPECIAL || mo == I_BLOCK_SPECIAL);
+
+ statbuf.st_dev = rip->i_dev;
+ statbuf.st_ino = rip->i_num;
+ statbuf.st_mode = rip->i_mode;
+ statbuf.st_nlink = rip->i_links_count;
+ statbuf.st_uid = rip->i_uid;
+ statbuf.st_gid = rip->i_gid;
+ statbuf.st_rdev = (s ? rip->i_block[0] : NO_DEV);
+ statbuf.st_size = rip->i_size;
+ statbuf.st_atime = rip->i_atime;
+ statbuf.st_mtime = rip->i_mtime;
+ statbuf.st_ctime = rip->i_ctime;
+
+ /* Copy the struct to user space. */
+ r = sys_safecopyto(who_e, gid, (vir_bytes) 0, (vir_bytes) &statbuf,
+ (size_t) sizeof(statbuf), D);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * fs_fstatfs *
+ *===========================================================================*/
+PUBLIC int fs_fstatfs()
+{
+ struct statfs st;
+ struct inode *rip;
+ int r;
+
+ if((rip = find_inode(fs_dev, ROOT_INODE)) == NULL)
+ return(EINVAL);
+
+ st.f_bsize = rip->i_sp->s_block_size;
+
+ /* Copy the struct to user space. */
+ r = sys_safecopyto(fs_m_in.m_source, (cp_grant_id_t) fs_m_in.REQ_GRANT,
+ (vir_bytes) 0, (vir_bytes) &st, (size_t) sizeof(st), D);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * fs_stat *
+ *===========================================================================*/
+PUBLIC int fs_stat()
+{
+ register int r; /* return value */
+ register struct inode *rip; /* target inode */
+
+ if ((rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+ r = stat_inode(rip, fs_m_in.m_source, (cp_grant_id_t) fs_m_in.REQ_GRANT);
+ put_inode(rip); /* release the inode */
+ return(r);
+}
+
+/*===========================================================================*
+ * fs_statvfs *
+ *===========================================================================*/
+PUBLIC int fs_statvfs()
+{
+ struct statvfs st;
+ struct super_block *sp;
+ int r;
+
+ sp = get_super(fs_dev);
+
+ st.f_bsize = sp->s_block_size;
+ st.f_frsize = sp->s_block_size;
+ st.f_blocks = sp->s_blocks_count;
+ st.f_bfree = sp->s_free_blocks_count;
+ st.f_bavail = sp->s_free_blocks_count - sp->s_r_blocks_count;
+ st.f_files = sp->s_inodes_count;
+ st.f_ffree = sp->s_free_inodes_count;
+ st.f_favail = sp->s_free_inodes_count;
+ st.f_fsid = fs_dev;
+ st.f_flag = (sp->s_rd_only == 1 ? ST_RDONLY : 0);
+ st.f_flag |= ST_NOTRUNC;
+ st.f_namemax = NAME_MAX;
+
+ /* Copy the struct to user space. */
+ r = sys_safecopyto(fs_m_in.m_source, fs_m_in.REQ_GRANT, 0, (vir_bytes) &st,
+ (phys_bytes) sizeof(st), D);
+
+ return(r);
+}
--- /dev/null
+/* This file manages the super block structure.
+ *
+ * The entry points into this file are
+ * get_super: search the 'superblock' table for a device
+ * read_super: read a superblock
+ *
+ * Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <string.h>
+#include <stdlib.h>
+#include <minix/com.h>
+#include <minix/u64.h>
+#include "buf.h"
+#include "inode.h"
+#include "super.h"
+#include "const.h"
+
+FORWARD _PROTOTYPE( off_t ext2_max_size, (int block_size) );
+FORWARD _PROTOTYPE( u32_t ext2_count_dirs, (struct super_block *sp) );
+
+FORWARD _PROTOTYPE( void super_copy, (register struct super_block *dest,
+ register struct super_block *source));
+FORWARD _PROTOTYPE( void copy_group_descriptors,
+ (register struct group_desc *dest_array,
+ register struct group_desc *source_array,
+ unsigned int ngroups));
+
+PRIVATE off_t super_block_offset;
+
+
+/*===========================================================================*
+ * get_super *
+ *===========================================================================*/
+PUBLIC struct super_block *get_super(
+ dev_t dev /* device number whose super_block is sought */
+)
+{
+ if (dev == NO_DEV)
+ panic("request for super_block of NO_DEV");
+ if (superblock->s_dev != dev)
+ panic("wrong superblock", (int) dev);
+
+ return(superblock);
+}
+
+
+/*===========================================================================*
+ * get_block_size *
+ *===========================================================================*/
+PUBLIC unsigned int get_block_size(dev_t dev)
+{
+ if (dev == NO_DEV)
+ panic("request for block size of NO_DEV");
+ return(fs_block_size);
+}
+
+PRIVATE struct group_desc *ondisk_group_descs;
+
+/*===========================================================================*
+ * read_super *
+ *===========================================================================*/
+PUBLIC int read_super(sp)
+register struct super_block *sp; /* pointer to a superblock */
+{
+ /* Read a superblock. */
+ dev_t dev;
+ int r;
+ /* group descriptors, sp->s_group_desc points to this. */
+ static struct group_desc *group_descs;
+ char *buf;
+ block_t gd_size; /* group descriptors table size in blocks */
+ int gdt_position;
+
+ dev = sp->s_dev; /* save device (will be overwritten by copy) */
+ if (dev == NO_DEV)
+ panic("request for super_block of NO_DEV");
+
+ if (opt.block_with_super == 0) {
+ super_block_offset = SUPER_BLOCK_BYTES;
+ } else {
+ /* The block number here uses 1k units */
+ super_block_offset = opt.block_with_super * 1024;
+ }
+
+ STATICINIT(ondisk_superblock, sizeof(struct super_block));
+
+ if (!sp || !ondisk_superblock)
+ panic("can't allocate memory for super_block buffers");
+
+ r = block_dev_io(MFS_DEV_READ, dev, SELF_E,
+ (char*) ondisk_superblock, cvu64(super_block_offset),
+ _MIN_BLOCK_SIZE);
+ if (r != _MIN_BLOCK_SIZE)
+ return(EINVAL);
+
+ super_copy(sp, ondisk_superblock);
+
+ sp->s_dev = NO_DEV; /* restore later */
+
+ if (sp->s_magic != SUPER_MAGIC)
+ return(EINVAL);
+
+ sp->s_block_size = 1024*(1<<sp->s_log_block_size);
+
+ if (sp->s_block_size < _MIN_BLOCK_SIZE
+ || sp->s_block_size >_MAX_BLOCK_SIZE) {
+ return(EINVAL);
+ printf("data block size is too large\n");
+ }
+
+ if ((sp->s_block_size % 512) != 0)
+ return(EINVAL);
+
+ if (SUPER_SIZE_D > sp->s_block_size)
+ return(EINVAL);
+
+ /* Variable added for convinience (i_blocks counts 512-byte blocks). */
+ sp->s_sectors_in_block = sp->s_block_size / 512;
+
+ /* TODO: this code is for revision 1 (but bw compatible with 0)
+ * inode must be power of 2 and smaller, than block size.
+ */
+ if (EXT2_INODE_SIZE(sp) & (EXT2_INODE_SIZE(sp) - 1) != 0
+ || EXT2_INODE_SIZE(sp) > sp->s_block_size) {
+ printf("superblock->s_inode_size is incorrect...\n");
+ return(EINVAL);
+ }
+
+ sp->s_blocksize_bits = sp->s_log_block_size + 10;
+ sp->s_max_size = ext2_max_size(sp->s_block_size);
+ sp->s_inodes_per_block = sp->s_block_size / EXT2_INODE_SIZE(sp);
+ if (sp->s_inodes_per_block == 0 || sp->s_inodes_per_group == 0) {
+ printf("either inodes_per_block or inodes_per_group count is 0\n");
+ return(EINVAL);
+ }
+
+ sp->s_itb_per_group = sp->s_inodes_per_group / sp->s_inodes_per_block;
+ sp->s_desc_per_block = sp->s_block_size / sizeof(struct group_desc);
+
+ sp->s_groups_count = ((sp->s_blocks_count - sp->s_first_data_block - 1)
+ / sp->s_blocks_per_group) + 1;
+
+ /* ceil(groups_count/desc_per_block) */
+ sp->s_gdb_count = (sp->s_groups_count + sp->s_desc_per_block - 1)
+ / sp->s_desc_per_block;
+
+ gd_size = sp->s_gdb_count * sp->s_block_size;
+
+ buf = 0;
+ STATICINIT(buf, gd_size);
+ group_descs = (struct group_desc *) buf;
+
+ buf = 0;
+ STATICINIT(buf, gd_size);
+ ondisk_group_descs = (struct group_desc *) buf;
+
+ if (!group_descs || !ondisk_group_descs)
+ panic("can't allocate memory for gdt buffer");
+
+ /* s_first_data_block (block number, where superblock is stored)
+ * is 1 for 1Kb blocks and 0 for larger blocks.
+ * For fs with 1024-byte blocks first 1024 bytes (block0) used by MBR,
+ * and block1 stores superblock. When block size is larger, block0 stores
+ * both MBR and superblock, but gdt lives in next block anyway.
+ * If sb=N was specified, then gdt is stored in N+1 block, the block number
+ * here uses 1k units.
+ *
+ */
+ if (opt.block_with_super == 0) {
+ gdt_position = (sp->s_first_data_block + 1) * sp->s_block_size;
+ } else {
+ gdt_position = (opt.block_with_super + 1) * 1024;
+ }
+
+ r = block_dev_io(MFS_DEV_READ, dev, SELF_E,
+ (char*) ondisk_group_descs, cvu64(gdt_position),
+ gd_size);
+ if (r != gd_size) {
+ printf("Can not read group descriptors\n");
+ return(EINVAL);
+ }
+
+ /* TODO: check descriptors we just read */
+
+ copy_group_descriptors(group_descs, ondisk_group_descs, sp->s_groups_count);
+ sp->s_group_desc = group_descs;
+
+ /* Make a few basic checks to see if super block looks reasonable. */
+ if (sp->s_inodes_count < 1 || sp->s_blocks_count < 1) {
+ printf("not enough inodes or data blocks, \n");
+ return(EINVAL);
+ }
+
+ sp->s_dirs_counter = ext2_count_dirs(sp);
+
+ /* Start block search from this block.
+ * We skip superblock (1 block), group descriptors blocks (sp->s_gdb_count)
+ * block and inode bitmaps (2 blocks) and inode table.
+ */
+ sp->s_bsearch = sp->s_first_data_block + 1 + sp->s_gdb_count + 2
+ + sp->s_itb_per_group;
+
+ sp->s_igsearch = 0;
+
+ sp->s_dev = dev; /* restore device number */
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * write_super *
+ *===========================================================================*/
+PUBLIC void write_super(sp)
+struct super_block *sp; /* pointer to a superblock */
+{
+/* Write a superblock and gdt. */
+ int r;
+ block_t gd_size; /* group descriptors table size in blocks */
+ int gdt_position;
+
+ if (sp->s_rd_only)
+ panic("can't write superblock on read-only filesys.");
+
+ if (sp->s_dev == NO_DEV)
+ panic("request to write super_block, but NO_DEV");
+
+ super_copy(ondisk_superblock, sp);
+
+ r = block_dev_io(MFS_DEV_WRITE, sp->s_dev, SELF_E,
+ sp, cvu64(super_block_offset), SUPER_SIZE_D);
+ if (r != SUPER_SIZE_D)
+ printf("ext2: Warning, failed to write superblock to the disk!\n");
+
+ if (group_descriptors_dirty == DIRTY) {
+ /* Locate the appropriate super_block. */
+ gd_size = sp->s_gdb_count * sp->s_block_size;
+
+ if (opt.block_with_super == 0) {
+ gdt_position = (sp->s_first_data_block + 1) * sp->s_block_size;
+ } else {
+ gdt_position = (opt.block_with_super + 1) * 1024;
+ }
+
+ copy_group_descriptors(ondisk_group_descs, sp->s_group_desc,
+ sp->s_groups_count);
+
+ r = block_dev_io(MFS_DEV_WRITE, sp->s_dev, SELF_E,
+ (char*) ondisk_group_descs, cvu64(gdt_position),
+ gd_size);
+ if (r != gd_size) {
+ printf("Can not write group descriptors\n");
+ }
+ group_descriptors_dirty = CLEAN;
+ }
+}
+
+
+/*===========================================================================*
+ * get_group_desc *
+ *===========================================================================*/
+struct group_desc* get_group_desc(unsigned int bnum)
+{
+ if (bnum >= superblock->s_groups_count) {
+ printf("ext2, get_group_desc: wrong bnum (%d) requested\n", bnum);
+ return NULL;
+ }
+ return &superblock->s_group_desc[bnum];
+}
+
+
+PRIVATE u32_t ext2_count_dirs(struct super_block *sp)
+{
+ u32_t count = 0;
+ int i;
+
+ for (i = 0; i < sp->s_groups_count; i++) {
+ struct group_desc *desc = get_group_desc(i);
+ if (!desc)
+ continue; /* TODO: fail? */
+ count += desc->used_dirs_count;
+ }
+ return count;
+}
+
+
+/*===========================================================================*
+ * ext2_max_size *
+ *===========================================================================*/
+/* There are several things, which affect max filesize:
+ * - inode.i_blocks (512-byte blocks) is limited to (2^32 - 1).
+ * - number of addressed direct, single, double and triple indirect blocks.
+ * Number of addressed blocks depends on block_size only, thus unlike in
+ * linux (ext2_max_size) we do not make calculations, but use constants
+ * for different block sizes. Calculations (gcc code) are commented.
+ * Note: linux ext2_max_size makes calculated based on shifting, not
+ * arithmetics.
+ * (!!!)Note: constants hardly tight to EXT2_NDIR_BLOCKS, but I doubt its value
+ * will be changed someday. So if it's changed, then just recalculate constatns.
+ * Anyway this function is safe for any change.
+ * Note: there is also limitation from VFS (to LONG_MAX, i.e. 2GB).
+ */
+PRIVATE off_t ext2_max_size(int block_size)
+{
+ /* 12 is EXT2_NDIR_BLOCKS used in calculations. */
+ if (EXT2_NDIR_BLOCKS != 12)
+ panic("ext2_max_size needs modification!");
+ switch(block_size) {
+ case 1024: return LONG_MAX; /* actually 17247252480 */
+ case 2048: return LONG_MAX; /* 275415851008 */
+ case 4096: return LONG_MAX; /* 2194719883264 */
+ default: {
+ ext2_debug("ext2_max_size: Unsupported block_size! \
+ Assuming bs is 1024 bytes\n");
+ return 67383296L;
+ }
+ }
+#if 0
+ long addr_in_block = block_size/4; /* 4 bytes per addr */
+ long sectors_in_block = block_size/512;
+ long long meta_blocks; /* single, double and triple indirect blocks */
+ unsigned long long out_range_s; /* max blocks addressed by inode */
+ unsigned long long max_bytes;
+ unsigned long long upper_limit;
+
+ /* 1 indirect block, 1 + addr_in_block dindirect and 1 + addr_in_block +
+ * + addr_in_block*addr_in_block triple indirect blocks */
+ meta_blocks = 2*addr_in_block + addr_in_block*addr_in_block + 3;
+ out_range_s = EXT2_NDIR_BLOCKS + addr_in_block + addr_in_block * addr_in_block
+ + addr_in_block * addr_in_block * addr_in_block;
+ max_bytes = out_range_s * block_size;
+
+ upper_limit = (1LL << 32) - 1; /* max 512-byte blocks by i_blocks */
+ upper_limit /= sectors_in_block; /* total block_size blocks */
+ upper_limit -= meta_blocks; /* total data blocks */
+ upper_limit *= (long long)block_size; /* max size in bytes */
+
+ if (max_bytes > upper_limit)
+ max_bytes = upper_limit;
+
+ /* Limit s_max_size to LONG_MAX */
+ if (max_bytes > LONG_MAX)
+ max_bytes = LONG_MAX;
+
+ return max_bytes;
+#endif
+}
+
+
+/*===========================================================================*
+ * super_copy *
+ *===========================================================================*/
+PRIVATE void super_copy(
+ register struct super_block *dest,
+ register struct super_block *source
+)
+/* Note: we don't convert stuff, used in ext3. */
+{
+/* Copy super_block to the in-core table, swapping bytes if need be. */
+ if (le_CPU) {
+ /* Just use memcpy */
+ memcpy(dest, source, SUPER_SIZE_D);
+ return;
+ }
+ dest->s_inodes_count = conv4(le_CPU, source->s_inodes_count);
+ dest->s_blocks_count = conv4(le_CPU, source->s_blocks_count);
+ dest->s_r_blocks_count = conv4(le_CPU, source->s_r_blocks_count);
+ dest->s_free_blocks_count = conv4(le_CPU, source->s_free_blocks_count);
+ dest->s_free_inodes_count = conv4(le_CPU, source->s_free_inodes_count);
+ dest->s_first_data_block = conv4(le_CPU, source->s_first_data_block);
+ dest->s_log_block_size = conv4(le_CPU, source->s_log_block_size);
+ dest->s_log_frag_size = conv4(le_CPU, source->s_log_frag_size);
+ dest->s_blocks_per_group = conv4(le_CPU, source->s_blocks_per_group);
+ dest->s_frags_per_group = conv4(le_CPU, source->s_frags_per_group);
+ dest->s_inodes_per_group = conv4(le_CPU, source->s_inodes_per_group);
+ dest->s_mtime = conv4(le_CPU, source->s_mtime);
+ dest->s_wtime = conv4(le_CPU, source->s_wtime);
+ dest->s_mnt_count = conv2(le_CPU, source->s_mnt_count);
+ dest->s_max_mnt_count = conv2(le_CPU, source->s_max_mnt_count);
+ dest->s_magic = conv2(le_CPU, source->s_magic);
+ dest->s_state = conv2(le_CPU, source->s_state);
+ dest->s_errors = conv2(le_CPU, source->s_errors);
+ dest->s_minor_rev_level = conv2(le_CPU, source->s_minor_rev_level);
+ dest->s_lastcheck = conv4(le_CPU, source->s_lastcheck);
+ dest->s_checkinterval = conv4(le_CPU, source->s_checkinterval);
+ dest->s_creator_os = conv4(le_CPU, source->s_creator_os);
+ dest->s_rev_level = conv4(le_CPU, source->s_rev_level);
+ dest->s_def_resuid = conv2(le_CPU, source->s_def_resuid);
+ dest->s_def_resgid = conv2(le_CPU, source->s_def_resgid);
+ dest->s_first_ino = conv4(le_CPU, source->s_first_ino);
+ dest->s_inode_size = conv2(le_CPU, source->s_inode_size);
+ dest->s_block_group_nr = conv2(le_CPU, source->s_block_group_nr);
+ dest->s_feature_compat = conv4(le_CPU, source->s_feature_compat);
+ dest->s_feature_incompat = conv4(le_CPU, source->s_feature_incompat);
+ dest->s_feature_ro_compat = conv4(le_CPU, source->s_feature_ro_compat);
+ memcpy(dest->s_uuid, source->s_uuid, sizeof(dest->s_uuid));
+ memcpy(dest->s_volume_name, source->s_volume_name,
+ sizeof(dest->s_volume_name));
+ memcpy(dest->s_last_mounted, source->s_last_mounted,
+ sizeof(dest->s_last_mounted));
+ dest->s_algorithm_usage_bitmap =
+ conv4(le_CPU, source->s_algorithm_usage_bitmap);
+ dest->s_prealloc_blocks = source->s_prealloc_blocks;
+ dest->s_prealloc_dir_blocks = source->s_prealloc_dir_blocks;
+ dest->s_padding1 = conv2(le_CPU, source->s_padding1);
+}
+
+
+/*===========================================================================*
+ * gd_copy *
+ *===========================================================================*/
+PRIVATE void gd_copy(
+ register struct group_desc *dest,
+ register struct group_desc *source
+)
+{
+ /* Copy super_block to the in-core table, swapping bytes if need be. */
+ if (le_CPU) {
+ /* Just use memcpy */
+ memcpy(dest, source, sizeof(struct group_desc));
+ return;
+ }
+ dest->block_bitmap = conv4(le_CPU, source->block_bitmap);
+ dest->inode_bitmap = conv4(le_CPU, source->inode_bitmap);
+ dest->inode_table = conv4(le_CPU, source->inode_table);
+ dest->free_blocks_count = conv2(le_CPU, source->free_blocks_count);
+ dest->free_inodes_count = conv2(le_CPU, source->free_inodes_count);
+ dest->used_dirs_count = conv2(le_CPU, source->used_dirs_count);
+}
+
+
+/*===========================================================================*
+ * copy_group_descriptors *
+ *===========================================================================*/
+PRIVATE void copy_group_descriptors(
+ register struct group_desc *dest_array,
+ register struct group_desc *source_array,
+ unsigned int ngroups
+)
+{
+ int i;
+ for (i = 0; i < ngroups; i++)
+ gd_copy(&dest_array[i], &source_array[i]);
+}
--- /dev/null
+/* Super block table. The root file system and every mounted file system
+ * has an entry here. The entry holds information about the sizes of the bit
+ * maps and inodes.
+ *
+ * A super_block slot is free if s_dev == NO_DEV.
+ *
+ */
+
+#ifndef EXT2_SUPER_H
+#define EXT2_SUPER_H
+
+/* super_block (on-disk part) was taken from linux/include/linux/ext2_fs.h */
+EXTERN struct super_block {
+ u32_t s_inodes_count; /* Inodes count */
+ u32_t s_blocks_count; /* Blocks count */
+ u32_t s_r_blocks_count; /* Reserved blocks count */
+ u32_t s_free_blocks_count; /* Free blocks count */
+ u32_t s_free_inodes_count; /* Free inodes count */
+ u32_t s_first_data_block; /* First Data Block */
+ u32_t s_log_block_size; /* Block size */
+ u32_t s_log_frag_size; /* Fragment size */
+ u32_t s_blocks_per_group; /* # Blocks per group */
+ u32_t s_frags_per_group; /* # Fragments per group */
+ u32_t s_inodes_per_group; /* # Inodes per group */
+ u32_t s_mtime; /* Mount time */
+ u32_t s_wtime; /* Write time */
+ u16_t s_mnt_count; /* Mount count */
+ u16_t s_max_mnt_count; /* Maximal mount count */
+ u16_t s_magic; /* Magic signature */
+ u16_t s_state; /* File system state */
+ u16_t s_errors; /* Behaviour when detecting errors */
+ u16_t s_minor_rev_level; /* minor revision level */
+ u32_t s_lastcheck; /* time of last check */
+ u32_t s_checkinterval; /* max. time between checks */
+ u32_t s_creator_os; /* OS */
+ u32_t s_rev_level; /* Revision level */
+ u16_t s_def_resuid; /* Default uid for reserved blocks */
+ u16_t s_def_resgid; /* Default gid for reserved blocks */
+ /*
+ * These fields are for EXT2_DYNAMIC_REV superblocks only.
+ *
+ * Note: the difference between the compatible feature set and
+ * the incompatible feature set is that if there is a bit set
+ * in the incompatible feature set that the kernel doesn't
+ * know about, it should refuse to mount the filesystem.
+ *
+ * e2fsck's requirements are more strict; if it doesn't know
+ * about a feature in either the compatible or incompatible
+ * feature set, it must abort and not try to meddle with
+ * things it doesn't understand...
+ */
+ u32_t s_first_ino; /* First non-reserved inode */
+ u16_t s_inode_size; /* size of inode structure */
+ u16_t s_block_group_nr; /* block group # of this superblock */
+ u32_t s_feature_compat; /* compatible feature set */
+ u32_t s_feature_incompat; /* incompatible feature set */
+ u32_t s_feature_ro_compat; /* readonly-compatible feature set */
+ u8_t s_uuid[16]; /* 128-bit uuid for volume */
+ char s_volume_name[16]; /* volume name */
+ char s_last_mounted[64]; /* directory where last mounted */
+ u32_t s_algorithm_usage_bitmap; /* For compression */
+ /*
+ * Performance hints. Directory preallocation should only
+ * happen if the EXT2_COMPAT_PREALLOC flag is on.
+ */
+ u8_t s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
+ u8_t s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
+ u16_t s_padding1;
+ /*
+ * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
+ */
+ u8_t s_journal_uuid[16]; /* uuid of journal superblock */
+ u32_t s_journal_inum; /* inode number of journal file */
+ u32_t s_journal_dev; /* device number of journal file */
+ u32_t s_last_orphan; /* start of list of inodes to delete */
+ u32_t s_hash_seed[4]; /* HTREE hash seed */
+ u8_t s_def_hash_version; /* Default hash version to use */
+ u8_t s_reserved_char_pad;
+ u16_t s_reserved_word_pad;
+ u32_t s_default_mount_opts;
+ u32_t s_first_meta_bg; /* First metablock block group */
+ u32_t s_reserved[190]; /* Padding to the end of the block */
+
+ /* The following items are only used when the super_block is in memory. */
+ u32_t s_inodes_per_block; /* Number of inodes per block */
+ u32_t s_itb_per_group; /* Number of inode table blocks per group */
+ u32_t s_gdb_count; /* Number of group descriptor blocks */
+ u32_t s_desc_per_block; /* Number of group descriptors per block */
+ u32_t s_groups_count; /* Number of groups in the fs */
+ u8_t s_blocksize_bits; /* Used to calculate offsets
+ * (e.g. inode block),
+ * always s_log_block_size+10.
+ */
+ struct group_desc *s_group_desc; /* Group descriptors read into RAM */
+
+ u16_t s_block_size; /* block size in bytes. */
+ u16_t s_sectors_in_block; /* s_block_size / 512 */
+ u32_t s_max_size; /* maximum file size on this device */
+ dev_t s_dev; /* whose super block is this? */
+ int s_rd_only; /* set to 1 if file sys mounted read only */
+ block_t s_bsearch; /* all data blocks below this block are in use*/
+ int s_igsearch; /* all groups below this one have no free inodes */
+ char s_is_root;
+ u32_t s_dirs_counter;
+
+} *superblock, *ondisk_superblock;
+
+
+/* Structure of a blocks group descriptor.
+ * On disk stored in little endian format.
+ */
+struct group_desc
+{
+ u32_t block_bitmap; /* Blocks bitmap block */
+ u32_t inode_bitmap; /* Inodes bitmap block */
+ u32_t inode_table; /* Inodes table block */
+ u16_t free_blocks_count; /* Free blocks count */
+ u16_t free_inodes_count; /* Free inodes count */
+ u16_t used_dirs_count; /* Directories count */
+ u16_t pad;
+ u32_t reserved[3];
+};
+
+#define IMAP 0 /* operating on the inode bit map */
+#define BMAP 1 /* operating on the block bit map */
+#define IMAPD 2 /* operating on the inode bit map, inode is dir */
+
+#endif /* EXT2_SUPER_H */
--- /dev/null
+/* This file contains the table used to map system call numbers onto the
+ * routines that perform them.
+ *
+ * Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#define _TABLE
+
+#include "fs.h"
+#include "inode.h"
+#include "buf.h"
+#include "super.h"
+#include "drivers.h"
+
+PUBLIC _PROTOTYPE (int (*fs_call_vec[]), (void) ) = {
+ no_sys, /* 0 not used */
+ no_sys, /* 1 */ /* Was: fs_getnode */
+ fs_putnode, /* 2 */
+ fs_slink, /* 3 */
+ fs_ftrunc, /* 4 */
+ fs_chown, /* 5 */
+ fs_chmod, /* 6 */
+ fs_inhibread, /* 7 */
+ fs_stat, /* 8 */
+ fs_utime, /* 9 */
+ fs_fstatfs, /* 10 */
+ fs_breadwrite, /* 11 */
+ fs_breadwrite, /* 12 */
+ fs_unlink, /* 13 */
+ fs_unlink, /* 14 */
+ fs_unmount, /* 15 */
+ fs_sync, /* 16 */
+ fs_new_driver, /* 17 */
+ fs_flush, /* 18 */
+ fs_readwrite, /* 19 */
+ fs_readwrite, /* 20 */
+ fs_mknod, /* 21 */
+ fs_mkdir, /* 22 */
+ fs_create, /* 23 */
+ fs_link, /* 24 */
+ fs_rename, /* 25 */
+ fs_lookup, /* 26 */
+ fs_mountpoint, /* 27 */
+ fs_readsuper, /* 28 */
+ no_sys, /* 29 */ /* Was: fs_newnode */
+ fs_rdlink, /* 30 */
+ fs_getdents, /* 31 */
+ fs_statvfs, /* 32 */
+};
--- /dev/null
+/* Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <minix/callnr.h>
+#include <minix/com.h>
+#include "inode.h"
+#include <minix/vfsif.h>
+
+
+/*===========================================================================*
+ * fs_utime *
+ *===========================================================================*/
+PUBLIC int fs_utime()
+{
+ register struct inode *rip;
+ register int r;
+
+ /* Temporarily open the file. */
+ if( (rip = get_inode(fs_dev, (ino_t) fs_m_in.REQ_INODE_NR)) == NULL)
+ return(EINVAL);
+
+ /* Only the owner of a file or the super_user can change its time. */
+ r = OK;
+ if(read_only(rip) != OK) r = EROFS; /* not even su can touch if R/O */
+ if(r == OK) {
+ rip->i_atime = fs_m_in.REQ_ACTIME;
+ rip->i_mtime = fs_m_in.REQ_MODTIME;
+ rip->i_update = CTIME; /* discard any stale ATIME and MTIME flags */
+ rip->i_dirt = DIRTY;
+ }
+
+ put_inode(rip);
+ return(r);
+}
--- /dev/null
+#ifndef EXT2_TYPE_H
+#define EXT2_TYPE_H
+
+/* On the disk all attributes are stored in little endian format.
+ * Inode structure was taken from linux/include/linux/ext2_fs.h.
+ */
+typedef struct {
+ u16_t i_mode; /* File mode */
+ u16_t i_uid; /* Low 16 bits of Owner Uid */
+ u32_t i_size; /* Size in bytes */
+ u32_t i_atime; /* Access time */
+ u32_t i_ctime; /* Creation time */
+ u32_t i_mtime; /* Modification time */
+ u32_t i_dtime; /* Deletion Time */
+ u16_t i_gid; /* Low 16 bits of Group Id */
+ u16_t i_links_count; /* Links count */
+ u32_t i_blocks; /* Blocks count */
+ u32_t i_flags; /* File flags */
+ union {
+ struct {
+ u32_t l_i_reserved1;
+ } linux1;
+ struct {
+ u32_t h_i_translator;
+ } hurd1;
+ struct {
+ u32_t m_i_reserved1;
+ } masix1;
+ } osd1; /* OS dependent 1 */
+ u32_t i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
+ u32_t i_generation; /* File version (for NFS) */
+ u32_t i_file_acl; /* File ACL */
+ u32_t i_dir_acl; /* Directory ACL */
+ u32_t i_faddr; /* Fragment address */
+ union {
+ struct {
+ u8_t l_i_frag; /* Fragment number */
+ u8_t l_i_fsize; /* Fragment size */
+ u16_t i_pad1;
+ u16_t l_i_uid_high; /* these 2 fields */
+ u16_t l_i_gid_high; /* were reserved2[0] */
+ u32_t l_i_reserved2;
+ } linux2;
+ struct {
+ u8_t h_i_frag; /* Fragment number */
+ u8_t h_i_fsize; /* Fragment size */
+ u16_t h_i_mode_high;
+ u16_t h_i_uid_high;
+ u16_t h_i_gid_high;
+ u32_t h_i_author;
+ } hurd2;
+ struct {
+ u8_t m_i_frag; /* Fragment number */
+ u8_t m_i_fsize; /* Fragment size */
+ u16_t m_pad1;
+ u32_t m_i_reserved2[2];
+ } masix2;
+ } osd2; /* OS dependent 2 */
+} d_inode;
+
+
+/* Part of on disk directory (entry description).
+ * It includes all fields except name (since size is unknown.
+ * In revision 0 name_len is u16_t (here is structure of rev >= 0.5,
+ * where name_len was truncated with the upper 8 bit to add file_type).
+ * MIN_DIR_ENTRY_SIZE depends on this structure.
+ */
+struct ext2_disk_dir_desc {
+ u32_t d_ino;
+ u16_t d_rec_len;
+ u8_t d_name_len;
+ u8_t d_file_type;
+ char d_name[1];
+};
+
+/* Current position in block */
+#define CUR_DISC_DIR_POS(cur_desc, base) ((char*)cur_desc - (char*)base)
+/* Return pointer to the next dentry */
+#define NEXT_DISC_DIR_DESC(cur_desc) ((struct ext2_disk_dir_desc*)\
+ ((char*)cur_desc + cur_desc->d_rec_len))
+/* Return next dentry's position in block */
+#define NEXT_DISC_DIR_POS(cur_desc, base) (cur_desc->d_rec_len +\
+ CUR_DISC_DIR_POS(cur_desc, base))
+
+struct buf {
+ /* Data portion of the buffer. */
+ union fsdata_u *bp;
+
+ /* Header portion of the buffer. */
+ struct buf *b_next; /* used to link all free bufs in a chain */
+ struct buf *b_prev; /* used to link all free bufs the other way */
+ struct buf *b_hash; /* used to link bufs on hash chains */
+ block_t b_blocknr; /* block number of its (minor) device */
+ dev_t b_dev; /* major | minor device where block resides */
+ char b_dirt; /* CLEAN or DIRTY */
+ char b_count; /* number of users of this buffer */
+ unsigned int b_bytes; /* Number of bytes allocated in bp */
+};
+
+
+/* Structure with options affecting global behavior. */
+struct opt {
+ int use_orlov; /* Bool: Use Orlov allocator */
+ /* In ext2 there are reserved blocks, which can be used by super user only or
+ * user specified by resuid/resgid. Right now we can't check what user
+ * requested operation (VFS limitation), so it's a small warkaround.
+ */
+ int mfsalloc; /* Bool: use mfslike allocator */
+ int use_reserved_blocks; /* Bool: small workaround */
+ unsigned int block_with_super;/* Int: where to read super block,
+ * uses 1k units. */
+ int use_prealloc; /* Bool: use preallocation */
+};
+
+
+#endif /* EXT2_TYPE_H */
--- /dev/null
+/* Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include "buf.h"
+#include "inode.h"
+#include "super.h"
+
+
+/*===========================================================================*
+ * no_sys *
+ *===========================================================================*/
+PUBLIC int no_sys()
+{
+/* Somebody has used an illegal system call number */
+ printf("no_sys: invalid call %d\n", req_nr);
+ return(EINVAL);
+}
+
+
+/*===========================================================================*
+ * conv2 *
+ *===========================================================================*/
+PUBLIC unsigned conv2(norm, w)
+int norm; /* TRUE if no swap, FALSE for byte swap */
+int w; /* promotion of 16-bit word to be swapped */
+{
+/* Possibly swap a 16-bit word between 8086 and 68000 byte order. */
+ if (norm) return( (unsigned) w & 0xFFFF);
+ return( ((w&BYTE) << 8) | ( (w>>8) & BYTE));
+}
+
+
+/*===========================================================================*
+ * conv4 *
+ *===========================================================================*/
+PUBLIC long conv4(norm, x)
+int norm; /* TRUE if no swap, FALSE for byte swap */
+long x; /* 32-bit long to be byte swapped */
+{
+/* Possibly swap a 32-bit long between 8086 and 68000 byte order. */
+ unsigned lo, hi;
+ long l;
+
+ if (norm) return(x); /* byte order was already ok */
+ lo = conv2(FALSE, (int) x & 0xFFFF); /* low-order half, byte swapped */
+ hi = conv2(FALSE, (int) (x>>16) & 0xFFFF); /* high-order half, swapped */
+ l = ( (long) lo <<16) | hi;
+ return(l);
+}
+
+
+/*===========================================================================*
+ * clock_time *
+ *===========================================================================*/
+PUBLIC time_t clock_time()
+{
+/* This routine returns the time in seconds since 1.1.1970. MINIX is an
+ * astrophysically naive system that assumes the earth rotates at a constant
+ * rate and that such things as leap seconds do not exist.
+ */
+
+ register int k;
+ clock_t uptime;
+ time_t boottime;
+
+ if ( (k=getuptime2(&uptime, &boottime)) != OK)
+ panic("clock_time: getuptme2 failed: %d", k);
+
+ return( (time_t) (boottime + (uptime/sys_hz())));
+}
+
+
+/*===========================================================================*
+ * mfs_min *
+ *===========================================================================*/
+PUBLIC int min(unsigned int l, unsigned int r)
+{
+ if(r >= l) return(l);
+
+ return(r);
+}
+
+
+/*===========================================================================*
+ * mfs_nul *
+ *===========================================================================*/
+PUBLIC void mfs_nul_f(char *file, int line, char *str, unsigned int len,
+ unsigned int maxlen)
+{
+ if(len < maxlen && str[len-1] != '\0') {
+ printf("ext2 %s:%d string (length %d, maxlen %d) not null-terminated\n",
+ file, line, len, maxlen);
+ }
+}
+
+#define MYASSERT(c) if(!(c)) { printf("ext2:%s:%d: sanity check: %s failed\n", \
+ file, line, #c); panic("sanity check " #c " failed: %d", __LINE__); }
+
+
+/*===========================================================================*
+ * sanity_check *
+ *===========================================================================*/
+PUBLIC void sanitycheck(char *file, int line)
+{
+ MYASSERT(SELF_E > 0);
+ if(superblock->s_dev != NO_DEV) {
+ MYASSERT(superblock->s_dev == fs_dev);
+ MYASSERT(superblock->s_block_size == fs_block_size);
+ } else {
+ MYASSERT(_MIN_BLOCK_SIZE == fs_block_size);
+ }
+}
+
+/*===========================================================================*
+ * ansi_strcmp *
+ *===========================================================================*/
+PUBLIC int ansi_strcmp(register const char* ansi_s, register const char *s2,
+ register size_t ansi_s_length)
+{
+/* Compare non null-terminated string ansi_s (length=ansi_s_length)
+ * with C-string s2.
+ * It returns 0 if strings are equal, otherwise -1 is returned.
+ */
+ if (ansi_s_length) {
+ do {
+ if (*s2 == '\0')
+ return -1;
+ if (*ansi_s++ != *s2++)
+ return -1;
+ } while (--ansi_s_length > 0);
+
+ if (*s2 == '\0')
+ return 0;
+ else
+ return -1;
+ }
+ return 0;
+}
+
+
+/*===========================================================================*
+ * setbit *
+ *===========================================================================*/
+PUBLIC bit_t setbit(bitchunk_t *bitmap, bit_t max_bits, unsigned int word)
+{
+ /* Find free bit in bitmap and set. Return number of the bit,
+ * if failed return -1.
+ */
+ bitchunk_t *wptr, *wlim;
+ bit_t b = -1;
+
+ /* TODO: do we need to add 1? I saw a situation, when it was
+ * required, and since we check bit number with max_bits it
+ * should be safe.
+ */
+ wlim = &bitmap[FS_BITMAP_CHUNKS(max_bits >> 3)];
+
+ /* Iterate over the words in block. */
+ for (wptr = &bitmap[word]; wptr < wlim; wptr++) {
+ bit_t i;
+ bitchunk_t k;
+
+ /* Does this word contain a free bit? */
+ if (*wptr == (bitchunk_t) ~0)
+ continue;
+
+ /* Find and allocate the free bit. */
+ k = (int) *wptr;
+ for (i = 0; (k & (1 << i)) != 0; ++i) {}
+
+ /* Bit number from the start of the bit map. */
+ b = (wptr - &bitmap[0]) * FS_BITCHUNK_BITS + i;
+
+ /* Don't allocate bits beyond the end of the map. */
+ if (b >= max_bits) {
+ b = -1;
+ continue;
+ }
+
+ /* Allocate bit number. */
+ k |= 1 << i;
+ *wptr = (int) k;
+ break;
+ }
+
+ return b;
+}
+
+
+/*===========================================================================*
+ * setbyte *
+ *===========================================================================*/
+PUBLIC bit_t setbyte(bitchunk_t *bitmap, bit_t max_bits, unsigned int word)
+{
+ /* Find free byte in bitmap and set it. Return number of the starting bit,
+ * if failed return -1.
+ */
+ unsigned char *wptr, *wlim;
+ bit_t b = -1;
+
+ wptr = (unsigned char*) &bitmap[0];
+ /* TODO: do we need to add 1? I saw a situation, when it was
+ * required, and since we check bit number with max_bits it
+ * should be safe.
+ */
+ wlim = &wptr[(max_bits >> 3)];
+
+ /* Iterate over the words in block. */
+ for ( ; wptr < wlim; wptr++) {
+ /* Is it a free byte? */
+ if (*wptr | 0)
+ continue;
+
+ /* Bit number from the start of the bit map. */
+ b = (wptr - (unsigned char*) &bitmap[0]) * CHAR_BIT;
+
+ /* Don't allocate bits beyond the end of the map. */
+ if (b + CHAR_BIT >= max_bits) {
+ b = -1;
+ continue;
+ }
+
+ /* Allocate byte number. */
+ *wptr = (unsigned char) ~0;
+ break;
+ }
+ return b;
+}
+
+
+/*===========================================================================*
+ * unsetbit *
+ *===========================================================================*/
+PUBLIC int unsetbit(bitchunk_t *bitmap, bit_t bit)
+{
+ /* Unset specified bit. If requested bit is already free return -1,
+ * otherwise return 0.
+ */
+ unsigned int word; /* bit_returned word in bitmap */
+ bitchunk_t k, mask;
+
+ word = bit / FS_BITCHUNK_BITS;
+ bit = bit % FS_BITCHUNK_BITS; /* index in word */
+ mask = 1 << bit;
+
+ k = (int) bitmap[word];
+ if (!(k & mask))
+ return -1;
+
+ k &= ~mask;
+ bitmap[word] = (int) k;
+ return 0;
+}
--- /dev/null
+/* This file is the counterpart of "read.c". It contains the code for writing
+ * insofar as this is not contained in fs_readwrite().
+ *
+ * The entry points into this file are
+ * write_map: write a new block into an inode
+ * new_block: acquire a new block
+ * zero_block: overwrite a block with zeroes
+ *
+ * Created (MFS based):
+ * February 2010 (Evgeniy Ivanov)
+ */
+
+#include "fs.h"
+#include <string.h>
+#include "buf.h"
+#include "inode.h"
+#include "super.h"
+
+FORWARD _PROTOTYPE( void wr_indir, (struct buf *bp, int index, block_t block) );
+FORWARD _PROTOTYPE( int empty_indir, (struct buf *, struct super_block *) );
+
+/*===========================================================================*
+ * write_map *
+ *===========================================================================*/
+PUBLIC int write_map(rip, position, new_block, op)
+struct inode *rip; /* pointer to inode to be changed */
+off_t position; /* file address to be mapped */
+block_t new_block; /* block # to be inserted */
+int op; /* special actions */
+{
+/* Write a new block into an inode.
+ *
+ * If op includes WMAP_FREE, free the block corresponding to that position
+ * in the inode ('new_block' is ignored then). Also free the indirect block
+ * if that was the last entry in the indirect block.
+ * Also free the double/triple indirect block if that was the last entry in
+ * the double/triple indirect block.
+ * It's the only function which should take care about rip->i_blocks counter.
+ */
+ int index1, index2, index3; /* indexes in single..triple indirect blocks */
+ long excess, block_pos;
+ char new_ind = 0, new_dbl = 0, new_triple = 0;
+ int single = 0, triple = 0;
+ register block_t old_block, b1, b2, b3;
+ struct buf *bp = NULL,
+ *bp_dindir = NULL,
+ *bp_tindir = NULL;
+ static char first_time = TRUE;
+ static long addr_in_block;
+ static long addr_in_block2;
+ static long doub_ind_s;
+ static long triple_ind_s;
+ static long out_range_s;
+
+ if (first_time) {
+ addr_in_block = rip->i_sp->s_block_size / BLOCK_ADDRESS_BYTES;
+ addr_in_block2 = addr_in_block * addr_in_block;
+ doub_ind_s = EXT2_NDIR_BLOCKS + addr_in_block;
+ triple_ind_s = doub_ind_s + addr_in_block2;
+ out_range_s = triple_ind_s + addr_in_block2 * addr_in_block;
+ first_time = FALSE;
+ }
+
+ block_pos = position / rip->i_sp->s_block_size; /* relative blk # in file */
+ rip->i_dirt = DIRTY; /* inode will be changed */
+
+ /* Is 'position' to be found in the inode itself? */
+ if (block_pos < EXT2_NDIR_BLOCKS) {
+ if (rip->i_block[block_pos] != NO_BLOCK && (op & WMAP_FREE)) {
+ free_block(rip->i_sp, rip->i_block[block_pos]);
+ rip->i_block[block_pos] = NO_BLOCK;
+ rip->i_blocks -= rip->i_sp->s_sectors_in_block;
+ } else {
+ rip->i_block[block_pos] = new_block;
+ rip->i_blocks += rip->i_sp->s_sectors_in_block;
+ }
+ return(OK);
+ }
+
+ /* It is not in the inode, so it must be single, double or triple indirect */
+ if (block_pos < doub_ind_s) {
+ b1 = rip->i_block[EXT2_NDIR_BLOCKS]; /* addr of single indirect block */
+ index1 = block_pos - EXT2_NDIR_BLOCKS;
+ single = TRUE;
+ } else if (block_pos >= out_range_s) { /* TODO: do we need it? */
+ return(EFBIG);
+ } else {
+ /* double or triple indirect block. At first if it's triple,
+ * find double indirect block.
+ */
+ excess = block_pos - doub_ind_s;
+ b2 = rip->i_block[EXT2_DIND_BLOCK];
+ if (block_pos >= triple_ind_s) {
+ b3 = rip->i_block[EXT2_TIND_BLOCK];
+ if (b3 == NO_BLOCK && !(op & WMAP_FREE)) {
+ /* Create triple indirect block. */
+ if ( (b3 = alloc_block(rip, rip->i_bsearch) ) == NO_BLOCK) {
+ ext2_debug("failed to allocate tblock near %d\n", rip->i_block[0]);
+ return(ENOSPC);
+ }
+ rip->i_block[EXT2_TIND_BLOCK] = b3;
+ rip->i_blocks += rip->i_sp->s_sectors_in_block;
+ new_triple = TRUE;
+ }
+ /* 'b3' is block number for triple indirect block, either old
+ * or newly created.
+ * If there wasn't one and WMAP_FREE is set, 'b3' is NO_BLOCK.
+ */
+ if (b3 == NO_BLOCK) {
+ /* WMAP_FREE and no triple indirect block - then no
+ * double and single indirect blocks either.
+ */
+ b1 = b2 = NO_BLOCK;
+ } else {
+ bp_tindir = get_block(rip->i_dev, b3, (new_triple ? NO_READ : NORMAL));
+ if (new_triple) {
+ zero_block(bp_tindir);
+ bp_tindir->b_dirt = DIRTY;
+ }
+ excess = block_pos - triple_ind_s;
+ index3 = excess / addr_in_block2;
+ b2 = rd_indir(bp_tindir, index3);
+ excess = excess % addr_in_block2;
+ }
+ triple = TRUE;
+ }
+
+ if (b2 == NO_BLOCK && !(op & WMAP_FREE)) {
+ /* Create the double indirect block. */
+ if ( (b2 = alloc_block(rip, rip->i_bsearch) ) == NO_BLOCK) {
+ /* Release triple ind blk. */
+ put_block(bp_tindir, INDIRECT_BLOCK);
+ ext2_debug("failed to allocate dblock near %d\n", rip->i_block[0]);
+ return(ENOSPC);
+ }
+ if (triple) {
+ wr_indir(bp_tindir, index3, b2); /* update triple indir */
+ bp_tindir->b_dirt = DIRTY;
+ } else {
+ rip->i_block[EXT2_DIND_BLOCK] = b2;
+ }
+ rip->i_blocks += rip->i_sp->s_sectors_in_block;
+ new_dbl = TRUE; /* set flag for later */
+ }
+
+ /* 'b2' is block number for double indirect block, either old
+ * or newly created.
+ * If there wasn't one and WMAP_FREE is set, 'b2' is NO_BLOCK.
+ */
+ if (b2 == NO_BLOCK) {
+ /* WMAP_FREE and no double indirect block - then no
+ * single indirect block either.
+ */
+ b1 = NO_BLOCK;
+ } else {
+ bp_dindir = get_block(rip->i_dev, b2, (new_dbl ? NO_READ : NORMAL));
+ if (new_dbl) {
+ zero_block(bp_dindir);
+ bp_dindir->b_dirt = DIRTY;
+ }
+ index2 = excess / addr_in_block;
+ b1 = rd_indir(bp_dindir, index2);
+ index1 = excess % addr_in_block;
+ }
+ single = FALSE;
+ }
+
+ /* b1 is now single indirect block or NO_BLOCK; 'index' is index.
+ * We have to create the indirect block if it's NO_BLOCK. Unless
+ * we're freing (WMAP_FREE).
+ */
+ if (b1 == NO_BLOCK && !(op & WMAP_FREE)) {
+ if ( (b1 = alloc_block(rip, rip->i_bsearch) ) == NO_BLOCK) {
+ /* Release dbl and triple indirect blks. */
+ put_block(bp_dindir, INDIRECT_BLOCK);
+ put_block(bp_tindir, INDIRECT_BLOCK);
+ ext2_debug("failed to allocate dblock near %d\n", rip->i_block[0]);
+ return(ENOSPC);
+ }
+ if (single) {
+ rip->i_block[EXT2_NDIR_BLOCKS] = b1; /* update inode single indirect */
+ } else {
+ wr_indir(bp_dindir, index2, b1); /* update dbl indir */
+ bp_dindir->b_dirt = DIRTY;
+ }
+ rip->i_blocks += rip->i_sp->s_sectors_in_block;
+ new_ind = TRUE;
+ }
+
+ /* b1 is indirect block's number (unless it's NO_BLOCK when we're
+ * freeing).
+ */
+ if (b1 != NO_BLOCK) {
+ bp = get_block(rip->i_dev, b1, (new_ind ? NO_READ : NORMAL) );
+ if (new_ind)
+ zero_block(bp);
+ if (op & WMAP_FREE) {
+ if ((old_block = rd_indir(bp, index1)) != NO_BLOCK) {
+ free_block(rip->i_sp, old_block);
+ rip->i_blocks -= rip->i_sp->s_sectors_in_block;
+ wr_indir(bp, index1, NO_BLOCK);
+ }
+
+ /* Last reference in the indirect block gone? Then
+ * free the indirect block.
+ */
+ if (empty_indir(bp, rip->i_sp)) {
+ free_block(rip->i_sp, b1);
+ rip->i_blocks -= rip->i_sp->s_sectors_in_block;
+ b1 = NO_BLOCK;
+ /* Update the reference to the indirect block to
+ * NO_BLOCK - in the double indirect block if there
+ * is one, otherwise in the inode directly.
+ */
+ if (single) {
+ rip->i_block[EXT2_NDIR_BLOCKS] = b1;
+ } else {
+ wr_indir(bp_dindir, index2, b1);
+ bp_dindir->b_dirt = DIRTY;
+ }
+ }
+ } else {
+ wr_indir(bp, index1, new_block);
+ rip->i_blocks += rip->i_sp->s_sectors_in_block;
+ }
+ /* b1 equals NO_BLOCK only when we are freeing up the indirect block. */
+ bp->b_dirt = (b1 == NO_BLOCK) ? CLEAN : DIRTY;;
+ put_block(bp, INDIRECT_BLOCK);
+ }
+
+ /* If the single indirect block isn't there (or was just freed),
+ * see if we have to keep the double indirect block, if any.
+ * If we don't have to keep it, don't bother writing it out.
+ */
+ if (b1 == NO_BLOCK && !single && b2 != NO_BLOCK &&
+ empty_indir(bp_dindir, rip->i_sp)) {
+ bp_dindir->b_dirt = CLEAN;
+ free_block(rip->i_sp, b2);
+ rip->i_blocks -= rip->i_sp->s_sectors_in_block;
+ b2 = NO_BLOCK;
+ if (triple) {
+ wr_indir(bp_tindir, index3, b2); /* update triple indir */
+ bp_tindir->b_dirt = DIRTY;
+ } else {
+ rip->i_block[EXT2_DIND_BLOCK] = b2;
+ }
+ }
+ /* If the double indirect block isn't there (or was just freed),
+ * see if we have to keep the triple indirect block, if any.
+ * If we don't have to keep it, don't bother writing it out.
+ */
+ if (b2 == NO_BLOCK && triple && b3 != NO_BLOCK &&
+ empty_indir(bp_tindir, rip->i_sp)) {
+ bp_tindir->b_dirt = CLEAN;
+ free_block(rip->i_sp, b3);
+ rip->i_blocks -= rip->i_sp->s_sectors_in_block;
+ rip->i_block[EXT2_TIND_BLOCK] = NO_BLOCK;
+ }
+
+ put_block(bp_dindir, INDIRECT_BLOCK); /* release double indirect blk */
+ put_block(bp_tindir, INDIRECT_BLOCK); /* release triple indirect blk */
+
+ return(OK);
+}
+
+
+/*===========================================================================*
+ * wr_indir *
+ *===========================================================================*/
+PRIVATE void wr_indir(bp, index, block)
+struct buf *bp; /* pointer to indirect block */
+int index; /* index into *bp */
+block_t block; /* block to write */
+{
+/* Given a pointer to an indirect block, write one entry. */
+
+ if(bp == NULL)
+ panic("wr_indir() on NULL");
+
+ /* write a block into an indirect block */
+ bp->b_ind[index] = conv4(le_CPU, block);
+}
+
+
+/*===========================================================================*
+ * empty_indir *
+ *===========================================================================*/
+PRIVATE int empty_indir(bp, sb)
+struct buf *bp; /* pointer to indirect block */
+struct super_block *sb; /* superblock of device block resides on */
+{
+/* Return nonzero if the indirect block pointed to by bp contains
+ * only NO_BLOCK entries.
+ */
+ long addr_in_block = sb->s_block_size/4; /* 4 bytes per addr */
+ int i;
+ for(i = 0; i < addr_in_block; i++)
+ if(bp->b_ind[i] != NO_BLOCK)
+ return(0);
+ return(1);
+}
+
+/*===========================================================================*
+ * new_block *
+ *===========================================================================*/
+PUBLIC struct buf *new_block(rip, position)
+register struct inode *rip; /* pointer to inode */
+off_t position; /* file pointer */
+{
+/* Acquire a new block and return a pointer to it. */
+ register struct buf *bp;
+ int r;
+ block_t b;
+
+ /* Is another block available? */
+ if ( (b = read_map(rip, position)) == NO_BLOCK) {
+ /* Check if this position follows last allocated
+ * block.
+ */
+ block_t goal = NO_BLOCK;
+ if (rip->i_last_pos_bl_alloc != 0) {
+ off_t position_diff = position - rip->i_last_pos_bl_alloc;
+ if (rip->i_bsearch == 0) {
+ /* Should never happen, but not critical */
+ ext2_debug("warning, i_bsearch is 0, while\
+ i_last_pos_bl_alloc is not!");
+ }
+ if (position_diff <= rip->i_sp->s_block_size) {
+ goal = rip->i_bsearch + 1;
+ } else {
+ /* Non-sequential write operation,
+ * disable preallocation
+ * for this inode.
+ */
+ rip->i_preallocation = 0;
+ discard_preallocated_blocks(rip);
+ }
+ }
+
+ if ( (b = alloc_block(rip, goal) ) == NO_BLOCK) {
+ err_code = ENOSPC;
+ return(NULL);
+ }
+ if ( (r = write_map(rip, position, b, 0)) != OK) {
+ free_block(rip->i_sp, b);
+ err_code = r;
+ ext2_debug("write_map failed\n");
+ return(NULL);
+ }
+ rip->i_last_pos_bl_alloc = position;
+ if (position == 0) {
+ /* rip->i_last_pos_bl_alloc points to the block position,
+ * and zero indicates first usage, thus just increment.
+ */
+ rip->i_last_pos_bl_alloc++;
+ }
+ }
+
+ bp = get_block(rip->i_dev, b, NO_READ);
+ zero_block(bp);
+ return(bp);
+}
+
+/*===========================================================================*
+ * zero_block *
+ *===========================================================================*/
+PUBLIC void zero_block(bp)
+register struct buf *bp; /* pointer to buffer to zero */
+{
+/* Zero a block. */
+ ASSERT(bp->b_bytes > 0);
+ ASSERT(bp->bp);
+ memset(bp->b_data, 0, (size_t) bp->b_bytes);
+ bp->b_dirt = DIRTY;
+}