Commit 59616489 authored by Siddharth Aggarwal's avatar Siddharth Aggarwal

Implemented the following for the checkpointer on BSD 4.10

- Crash recovery. Saving in memory information to disk during a crash
  Checking disk for previously stored checkpoint information on boot-up

- Read-only mounted checkpoints. Checkpoints #1, #2 and mounted are
  accessible as /dev/chk1, /dev/chk2 etc.
parent 726c49bd
#include "trie.h"
#include "block_alloc.h"
#include "shd.h"
int bPrintBlocks;
void SetShadowSize (long size)
{
shadow_size = size;
}
void SetShadowStart (long start)
{
shadow_start = start;
}
void InitBlockAllocator (int method, long range_start, long range_size)
{
reclaim_method = method;
block_range.start = range_start;
/*range_size = 200;*/
/*range_size = 2000;*/
bPrintBlocks = 0;
block_range.end = range_start + range_size;
block_range.ptr = range_start;
printf ("Initialized block allocator to start = %ld, end = %ld\n", block_range.ptr, block_range.end);
SetShadowStart (range_start);
SetShadowSize (range_size);
}
void DeleteCheckpoint (int version)
{
/* dummy function. write code to merge checkpoints */
return;
}
void CopyTree (Trie * trie, long size)
{
/* dummy function. Iterate through the trie and adjust each block
number by "size". Also Copy the blocks to new locations respectively */
if (0 != trie)
{
TrieIterator pos;
}
}
long GetLastBlock ()
{
/* dummy function. Returns the largest block number allocated to
a given checkpoint */
return 0;
}
long CurrentFreeBlockSize ()
{
if (block_range.end < block_range.ptr)
return (block_range.ptr + shadow_size - block_range.end);
else
return (block_range.end - block_range.ptr);
{
printf ("Error in free block list\n");
return -1;
}
return (block_range.end - block_range.ptr);
}
int BlockFree (long start, long size)
......@@ -56,31 +43,42 @@ int BlockFree (long start, long size)
/* called only for EXPLICIT_CKPT_DELETE. Shouldn't be called for
LAST_CKPT_AUTO_DELETE at all */
struct FreeSpaceQueue* temp = head;
long end = (start + size) % shadow_size;
/*printf ("Free space queue before deleting block = \n");
PrintFreeSpaceQueue ();
printf ("Freeing blocks %ld to %ld\n", start, end);*/
struct FreeSpaceQueue* temp = shd_fs_head;
long end;
long next_to_end;
long prev_to_start;
if ((start + size) > shadow_size)
end = (start + size) - shadow_size + shadow_start;
else
end = start + size;
if ((end == shadow_size) || (start == shadow_start))
{
AddFreeSpaceToQueue (start, end);
return 0;
}
next_to_end = end + 1;
prev_to_start = start - 1;
while (temp != 0)
{
if ((temp->start == (end + 1) % shadow_size)
|| (temp->start == end))
if (temp->start == next_to_end)
{
temp->start = start;
/*printf ("Merging ranges (%ld, %ld) and (%ld, %ld)\n", start, end, temp->start, temp->end);*/
temp->size += size;
return 0;
}
else
if (((temp->end + 1) % shadow_size == start)
|| (temp->end == start))
if (temp->end == prev_to_start)
{
temp->end = end;
/*printf ("Merging ranges (%ld, %ld) and (%ld, %ld)\n", start, end, temp->start, temp->end);*/
temp->size += size;
return 0;
}
temp = temp->next;
}
AddFreeSpaceToQueue (start, end);
/*printf ("Free space queue after deleting block = \n");
PrintFreeSpaceQueue ();*/
return 0;
}
......@@ -88,97 +86,99 @@ long BlockAlloc (int size)
{
struct FreeSpaceQueue* temp = 0;
long retVal;
/*printf ("Free space queue before allocating block = \n");
PrintFreeSpaceQueue ();*/
switch (reclaim_method)
{
case LAST_CKPT_AUTO_DELETE:
while (CurrentFreeBlockSize () < size)
{
DeleteCheckpoint (first_checkpoint);
block_range.end = GetLastBlock (first_checkpoint);
/* Delete the corresponding trie and merge changes with
next checkpoint */
first_checkpoint++;
}
break;
case EXPLICIT_CKPT_DELETE:
while (CurrentFreeBlockSize () < size)
{
if (-1 == MergeWithNextFreeBlockRange ( CurrentFreeBlockSize() ))
/*if (bPrintBlocks)
{
printf ("Free space queue before allocating block = \n");
PrintFreeSpaceQueue ();
}
printf ("CurrentFreeBlockSize = %ld, Requested size = %ld\n", CurrentFreeBlockSize(), size);*/
if (CurrentFreeBlockSize () < size) {
switch (reclaim_method)
{
case LAST_CKPT_AUTO_DELETE:
/* Not implemented yet */
break;
case EXPLICIT_CKPT_DELETE:
if (SHD_DISK_FULL == MergeWithNextFreeBlockRange (size))
{
printf ("Error! No more free space on disk\n");
return -1;
return SHD_DISK_FULL;
}
}
break;
break;
}
}
retVal = block_range.ptr;
block_range.ptr += size;
/*printf ("Allocating %d blocks starting %d\n", size, retVal);
printf ("Free space queue after allocating block = \n");
PrintFreeSpaceQueue ();*/
/*if (bPrintBlocks)
{
printf ("Allocating %d blocks starting %d\n", size, retVal);
PrintFreeSpaceQueue ();
}*/
return retVal;
}
long MergeWithNextFreeBlockRange (long size)
{
int done = 0;
Trie * trie;
struct FreeSpaceQueue* temp = GetNextFreeSpaceFromQueue ();
if (0 == temp)
return -1;
while (1)
struct FreeSpaceQueue* shd_temp, *shd_prev;
shd_temp = shd_prev = shd_fs_head;
while (shd_temp != 0)
{
CopyTree (trie, size);
if ((GetLastBlock() + size) % shadow_size >= temp->start)
if (shd_temp->size >= size)
{
block_range.start = block_range.ptr = GetLastBlock();
block_range.end = temp->end;
DeleteFreeSpace (temp);
break;
if (CurrentFreeBlockSize() > 0)
AddFreeSpaceToQueue (block_range.ptr, block_range.end);
/*printf ("Old range = (%ld, %ld) New range = (%ld, %ld)\n", block_range.ptr, block_range.end, shd_temp->start, shd_temp->end);*/
block_range.start = block_range.ptr = shd_temp->start;
block_range.end = shd_temp->end;
/* Delete shd_temp from queue */
if (shd_temp == shd_fs_head)
{
shd_fs_head = shd_fs_head->next;
free (shd_temp, M_DEVBUF);
}
else
{
shd_prev->next = shd_temp->next;
free (shd_temp, M_DEVBUF);
}
return 0;
}
shd_prev = shd_temp;
shd_temp = shd_temp->next;
}
return (size + temp->end - temp->start);
return SHD_DISK_FULL;
}
int initFreeSpaceQueue (void)
{
head = tail = 0;
shd_fs_head = 0;
}
int AddFreeSpaceToQueue (long start, long end)
{
struct FreeSpaceQueue* new = malloc (sizeof (struct FreeSpaceQueue), M_DEVBUF, M_NOWAIT);
struct FreeSpaceQueue* temp;
new->start = start;
new->end = end;
if (0 == head)
head = tail = new;
new->next = 0;
new->size = end - start;
if (0 == shd_fs_head)
shd_fs_head = new;
else
{
new->next = head;
head->prev = new;
head = new;
temp = shd_fs_head;
while (temp->next != 0)
temp = temp->next;
temp->next = new;
}
return 0;
}
struct FreeSpaceQueue* GetNextFreeSpaceFromQueue (void)
{
struct FreeSpaceQueue* temp = 0;
if (0 == tail)
return 0;
else
{
temp = tail;
tail = tail->prev;
}
return temp;
}
int PrintFreeSpaceQueue()
{
struct FreeSpaceQueue* current = head;
struct FreeSpaceQueue* current = shd_fs_head;
while (current != 0)
{
printf ("%ld<->%ld ", current->start, current->end);
......@@ -188,16 +188,7 @@ int PrintFreeSpaceQueue()
return 0;
}
int DeleteFreeSpace (struct FreeSpaceQueue* temp)
{
if (head == temp)
head = 0;
free (temp, M_DEVBUF);
temp = temp->next = temp->prev = 0;
return 0;
}
int main ()
/*int main ()
{
struct FreeSpaceQueue* temp;
AddFreeSpaceToQueue (10, 15);
......@@ -212,4 +203,4 @@ int main ()
PrintFreeSpaceQueue();
return 0;
}
*/
/* Change to correct value */
#ifndef INCLUDE_BLOCK_ALLOC_H
#define INCLUDE_BLOCK_ALLOC_H
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
......@@ -21,9 +24,9 @@
#define SRC_TO_SHADOW 1
#define SHADOW_TO_SRC 2
int first_checkpoint;
int reclaim_method;
long shadow_size;
long shadow_start;
struct CurrentFreeBlockRange
{
......@@ -36,15 +39,12 @@ struct FreeSpaceQueue
{
long start;
long end;
long size;
struct FreeSpaceQueue *next;
struct FreeSpaceQueue *prev;
} *head, *tail;
} *shd_fs_head;
void SetShadowSize (long size);
void InitBlockAllocator (int method, long range_start, long range_size);
void DeleteCheckpoint (int version);
void CopyTree (Trie * trie, long size);
long GetLastBlock ();
long CurrentFreeBlockSize ();
int BlockFree (long start, long end);
long BlockAlloc (int size);
......@@ -54,8 +54,10 @@ int AddFreeSpaceToQueue (long start, long end);
struct FreeSpaceQueue* GetNextFreeSpaceFromQueue (void);
int PrintFreeSpaceQueue();
int DeleteFreeSpace (struct FreeSpaceQueue* temp);
void SetShadowSize (long size);
void SetShadowStart (long start);
#endif
......
This diff is collapsed.
......@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)mount.h 8.21 (Berkeley) 5/20/95
* $FreeBSD: src/sys/sys/mount.h,v 1.89.2.6 2002/04/26 00:46:00 iedowse Exp $
* $FreeBSD: src/sys/sys/mount.h,v 1.89.2.7 2003/04/04 20:35:57 tegge Exp $
*/
#ifndef _SYS_MOUNT_H_
......@@ -132,8 +132,9 @@ struct mount {
time_t mnt_time; /* last time written*/
u_int mnt_iosize_max; /* max IO request size */
struct vnodelst mnt_reservedvnlist; /* (future) dirty vnode list */
int mnt_nvnodelistsize; /* # of vnodes on this mount */
void *mnt_prison; /* associated prison */
int mnt_ops_in_progress;
int mnt_ops_in_progress; /* # of operations in progress on this mount */
};
#endif /* _KERNEL */
......
This diff is collapsed.
#define NSHD 1
#define SHD_NO_MEM -1
#define SHD_DISK_FULL -2
This diff is collapsed.
......@@ -147,4 +147,5 @@ struct shd_softc {
#define SHDREADBLOCK _IOWR('S', 27, struct shd_readbuf)
#define SHDSETREBOOTVERSION _IOWR('S', 28, struct shd_ioctl)
#define SHDGETMODIFIEDRANGES _IOWR('S', 29, struct shd_modinfo)
#define SHDCRASH _IOWR('S', 30, int)
#include "trie.h"
#define SHD_NO_MEM -1
#define SHD_DISK_FULL -2
static void TrieNodeInit(TrieNode * node);
static void TrieNodeCleanup(TrieNode * node);
static int extract(char inDepth, TrieKey key);
......@@ -107,7 +110,7 @@ int TrieInit(Trie ** trieOutPtr, BlockAllocateFunction blockAlloc,
if (trie == 0)
{
printf ("Error allocating memory for TrieInit\n");
result = 0;
result = SHD_NO_MEM;
}
else
{
......@@ -150,7 +153,7 @@ void TrieCleanup(Trie * triePtr)
{
if (triePtr != 0)
{
TrieNodeCleanup(&(triePtr->root));
freeChildren(triePtr, &(triePtr->root), DEFAULT_OVERLAP);
free(triePtr
#ifdef KERNEL
, M_DEVBUF
......@@ -266,7 +269,7 @@ int TrieInsertWeak(Trie * triePtr, TrieKey key, int size, FirstPtrT first,
sizeToDepth(blockSize));
if (temp < 0)
{
copyCount = -1;
copyCount = temp;
break;
}
else
......@@ -365,7 +368,7 @@ static int insertWeak(Trie * triePtr, TrieNode * node, TrieKey key,
}
else
{
total = -1;
total = SHD_DISK_FULL;
}
done = 1;
}
......@@ -447,7 +450,7 @@ static int addChild(Trie * triePtr, TrieNode * parent, TrieKey key,
else
{
printf ("Error allocating memory for addChild\n");
result = -1;
result = SHD_NO_MEM;
}
return result;
}
......@@ -602,7 +605,7 @@ static TrieNode * pushDown(Trie * triePtr, TrieNode * node)
else
{
printf ("Error allocating memory for pushDown\n");
middle = 0;
middle = SHD_NO_MEM;
}
return middle;
}
......
......@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
* $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.7 2002/03/09 05:22:23 dd Exp $
* $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $
*/
#include <sys/param.h>
......@@ -212,6 +212,14 @@ uipc_peeraddr(struct socket *so, struct sockaddr **nam)
if (unp->unp_conn && unp->unp_conn->unp_addr)
*nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
1);
else {
/*
* XXX: It seems that this test always fails even when
* connection is established. So, this else clause is
* added as workaround to return PF_LOCAL sockaddr.
*/
*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
}
return 0;
}
......@@ -395,8 +403,8 @@ uipc_sense(struct socket *so, struct stat *sb)
sb->st_blksize += so2->so_rcv.sb_cc;
}
sb->st_dev = NOUDEV;
if (unp->unp_ino == 0)
unp->unp_ino = unp_ino++;
if (unp->unp_ino == 0) /* make up a non-zero inode number */
unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
sb->st_ino = unp->unp_ino;
return (0);
}
......@@ -1055,6 +1063,8 @@ unp_internalize(control, p)
if ((unsigned)fd >= fdescp->fd_nfiles ||
fdescp->fd_ofiles[fd] == NULL)
return (EBADF);
if (fdescp->fd_ofiles[fd]->f_type == DTYPE_KQUEUE)
return (EOPNOTSUPP);
}
/*
* Now replace the integer FDs with pointers to
......
......@@ -25,7 +25,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: src/sys/kern/vfs_conf.c,v 1.49.2.4 2002/02/04 13:08:12 sobomax Exp $
* $FreeBSD: src/sys/kern/vfs_conf.c,v 1.49.2.5 2003/01/07 11:56:53 joerg Exp $
*/
/*
......@@ -128,7 +128,6 @@ vfs_mountroot(void *junk)
return;
}
}
(*shdinitp)();
/*
* Try to use the value read by the loader from /etc/fstab, or
......@@ -329,22 +328,19 @@ getdiskbyname(char *name) {
slice = 0;
part = 0;
cp = rindex(name, '/');
if (cp != NULL) {
name = cp + 1;
}
if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
name += sizeof(_PATH_DEV) - 1;
cp = name;
while (cp != '\0' && (*cp < '0' || *cp > '9'))
while (*cp != '\0' && (*cp < '0' || *cp > '9') && *cp != '/')
cp++;
if (cp == name) {
printf("missing device name\n");
return (NODEV);
}
if (*cp == '\0') {
printf("missing unit number\n");
return (NODEV);
}
unit = *cp - '0';
if (*cp == '\0' || *cp == '/')
unit = -1;
else
unit = *cp - '0';
*cp++ = '\0';
for (cd = 0; cd < NUMCDEVSW; cd++) {
dev = makedev(cd, 0);
......@@ -355,6 +351,13 @@ getdiskbyname(char *name) {
printf("no such device '%s'\n", name);
return (NODEV);
gotit:
if (devsw(dev)->d_maj == major(rootdev))
/* driver has already configured rootdev, e. g. vinum */
return (rootdev);
if (unit == -1) {
printf("missing unit number\n");
return (NODEV);
}
while (*cp >= '0' && *cp <= '9')
unit = 10 * unit + *cp++ - '0';
if (*cp == 's' && cp[1] >= '0' && cp[1] <= '9') {
......
......@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
* $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.16 2002/04/26 00:46:04 iedowse Exp $
* $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.19 2004/02/19 14:02:30 pjd Exp $
*/
/* For 4.3 integer FS ID compatibility */
......@@ -126,6 +126,8 @@ mount(p, uap)
struct nameidata nd;
char fstypename[MFSNAMELEN];
if (p->p_prison != NULL)
return (EPERM);
if (usermount == 0 && (error = suser(p)))
return (error);
/*
......@@ -290,6 +292,7 @@ mount(p, uap)
bzero((char *)mp, (u_long)sizeof(struct mount));
TAILQ_INIT(&mp->mnt_nvnodelist);
TAILQ_INIT(&mp->mnt_reservedvnlist);
mp->mnt_nvnodelistsize = 0;
lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
(void)vfs_busy(mp, LK_NOWAIT, 0, p);
mp->mnt_op = vfsp->vfc_vfsops;
......@@ -446,6 +449,10 @@ unmount(p, uap)
int error;
struct nameidata nd;
if (p->p_prison != NULL)
return (EPERM);
if (usermount == 0 && (error = suser(p)))
return (error);
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
......@@ -1220,14 +1227,15 @@ ocreat(p, uap)
}
vp = nd.ni_dvp;
shd_write_entry (vp, "create");
retVal = open(p, &nuap);
retVal = open(p, &nuap);
if (retVal)
{
printf ("open returned error %d in ocreat\n", retVal);
}
shd_write_exit (vp, "create");
NDFREE(&nd, NDF_ONLY_PNBUF);
return retVal;
return retVal;
}
#endif /* COMPAT_43 */
......@@ -1302,7 +1310,7 @@ mknod(p, uap)
if (!error) {
VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
if (whiteout)
{
{
shd_write_entry (nd.ni_dvp, "mknod");
error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
shd_write_exit (nd.ni_dvp, "mknod");
......@@ -1422,7 +1430,7 @@ link(p, uap)
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
shd_write_entry (vp, "link");
error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
shd_write_entry (vp, "link");
shd_write_exit (vp, "link");
}
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_dvp == nd.ni_vp)
......@@ -2786,14 +2794,10 @@ rename(p, uap)
if (fvp == tdvp)
error = EINVAL;
/*
* If source is the same as the destination (that is the
* same inode number with the same name in the same directory),
* then there is nothing to do.
* If the source is the same as the destination (that is, if they
* are links to the same vnode), then there is nothing to do.
*/
if (fvp == tvp && fromnd.ni_dvp == tdvp &&
fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
!bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
fromnd.ni_cnd.cn_namelen))
if (fvp == tvp)
error = -1;
out:
if (!error) {
......
......@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
* $FreeBSD: src/sys/kern/vfs_vnops.c,v 1.87.2.12 2002/04/14 16:38:38 fjoe Exp $
* $FreeBSD: src/sys/kern/vfs_vnops.c,v 1.87.2.13 2002/12/29 18:19:53 dillon Exp $
*/
#include <sys/param.h>
......@@ -261,10 +261,10 @@ sequential_heuristic(struct uio *uio, struct file *fp)
* are.
*/
tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
if (tmpseq >= 127)
tmpseq = 127;
if (tmpseq > IO_SEQMAX)
tmpseq = IO_SEQMAX;
fp->f_seqcount = tmpseq;
return(fp->f_seqcount << 16);
return(fp->f_seqcount << IO_SEQSHIFT);
}
/*
......@@ -408,7 +408,7 @@ extern int checkpt_process_sleep_variable;