NFS, hung processes

Richard Tobin richard at aiai.ed.ac.uk
Wed Aug 2 03:21:38 AEST 1989


In article <13134 at bloom-beacon.MIT.EDU> jik at athena.mit.edu (Jonathan I. Kamens) writes:
>  The most common way of referencing a dead NFS path even if you don't
>realize you're doing it is if you have said path in your search path
>and try to execute a program and/or start a new shell.  Both will
>cause the search path to be scanned, and they could encounter the dead
>path and hang on it.
>
>  One solution, which is what we use, is not to hard mount anything
>but the most important NFS filesystems.  

Another solution is to mount the filesystems in, say, /nfs, and have symbolic
links to them from the places people actually refer to.  Then you can
remove the symbolic links if the server is down.

Even better, you can have a program do it.  Here's one I wrote recently.
We've only just started using it, so it may not be bug-free.

-- Richard

/*
 * nfslink [-i interval] [-t timeout] host name mountpt [name mountpt ...]
 *
 * maintain links to mounted file systems, removing them if the
 * remote machine isn't responding.
 *
 * Copyright Richard Tobin / AIAI 1989
 * 
 * May be freely redistributed if this whole notice remains intact.
 */

#include <stdio.h>
#include <errno.h>
#include <signal.h>
#include <sys/time.h>
#include <rpc/rpc.h>
#include <rpc/clnt.h>
#include <nfs/nfs.h>
#include <setjmp.h>
#include <sys/stat.h>

main(argc, argv)
int argc;
char **argv;
{
    int c, interval = 20, timeout = 5, firsttime = 1;
    extern char *optarg;
    extern int optind, opterr;

    while((c = getopt(argc, argv, "i:t:")) != EOF)
	switch(c)
	{
	  case 'i':
	    interval = atoi(optarg);
	    break;

	  case 't':
	    timeout = atoi(optarg);
	    break;

	  case '?':
	    usage();
	    break;
	}

    if((argc - optind) < 3 || ((argc - optind) & 1) == 0)
	usage();

    while(1)
    {
	if(nfscheck(argv[optind], timeout) == 0)
	    makelinks(&argv[optind+1], firsttime);
	else
	    removelinks(&argv[optind+1], firsttime);

	firsttime = 0;
	sleep(interval);
    }
}

makelinks(links, verbose)
char **links;
int verbose;
{
    struct stat namestat;
    
    while(*links)
    {
	char *name = *links++;
	char *mountpt = *links++;

	if(lstat(name, &namestat) == -1)
	{
	    if(errno == ENOENT)
	    {
		if(symlink(mountpt, name) == -1)
		{
		    perror("nfslink: symlink");
		    fatal("can't link %s to %s\n", name, mountpt);
		}
		printf("nfslink: linked %s to %s\n", name, mountpt);
		fflush(stdout);
		continue;
	    }
	    else
	    {
		perror("nfslink: lstat");
		fatal("can't lstat %s\n", name, 0);
	    }
	}

	if((namestat.st_mode & S_IFMT) == S_IFLNK)
	{
	    if(pointsto(name, mountpt))
	    {
		if(verbose)
		{
		    printf("nfslink: %s is already linked to %s\n",
			   name, mountpt);
		    fflush(stdout);
		}
	    }
	    else
	    {
		fatal("%s is a link, but not to %s\n", name, mountpt);
	    }
	}
	else
	{
	    fatal("%s exists, but is not a symbolic link\n", name, 0);
	}
    }
}

removelinks(links, verbose)
char **links;
int verbose;
{
    struct stat namestat;
    
    while(*links)
    {
	char *name = *links++;
	char *mountpt = *links++;

	if(lstat(name, &namestat) == -1)
	{
	    if(errno == ENOENT)
	    {
		if(verbose)
		{
		    printf("nfslink: link from %s to %s is already removed\n",
			   name, mountpt);
		    fflush(stdout);
		}
		continue;
	    }
	    else
	    {
		perror("nfslink: lstat");
		fatal("can't lstat %s\n", name, 0);
	    }
	}

	if((namestat.st_mode & S_IFMT) == S_IFLNK)
	{
	    if(pointsto(name, mountpt))
	    {
		if(unlink(name) == -1)
		{
		    perror("nfslink: unlink");
		    fatal("can't remove link from %s to %s\n",
			  name, mountpt);
		}
		printf("nfslink: removed link from %s to %s\n",
		       name, mountpt);
		fflush(stdout);
	    }
	    else
	    {
		fatal("%s is a link, but not to %s\n", name, mountpt);
	    }
	}
	else
	{
	    fatal("%s exists, but is not a symbolic link\n", name, 0);
	}
    }
}

int pointsto(name, target)
char *name, *target;
{
    /* We don't use stat lest it hang, so it's not quite right */

    char buf[200];
    int len;

    len = readlink(name, buf, sizeof(buf)-1);
    if(len == -1)
    {
	perror("nfslink: readlink");
	fatal("can't read link %s\n", name, 0);
    }

    buf[len] = '\0';
    return strcmp(buf, target) == 0;
}

fatal(fmt, arg1, arg2)
char *fmt, *arg1, *arg2;
{
    fprintf(stderr, "nfslink: fatal error: ");
    fprintf(stderr, fmt, arg1, arg2);
    exit(1);
}

usage()
{
    fprintf(stderr, "usage: nfslink [-i interval] [-t timeout] host name mountpt [name mountpt ...]\n");
    exit(2);
}

static jmp_buf env;
void timedout();

int nfscheck(host, timeout)
char *host;
int timeout;
{
    int stat;
    signal(SIGALRM, timedout);

    if(setjmp(env) == 0)
    {
	alarm(timeout);
	stat = callrpc(host, NFS_PROGRAM, NFS_VERSION, RFS_NULL,
		       xdr_void, 0, xdr_void, 0);
	alarm(0);
	if(stat == 0)
	    return 0;
    }
    return -1;
}

void timedout()
{
    longjmp(env, 1);
}
-- 
Richard Tobin,                       JANET: R.Tobin at uk.ac.ed             
AI Applications Institute,           ARPA:  R.Tobin%uk.ac.ed at nsfnet-relay.ac.uk
Edinburgh University.                UUCP:  ...!ukc!ed.ac.uk!R.Tobin



More information about the Comp.unix.wizards mailing list