Subject: Re: symlink to null string and realpath()
To: None <tech-userlevel@NetBSD.org>
From: enami tsugutomo <enami@sm.sony.co.jp>
List: tech-userlevel
Date: 01/28/2005 09:47:23
enami tsugutomo <enami@but-b.or.jp> writes:

> If a path ends with symlink to null string and it is passed to
> realpath(), the realpath() successes but kernel doesn't allow
> traversal of such symlink.  So, the result returned by realpath()
> isn't suitable as real path.

I'll change the realpath() to return ENOENT in some case including
above to match kernel behaviour.

BTW, attached is a realpath() modified to use strategy of FreeBSD's
realpath() to address PR#28986.  I'll commit this in this weekend.

enami.

Index: getcwd.c
===================================================================
RCS file: /cvsroot/src/lib/libc/gen/getcwd.c,v
retrieving revision 1.35
diff -c -r1.35 getcwd.c
*** getcwd.c	23 Jan 2005 01:00:51 -0000	1.35
--- getcwd.c	27 Jan 2005 00:53:25 -0000
***************
*** 46,55 ****
  #include <sys/stat.h>
  
  #include <assert.h>
- #include <dirent.h>
  #include <errno.h>
- #include <fcntl.h>
- #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
  #include <unistd.h>
--- 46,52 ----
***************
*** 62,198 ****
  #endif
  
  /*
!  * char *realpath(const char *path, char resolved_path[MAXPATHLEN]);
   *
   * Find the real name of path, by removing all ".", ".." and symlink
   * components.  Returns (resolved) on success, or (NULL) on failure,
   * in which case the path which caused trouble is left in (resolved).
   */
  char *
! realpath(path, resolved)
! 	const char *path;
! 	char *resolved;
  {
  	struct stat sb;
! 	int fd, n, rootd, serrno, nlnk = 0;
! 	char *p, *q, wbuf[MAXPATHLEN];
  
  	_DIAGASSERT(path != NULL);
  	_DIAGASSERT(resolved != NULL);
  
! 	/* Save the starting point. */
! 	if ((fd = open(".", O_RDONLY)) < 0) {
! 		(void)strlcpy(resolved, ".", MAXPATHLEN);
! 		return (NULL);
! 	}
  
  	/*
! 	 * Find the dirname and basename from the path to be resolved.
! 	 * Change directory to the dirname component.
! 	 * lstat the basename part.
! 	 *     if it is a symlink, read in the value and loop.
! 	 *     if it is a directory, then change to that directory.
! 	 * get the current directory name and append the basename.
  	 */
! 	if (strlcpy(resolved, path, MAXPATHLEN) >= MAXPATHLEN) {
! 		errno = ENAMETOOLONG;
! 		goto err1;
  	}
  loop:
! 	q = strrchr(resolved, '/');
! 	if (q != NULL) {
! 		p = q + 1;
! 		if (q == resolved)
! 			q = "/";
! 		else {
! 			do {
! 				--q;
! 			} while (q > resolved && *q == '/');
! 			q[1] = '\0';
! 			q = resolved;
! 		}
! 		if (chdir(q) < 0)
! 			goto err1;
! 	} else
! 		p = resolved;
! 
! 	/* Deal with the last component. */
! 	if (lstat(p, &sb) == 0) {
! 		if (S_ISLNK(sb.st_mode)) {
! 			if (nlnk++ >= MAXSYMLINKS) {
! 				errno = ELOOP;
! 				goto err1;
! 			}
! 			n = readlink(p, resolved, MAXPATHLEN-1);
! 			if (n < 0)
! 				goto err1;
! 			resolved[n] = '\0';
  			goto loop;
  		}
! 		if (S_ISDIR(sb.st_mode)) {
! 			if (chdir(p) < 0)
! 				goto err1;
! 			p = "";
  		}
  	}
  
! 	/*
! 	 * Save the last component name and get the full pathname of
! 	 * the current directory.
! 	 */
! 	if (strlcpy(wbuf, p, sizeof(wbuf)) >= sizeof(wbuf)) {
  		errno = ENAMETOOLONG;
! 		goto err1;
  	}
  
  	/*
! 	 * Call the inernal internal version of getcwd which
! 	 * does a physical search rather than using the $PWD short-cut
! 	 */
! 	if (getcwd(resolved, MAXPATHLEN) == 0)
! 		goto err1;
! 
! 	/*
! 	 * Join the two strings together, ensuring that the right thing
! 	 * happens if the last component is empty, or the dirname is root.
! 	 */
! 	if (resolved[0] == '/' && resolved[1] == '\0')
! 		rootd = 1;
! 	else
! 		rootd = 0;
! 
! 	if (*wbuf) {
! 		if (strlen(resolved) + strlen(wbuf) + (rootd ? 0 : 1) + 1 >
! 		    MAXPATHLEN) {
! 			errno = ENAMETOOLONG;
! 			goto err1;
! 		}
! 		if (rootd == 0)
! 			if (strlcat(resolved, "/", MAXPATHLEN) >= MAXPATHLEN) {
! 				errno = ENAMETOOLONG;
! 				goto err1;
! 			}
! 		if (strlcat(resolved, wbuf, MAXPATHLEN) >= MAXPATHLEN) {
! 			errno = ENAMETOOLONG;
! 			goto err1;
  		}
  	}
  
! 	/* Go back to where we came from. */
! 	if (fchdir(fd) < 0) {
! 		serrno = errno;
! 		goto err2;
  	}
  
! 	/* It's okay if the close fails, what's an fd more or less? */
! 	(void)close(fd);
! 	return (resolved);
! 
! err1:	serrno = errno;
! 	(void)fchdir(fd);
! err2:	(void)close(fd);
! 	errno = serrno;
! 	return (NULL);
  }
  
  char *
--- 59,200 ----
  #endif
  
  /*
!  * char *realpath(const char *path, char resolved[MAXPATHLEN]);
   *
   * Find the real name of path, by removing all ".", ".." and symlink
   * components.  Returns (resolved) on success, or (NULL) on failure,
   * in which case the path which caused trouble is left in (resolved).
   */
  char *
! realpath(const char *path, char *resolved)
  {
  	struct stat sb;
! 	int idx = 0, len, n, nlnk = 0, serrno = errno;
! 	const char *q;
! 	char *p, wbuf[2][MAXPATHLEN];
  
  	_DIAGASSERT(path != NULL);
  	_DIAGASSERT(resolved != NULL);
  
! 	/*
! 	 * Build real path one by one with paying an attention to .,
! 	 * .. and symbolic link.
! 	 */
  
  	/*
! 	 * `p' is where we'll put a new component with prepending
! 	 * a delimiter.
  	 */
! 	p = resolved;
! 
! 	if (*path == 0) {
! 		*p = 0;
! 		errno = ENOENT;
! 		return (NULL);
  	}
+ 
+ 	/* If relative path, start from current working directory. */
+ 	if (*path != '/') {
+ 		if (getcwd(resolved, MAXPATHLEN) == NULL) {
+ 			p[0] = '.';
+ 			p[1] = 0;
+ 			return (NULL);
+ 		}
+ 		len = strlen(resolved);
+ 		if (len > 1)
+ 			p += len;
+ 	}
+ 
  loop:
! 	/* Skip any slash. */
! 	while (*path == '/')
! 		path++;
! 
! 	if (*path == 0) {
! 		if (p == resolved)
! 			*p++ = '/';
! 		*p = 0;
! 		return (resolved);
! 	}
! 
! 	/* Find the end of this component. */
! 	q = path;
! 	do
! 		q++;
! 	while (*q != '/' && *q != 0);
! 
! 	/* Test . or .. */
! 	if (path[0] == '.') {
! 		if (q - path == 1) {
! 			path = q;
  			goto loop;
  		}
! 		if (path[1] == '.' && q - path == 2) {
! 			/* Trim the last component. */
! 			if (p != resolved)
! 				while (*--p != '/')
! 					;
! 			path = q;
! 			goto loop;
  		}
  	}
  
! 	/* Append this component. */
! 	if (p - resolved + 1 + q - path + 1 > MAXPATHLEN) {
  		errno = ENAMETOOLONG;
! 		if (p == resolved)
! 			*p++ = '/';
! 		*p = 0;
! 		return (NULL);
  	}
+ 	p[0] = '/';
+ 	memcpy(&p[1], path, q - path);
+ 	p[1 + q - path] = 0;
  
  	/*
! 	 * If this component is a symlink, toss it and prepend link
! 	 * target to unresolved path.
! 	 */
! 	if (lstat(resolved, &sb) == -1) {
! 		/* Allow nonexistent component if this is the last one. */
! 		if (*q == 0 && errno == ENOENT) {
! 			errno = serrno;
! 			return (resolved);
  		}
+ 		return (NULL);
  	}
+ 	if (S_ISLNK(sb.st_mode)) {
+ 		if (nlnk++ >= MAXSYMLINKS) {
+ 			errno = ELOOP;
+ 			return (NULL);
+ 		}
+ 		n = readlink(resolved, wbuf[idx], sizeof(wbuf[0]) - 1);
+ 		if (n < 0)
+ 			return (NULL);
+ 		if (n == 0) {
+ 			errno = ENOENT;
+ 			return (NULL);
+ 		}
  
! 		/* Append unresolved path to link target and switch to it. */
! 		if (n + (len = strlen(q)) + 1 > sizeof(wbuf[0])) {
! 			errno = ENAMETOOLONG;
! 			return (NULL);
! 		}
! 		memcpy(&wbuf[idx][n], q, len + 1);
! 		path = wbuf[idx];
! 		idx ^= 1;
! 
! 		/* If absolute symlink, start from root. */
! 		if (*path == '/')
! 			p = resolved;
! 		goto loop;
  	}
  
! 	/* Advance both resolved and unresolved path. */
! 	p += 1 + q - path;
! 	path = q;
! 	goto loop;
  }
  
  char *