Subject: Re: kern/36169: 1sec+ delays using msync(2) with flags MS_ASYNC |
To: None <kern-bug-people@netbsd.org, gnats-admin@netbsd.org,>
From: Brian de Alwis <bsd@cs.ubc.ca>
List: netbsd-bugs
Date: 08/29/2007 17:10:12
The following reply was made to PR kern/36169; it has been noted by GNATS.

From: Brian de Alwis <bsd@cs.ubc.ca>
To: gnats-bugs@NetBSD.org
Cc: 
Subject: Re: kern/36169: 1sec+ delays using msync(2) with flags MS_ASYNC |
	MS_INVALIDATE
Date: Wed, 29 Aug 2007 11:09:25 -0600

 I've looked into this some more, and it appears the major slowdowns
 I reported previously result from the use of MS_INVALIDATE.
 
 crm uses mmap to map in two 12M files and does random seeking within
 them (the files are hash tables).  As there can be a possibility
 that these files are used by multiple processes, they msync() the
 file with MS_INVALIDATE.
 
 However in practice, there is only one process mmaping a file at
 a time.  But using MS_INVALIDATE on NetBSD seems to wipe out any
 caching of the file, regardless of whether it's actually necessary.
 
 I think the NetBSD implementation is invalidating all cached copies
 of the mapped data, regardless of whether it's inconsistent or not.
 POSIX 1003.1 says:
 
     When MS_INVALIDATE is specified, msync() shall invalidate all
     cached copies of mapped data that are inconsistent with the
     permanent storage locations such that subsequent references
     shall obtain data that was consistent with the permanent storage
     locations sometime between the call to msync() and the first
     subsequent memory reference to the data.
     <www.opengroup.org/onlinepubs/009695399/functions/msync.html>
 
 I've attached a simple test program that demonstrates this.  It
 does the following steps:
 
  1. mmaps a file,
  2. performs 1024 random reads in the resulting memory region,
  3. an msync with MS_SYNC (-s), MS_ASYNC (-a), MS_INVALIDATE (-i), or
     some combination, as provided on the command line
  4. munmaps the file
 
 If you run this repeatedly with `-s', the first run will take a
 bit of time, as the file is read in, but subsequent runs are
 extremely quick.
 
     $ ls -sh /mp3/misc/Joi--Show_Me_Love.mp3
     7.6M /mp3/misc/Joi--Show_Me_Love.mp3
 
     $ /usr/bin/time ./mmap-invalidate-test -s /mp3/misc/Joi--Show_Me_Love.mp3 
 	    2.11 real         0.00 user         0.01 sys
 
     $ /usr/bin/time ./mmap-invalidate-test -s /mp3/misc/Joi--Show_Me_Love.mp3 
 	    0.23 real         0.00 user         0.00 sys
 
     $ /usr/bin/time ./mmap-invalidate-test -s /mp3/misc/Joi--Show_Me_Love.mp3 
 	    0.05 real         0.00 user         0.00 sys
 
 If you run with `-i' (MS_INVALIDATE), each run takes a long time:
 
     $ /usr/bin/time ./mmap-invalidate-test -i /mp3/misc/Joi--Show_Me_Love.mp3 
 	    1.98 real         0.00 user         0.01 sys
 
     $ /usr/bin/time ./mmap-invalidate-test -i /mp3/misc/Joi--Show_Me_Love.mp3 
 	    2.09 real         0.00 user         0.02 sys
 
     $ /usr/bin/time ./mmap-invalidate-test -i /mp3/misc/Joi--Show_Me_Love.mp3 
 	    1.81 real         0.00 user         0.01 sys
 
 This is on 4.99.28 from Aug 11.
 
 Brian.
 
 ----------------------------------------------------------------------
 #include <stdio.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdarg.h>
 #include <time.h>
 
 char *progname;
 int beVerbose = 0;
 
 void verbose(const char *fmt, ...) {
     va_list ap;
     if(beVerbose) {
 	va_start(ap, fmt);
 	vfprintf(stderr, fmt, ap);
 	va_end(ap);
     }
 }
 
 void usage() {
     fprintf(stderr, "use: %s [-sair] file\n", progname);
     fprintf(stderr, "  -s    specify MS_SYNC\n");
     fprintf(stderr, "  -a    specify MS_ASYNC\n");
     fprintf(stderr, "  -i    specify MS_INVALIDATE\n");
     fprintf(stderr, "  -r    use file in read-only\n");
     exit(-1);
     /*NOTREACHED*/
 }
 
 int main(int argc, char **argv) {
     int ch;
     int fd;
     int openFlags = O_RDWR;
     int msyncFlags = 0;
     struct stat st;
     void *region;
     int i;
 
     progname = argv[0];
 
     while((ch = getopt(argc, argv, "sairv")) >= 0) {
 	switch(ch) {
 	case 's':
 	    msyncFlags |= MS_SYNC;
 	    break;
 
 	case 'a':
 	    msyncFlags |= MS_ASYNC;
 	    break;
 
 	case 'i':
 	    msyncFlags |= MS_INVALIDATE;
 	    break;
 
 	case 'r':
 	    openFlags |= O_RDONLY;
 	    break;
 
 	case 'v':
 	    beVerbose = !beVerbose;
 	    break;
 
 	default:
 	    usage();
 	    /*NOTREACHED*/
 	}
     }
     argc -= optind; argv += optind;
 
     if(argc != 1) {
 	fprintf(stderr, "error: missing filename\n\n");
 	usage();
     }
     if(msyncFlags == 0) {
 	fprintf(stderr, "error: no msyncFlags specified\n\n");
 	usage();
     }
 
     verbose("opening %s...\n", argv[0]);
     if((fd = open(argv[0], openFlags)) < 0) { perror(argv[0]); return -1; }
     verbose("fstat of %s...\n", argv[0]);
     if(fstat(fd, &st) < 0) { perror("fstat"); return -1; }
     verbose("mmaping %ld bytes...\n", st.st_size);
     if((region = mmap(NULL, st.st_size, 
 	    PROT_READ | (openFlags == O_RDWR ? PROT_WRITE : 0),
 	    MAP_FILE | MAP_SHARED, fd, 0)) == MAP_FAILED) {
 	perror("mmap");
 	return -1;
     }
 
     /* now read a bunch of bytes from random locations */
     verbose("reading 1024 bytes from random locations\n");
     srandom(time(NULL));
     for(i = 0; i < 1024; i++) {
 	int index = (int)(random() % (st.st_size / sizeof(int)));
 	int c = *((int *)region + index);
     }
 
     verbose("msync'ing file\n");
     if(msync(region, st.st_size, msyncFlags) < 0) {
 	perror("msync");
 	return -1;
     }
     verbose("munmap'ing file\n");
     if(munmap(region, st.st_size) < 0) { perror("munmap"); return -1; }
     verbose("closing file\n");
     close(fd);
     verbose("finished\n");
     return 0;
 }
 ----------------------------------------------------------------------
 
 -- 
   Brian de Alwis | Software Practices Lab | UBC | http://www.cs.ubc.ca/~bsd/
       "Amusement to an observing mind is study." - Benjamin Disraeli