Subject: Re: lock bug in getnewvnode, or uvm_km_kmemalloc/uvm_map ?
To: Frank van der Linden <fvdl@wasabisystems.com>
From: Manuel Bouyer <bouyer@antioche.eu.org>
List: tech-kern
Date: 11/20/2002 22:22:59
--RnlQjJ0d97Da+TV1
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Hi,
so here is my proposed fix to this problem.
There are a few interface changes:
- amap_extend() gains a flags parameter, can be either M_NOWAIT or M_WAITOK
  (amap_extend() use malloc)
- uvm_km_kmemalloc() will pass UVM_KMF_NOWAIT in addition to
  UVM_KMF_TRYLOCK to uvm_map()
- uvm_mapent_alloc() gains a flags parameter (PR_NOWAIT or PR_WAITOK)

I changed uvm_map() to pass the right flag to uvm_mapent_alloc() and
amap_extend() based on the UVM_KMF_NOWAIT flag, and deal with errors.
uvm_km_alloc_poolpage1() will call uvm_km_kmemalloc() with
UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK f !waitok instead of just UVM_KMF_NOWAIT, so
that we won't sleep in vm_map_lock().

I minimally tested this on a UP i386 with LOCKDEBUG. I'll do more tests
tomorow on a MP i386. I'll also back-port it to 1.6 to see if it fixes the
getnewvnode problem which doesn't seem to happen with current (possibly
because the raidframe autoconfig code is called in a different way now).

Please review these changes, especially the error cases (do I unlock and free
all what should be ?). I'm also not sure I didn't miss cases where we could
sleep.

-- 
Manuel Bouyer <bouyer@antioche.eu.org>
     NetBSD: 23 ans d'experience feront toujours la difference
--

--RnlQjJ0d97Da+TV1
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff

Index: uvm_amap.c
===================================================================
RCS file: /cvsroot/syssrc/sys/uvm/uvm_amap.c,v
retrieving revision 1.47
diff -u -r1.47 uvm_amap.c
--- uvm_amap.c	2002/11/15 17:30:35	1.47
+++ uvm_amap.c	2002/11/20 21:11:26
@@ -284,10 +284,11 @@
  *    one (thus it can't be shared)
  */
 int
-amap_extend(entry, addsize, forwards)
+amap_extend(entry, addsize, forwards, flags)
 	struct vm_map_entry *entry;
 	vsize_t addsize;
 	int forwards;
+	int flags;
 {
 	struct vm_amap *amap = entry->aref.ar_amap;
 	int slotoff = entry->aref.ar_pageoff;
@@ -462,14 +463,14 @@
 	newppref = NULL;
 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
 		newppref = malloc(slotalloc * sizeof(int), M_UVMAMAP,
-		    M_WAITOK | M_CANFAIL);
+		    flags | M_CANFAIL);
 #endif
 	newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP,
-	    M_WAITOK | M_CANFAIL);
+	    flags | M_CANFAIL);
 	newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP,
-	    M_WAITOK | M_CANFAIL);
+	    flags | M_CANFAIL);
 	newover = malloc(slotalloc * sizeof(struct vm_anon *), M_UVMAMAP,
-	    M_WAITOK | M_CANFAIL);
+	    flags | M_CANFAIL);
 	if (newsl == NULL || newbck == NULL || newover == NULL) {
 #ifdef UVM_AMAP_PPREF
 		if (newppref != NULL) {
Index: uvm_amap.h
===================================================================
RCS file: /cvsroot/syssrc/sys/uvm/uvm_amap.h,v
retrieving revision 1.19
diff -u -r1.19 uvm_amap.h
--- uvm_amap.h	2002/11/14 17:58:48	1.19
+++ uvm_amap.h	2002/11/20 21:11:26
@@ -92,7 +92,7 @@
 void		amap_cow_now	/* resolve all COW faults now */
 			__P((struct vm_map *, struct vm_map_entry *));
 int		amap_extend	/* make amap larger */
-			__P((struct vm_map_entry *, vsize_t, int));
+			__P((struct vm_map_entry *, vsize_t, int, int));
 int		amap_flags	/* get amap's flags */
 			__P((struct vm_amap *));
 void		amap_free	/* free amap */
Index: uvm_km.c
===================================================================
RCS file: /cvsroot/syssrc/sys/uvm/uvm_km.c,v
retrieving revision 1.59
diff -u -r1.59 uvm_km.c
--- uvm_km.c	2002/10/05 17:26:06	1.59
+++ uvm_km.c	2002/11/20 21:11:28
@@ -394,7 +394,8 @@
 
 	if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET,
 	      0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
-			  UVM_ADV_RANDOM, (flags & UVM_KMF_TRYLOCK)))
+			  UVM_ADV_RANDOM,
+			  (flags & (UVM_KMF_TRYLOCK | UVM_KMF_NOWAIT))))
 			!= 0)) {
 		UVMHIST_LOG(maphist, "<- done (no VM)",0,0,0,0);
 		return(0);
@@ -745,7 +746,8 @@
 	 */
 
 	s = splvm();
-	va = uvm_km_kmemalloc(map, obj, PAGE_SIZE, waitok ? 0 : UVM_KMF_NOWAIT);
+	va = uvm_km_kmemalloc(map, obj, PAGE_SIZE,
+	    waitok ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK);
 	splx(s);
 	return (va);
 #endif /* PMAP_MAP_POOLPAGE */
Index: uvm_map.c
===================================================================
RCS file: /cvsroot/syssrc/sys/uvm/uvm_map.c,v
retrieving revision 1.125
diff -u -r1.125 uvm_map.c
--- uvm_map.c	2002/11/14 17:58:48	1.125
+++ uvm_map.c	2002/11/20 21:11:34
@@ -190,7 +190,7 @@
  * local prototypes
  */
 
-static struct vm_map_entry *uvm_mapent_alloc __P((struct vm_map *));
+static struct vm_map_entry *uvm_mapent_alloc __P((struct vm_map *, int));
 static void uvm_mapent_copy __P((struct vm_map_entry *, struct vm_map_entry *));
 static void uvm_mapent_free __P((struct vm_map_entry *));
 static void uvm_map_entry_unwire __P((struct vm_map *, struct vm_map_entry *));
@@ -206,8 +206,9 @@
  */
 
 static __inline struct vm_map_entry *
-uvm_mapent_alloc(map)
+uvm_mapent_alloc(map, flags)
 	struct vm_map *map;
+	int flags;
 {
 	struct vm_map_entry *me;
 	int s;
@@ -220,17 +221,21 @@
 		if (me) uvm.kentry_free = me->next;
 		simple_unlock(&uvm.kentry_lock);
 		splx(s);
-		if (me == NULL) {
+		if (__predict_false(me == NULL)) {
 			panic("uvm_mapent_alloc: out of static map entries, "
 			      "check MAX_KMAPENT (currently %d)",
 			      MAX_KMAPENT);
 		}
 		me->flags = UVM_MAP_STATIC;
 	} else if (map == kernel_map) {
-		me = pool_get(&uvm_map_entry_kmem_pool, PR_WAITOK);
+		me = pool_get(&uvm_map_entry_kmem_pool, flags);
+		if (__predict_false(me == NULL))
+			return NULL;
 		me->flags = UVM_MAP_KMEM;
 	} else {
-		me = pool_get(&uvm_map_entry_pool, PR_WAITOK);
+		me = pool_get(&uvm_map_entry_pool, flags);
+		if (__predict_false(me == NULL))
+			return NULL;
 		me->flags = 0;
 	}
 
@@ -421,7 +426,7 @@
 	 * starting address.
 	 */
 
-	new_entry = uvm_mapent_alloc(map);
+	new_entry = uvm_mapent_alloc(map, PR_WAITOK);
 	uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
 
 	new_entry->end = start;
@@ -471,7 +476,7 @@
 	 *	AFTER the specified entry
 	 */
 
-	new_entry = uvm_mapent_alloc(map);
+	new_entry = uvm_mapent_alloc(map, PR_WAITOK);
 	uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
 
 	new_entry->start = entry->end = end;
@@ -572,7 +577,10 @@
 
 	new_entry = NULL;
 	if (map == pager_map) {
-		new_entry = uvm_mapent_alloc(map);
+		new_entry = uvm_mapent_alloc(map,
+		    (flags & UVM_KMF_NOWAIT) ? PR_NOWAIT : PR_WAITOK);
+		 if (__predict_false(new_entry == NULL))
+			return ENOMEM;
 	}
 
 	/*
@@ -681,7 +689,8 @@
 
 		if (prev_entry->aref.ar_amap) {
 			error = amap_extend(prev_entry, size,
-			    AMAP_EXTEND_FORWARDS);
+			    AMAP_EXTEND_FORWARDS,
+			    (flags & UVM_KMF_NOWAIT) ? M_NOWAIT : M_WAITOK);
 			if (error) {
 				vm_map_unlock(map);
 				if (new_entry) {
@@ -779,7 +788,9 @@
 				if (amap_extend(prev_entry,
 				    prev_entry->next->end -
 				    prev_entry->next->start,
-				    AMAP_EXTEND_FORWARDS))
+				    AMAP_EXTEND_FORWARDS,
+				    (flags & UVM_KMF_NOWAIT) ?
+				    M_NOWAIT : M_WAITOK))
 				goto nomerge;
 			}
 
@@ -797,7 +808,9 @@
 				if (amap_extend(prev_entry->next,
 				    prev_entry->end -
 				    prev_entry->start + size,
-				    AMAP_EXTEND_BACKWARDS))
+				    AMAP_EXTEND_BACKWARDS,
+				    (flags & UVM_KMF_NOWAIT) ?
+				    M_NOWAIT : M_WAITOK))
 				goto nomerge;
 			}
 		} else {
@@ -807,7 +820,9 @@
 			 */
 			if (prev_entry->next->aref.ar_amap) {
 				error = amap_extend(prev_entry->next, size,
-				    AMAP_EXTEND_BACKWARDS);
+				    AMAP_EXTEND_BACKWARDS,
+				    (flags & UVM_KMF_NOWAIT) ?
+				    M_NOWAIT : M_WAITOK);
 				if (error) {
 					vm_map_unlock(map);
 					if (new_entry) {
@@ -879,7 +894,12 @@
 		 */
 
 		if (new_entry == NULL) {
-			new_entry = uvm_mapent_alloc(map);
+			new_entry = uvm_mapent_alloc(map,
+			    (flags & UVM_KMF_NOWAIT) ? PR_NOWAIT : PR_WAITOK);
+			if (__predict_false(new_entry == NULL)) {
+				vm_map_unlock(map);
+				return ENOMEM;
+			}
 		}
 		new_entry->start = *startp;
 		new_entry->end = new_entry->start + size;
@@ -911,7 +931,14 @@
 
 			vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
 				UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
-			struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK);
+			struct vm_amap *amap = amap_alloc(size, to_add,
+			    (flags & UVM_KMF_NOWAIT) ? M_NOWAIT : M_WAITOK);
+			if (__predict_false(amap == NULL)) {
+				vm_map_unlock(map);
+				if (new_entry)
+					uvm_mapent_free(new_entry);
+				return ENOMEM;
+			}
 			new_entry->aref.ar_pageoff = 0;
 			new_entry->aref.ar_amap = amap;
 		} else {
@@ -1697,7 +1724,7 @@
 		oldoffset = (entry->start + fudge) - start;
 
 		/* allocate a new map entry */
-		newentry = uvm_mapent_alloc(dstmap);
+		newentry = uvm_mapent_alloc(dstmap, PR_WAITOK);
 		if (newentry == NULL) {
 			error = ENOMEM;
 			goto bad;
@@ -3178,7 +3205,7 @@
 				/* XXXCDC: WAITOK??? */
 			}
 
-			new_entry = uvm_mapent_alloc(new_map);
+			new_entry = uvm_mapent_alloc(new_map, PR_WAITOK);
 			/* old_entry -> new_entry */
 			uvm_mapent_copy(old_entry, new_entry);
 
@@ -3215,7 +3242,7 @@
 			 * (note that new references are read-only).
 			 */
 
-			new_entry = uvm_mapent_alloc(new_map);
+			new_entry = uvm_mapent_alloc(new_map, PR_WAITOK);
 			/* old_entry -> new_entry */
 			uvm_mapent_copy(old_entry, new_entry);
 

--RnlQjJ0d97Da+TV1--