Subject: Re: "panic: extent_free: region not found" on AS4100, 1.6.x with mlx(4)
To: NetBSD port-alpha List <port-alpha@NetBSD.org>
From: Greg A. Woods <woods@weird.com>
List: port-alpha
Date: 10/24/2005 19:20:34
--Multipart_Mon_Oct_24_19:20:33_2005-1
Content-Type: text/plain; charset=US-ASCII

At Mon, 24 Oct 2005 10:53:28 -0600 (MDT),
Michael L. Hitch wrote:
> 
> No, this problem has already been identified, analyzed, and fixed.
> 
>  Revision 1.34 / (download) - annotate - [select for diffs], Sat Jul 30
> 17:37:11 2005 UTC (2 months, 3 weeks ago) by mhitch
> Branch: MAIN
> Changes since 1.33: +3 -2 lines
> Diff to previous 1.33 (colored)
> 
> The handler for the periodic ENQUIRY command was not unmapping the ccb
> data buffer, which left stale flags which caused Alpha ES40 crashes.  Add
> the mlx_ccb_unmap() call and the ES40 now runs with > 1GB memory.
 
Hmmm.... but I've already long ago pulled that patch up into my local
1.6.x tree that my running kernel was built from.

Also FYI, My old machine had 1.5GB of RAM and never saw this problem
(with the patch installed of course), the new one currently has 2GB.


@@ -1107,6 +1109,7 @@
 	u_int lsn;
 
 	mlx = (struct mlx_softc *)mc->mc_mx.mx_dv;
+	mlx_ccb_unmap(mlx, mc);		/* XXX revision 1.34 */
 
 	/*
 	 * Command completed OK?


The full diff of the mlx.c I'm running with is attached.

-- 
						Greg A. Woods

H:+1 416 218-0098  W:+1 416 489-5852 x122  VE3TCP  RoboHack <woods@robohack.ca>
Planix, Inc. <woods@planix.com>          Secrets of the Weird <woods@weird.com>


--Multipart_Mon_Oct_24_19:20:33_2005-1
Content-Type: text/plain; charset=US-ASCII


I'm not sure where/why I came up with the NULL deref part of this patch,
and I'm not sure setting mc_status to zero is the right default, but
until running on this new machine everything seemed to work OK....

Index: sys/dev/ic/mlx.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/dev/ic/mlx.c,v
retrieving revision 1.16.4.1
diff -u -r1.16.4.1 mlx.c
--- sys/dev/ic/mlx.c	28 Jul 2003 18:08:37 -0000	1.16.4.1
+++ sys/dev/ic/mlx.c	24 Oct 2005 23:11:31 -0000
@@ -422,7 +422,7 @@
 		meo = mlx_enquire(mlx, MLX_CMD_ENQUIRY_OLD,
 		    sizeof(struct mlx_enquiry_old), NULL, 0);
 		if (meo == NULL) {
-			printf("%s: ENQUIRY_OLD failed\n", mlx->mlx_dv.dv_xname);
+			printf("%s: ENQUIRY_OLD failed during init\n", mlx->mlx_dv.dv_xname);
 			return;
 		}
 		ci->ci_firmware_id[0] = meo->me_fwmajor;
@@ -543,11 +543,13 @@
 		model = buf;
 	}
 
-	printf("%s: DAC%s, %d channel%s, firmware %d.%02d-%c-%02d",
-	    mlx->mlx_dv.dv_xname, model, ci->ci_nchan,
-	    ci->ci_nchan > 1 ? "s" : "",
-	    ci->ci_firmware_id[0], ci->ci_firmware_id[1],
-	    ci->ci_firmware_id[3], ci->ci_firmware_id[2]);
+	printf("%s: DAC%s, %d channel%s, firmware %d.%02d-%c-%02d, interface V%d",
+	       mlx->mlx_dv.dv_xname,
+	       model,
+	       ci->ci_nchan, ci->ci_nchan > 1 ? "s" : "",
+	       ci->ci_firmware_id[0], ci->ci_firmware_id[1],
+	       ci->ci_firmware_id[3], ci->ci_firmware_id[2],
+	       ci->ci_iftype);
 	if (ci->ci_mem_size != 0)
 		printf(", %dMB RAM", ci->ci_mem_size >> 20);
 	printf("\n");
@@ -573,7 +575,7 @@
 		meo = mlx_enquire(mlx, MLX_CMD_ENQUIRY_OLD,
 		    sizeof(struct mlx_enquiry_old), NULL, waitok);
 		if (meo == NULL) {
-			printf("%s: ENQUIRY_OLD failed\n",
+			printf("%s: ENQUIRY_OLD failed during configure\n",
 			    mlx->mlx_dv.dv_xname);
 			goto out;
 		}
@@ -583,7 +585,7 @@
 		me = mlx_enquire(mlx, MLX_CMD_ENQUIRY,
 		    sizeof(struct mlx_enquiry), NULL, waitok);
 		if (me == NULL) {
-			printf("%s: ENQUIRY failed\n", mlx->mlx_dv.dv_xname);
+			printf("%s: ENQUIRY failed during configure\n", mlx->mlx_dv.dv_xname);
 			goto out;
 		}
 		mlx->mlx_numsysdrives = me->me_num_sys_drvs;
@@ -595,7 +597,7 @@
 	if (mes == NULL) {
 		printf("%s: error fetching drive status\n",
 		    mlx->mlx_dv.dv_xname);
-		free(me, M_DEVBUF);
+		/* free() removed as per rev. 1.23 */
 		goto out;
 	}
 
@@ -1107,6 +1109,7 @@
 	u_int lsn;
 
 	mlx = (struct mlx_softc *)mc->mc_mx.mx_dv;
+	mlx_ccb_unmap(mlx, mc);		/* XXX revision 1.34 */
 
 	/*
 	 * Command completed OK?
@@ -1852,15 +1855,17 @@
 
  out:
 	if (mc != NULL) {
+		/* Copy out status */
+		mu->mu_status = mc->mc_status;
+
 		if (mapped)
 			mlx_ccb_unmap(mlx, mc);
 		mlx_ccb_free(mlx, mc);
-	}
-
-	/* Copy out status and data */
-	mu->mu_status = mc->mc_status;
+	} else
+		mu->mu_status = 0;	/* don't de-ref a NULL! */
 
 	if (kbuf != NULL) {
+		/* Copy out data */
 		if (mu->mu_datasize > 0 && (mu->mu_bufdir & MU_XFER_IN) != 0) {
 			rv = copyout(kbuf, mu->mu_buf, mu->mu_datasize);
 #ifdef DIAGNOSTIC
@@ -1885,6 +1890,7 @@
 	int s;
 
 	s = splbio();
+	mc = SLIST_FIRST(&mlx->mlx_ccb_freelist); /* moved here as per rev. 1.24 */
 	if (control) {
 		if (mlx->mlx_nccbs_ctrl >= MLX_NCCBS_CONTROL) {
 			splx(s);
@@ -1894,7 +1900,6 @@
 		mc->mc_flags |= MC_CONTROL;
 		mlx->mlx_nccbs_ctrl++;
 	}
-	mc = SLIST_FIRST(&mlx->mlx_ccb_freelist);
 	SLIST_REMOVE_HEAD(&mlx->mlx_ccb_freelist, mc_chain.slist);
 	splx(s);
 
@@ -2212,7 +2217,7 @@
 			    mlx->mlx_dv.dv_xname);
 			mlx->mlx_flags |= MLXF_SPINUP_REPORTED;
 		}
-		break;
+		return (0);			/* from rev. 1.23 */
 
 	case 0x30:
 		fmt = "configuration checksum error";
@@ -2243,7 +2248,8 @@
 		break;
 
 	case 0xf0:
-		fmt = "FATAL MEMORY PARITY ERROR";
+		printf("%s: FATAL MEMORY PARITY ERROR\n",
+		       mlx->mlx_dv.dv_xname);	/* from rev. 1.23 */
 		return (1);
 
 	default:

--Multipart_Mon_Oct_24_19:20:33_2005-1--