Current-Users archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kern/38636: ld(4) is now totally broken with ataraid(4) (ld.c 1.57 botch?)



Greg Oster suggested in private mail that there might be one instance
where device_private() should be used (in ld.c:ld_config_interrupts()).
However adding the call didn't fix anything for me yet (or indeed make
any visible difference I can see so far).

I was poking a bit deeper in ld_ataraid_attach() and found that it seems
to be called with the device_t pointer which ldattach() should also be
called with, but instead it's dissecting a new value from its own private
sc->sc_ld.  However that struct doesn't seem to ever have been properly
initialized by config_devalloc().

See how the device_t initialized for "ld0" is at 0xc3194f00, but the
value being passed to ldattach() is at 0xc3193e00.

ataraid0: config_devalloc: setup completed for ataraid unit 0 as ataraid0 (dev 
= 0xc30d8000)
ataraid0: found 1 RAID volume
ld0: config_devalloc: setup completed for ld unit 0 as ld0 (dev = 0xc3194f00)
ld0 at ataraid0 vendtype 1 unit 0: Adaptec ATA RAID-1 array
ld0: ld_ataraid_attach(): ataraid unit 0 on ld0
: ld_ataraid_attach(): ld unit 0 (ld->sc_dv = 0xc3193e00)
: ldattach(): unit 0
: 186 GB, 24321 cyl, 255 head, 63 sec, 512 bytes/sect x 390721536 sectors
rnd:  attached as an entropy source (collecting)
uvm_fault(0xc0bc7cc0, 0, 1) -> 0xe
kernel: supervisor trap page fault, code=0
Stopped in pid 0.1 (system) at  netbsd:dkwedge_discover+0x35:   movl    
0x8(%esi),%eax
db{1}> trace
dkwedge_discover(84,c01f4dd0,1,c01f4d70,ff,3f,200,1749f000,0,3) at 
netbsd:dkwedge_discover+0x35
ldattach(c3193e00,0,ffffffff,c3193e00,0,c0a9c6d7,0,6,8,c3193e00) at 
netbsd:ldattach+0x325
ld_ataraid_attach(c30d8000,c3194f00,c30d8800,c0cfcc64,c30d8800,c30ce9a0,c3194f00,c30d8800,c0cfcc64,c30d8000)
 at netbsd:ld_ataraid_attach+0x2ab
config_attach_loc(c30d8000,c0af5e10,c0cfcc64,c30d8800,c05bf330,c04c98e0,0,615755e7,c30d8000,1)
 at netbsd:config_attach_loc+0x173
ataraid_attach(0,c30d8000,0,c30ceae0,0,c30ceae0,c0cfccb8,c05bf59c,c0b0de4c,c0b0de20)
 at netbsd:ataraid_attach+0x87
config_attach_pseudo(c0b0de4c,c0b0de20,2,26,27,0,c0cfcce8,c04c9fc9,0,0) at 
netbsd:config_attach_pseudo+0x35
ata_raid_finalize(0,0,c0a82110,64,0,c0486db9,0,14,14,0) at 
netbsd:ata_raid_finalize+0x4c
config_finalize(0,0,14,0,0,c0486730,0,0,c0bc6c64,0) at 
netbsd:config_finalize+0xa9
main(0,c01002cd,0,0,0,0,0,0,0,0) at netbsd:main+0x273
db{1}> 


I haven't been keeping abreast of all the changes going on to device_t
and the private softc handling so I'm not sure what's going on here.  It
almost looks as if someone tried to add an unnecessary extra level in
the attachments somewhere.

Either that or there's a call to config_devalloc() missing, or the one
done for ld_ataraid_attach() should be being copied into the right
places for the value passed to ldattach() since ldattach() is apparently
not called directly by the autoconf machinery but only by
ld_ataraid_attach().

I'm rather stunned that nobody else running -current has hit upon
problems with ld(4) yet though.  What's going on here?  Are other users
of ld(4) (eg. mlx(4)) having any problems?  Apparently not.

Further it would appear that none of the other parent devices for ld(4)
are yet using device_t and none call device_private() in the way that
ld_ataraid_attach() was changed in ld_ataraid.c:1.27.

Something seems very half-baked here now with ataraid(4).

I tried partly reverting ld_ataraid.c:1.27 but that just made things
worse.  Perhaps I should try reverting the entire change since it's the
lone ld(4) user now using both device_t and device_private() for
ldattach()?

Ah, yes, that works perfectly!  (exact changes for running code below)

ataraid0: config_devalloc: setup completed for ataraid unit 0 as ataraid0 (dev 
= 0xc30d8000)
ataraid0: found 1 RAID volume
ld0: config_devalloc: setup completed for ld unit 0 as ld0 (dev = 0xc3193e00)
ld0 at ataraid0 vendtype 1 unit 0: Adaptec ATA RAID-1 array
ld0: ld_ataraid_attach(): ld unit 0 (ld->sc_dv = 0xc3193e00)
ld0: 186 GB, 24321 cyl, 255 head, 63 sec, 512 bytes/sect x 390721536 sectors
rnd: ld0 attached as an entropy source (collecting)


So, can we please revert ld_ataraid.c:1.27 now?   Thanks!

-- 
                                                Greg A. Woods
                                                Planix, Inc.

<woods%planix.com@localhost>     +1 416 489-5852 x122     http://www.planix.com/


Index: sys/dev/ata/ld_ataraid.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/dev/ata/ld_ataraid.c,v
retrieving revision 1.27
diff -u -r1.27 ld_ataraid.c
--- sys/dev/ata/ld_ataraid.c    4 May 2008 13:59:41 -0000       1.27
+++ sys/dev/ata/ld_ataraid.c    27 May 2008 19:42:55 -0000
@@ -92,7 +92,7 @@
 static int     ld_ataraid_start_raid0(struct ld_softc *, struct buf *);
 static void    ld_ataraid_iodone_raid0(struct buf *);
 
-CFATTACH_DECL_NEW(ld_ataraid, sizeof(struct ld_ataraid_softc),
+CFATTACH_DECL(ld_ataraid, sizeof(struct ld_ataraid_softc),
     ld_ataraid_match, ld_ataraid_attach, NULL, NULL);
 
 static int ld_ataraid_initialized;
@@ -120,9 +120,9 @@
 }
 
 static void
-ld_ataraid_attach(device_t parent, device_t self, void *aux)
+ld_ataraid_attach(struct device *parent, struct device *self, void *aux)
 {
-       struct ld_ataraid_softc *sc = device_private(self);
+       struct ld_ataraid_softc *sc = (void *) self;
        struct ld_softc *ld = &sc->sc_ld;
        struct ataraid_array_info *aai = aux;
        const char *level;
@@ -178,6 +178,9 @@
        aprint_naive(": ATA %s array\n", level);
        aprint_normal(": %s ATA %s array\n",
            ata_raid_type_name(aai->aai_type), level);
+#ifdef DIAGNOSTIC
+       aprint_normal_dev(&ld->sc_dv, "ld_ataraid_attach(): ld unit %d 
(ld->sc_dv = %p)\n", ld->sc_dv.dv_unit, &ld->sc_dv);
+#endif
 
        if (ld->sc_start == NULL) {
                aprint_error_dev(&ld->sc_dv, "unsupported array type\n");
@@ -471,9 +474,9 @@
                adi = &aai->aai_disks[cbp->cb_comp];
                adi->adi_status &= ~ADI_S_ONLINE;
 
-               printf("%s: error %d on component %d (%s)\n",
-                   device_xname(&sc->sc_ld.sc_dv), bp->b_error, cbp->cb_comp,
-                   device_xname(adi->adi_dev));
+               aprint_error_dev(&sc->sc_ld.sc_dv, "error %d on component %d 
(%s)\n",
+                                bp->b_error, cbp->cb_comp,
+                                device_xname(adi->adi_dev));
 
                /*
                 * If we didn't see an error yet and we are reading
Index: sys/kern/subr_autoconf.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/kern/subr_autoconf.c,v
retrieving revision 1.150
diff -u -r1.150 subr_autoconf.c
--- sys/kern/subr_autoconf.c    25 May 2008 15:03:01 -0000      1.150
+++ sys/kern/subr_autoconf.c    27 May 2008 18:18:17 -0000
@@ -1214,7 +1214,8 @@
        xunit = number(&num[sizeof(num)], myunit);
        lunit = &num[sizeof(num)] - xunit;
        if (lname + lunit > sizeof(dev->dv_xname))
-               panic("config_devalloc: device name too long");
+               panic("config_devalloc: device name '%s' too long for dv_xname 
with unit number '%s'",
+                     cd->cd_name, xunit);
 
        /* get memory for all device vars */
        KASSERT((ca->ca_flags & DVF_PRIV_ALLOC) || ca->ca_devsize >= 
sizeof(struct device));
@@ -1222,7 +1223,8 @@
                dev_private = malloc(ca->ca_devsize, M_DEVBUF,
                                     M_ZERO | (cold ? M_NOWAIT : M_WAITOK));
                if (dev_private == NULL)
-                       panic("config_devalloc: memory allocation for device 
softc failed");
+                       panic("config_devalloc: memory allocation for device 
softc for %s%s failed",
+                             cd->cd_name, xunit);
        } else {
                KASSERT(ca->ca_flags & DVF_PRIV_ALLOC);
                dev_private = NULL;
@@ -1235,7 +1237,8 @@
                dev = dev_private;
        }
        if (dev == NULL)
-               panic("config_devalloc: memory allocation for device_t failed");
+               panic("config_devalloc: memory allocation for device_t for %s%s 
failed",
+                     cd->cd_name, xunit);
 
        dev->dv_class = cd->cd_class;
        dev->dv_cfdata = cf;
@@ -1270,6 +1273,14 @@
        prop_dictionary_set_uint16(dev->dv_properties,
            "device-unit", dev->dv_unit);
 
+#ifdef DEBUG
+       aprint_debug_dev(dev, "config_devalloc: setup completed for %s unit %d 
as %s (dev = %p)\n",
+                        dev->dv_cfdriver->cd_name,
+                        dev->dv_unit,
+                        dev->dv_xname,
+                        dev);
+#endif
+
        return (dev);
 }
 


Home | Main Index | Thread Index | Old Index