NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kern/38636: ld(4) is now totally broken with ataraid(4) (ld.c 1.57 botch?)



The following reply was made to PR kern/38636; it has been noted by GNATS.

From: "Greg A. Woods" <woods%planix.com@localhost>
To: NetBSD GNATS <gnats-bugs%NetBSD.org@localhost>
Cc: Juan Romero Pardines <xtraeme%netbsd.org@localhost>,
        NetBSD Kernel Bug People <kern-bug-people%netbsd.org@localhost>,
        NetBSD-current Users's Discussion List 
<current-users%netbsd.org@localhost>
Subject: Re: kern/38636: ld(4) is now totally broken with ataraid(4) (ld.c 1.57 
botch?)
Date: Tue, 27 May 2008 15:46:29 -0400

 Greg Oster suggested in private mail that there might be one instance
 where device_private() should be used (in ld.c:ld_config_interrupts()).
 However adding the call didn't fix anything for me yet (or indeed make
 any visible difference I can see so far).
 
 I was poking a bit deeper in ld_ataraid_attach() and found that it seems
 to be called with the device_t pointer which ldattach() should also be
 called with, but instead it's dissecting a new value from its own private
 sc->sc_ld.  However that struct doesn't seem to ever have been properly
 initialized by config_devalloc().
 
 See how the device_t initialized for "ld0" is at 0xc3194f00, but the
 value being passed to ldattach() is at 0xc3193e00.
 
 ataraid0: config_devalloc: setup completed for ataraid unit 0 as ataraid0 (dev 
= 0xc30d8000)
 ataraid0: found 1 RAID volume
 ld0: config_devalloc: setup completed for ld unit 0 as ld0 (dev = 0xc3194f00)
 ld0 at ataraid0 vendtype 1 unit 0: Adaptec ATA RAID-1 array
 ld0: ld_ataraid_attach(): ataraid unit 0 on ld0
 : ld_ataraid_attach(): ld unit 0 (ld->sc_dv = 0xc3193e00)
 : ldattach(): unit 0
 : 186 GB, 24321 cyl, 255 head, 63 sec, 512 bytes/sect x 390721536 sectors
 rnd:  attached as an entropy source (collecting)
 uvm_fault(0xc0bc7cc0, 0, 1) -> 0xe
 kernel: supervisor trap page fault, code=0
 Stopped in pid 0.1 (system) at  netbsd:dkwedge_discover+0x35:   movl    
0x8(%esi),%eax
 db{1}> trace
 dkwedge_discover(84,c01f4dd0,1,c01f4d70,ff,3f,200,1749f000,0,3) at 
netbsd:dkwedge_discover+0x35
 ldattach(c3193e00,0,ffffffff,c3193e00,0,c0a9c6d7,0,6,8,c3193e00) at 
netbsd:ldattach+0x325
 
ld_ataraid_attach(c30d8000,c3194f00,c30d8800,c0cfcc64,c30d8800,c30ce9a0,c3194f00,c30d8800,c0cfcc64,c30d8000)
 at netbsd:ld_ataraid_attach+0x2ab
 
config_attach_loc(c30d8000,c0af5e10,c0cfcc64,c30d8800,c05bf330,c04c98e0,0,615755e7,c30d8000,1)
 at netbsd:config_attach_loc+0x173
 
ataraid_attach(0,c30d8000,0,c30ceae0,0,c30ceae0,c0cfccb8,c05bf59c,c0b0de4c,c0b0de20)
 at netbsd:ataraid_attach+0x87
 config_attach_pseudo(c0b0de4c,c0b0de20,2,26,27,0,c0cfcce8,c04c9fc9,0,0) at 
netbsd:config_attach_pseudo+0x35
 ata_raid_finalize(0,0,c0a82110,64,0,c0486db9,0,14,14,0) at 
netbsd:ata_raid_finalize+0x4c
 config_finalize(0,0,14,0,0,c0486730,0,0,c0bc6c64,0) at 
netbsd:config_finalize+0xa9
 main(0,c01002cd,0,0,0,0,0,0,0,0) at netbsd:main+0x273
 db{1}> 
 
 
 I haven't been keeping abreast of all the changes going on to device_t
 and the private softc handling so I'm not sure what's going on here.  It
 almost looks as if someone tried to add an unnecessary extra level in
 the attachments somewhere.
 
 Either that or there's a call to config_devalloc() missing, or the one
 done for ld_ataraid_attach() should be being copied into the right
 places for the value passed to ldattach() since ldattach() is apparently
 not called directly by the autoconf machinery but only by
 ld_ataraid_attach().
 
 I'm rather stunned that nobody else running -current has hit upon
 problems with ld(4) yet though.  What's going on here?  Are other users
 of ld(4) (eg. mlx(4)) having any problems?  Apparently not.
 
 Further it would appear that none of the other parent devices for ld(4)
 are yet using device_t and none call device_private() in the way that
 ld_ataraid_attach() was changed in ld_ataraid.c:1.27.
 
 Something seems very half-baked here now with ataraid(4).
 
 I tried partly reverting ld_ataraid.c:1.27 but that just made things
 worse.  Perhaps I should try reverting the entire change since it's the
 lone ld(4) user now using both device_t and device_private() for
 ldattach()?
 
 Ah, yes, that works perfectly!  (exact changes for running code below)
 
 ataraid0: config_devalloc: setup completed for ataraid unit 0 as ataraid0 (dev 
= 0xc30d8000)
 ataraid0: found 1 RAID volume
 ld0: config_devalloc: setup completed for ld unit 0 as ld0 (dev = 0xc3193e00)
 ld0 at ataraid0 vendtype 1 unit 0: Adaptec ATA RAID-1 array
 ld0: ld_ataraid_attach(): ld unit 0 (ld->sc_dv = 0xc3193e00)
 ld0: 186 GB, 24321 cyl, 255 head, 63 sec, 512 bytes/sect x 390721536 sectors
 rnd: ld0 attached as an entropy source (collecting)
 
 
 So, can we please revert ld_ataraid.c:1.27 now?   Thanks!
 
 -- 
                                                Greg A. Woods
                                                Planix, Inc.
 
 <woods%planix.com@localhost>     +1 416 489-5852 x122     
http://www.planix.com/
 
 
 Index: sys/dev/ata/ld_ataraid.c
 ===================================================================
 RCS file: /cvs/master/m-NetBSD/main/src/sys/dev/ata/ld_ataraid.c,v
 retrieving revision 1.27
 diff -u -r1.27 ld_ataraid.c
 --- sys/dev/ata/ld_ataraid.c   4 May 2008 13:59:41 -0000       1.27
 +++ sys/dev/ata/ld_ataraid.c   27 May 2008 19:42:55 -0000
 @@ -92,7 +92,7 @@
  static int    ld_ataraid_start_raid0(struct ld_softc *, struct buf *);
  static void   ld_ataraid_iodone_raid0(struct buf *);
  
 -CFATTACH_DECL_NEW(ld_ataraid, sizeof(struct ld_ataraid_softc),
 +CFATTACH_DECL(ld_ataraid, sizeof(struct ld_ataraid_softc),
      ld_ataraid_match, ld_ataraid_attach, NULL, NULL);
  
  static int ld_ataraid_initialized;
 @@ -120,9 +120,9 @@
  }
  
  static void
 -ld_ataraid_attach(device_t parent, device_t self, void *aux)
 +ld_ataraid_attach(struct device *parent, struct device *self, void *aux)
  {
 -      struct ld_ataraid_softc *sc = device_private(self);
 +      struct ld_ataraid_softc *sc = (void *) self;
        struct ld_softc *ld = &sc->sc_ld;
        struct ataraid_array_info *aai = aux;
        const char *level;
 @@ -178,6 +178,9 @@
        aprint_naive(": ATA %s array\n", level);
        aprint_normal(": %s ATA %s array\n",
            ata_raid_type_name(aai->aai_type), level);
 +#ifdef DIAGNOSTIC
 +      aprint_normal_dev(&ld->sc_dv, "ld_ataraid_attach(): ld unit %d 
(ld->sc_dv = %p)\n", ld->sc_dv.dv_unit, &ld->sc_dv);
 +#endif
  
        if (ld->sc_start == NULL) {
                aprint_error_dev(&ld->sc_dv, "unsupported array type\n");
 @@ -471,9 +474,9 @@
                adi = &aai->aai_disks[cbp->cb_comp];
                adi->adi_status &= ~ADI_S_ONLINE;
  
 -              printf("%s: error %d on component %d (%s)\n",
 -                  device_xname(&sc->sc_ld.sc_dv), bp->b_error, cbp->cb_comp,
 -                  device_xname(adi->adi_dev));
 +              aprint_error_dev(&sc->sc_ld.sc_dv, "error %d on component %d 
(%s)\n",
 +                               bp->b_error, cbp->cb_comp,
 +                               device_xname(adi->adi_dev));
  
                /*
                 * If we didn't see an error yet and we are reading
 Index: sys/kern/subr_autoconf.c
 ===================================================================
 RCS file: /cvs/master/m-NetBSD/main/src/sys/kern/subr_autoconf.c,v
 retrieving revision 1.150
 diff -u -r1.150 subr_autoconf.c
 --- sys/kern/subr_autoconf.c   25 May 2008 15:03:01 -0000      1.150
 +++ sys/kern/subr_autoconf.c   27 May 2008 18:18:17 -0000
 @@ -1214,7 +1214,8 @@
        xunit = number(&num[sizeof(num)], myunit);
        lunit = &num[sizeof(num)] - xunit;
        if (lname + lunit > sizeof(dev->dv_xname))
 -              panic("config_devalloc: device name too long");
 +              panic("config_devalloc: device name '%s' too long for dv_xname 
with unit number '%s'",
 +                    cd->cd_name, xunit);
  
        /* get memory for all device vars */
        KASSERT((ca->ca_flags & DVF_PRIV_ALLOC) || ca->ca_devsize >= 
sizeof(struct device));
 @@ -1222,7 +1223,8 @@
                dev_private = malloc(ca->ca_devsize, M_DEVBUF,
                                     M_ZERO | (cold ? M_NOWAIT : M_WAITOK));
                if (dev_private == NULL)
 -                      panic("config_devalloc: memory allocation for device 
softc failed");
 +                      panic("config_devalloc: memory allocation for device 
softc for %s%s failed",
 +                            cd->cd_name, xunit);
        } else {
                KASSERT(ca->ca_flags & DVF_PRIV_ALLOC);
                dev_private = NULL;
 @@ -1235,7 +1237,8 @@
                dev = dev_private;
        }
        if (dev == NULL)
 -              panic("config_devalloc: memory allocation for device_t failed");
 +              panic("config_devalloc: memory allocation for device_t for %s%s 
failed",
 +                    cd->cd_name, xunit);
  
        dev->dv_class = cd->cd_class;
        dev->dv_cfdata = cf;
 @@ -1270,6 +1273,14 @@
        prop_dictionary_set_uint16(dev->dv_properties,
            "device-unit", dev->dv_unit);
  
 +#ifdef DEBUG
 +      aprint_debug_dev(dev, "config_devalloc: setup completed for %s unit %d 
as %s (dev = %p)\n",
 +                       dev->dv_cfdriver->cd_name,
 +                       dev->dv_unit,
 +                       dev->dv_xname,
 +                       dev);
 +#endif
 +
        return (dev);
  }
  
 


Home | Main Index | Thread Index | Old Index