NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: kern/59912: running rm -rf on LFS hangs the rm process forever on prelock state



The following reply was made to PR kern/59912; it has been noted by GNATS.

From: Shinichi Doyashiki <clare%csel.org@localhost>
To: gnats-bugs%netbsd.org@localhost
Cc: 
Subject: Re: kern/59912: running rm -rf on LFS hangs the rm process forever on
 prelock state
Date: Sat, 17 Jan 2026 23:26:07 +0900

 I don't understand the LFS code yet.
 
 After providing the necessary information to LLM and investigating,
 I was able to come up with a starting patch for the lfs_cleanerd
 process, as shown below. This code removes the sleep in the clean1
 state and instead adds a counter-based wait code to control the
 progress of lfs_cleanerd. However, even with this patch, fcntl
 still causes a panic.  According to LLM, this may be due
 to stack corruption or structure corruption around fcntl.
 
 $ cvs diff -u
 cvs diff: Diffing .
 Index: lfs.h
 ===================================================================
 RCS file: /cvsroot/src/sys/ufs/lfs/lfs.h,v
 retrieving revision 1.219
 diff -u -r1.219 lfs.h
 --- lfs.h       5 Jan 2026 05:02:47 -0000       1.219
 +++ lfs.h       17 Jan 2026 13:57:31 -0000
 @@ -1050,6 +1050,8 @@
          uint32_t  lfs_seglock;          /* segment lock counter */
 
          uint32_t lfs_iocount;           /* number of ios pending */
 +       uint64_t lfs_seg_epoch;
 +       kcondvar_t lfs_seg_epoch_cv;
          uint32_t lfs_writer;            /* don't allow any dirops to start */
          uint32_t lfs_dirops;            /* count of active directory ops */
          kcondvar_t lfs_diropscv;        /* condvar of active directory ops */
 Index: lfs_subr.c
 ===================================================================
 RCS file: /cvsroot/src/sys/ufs/lfs/lfs_subr.c,v
 retrieving revision 1.110
 diff -u -r1.110 lfs_subr.c
 --- lfs_subr.c  5 Jan 2026 05:02:47 -0000       1.110
 +++ lfs_subr.c  17 Jan 2026 13:57:32 -0000
 @@ -546,6 +546,15 @@
                          LFS_ENTER_LOG("segunlock_std", __FILE__, __LINE__, 0, 0, curproc->p_pid);
 
                          --fs->lfs_seglock;
 +
 +                       /*
 +                        * Segment writer reached a stable point.
 +                        * Wake up readers waiting for cleaner progress.
 +                        */
 +                       mutex_enter(&lfs_lock);
 +                       fs->lfs_seg_epoch++;
 +                       cv_broadcast(&fs->lfs_seg_epoch_cv);
 +                       mutex_exit(&lfs_lock);
                  }
                  /*
                   * We let checkpoints happen asynchronously.  That means
 @@ -588,6 +597,14 @@
                          LFS_ENTER_LOG("segunlock_ckp", __FILE__, __LINE__, 0, 0, curproc->p_pid);
 
                          --fs->lfs_seglock;
 +
 +                       /*
 +                        * Checkpoint completed: filesystem state is stable.
 +                        */
 +                       mutex_enter(&lfs_lock);
 +                       fs->lfs_seg_epoch++;
 +                       cv_broadcast(&fs->lfs_seg_epoch_cv);
 +                       mutex_exit(&lfs_lock);
                  }
                  if (do_unmark_dirop)
                          lfs_unmark_dirop(fs);
 Index: lfs_vfsops.c
 ===================================================================
 RCS file: /cvsroot/src/sys/ufs/lfs/lfs_vfsops.c,v
 retrieving revision 1.399
 diff -u -r1.399 lfs_vfsops.c
 --- lfs_vfsops.c        5 Jan 2026 05:02:47 -0000       1.399
 +++ lfs_vfsops.c        17 Jan 2026 13:57:33 -0000
 @@ -1147,6 +1147,8 @@
          cv_init(&fs->lfs_cleanercv, "cleancv");
          cv_init(&fs->lfs_prelockcv, "prelockcv");
          cv_init(&fs->lfs_cleanquitcv, "cleanquit");
 +       cv_init(&fs->lfs_seg_epoch_cv, "segepoch");
 +       fs->lfs_seg_epoch = 0;
 
          /* Set the file system readonly/modify bits. */
          fs->lfs_ronly = ronly;
 @@ -1488,6 +1490,7 @@
          cv_destroy(&fs->lfs_diropscv);
          cv_destroy(&fs->lfs_stopcv);
          cv_destroy(&fs->lfs_nextsegsleep);
 +       cv_destroy(&fs->lfs_seg_epoch_cv);
 
          rw_destroy(&fs->lfs_fraglock);
          rw_destroy(&fs->lfs_iflock);
 Index: lfs_vnops.c
 ===================================================================
 RCS file: /cvsroot/src/sys/ufs/lfs/lfs_vnops.c,v
 retrieving revision 1.352
 diff -u -r1.352 lfs_vnops.c
 --- lfs_vnops.c 5 Jan 2026 05:02:47 -0000       1.352
 +++ lfs_vnops.c 17 Jan 2026 13:57:34 -0000
 @@ -1436,7 +1436,8 @@
    * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before*
    * the active cleaner test.
    *
 - * XXX This code assumes that lfs_markv makes synchronous checkpoints.
 + * XXX This code assumes that cleaner progress is observable via
 + *     lfs_seg_epoch updates (i.e., no silent segment reuse).
    */
   int
   lfs_strategy(void *v)
 @@ -1484,9 +1485,17 @@
 
          slept = 1;
          loopcount = 0;
 -       mutex_enter(&lfs_lock);
 -       while (slept && fs->lfs_seglock) {
 +       for (;;) {
 +               uint64_t epoch;
 +
 +               mutex_enter(&lfs_lock);
 +               if (!slept || !fs->lfs_seglock) {
 +                       mutex_exit(&lfs_lock);
 +                       break;
 +               }
 +               epoch = fs->lfs_seg_epoch;
                  mutex_exit(&lfs_lock);
 +
                  /*
                   * Look through list of intervals.
                   * There will only be intervals to look through
 @@ -1529,25 +1538,26 @@
                                          slept = 1;
                                          ++loopcount;
                                          break;
 -                               } else if (fs->lfs_seglock) {
 -                                       mtsleep(&fs->lfs_seglock,
 -                                               (PRIBIO + 1) | PNORELOCK,
 -                                               "clean1", 0,
 -                                               &lfs_lock);
 +                               } else if (fs->lfs_seglock &&
 +                                          curlwp != fs->lfs_cleaner_thread) {
 +                                       /*
 +                                        * Do not block the cleaner thread itself:
 +                                        * it would deadlock waiting for progress
 +                                        * that only it can make.
 +                                        */
 +                                       while (epoch == fs->lfs_seg_epoch && fs->lfs_seglock)
 +                                               cv_wait(&fs->lfs_seg_epoch_cv, &lfs_lock);
                                          slept = 1;
                                          break;
                                  }
                                  mutex_exit(&lfs_lock);
                          }
                  }
 -               mutex_enter(&lfs_lock);
                  if (loopcount > MAXLOOP) {
                          printf("lfs_strategy: breaking out of clean2 loop\n");
                          break;
                  }
          }
 -       mutex_exit(&lfs_lock);
 -
          vp = ip->i_devvp;
          return VOP_STRATEGY(vp, bp);
   }
 
 (gdb) bt
 #0  0xffffffff80239db5 in cpu_reboot (howto=howto@entry=260,
      bootstr=bootstr@entry=0x0) at ../../../../arch/amd64/amd64/machdep.c:709
 #1  0xffffffff8094f3cf in kern_reboot (howto=howto@entry=260,
      bootstr=bootstr@entry=0x0) at ../../../../kern/kern_reboot.c:91
 #2  0xffffffff8099be5d in vpanic (fmt=fmt@entry=0xffffffff80dca941 "trap",
      ap=ap@entry=0xffffc489472ebb38) at ../../../../kern/subr_prf.c:288
 #3  0xffffffff8099bf32 in panic (fmt=fmt@entry=0xffffffff80dca941 "trap")
      at ../../../../kern/subr_prf.c:209
 #4  0xffffffff8023cb7e in trap (frame=0xffffc489472ebc80)
      at ../../../../arch/amd64/amd64/trap.c:325
 #5  0xffffffff80234ad4 in alltraps ()
 #6  0xffffffff808b7836 in lfs_fcntl (v=0xffffc489472ebe40)
      at ../../../../ufs/lfs/lfs_vnops.c:2047
 #7  lfs_fcntl (v=0xffffc489472ebe40) at ../../../../ufs/lfs/lfs_vnops.c:1861
 #8  0xffffffff80a246fe in VOP_FCNTL (vp=0xffff8a098b7faa40,
      command=<optimized out>, data=<optimized out>, fflag=<optimized out>,
      cred=<optimized out>) at ../../../../kern/vnode_if.c:972
 #9  0xffffffff809adcc6 in fcntl_forfs (fd=<optimized out>, fp=<optimized out>,
      cmd=-1610036220, arg=0x0) at ../../../../kern/sys_descrip.c:226
 #10 sys_fcntl (l=<optimized out>, uap=<optimized out>, retval=<optimized out>)
      at ../../../../kern/sys_descrip.c:385
 #11 0xffffffff80526a0b in sy_call (sy=0xffffffff8105a640 <sysent+2208>,
      l=0xffff8a097f3fec00, uap=0xffffc489472ec000, rval=0xffffc489472ebfb0)
      at ../../../../sys/syscallvar.h:65
 #12 sy_invoke (sy=0xffffffff8105a640 <sysent+2208>, l=0xffff8a097f3fec00,
      uap=0xffffc489472ec000, rval=0xffffc489472ebfb0, code=92)
      at ../../../../sys/syscallvar.h:94
 #13 syscall (frame=0xffffc489472ec000)
      at ../../../../arch/x86/x86/syscall.c:137
 #14 0xffffffff8021025d in handle_syscall ()
 


Home | Main Index | Thread Index | Old Index