Subject: auto-spl simple locks
To: None <tech-kern@netbsd.org>
From: Jason Thorpe <thorpej@nas.nasa.gov>
List: tech-smp
Date: 07/29/1999 17:10:43
[ Sorry for the cross-post, but this SMP issue affects every platform
  because it requires glue in the non-multiprocessor case, too, so
  is more appropriate for tech-kern. ]

Hi folks...

The following is the machine-independent portion of auto-spl simple locks.
These simple locks take care of blocking interrupts when necessary, and
also perform the interprocessor synchronization in the multiprocessor
case.

In the multiprocessor case, the interrupt blocking feature is especially
important for locks which will be asserted from interrupt context; when
asserted in non-interrupt context, if the interrupt isn't blocked, and
an interrupt on the same CPU occurs, you get deadlock.

In *all* cases, it makes programming less error prone; you grab a lock,
and doing so provides the appropriate synchronization.

As an example, the UVM "free page queue lock" is now initialized like
so:

	simple_lock_init(&uvm.fpageqlock, LK_SPL_IMP);

...and calls like this:

	s = splimp();
	simple_lock(&uvm.fpageqlock);
	.
	.
	.
	simple_unlock(&uvm.fpageqlock);
	splx(s);

...now just look like this:

	simple_lock(&uvm.fpageqlock);
	.
	.
	.
	simple_unlock(&uvm.fpageqlock);

However, it comes with some cost:

	(1) Code size: the simple lock calls are no longer noops in the
	    non-MULTIPROCESSOR case, so the object code is bigger.  On
	    my laptop:

text    data    bss     dec     hex     filename
1741660 115088  200492  2057240 1f6418  /netbsd.bak
1785056 115308  201132  2101496 2010f8  /netbsd

	(2) Run time cost: there is a slight cost due to additional
	    instructions executed on every simple lock call.  In
	    the common case, there are three instructions executed
	    for simple_lock() and three for simple_unlock() on the
	    i386: load, compare, branch.  On a simple fork/exit
	    benchmark, vforks were about 5-8 usec slower.  These
	    results may be improved eventually, since I haven't
	    removed all of the redundant spl*() calls that I can
	    eliminate now.

Now, it is easy to define a new lock type, like "spllock" or something
like that.

Unfortunately, in most cases in the kernel, of a given object type, some
instances will need to be interrupt-protected, and many won't.  So, in
those cases, you need to perform the "do I need to lock?" check.

Anyhow, I'm interested in what people think about this, or if people
have any other ideas.  I'll state now that auto-spl locks are the
only sane way I can fix a locking protocol error that exists in the
i386 pmap, even in the non-MULTIPROCESSOR case.

        -- Jason R. Thorpe <thorpej@nas.nasa.gov>

Index: sys/lock.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/lock.h,v
retrieving revision 1.22
diff -c -r1.22 lock.h
*** lock.h	1999/07/28 19:29:39	1.22
--- lock.h	1999/07/29 23:45:46
***************
*** 96,101 ****
--- 96,103 ----
   */
  struct simplelock {
  	int lock_data __attribute__((__aligned__));
+ 	int lock_level;
+ 	int lock_saved_level;
  #ifdef LOCKDEBUG
  	const char *lock_file;
  	int lock_line;
***************
*** 117,126 ****
  #endif
  
  #ifdef LOCKDEBUG
! #define	SIMPLELOCK_INITIALIZER	{ SIMPLELOCK_UNLOCKED, NULL, 0, NULL, 0, \
! 				  { NULL, NULL }, 0 }
  #else
! #define	SIMPLELOCK_INITIALIZER	{ SIMPLELOCK_UNLOCKED }
  #endif
  
  /* XXXCDC: kill typedefs later? */
--- 119,129 ----
  #endif
  
  #ifdef LOCKDEBUG
! #define	SIMPLELOCK_INITIALIZER(x)	{ SIMPLELOCK_UNLOCKED, (x), 0,	\
! 					  NULL, 0, NULL, 0,		\
! 					  { NULL, NULL }, 0 }
  #else
! #define	SIMPLELOCK_INITIALIZER(x)	{ SIMPLELOCK_UNLOCKED, (x), 0 }
  #endif
  
  /* XXXCDC: kill typedefs later? */
***************
*** 303,321 ****
  #define	simple_lock_try(alp)	_simple_lock_try((alp), __FILE__, __LINE__)
  #define	simple_unlock(alp)	_simple_unlock((alp), __FILE__, __LINE__)
  
! void	simple_lock_init __P((struct simplelock *));
  void	simple_lock_dump __P((void));
  void	simple_lock_freecheck __P((void *, void *));
- #elif defined(MULTIPROCESSOR)
- #define	simple_lock_init(alp)	cpu_simple_lock_init((alp))
- #define	simple_lock(alp)	cpu_simple_lock((alp))
- #define	simple_lock_try(alp)	cpu_simple_lock_try((alp))
- #define	simple_unlock(alp)	cpu_simple_unlock((alp))
  #else
! #define	simple_lock_init(alp)	(alp)->lock_data = SIMPLELOCK_UNLOCKED
! #define	simple_lock(alp)		/* nothing */
! #define	simple_lock_try(alp)	(1)	/* always succeeds */
! #define	simple_unlock(alp)		/* nothing */
  #endif
  
  #endif /* _SYS_LOCK_H_ */
--- 306,388 ----
  #define	simple_lock_try(alp)	_simple_lock_try((alp), __FILE__, __LINE__)
  #define	simple_unlock(alp)	_simple_unlock((alp), __FILE__, __LINE__)
  
! void	simple_lock_init __P((__volatile struct simplelock *, int));
  void	simple_lock_dump __P((void));
  void	simple_lock_freecheck __P((void *, void *));
  #else
! static	__inline void simple_lock_init __P((__volatile struct simplelock *,
! 	    int)) __attribute__((__unused__));
! static	__inline void simple_lock __P((__volatile struct simplelock *))
! 	    __attribute__((__unused__));
! static	__inline int simple_lock_try __P((__volatile struct simplelock *))
! 	    __attribute__((__unused__));
! static	__inline void simple_unlock __P((__volatile struct simplelock *))
! 	    __attribute__((__unused__));
! 
! static __inline void
! simple_lock_init(alp, level)
! 	__volatile struct simplelock *alp;
! 	int level;
! {
! 
! #if defined(MULTIPROCESSOR)
! 	cpu_simple_lock_init(alp);
! #else
! 	alp->lock_data = SIMPLELOCK_UNLOCKED;
! #endif
! 	alp->lock_level = level;
! 	alp->lock_saved_level = 0;
! }
! 
! static __inline void
! simple_lock(alp)
! 	__volatile struct simplelock *alp;
! {
! #if defined(MULTIPROCESSOR)
! 	if (alp->lock_level != LK_SPL_NONE) {
! 		int s = cpu_lock_spl(alp->lock_level);
! 		cpu_simple_lock(alp);
! 		alp->lock_saved_level = s;
! 	} else
! 		cpu_simple_lock(alp);
! #else
! 	if (alp->lock_level != LK_SPL_NONE)
! 		alp->lock_saved_level = cpu_lock_spl(alp->lock_level);
! #endif
! }
! 
! static __inline int
! simple_lock_try(alp)
! 	__volatile struct simplelock *alp;
! {
! #if defined(MULTIPROCESSOR)
! 	if (alp->lock_level != LK_SPL_NONE) {
! 		int rv, s = cpu_lock_spl(alp->lock_level);
! 		rv = cpu_simple_lock_try(alp);
! 		if (rv)
! 			alp->lock_saved_level = s;
! 		else
! 			splx(s);
! 		return (rv);
! 	} else
! 		return (cpu_simple_lock_try(alp));
! #else
! 	if (alp->lock_level != LK_SPL_NONE)
! 		alp->lock_saved_level = cpu_lock_spl(alp->lock_level);
! 	return (1);	/* always succeeds */
! #endif
! }
! 
! static __inline void
! simple_unlock(alp)
! 	__volatile struct simplelock *alp;
! {
! #if defined(MULTIPROCESSOR)
! 	cpu_simple_unlock(alp);
  #endif
+ 	if (alp->lock_level != LK_SPL_NONE)
+ 		splx(alp->lock_saved_level);
+ }
+ #endif /* LOCKDEBUG */
  
  #endif /* _SYS_LOCK_H_ */