Subject: port-i386/2785: microtime.s prevents changing HZ/TIMER_FREQ
To: None <gnats-bugs@gnats.netbsd.org>
From: None <dennis@jnx.com>
List: netbsd-bugs
Date: 09/28/1996 15:25:57
>Number:         2785
>Category:       port-i386
>Synopsis:       microtime.s prevents changing HZ/TIMER_FREQ
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    gnats-admin (GNATS administrator)
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Sat Sep 28 15:35:01 1996
>Last-Modified:
>Originator:     Dennis Ferguson
>Organization:
	Juniper Networks, Inc.
>Release:        1.2_ALPHA
>Environment:
	133 MHz Pentium, NetBSD-current from about July
System: NetBSD skank.jnx.com 1.2_ALPHA NetBSD 1.2_ALPHA (SKANKLIKE) #5: Sat Sep 28 14:34:02 PDT 1996 dennis@skank.jnx.com:/usr/src/sys/arch/i386/compile/SKANKLIKE i386


>Description:
	The implementation of microtime() in microtime.s is not compiled
	in if HZ is set and produces bogus results if TIMER_FREQ is
	altered in configuration to more closely match the actual frequency
	of a machine's clock.  This can be annoying if one is attempting
	to use the machine for time service.
>How-To-Repeat:
	Look at microtime.s.  Observe that it is surrounded by a #ifndef HZ.
	Note that the code compiles in constants which are only correct if
	HZ=100 and TIMER_FREQ=1193182
>Fix:
	The following patch causes a conversion table from timer ticks
	to microseconds to be computed at clock initialization time based
	on the actual configured values of HZ and TIMER_FREQ, and alters
	microtime.s to use the table when computing the microseconds since
	the last timer interrupt.  It also rearranges the assembler
	microtime() to minimize the time spent with interrupts blocked
	(not a big deal).  The table lookup is more precise at the default
	HZ/TIMER_FREQ values than the computation in the current code and
	runs about as fast, at the cost of a 768 byte lookup table.

*** arch/i386/i386/microtime.s	1996/09/26 02:37:54	1.1
--- arch/i386/i386/microtime.s	1996/09/28 21:33:47
***************
*** 40,129 ****
  #define	IRQ_BIT(irq_num)	(1 << ((irq_num) % 8))
  #define	IRQ_BYTE(irq_num)	((irq_num) / 8)
  
- /*
-  * Use a higher resolution version of microtime if HZ is not
-  * overridden (i.e. it is 100Hz).
-  */
- #ifndef HZ
  ENTRY(microtime)
  	cli				# disable interrupts
  
! 	movb	$(TIMER_SEL0|TIMER_LATCH),%al
! 	outb	%al,$TIMER_MODE		# latch timer 0's counter
  
! 	# Read counter value into ecx, LSB first
! 	xorl	%ecx,%ecx
! 	inb	$TIMER_CNTR0,%al
! 	movb	%al,%cl
  	inb	$TIMER_CNTR0,%al
! 	movb	%al,%ch
! 
! 	# Now check for counter overflow.  This is tricky because the
! 	# timer chip doesn't let us atomically read the current counter
! 	# value and the output state (i.e., overflow state).  We have
! 	# to read the ICU interrupt request register (IRR) to see if the
! 	# overflow has occured.  Because we lack atomicity, we use
! 	# the (very accurate) heuristic that we do not check for
! 	# overflow if the value read is close to 0.
! 	# E.g., if we just checked the IRR, we might read a non-overflowing
! 	# value close to 0, experience overflow, then read this overflow
! 	# from the IRR, and mistakenly add a correction to the "close
! 	# to zero" value.
! 	#
! 	# We compare the counter value to the heuristic constant 12.
! 	# If the counter value is less than this, we assume the counter
! 	# didn't overflow between disabling clock interrupts and latching
! 	# the counter value above.  For example, we assume that the first 3
! 	# instructions take less than 12 microseconds to execute.
! 	#
! 	# (We used to check for overflow only if the value read was close to
! 	# the timer limit, but this doesn't work very well if we're at the
! 	# clock's ipl or higher.)
! 	#
! 	# Otherwise, the counter might have overflowed.  We check for this
! 	# condition by reading the interrupt request register out of the ICU.
! 	# If it overflowed, we add in one clock period.
! 
! 	movl	$11932,%edx	# counter limit
! 
! 	testb	$IRQ_BIT(0),_ipending + IRQ_BYTE(0)
! 	jnz	1f
! 
! 	cmpl	$12,%ecx	# check for potential overflow
! 	jbe	2f
! 	
! 	inb	$IO_ICU1,%al	# read IRR in ICU
! 	testb	$IRQ_BIT(0),%al	# is a timer interrupt pending?
! 	jz	2f
! 
! 1:	subl	%edx,%ecx	# add another tick
! 	
! 2:	subl	%ecx,%edx	# subtract counter value from counter limit
! 
! 	# Divide by 1193280/1000000.  We use a fast approximation of 4096/3433.
! 	# For values of hz more than 100, this has a maximum error of 2us.
! 
! 	leal	(%edx,%edx,2),%eax	# a = 3d
! 	leal	(%edx,%eax,4),%eax	# a = 4a + d = 13d
! 	movl	%eax,%ecx
! 	shll	$5,%ecx
! 	addl	%ecx,%eax		# a = 33a    = 429d
! 	leal	(%edx,%eax,8),%eax	# a = 8a + d = 3433d
! 	shrl	$12,%eax		# a = a/4096 = 3433d/4096
! 
! 	movl	_time,%edx	# get time.tv_sec
! 	addl	_time+4,%eax	# add time.tv_usec
  
! 	sti			# enable interrupts
  	
! 	cmpl	$1000000,%eax	# carry in timeval?
  	jb	3f
! 	subl	$1000000,%eax	# adjust usec
! 	incl	%edx		# bump sec
  	
! 3:	movl	4(%esp),%ecx	# load timeval pointer arg
! 	movl	%edx,(%ecx)	# tvp->tv_sec = sec
! 	movl	%eax,4(%ecx)	# tvp->tv_usec = usec
  
  	ret
- #endif
--- 40,112 ----
  #define	IRQ_BIT(irq_num)	(1 << ((irq_num) % 8))
  #define	IRQ_BYTE(irq_num)	((irq_num) / 8)
  
  ENTRY(microtime)
+ 	# clear registers and do whatever we can up front
+ 	pushl	%edi
+ 	pushl	%ebx
+ 	xorl	%edx,%edx
+ 	movl	$(TIMER_SEL0|TIMER_LATCH),%eax
+ 
  	cli				# disable interrupts
  
! 	# select timer 0 and latch its counter
! 	outb	%al,$TIMER_MODE
! 	inb	$IO_ICU1,%al		# as close to timer latch as possible
! 	movb	%al,%ch			# %ch is current ICU mask
  
! 	# Read counter value into [%al %dl], LSB first
  	inb	$TIMER_CNTR0,%al
! 	movb	%al,%dl			# %dl has LSB
! 	inb	$TIMER_CNTR0,%al	# %al has MSB
  
! 	# save state of IIR in ICU, and of ipending, for later perusal
! 	movb	_ipending + IRQ_BYTE(0),%cl	# %cl is interrupt pending
  
! 	# save the current value of _time
! 	movl	_time,%edi		# get time.tv_sec
! 	movl	_time+4,%ebx		#  and time.tv_usec
! 
! 	sti				# enable interrupts, we're done
! 
! 	# At this point we've collected all the state we need to
! 	# compute the time.  First figure out if we've got a pending
! 	# interrupt.  If the IRQ0 bit is set in ipending we've taken
! 	# a clock interrupt without incrementing time, so we bump
! 	# time.tv_usec by a tick.  Otherwise if the ICU shows a pending
! 	# interrupt for IRQ0 we (or the caller) may have blocked an interrupt
! 	# with the cli.  If the counter is not a very small value (3 as
! 	# a heuristic), i.e. in pre-interrupt state, we add a tick to
! 	# time.tv_usec
! 
! 	testb	$IRQ_BIT(0),%cl		# pending interrupt?
! 	jnz	1f			# yes, increment count
! 
! 	testb	$IRQ_BIT(0),%ch		# hardware interrupt pending?
! 	jz	2f			# no, continue
! 	testb	%al,%al			# MSB zero?
! 	jnz	1f			# no, add a tick
! 	cmpb	$3,%dl			# is this small number?
! 	jbe	2f			# yes, continue
! 1:	addl	_isa_timer_tick,%ebx	# add a tick
! 
! 	# We've corrected for pending interrupts.  Now do a table lookup
! 	# based on each of the high and low order counter bytes to increment
! 	# time.tv_usec
! 2:	movw	_isa_timer_msb_table(,%eax,2),%ax
! 	subw	_isa_timer_lsb_table(,%edx,2),%ax
! 	addl	%eax,%ebx		# add msb increment
! 
! 	# Normalize the struct timeval.  We know the previous increments
! 	# will be less than a second, so we'll only need to adjust accordingly
! 	cmpl	$1000000,%ebx	# carry in timeval?
  	jb	3f
! 	subl	$1000000,%ebx	# adjust usec
! 	incl	%edi		# bump sec
  	
! 3:	movl	12(%esp),%ecx	# load timeval pointer arg
! 	movl	%edi,(%ecx)	# tvp->tv_sec = sec
! 	movl	%ebx,4(%ecx)	# tvp->tv_usec = usec
  
+ 	popl	%ebx
+ 	popl	%edi
  	ret
*** arch/i386/i386/machdep.c	1996/09/28 21:22:03	1.1
--- arch/i386/i386/machdep.c	1996/09/28 21:23:34
***************
*** 920,930 ****
  	delay(5000000);		/* 5 seconds */
  }
  
! #ifdef HZ
  /*
!  * If HZ is defined we use this code, otherwise the code in
!  * /sys/i386/i386/microtime.s is used.  The other code only works
!  * for HZ=100.
   */
  void
  microtime(tvp)
--- 920,928 ----
  	delay(5000000);		/* 5 seconds */
  }
  
! #ifdef notdef
  /*
!  * The version of microtime() in microtime.s is now used always.
   */
  void
  microtime(tvp)
***************
*** 940,946 ****
  		tvp->tv_usec -= 1000000;
  	}
  }
! #endif /* HZ */
  
  /*
   * Clear registers on exec
--- 938,944 ----
  		tvp->tv_usec -= 1000000;
  	}
  }
! #endif /* notdef */
  
  /*
   * Clear registers on exec
*** arch/i386/isa/clock.c	1996/09/26 02:36:47	1.1
--- arch/i386/isa/clock.c	1996/09/28 21:25:18
***************
*** 148,166 ****
  	outb(IO_RTC+1, datum);
  }
  
  void
  startrtclock()
  {
  	int s;
  
  	findcpuspeed();		/* use the clock (while it's free)
  					to find the cpu speed */
  	/* initialize 8253 clock */
  	outb(TIMER_MODE, TIMER_SEL0|TIMER_RATEGEN|TIMER_16BIT);
  
  	/* Correct rounding will buy us a better precision in timekeeping */
! 	outb(IO_TIMER1, TIMER_DIV(hz) % 256);
! 	outb(IO_TIMER1, TIMER_DIV(hz) / 256);
  
  	/* Check diagnostic status */
  	if ((s = mc146818_read(NULL, NVRAM_DIAG)) != 0)	/* XXX softc */
--- 148,254 ----
  	outb(IO_RTC+1, datum);
  }
  
+ /*
+  * microtime() makes use of the following globals.  Note that isa_timer_tick
+  * may be redundant to the `tick' variable, but is kept here for stability.
+  * isa_timer_count is the countdown count for the timer.  timer_msb_table[]
+  * and timer_lsb_table[] are used to compute the microsecond increment
+  * for time.tv_usec in the follow fashion:
+  *
+  * time.tv_usec += isa_timer_msb_table[cnt_msb] - isa_timer_lsb_table[cnt_lsb];
+  */
+ #define	ISA_TIMER_MSB_TABLE_SIZE	128
+ 
+ u_long	isa_timer_tick;		/* the number of microseconds in a tick */
+ u_short	isa_timer_count;	/* the countdown count for the timer */
+ u_short	isa_timer_msb_table[ISA_TIMER_MSB_TABLE_SIZE];	/* timer->usec MSB */
+ u_short	isa_timer_lsb_table[256];	/* timer->usec conversion for LSB */
+ 
  void
  startrtclock()
  {
  	int s;
+ 	u_long tval;
+ 	u_long t, msb, lsb, quotient, remainder;
  
  	findcpuspeed();		/* use the clock (while it's free)
  					to find the cpu speed */
+ 
+ 	/*
+ 	 * Compute timer_tick from hz.  We truncate this value (i.e.
+ 	 * round down) to minimize the possibility of a backward clock
+ 	 * step if hz is not a nice number.
+ 	 */
+ 	isa_timer_tick = 1000000 / (u_long) hz;
+ 
+ 	/*
+ 	 * Compute timer_count, the count-down count the timer will be
+ 	 * set to.  We can't stand any number with an MSB larger than
+ 	 * TIMER_MSB_TABLE_SIZE will accomodate.  Also, correctly round
+ 	 * this by carrying an extra bit through the division.
+ 	 */
+ 	tval = (TIMER_FREQ * 2) / (u_long) hz;
+ 	tval = (tval / 2) + (tval & 0x1);
+ 	if ((tval / 256) >= ISA_TIMER_MSB_TABLE_SIZE
+ 	    || TIMER_FREQ > (8*1024*1024)) {
+ 		panic("startrtclock: TIMER_FREQ/HZ unsupportable");
+ 	}
+ 	isa_timer_count = (u_short) tval;
+ 
+ 	/*
+ 	 * Now compute the translation tables from timer ticks to
+ 	 * microseconds.  We go to some length to ensure all values
+ 	 * are rounded-to-nearest (i.e. +-0.5 of the exact values)
+ 	 * as this will ensure the computation
+ 	 *
+ 	 * isa_timer_msb_table[msb] - isa_timer_lsb_table[lsb]
+ 	 *
+ 	 * will produce a result which is +-1 usec away from the
+ 	 * correctly rounded conversion (in fact, it'll be exact about
+ 	 * 75% of the time, 1 too large 12.5% of the time, and 1 too
+ 	 * small 12.5% of the time).
+ 	 */
+ 	for (s = 0; s < 256; s++) {
+ 		/* LSB table is easy, just divide and round */
+ 		t = ((u_long) s * 1000000 * 2) / TIMER_FREQ;
+ 		isa_timer_lsb_table[s] = (u_short) ((t / 2) + (t & 0x1));
+ 
+ 		/* MSB table is zero unless the MSB is <= isa_timer_count */
+ 		if (s < ISA_TIMER_MSB_TABLE_SIZE) {
+ 			msb = ((u_long) s) * 256;
+ 			if (msb > tval) {
+ 				isa_timer_msb_table[s] = 0;
+ 			} else {
+ 				/*
+ 				 * Harder computation here, since multiplying
+ 				 * the value by 1000000 can overflow a long.
+ 				 * To avoid 64-bit computations we divide
+ 				 * the high order byte and the low order
+ 				 * byte of the numerator separately, adding
+ 				 * the remainder of the first computation
+ 				 * into the second.  The constraint on
+ 				 * TIMER_FREQ above should prevent overflow
+ 				 * here.
+ 				 */
+ 				msb = tval - msb;
+ 				lsb = msb % 256;
+ 				msb = (msb / 256) * 1000000;
+ 				quotient = msb / TIMER_FREQ;
+ 				remainder = msb % TIMER_FREQ;
+ 				t = ((remainder * 256 * 2)
+ 				    + (lsb * 1000000 * 2)) / TIMER_FREQ;
+ 				isa_timer_msb_table[s] = (u_short)((t / 2)
+ 				    + (t & 0x1) + (quotient * 256));
+ 			}
+ 		}
+ 	}
+ 
  	/* initialize 8253 clock */
  	outb(TIMER_MODE, TIMER_SEL0|TIMER_RATEGEN|TIMER_16BIT);
  
  	/* Correct rounding will buy us a better precision in timekeeping */
! 	outb(IO_TIMER1, isa_timer_count % 256);
! 	outb(IO_TIMER1, isa_timer_count / 256);
  
  	/* Check diagnostic status */
  	if ((s = mc146818_read(NULL, NVRAM_DIAG)) != 0)	/* XXX softc */
>Audit-Trail:
>Unformatted: