tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: Time accounting on statclock()
"Cherry G. Mathew" <cherry%NetBSD.org@localhost> writes:
> Hello,
>
> So recently I've been looking at the interrupt subsystem for NetBSD/Xen,
> which is a little unusual in that several interrupts can batch
> together before a virtual cpu that is scheduled can be woken up to
> handle them.
>
> When this happens, a single callback handler is invoked, which
> demultiplexes all the pending interrupts, and passes on to everyone of
> them the same trapframe that was passed to the callback. This is
> particularly interesting for the clock handler, because what would have
> been multiple ticks during any dormant period for the guest OS would be
> only called once, and so the handler invokes hardclock() a number of
> times to make up for lost time. Once again, it accounts for it using the
> exact same trapframe that it got from the demultiplexing callback
> handler - which if it were from userland, would account every tick to
> that particular userland process (which was presumably dormant for the
> duration lost).
>
> This got me thinking about deferred interrupts using spl(9), and how
> time is accounted for those. (I will confess that I have an insiduous
> interest, but I won't divulge what that is right now).
>
> For eg: if a clock interrupt from userland got deferred as pending, even
> if it came in from userland (is this possible ?), because the current
> spl level was at, say, SPL_HIGH, it now seems to be the case that the
> system accounts for the delayed execution by charging the *entire* time
> (from the last hardclock() inclusive of whatever was executing at
> SPL_HIGH, to the system and not the userland process, thus charging the
> time interval between when the last hardclock() was called, and when it
> was actually serviced, to the system instead of the user process that
> was originally interrupted.
>
> To emphasise my point, I've added a patch below that I think should
> reflect the accounting correctly.
>
> I'm not sure I've understood this correctly, so I'd appreciate it if
> someone who has a good understanding of this would be able to comment.
>
> Many Thanks,
>
> Cherry
>
>
>
> --- kern_clock.c.~1.138.~ 2018-09-21 11:28:02.792675611 +0000
> +++ kern_clock.c 2018-10-16 12:06:38.753987323 +0000
> @@ -352,6 +352,10 @@
> }
>
> if (CLKF_USERMODE(frame)) {
> + if (p == NULL) { /* Account deferred clock ticks to
> current user. */
> + p = l->l_proc;
> + mutex_spin_enter(&p->p_stmutex);
> + }
> KASSERT(p != NULL);
> if ((p->p_stflag & PST_PROFIL) && profsrc ==
> PROFSRC_CLOCK)
> addupc_intr(l, CLKF_PC(frame));
Here's a full patch implementing what I have in mind for native and
port-xen on arch/amd64
--
~~cherry
diff -r 5a1052452d40 sys/arch/amd64/amd64/amd64_trap.S
--- a/sys/arch/amd64/amd64/amd64_trap.S Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/amd64/amd64/amd64_trap.S Tue Oct 16 14:35:48 2018 +0000
@@ -628,6 +628,7 @@
*/
ENTRY(alltraps)
INTRENTRY
+ movq %rsp,CPUVAR(TF)
.Lalltraps_noentry:
STI(si)
diff -r 5a1052452d40 sys/arch/amd64/amd64/genassym.cf
--- a/sys/arch/amd64/amd64/genassym.cf Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/amd64/amd64/genassym.cf Tue Oct 16 14:35:48 2018 +0000
@@ -259,6 +259,7 @@
define CPU_INFO_CPUID offsetof(struct cpu_info, ci_cpuid)
define CPU_INFO_ISTATE offsetof(struct cpu_info, ci_istate)
define CPU_INFO_CC_SKEW offsetof(struct cpu_info, ci_data.cpu_cc_skew)
+define CPU_INFO_TF offsetof(struct cpu_info, ci_tf)
define ACPI_SUSPEND_GDT offsetof(struct cpu_info, ci_suspend_gdt)
define ACPI_SUSPEND_IDT offsetof(struct cpu_info, ci_suspend_idt)
diff -r 5a1052452d40 sys/arch/amd64/amd64/vector.S
--- a/sys/arch/amd64/amd64/vector.S Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/amd64/amd64/vector.S Tue Oct 16 14:35:48 2018 +0000
@@ -238,7 +238,6 @@
movl $IPL_CLOCK,CPUVAR(ILEVEL)
sti
pushq %rbx
- movq %rsp,%rsi
xorq %rdi,%rdi
call _C_LABEL(lapic_clockintr)
jmp _C_LABEL(Xdoreti)
@@ -252,12 +251,14 @@
pushq $0
pushq $T_ASTFLT
INTRENTRY
+ movq %rsp,CPUVAR(TF)
jmp _C_LABEL(Xhandle_x2apic_ltimer)
IDTVEC_END(intr_x2apic_ltimer)
IDTVEC(intr_lapic_ltimer)
pushq $0
pushq $T_ASTFLT
INTRENTRY
+ movq %rsp,CPUVAR(TF)
jmp _C_LABEL(Xhandle_lapic_ltimer)
IDTVEC_END(intr_lapic_ltimer)
TEXT_USER_END
@@ -346,7 +347,6 @@
movl IH_LEVEL(%rbx),%r12d ;\
cmpl %r13d,%r12d ;\
jle 7f ;\
- movq %rsp,%rsi ;\
movq IH_ARG(%rbx),%rdi ;\
movl %r12d,CPUVAR(ILEVEL) ;\
call *IH_FUN(%rbx) /* call it */ ;\
@@ -382,6 +382,7 @@
pushq $0 /* dummy error code */ ;\
pushq $T_ASTFLT /* trap # for doing ASTs */ ;\
INTRENTRY ;\
+ movq %rsp,CPUVAR(TF) ;\
jmp _C_LABEL(Xhandle_ ## name ## num) ;\
IDTVEC_END(intr_ ## name ## num) ;\
TEXT_USER_END
@@ -665,7 +666,6 @@
movq IS_HANDLERS(%r14),%rbx ;\
6: \
movq IH_ARG(%rbx),%rdi ;\
- movq %rsp,%rsi ;\
call *IH_FUN(%rbx) /* call it */ ;\
movq IH_NEXT(%rbx),%rbx /* next handler in chain */ ;\
testq %rbx,%rbx ;\
@@ -765,6 +765,8 @@
/* sti?? */
movq %rsp,%rdi
subq $8,%rdi; /* don't forget if_ppl */
+ movq %rdi,CPUVAR(TF)
+ xorq %rdi, %rdi /* We don't want to pass on as arg */
call do_hypervisor_callback
testb $SEL_RPL,TF_CS(%rsp)
jnz doreti_checkast
@@ -782,6 +784,8 @@
INTRENTRY
movq %rsp,%rdi
subq $8,%rdi; /* don't forget if_ppl */
+ movq %rdi,CPUVAR(TF)
+ xorq %rdi, %rdi /* We don't want to pass on as arg */
call xen_failsafe_handler
INTRFASTEXIT
/* jmp HYPERVISOR_iret */
diff -r 5a1052452d40 sys/arch/i386/i386/genassym.cf
--- a/sys/arch/i386/i386/genassym.cf Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/i386/i386/genassym.cf Tue Oct 16 14:35:48 2018 +0000
@@ -262,6 +262,7 @@
define CPU_INFO_NINTR offsetof(struct cpu_info, ci_data.cpu_nintr)
define CPU_INFO_CURPRIORITY offsetof(struct cpu_info, ci_schedstate.spc_curpriority)
define CPU_INFO_CC_SKEW offsetof(struct cpu_info, ci_data.cpu_cc_skew)
+define CPU_INFO_TF offsetof(struct cpu_info, ci_tf)
define CPU_INFO_VENDOR offsetof(struct cpu_info, ci_vendor[0])
diff -r 5a1052452d40 sys/arch/i386/i386/vector.S
--- a/sys/arch/i386/i386/vector.S Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/i386/i386/vector.S Tue Oct 16 14:35:48 2018 +0000
@@ -1027,9 +1027,9 @@
cmpl $ecrit,%eax
jb critical_region_fixup
11: pushl CPUVAR(ILEVEL)
- push %esp
+ movq %esp, CPUVAR(TF)
call do_hypervisor_callback
- add $8,%esp
+ add $4,%esp
xorl %eax,%eax
movb TF_CS(%esp),%cl
test $CHK_UPL,%cl /* slow return to ring 2 or 3 */
diff -r 5a1052452d40 sys/arch/x86/include/cpu.h
--- a/sys/arch/x86/include/cpu.h Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/x86/include/cpu.h Tue Oct 16 14:35:48 2018 +0000
@@ -167,7 +167,7 @@
const struct cpu_functions *ci_func; /* start/stop functions */
struct trapframe *ci_ddb_regs;
-
+ struct trapframe *ci_tf; /* Saved for clock handlers */
u_int ci_cflush_lsize; /* CLFLUSH insn line size */
struct x86_cache_info ci_cinfo[CAI_COUNT];
diff -r 5a1052452d40 sys/arch/x86/isa/clock.c
--- a/sys/arch/x86/isa/clock.c Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/x86/isa/clock.c Tue Oct 16 14:35:48 2018 +0000
@@ -188,7 +188,7 @@
void sysbeep(int, int);
static void tickle_tc(void);
-static int clockintr(void *, struct intrframe *);
+static int clockintr(void *);
int sysbeepdetach(device_t, int);
@@ -371,11 +371,11 @@
}
static int
-clockintr(void *arg, struct intrframe *frame)
+clockintr(void *arg)
{
tickle_tc();
- hardclock((struct clockframe *)frame);
+ hardclock((struct clockframe *)curcpu()->ci_tf);
#if NMCA > 0
if (MCA_system) {
diff -r 5a1052452d40 sys/arch/x86/x86/lapic.c
--- a/sys/arch/x86/x86/lapic.c Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/x86/x86/lapic.c Tue Oct 16 14:35:48 2018 +0000
@@ -90,7 +90,7 @@
#include <x86/x86/vmtvar.h> /* for vmt_hvcall() */
/* Referenced from vector.S */
-void lapic_clockintr(void *, struct intrframe *);
+void lapic_clockintr(void *);
static void lapic_delay(unsigned int);
static uint32_t lapic_gettick(void);
@@ -561,13 +561,13 @@
extern u_int i8254_get_timecount(struct timecounter *);
void
-lapic_clockintr(void *arg, struct intrframe *frame)
+lapic_clockintr(void *arg)
{
struct cpu_info *ci = curcpu();
ci->ci_lapic_counter += lapic_tval;
ci->ci_isources[LIR_TIMER]->is_evcnt.ev_count++;
- hardclock((struct clockframe *)frame);
+ hardclock((struct clockframe *)ci->ci_tf);
}
void
diff -r 5a1052452d40 sys/arch/xen/include/evtchn.h
--- a/sys/arch/xen/include/evtchn.h Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/xen/include/evtchn.h Tue Oct 16 14:35:48 2018 +0000
@@ -38,9 +38,7 @@
bool events_suspend(void);
bool events_resume(void);
-unsigned int evtchn_do_event(int, struct intrframe *);
-void call_evtchn_do_event(int, struct intrframe *);
-void call_xenevt_event(int);
+unsigned int evtchn_do_event(int);
int event_set_handler(int, int (*func)(void *), void *, int, const char *,
const char *);
int event_remove_handler(int, int (*func)(void *), void *);
diff -r 5a1052452d40 sys/arch/xen/include/hypervisor.h
--- a/sys/arch/xen/include/hypervisor.h Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/xen/include/hypervisor.h Tue Oct 16 14:35:48 2018 +0000
@@ -129,7 +129,7 @@
/* hypervisor.c */
struct intrframe;
struct cpu_info;
-void do_hypervisor_callback(struct intrframe *regs);
+void do_hypervisor_callback(void);
void hypervisor_prime_pirq_event(int, unsigned int);
void hypervisor_enable_event(unsigned int);
diff -r 5a1052452d40 sys/arch/xen/x86/hypervisor_machdep.c
--- a/sys/arch/xen/x86/hypervisor_machdep.c Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/xen/x86/hypervisor_machdep.c Tue Oct 16 14:35:48 2018 +0000
@@ -215,10 +215,9 @@
evt_do_hypervisor_callback(unsigned int port, unsigned int l1i,
unsigned int l2i, void *args)
{
- KASSERT(args != NULL);
+ KASSERT(args == NULL);
struct cpu_info *ci = curcpu();
- struct intrframe *regs = args;
#ifdef PORT_DEBUG
if (port == PORT_DEBUG)
@@ -226,7 +225,7 @@
#endif
if (evtsource[port]) {
ci->ci_idepth++;
- evtchn_do_event(port, regs);
+ evtchn_do_event(port);
ci->ci_idepth--;
}
#ifdef DOM0OPS
@@ -248,7 +247,7 @@
}
void
-do_hypervisor_callback(struct intrframe *regs)
+do_hypervisor_callback(void)
{
volatile shared_info_t *s = HYPERVISOR_shared_info;
struct cpu_info *ci;
@@ -273,7 +272,7 @@
evt_iterate_bits(&vci->evtchn_pending_sel,
s->evtchn_pending, s->evtchn_mask,
- evt_do_hypervisor_callback, regs);
+ evt_do_hypervisor_callback, NULL);
}
#ifdef DIAGNOSTIC
diff -r 5a1052452d40 sys/arch/xen/xen/clock.c
--- a/sys/arch/xen/xen/clock.c Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/xen/xen/clock.c Tue Oct 16 14:35:48 2018 +0000
@@ -76,7 +76,7 @@
static unsigned xen_get_timecount(struct timecounter *);
static int xen_rtc_get(struct todr_chip_handle *, struct timeval *);
static int xen_rtc_set(struct todr_chip_handle *, struct timeval *);
-static int xen_timer_handler(void *, struct clockframe *);
+static int xen_timer_handler(void *);
/*
* xen timecounter:
@@ -802,7 +802,7 @@
* The cookie is the pointer to struct cpu_info.
*/
static int
-xen_timer_handler(void *cookie, struct clockframe *frame)
+xen_timer_handler(void *cookie)
{
struct cpu_info *ci = curcpu();
uint64_t last, now, delta, next;
@@ -836,7 +836,7 @@
while (delta >= NS_PER_TICK) {
ci->ci_xen_hardclock_systime_ns += NS_PER_TICK;
delta -= NS_PER_TICK;
- hardclock(frame);
+ hardclock((struct clockframe *)ci->ci_tf);
if (__predict_false(delta >= NS_PER_TICK))
ci->ci_xen_missed_hardclock_evcnt.ev_count++;
}
diff -r 5a1052452d40 sys/arch/xen/xen/evtchn.c
--- a/sys/arch/xen/xen/evtchn.c Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/arch/xen/xen/evtchn.c Tue Oct 16 14:35:48 2018 +0000
@@ -305,12 +305,12 @@
unsigned int
-evtchn_do_event(int evtch, struct intrframe *regs)
+evtchn_do_event(int evtch)
{
struct cpu_info *ci;
int ilevel;
struct intrhand *ih;
- int (*ih_fun)(void *, void *);
+ int (*ih_fun)(void *);
uint32_t iplmask;
int i;
uint32_t iplbit;
@@ -386,7 +386,7 @@
iplmask &= ~IUNMASK(ci, ih->ih_level);
ci->ci_ilevel = ih->ih_level;
ih_fun = (void *)ih->ih_fun;
- ih_fun(ih->ih_arg, regs);
+ ih_fun(ih->ih_arg);
ih = ih->ih_evt_next;
}
mutex_spin_exit(&evtlock[evtch]);
@@ -410,7 +410,7 @@
KASSERT(ih->ih_cpu == ci);
sti();
ih_fun = (void *)ih->ih_fun;
- ih_fun(ih->ih_arg, regs);
+ ih_fun(ih->ih_arg);
cli();
}
hypervisor_enable_ipl(i);
diff -r 5a1052452d40 sys/kern/kern_clock.c
--- a/sys/kern/kern_clock.c Tue Oct 16 09:33:04 2018 +0000
+++ b/sys/kern/kern_clock.c Tue Oct 16 14:35:48 2018 +0000
@@ -352,6 +352,10 @@
}
if (CLKF_USERMODE(frame)) {
+ if (p == NULL) { /* Account deferred clock ticks to current user. */
+ p = l->l_proc;
+ mutex_spin_enter(&p->p_stmutex);
+ }
KASSERT(p != NULL);
if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK)
addupc_intr(l, CLKF_PC(frame));
Home |
Main Index |
Thread Index |
Old Index