Port-xen archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

xen-4.5 and NetBSD/amd64-7.99.5 can't start netbsd-5-XEN3PAE_DOMU



So, I've been running a couple of Dell PE2950's with xen-4.5 and
NetBSD/amd64-7.99.5 dom0+domUs, and NetBSD/amd64-5.x domUs, so far quite
happily since the end of February.

Today I tried to start a NetBSD/i386-5.x domU with the INSTALL_XEN3PAE
kernel.  This failed badly, but not fatally, causing Xen to go into a
loop trying to re-start the new domain after each time it crashed.
(presumably because the config specifies on_crash="coredump-restart",
which I guess isn't so wise a default to use when first testing
something!)

Shouldn't this have worked though, i.e. booting a XEN3PAE i386 kernel on
an amd64 xen box???  Should I re-compose this into a send-pr?

Here's what happened:

# xl create -c /usr/pkg/etc/xen/pkg-i386.conf
Parsing config from /usr/pkg/etc/xen/pkg-i386.conf
xenconsole: Could not read tty from store: No such file or directory
libxl: error: libxl_exec.c:118:libxl_report_child_exitstatus: console child [29130] exited with error status 2

On the main system console the following output repeated for each
restart attempt.

xbd backend: attach device vg1-lv30 (size 12582912) for domain 64
xbd backend: attach device vg1-lv32 (size 4194304) for domain 64
xbd backend: attach device vg1-lv33 (size 104857600) for domain 64
xbd backend: attach device vg1-lv34 (size 104857600) for domain 64
xbd backend: attach device vnd2d (size 1385068) for domain 64
xbd backend: attach device vg1-lv31 (size 8388608) for domain 64
(XEN) d64v0: unhandled page fault (ec=0000)
(XEN) Pagetable walk from 00000000bf800000:
(XEN)  L4[0x000] = 0000000601f4f027 0000000000000894
(XEN)  L3[0x002] = 0000000601f4c027 0000000000000897
(XEN)  L2[0x1fc] = 0000000601f4e001 0000000000000895 
(XEN)  L1[0x000] = 0000000000000000 ffffffffffffffff
(XEN) domain_crash_sync called from entry.S: fault at ffff82d080214560 compat_create_bounce_frame+0xc6/0xde
(XEN) Domain 64 (vcpu#0) crashed on cpu#5:
(XEN) ----[ Xen-4.5.0  x86_64  debug=n  Not tainted ]----
(XEN) CPU:    5
(XEN) RIP:    0009:[<00000000c03bcfce>]
(XEN) RFLAGS: 0000000000000297   EM: 1   CONTEXT: pv guest
(XEN) rax: 0000000000000000   rbx: 0000000000000000   rcx: 0000000000000003
(XEN) rdx: 00000000bf800000   rsi: 0000000000000001   rdi: 0000000000000000
(XEN) rbp: 00000000c09a2cbc   rsp: 00000000c09a2c64   r8:  00000037fedf3f00
(XEN) r9:  00000037fedf3f00   r10: 5000000000000000   r11: 0000000000000000
(XEN) r12: ffff83007f8fa000   r13: 0000000000000005   r14: ffff82d0802e2620
(XEN) r15: ffff830076019000   cr0: 000000008005003b   cr4: 00000000000026f4
(XEN) cr3: 000000084f916000   cr2: 00000000bf800000
(XEN) ds: 0011   es: 0011   fs: 0031   gs: 0011   ss: 0011   cs: 0009
(XEN) Guest stack trace from esp=c09a2c64:
(XEN)   00000000 c03bcfce 00010009 00010097 00000006 20202000 45202020 30343435
(XEN)   000fa100 00000000 000009a4 00000003 00000000 bf800000 000009a4 00000000
(XEN)   005657f0 00000000 00000000 00000009 c0102c10 00000001 c09a2d3c c03b94b5
(XEN)   00000000 00000000 00000000 00000003 000009a4 00000000 000fa100 00000000
(XEN)   00000000 00000000 00000000 00000000 00000010 00000000 000f975c 00000000
(XEN)   00010000 00000000 000009a4 00000000 00000000 00000001 00010000 00000000
(XEN)   00000000 00000000 00000000 756e6547 009a3000 c04de200 00000000 c0100063
(XEN)   009a3000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00995000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
(XEN)   00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
xbd backend: detach device vg1-lv30 for domain 64
xbd backend: detach device vg1-lv31 for domain 64
xbd backend: detach device vg1-lv32 for domain 64
xbd backend: detach device vg1-lv33 for domain 64
xbd backend: detach device vg1-lv34 for domain 64
xbd backend: detach device vnd2d for domain 64


At first I tried to "xl pause" a domain as it tried to start, but either
I missed catching it at the right time, or this didn't work.

Then I tried "xl destory" -- eventually when I typed the right domain
number at the right time it worked, or at least the Xen kernel stopped
trying to restart the crashing domain.

However "xenstored" crashed:

xenful# gdb /usr/pkg/sbin/xenstored /xenstored.core
GNU gdb (GDB) 7.7.1
Copyright (C) 2014 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64--netbsd".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from /usr/pkg/sbin/xenstored...done.
[New process 1]

warning: Can't read pathname for load map: Unknown error: 4294967295.
Core was generated by `xenstored'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x0000000000403d94 in is_child (child=child@entry=0x40e02f "@releaseDomain", 
    parent=0x7f7ff7b162c0 <error: Cannot access memory at address 0x7f7ff7b162c0>) at xenstored_core.c:393
393             unsigned int len = strlen(parent);
(gdb) bt
#0  0x0000000000403d94 in is_child (child=child@entry=0x40e02f "@releaseDomain", 
    parent=0x7f7ff7b162c0 <error: Cannot access memory at address 0x7f7ff7b162c0>) at xenstored_core.c:393
#1  0x0000000000405880 in fire_watches (conn=conn@entry=0x0, name=name@entry=0x40e02f "@releaseDomain", 
    recurse=recurse@entry=false) at xenstored_watch.c:101
#2  0x0000000000405e6b in destroy_domain (_domain=_domain@entry=0x7f7ff7b1f710) at xenstored_domain.c:207
#3  0x0000000000407bc4 in talloc_free (ptr=ptr@entry=0x7f7ff7b1f710) at talloc.c:574
#4  0x0000000000407c0b in talloc_free_children (ptr=0x7f7ff7b12dd0) at talloc.c:525
#5  talloc_free (ptr=0x7f7ff7b12dd0) at talloc.c:583
#6  0x0000000000404c3b in consider_message (conn=0x7f7ff7b39cb0) at xenstored_core.c:1312
#7  handle_input (conn=conn@entry=0x7f7ff7b39cb0) at xenstored_core.c:1356
#8  0x0000000000405635 in main (argc=<optimized out>, argv=<optimized out>) at xenstored_core.c:2127
(gdb)


The existing domains are all still running, but I'm guessing from what
I've read I will have to reboot the whole system in order to get
xenstored back into the right state.


Of course without xenstored running I've got a terminal with a stuck
"xl".  It is really extremely annoying when a process cannot be killed
or even stopped.

# xl list
Name                                        ID   Mem VCPUs      State   Time(s)
load: 1.04  cmd: xl 3296 [rplq] 0.00u 0.00s 0% 2764k
^?^C^?^Zload: 0.00  cmd: xl 3296 [rplq] 0.00u 0.00s 0% 2764k

Starting xenstored manually caused it to print:

	 WARNING: Failed to open connection to gnttab

and to syslog:

Apr 21 23:30:07 xenful xenstored: Checking store ...
Apr 21 23:30:07 xenful xenstored: Checking store complete.
Apr 21 23:30:07 xenful xenstored: Checking store ...
Apr 21 23:30:07 xenful xenstored: Checking store complete.

and now "xl list" sort of works:

# xl list
Name                                        ID   Mem VCPUs      State   Time(s)
(null)                                       0  2048     1     r-----   30004.3
(null)                                       5  8000     1     -b----  101736.8
(null)                                      12  2000     1     -b----   38989.3


FYI, here's the config file

# -*- mode: python; -*-

# Guest name
name = "pkg-i386"

# 128-bit UUID for the domain as a hexadecimal number.
# Use "uuidgen" to generate one if required.
# The default behavior is to generate a new UUID each time the guest is started.
uuid = "b6b307f0-c760-42be-82b9-c359b462b3df"

# Kernel image to boot
#kernel = "/netbsd-5.2_STABLE-i386-XEN3PAE_DOMU"
kernel = "/netbsd-5.2_STABLE-i386-INSTALL_XEN3PAE_DOMU"

# Kernel command line options
# can specify "bootdev=xbd0" instead
# (When cmdline="STRING" is set, root="STRING" and extra="STRING" will be ignored.)
# xxx should be able to specify "root=xbd0a"
extra = "root=xbd0a"
#extra = "-s root=xbd0"

#on_watchdog="coredump-restart"
on_watchdog="preserve"
on_crash="coredump-restart"

# Initial memory allocation (MB)
memory = 4000

# Maximum memory (MB)
# If this is greater than `memory' then the slack will start ballooned
# (this assumes guest kernel support for ballooning)
maxmem = 4000

# Number of VCPUS
# xxx will only have effect with netbsd-6 or newer
vcpus = 2

# Network devices
# A list of 'vifspec' entries as described in
# docs/misc/xl-network-configuration.markdown
vif = [ 'bridge=bridge0' ]

# Disk Devices
# A list of `diskspec' entries as described in
# docs/misc/xl-disk-configuration.txt
#
# lvm lvcreate -L 6G -n lv30 vg1	# /
# lvm lvcreate -L 4G -n lv31 vg1	# swap
# lvm lvcreate -L 2G -n lv32 vg1	# /var
# lvm lvcreate -L 50G -n lv33 vg1	# /usr/pkg
# lvm lvcreate -L 50G -n lv34 vg1	# /build
#
disk = [
	 'format=raw, vdev=0x0, access=rw, target=/dev/mapper/vg1-lv30',
	 'format=raw, vdev=0x1, access=rw, target=/dev/mapper/vg1-lv31',
	 'format=raw, vdev=0x2, access=rw, target=/dev/mapper/vg1-lv32',
	 'format=raw, vdev=0x3, access=rw, target=/dev/mapper/vg1-lv33',
	 'format=raw, vdev=0x4, access=rw, target=/dev/mapper/vg1-lv34',
	 'format=raw, vdev=0x5, access=ro, devtype=cdrom, target=/future/build/images/NetBSD-5.2_STABLE-i386.iso'
	]


-- 
						Greg A. Woods
						Planix, Inc.

<woods%planix.com@localhost>       +1 250 762-7675        http://www.planix.com/

Attachment: pgp80NojrUKng.pgp
Description: PGP signature



Home | Main Index | Thread Index | Old Index