Port-xen archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[dump-core] "No VCPU context could be grabbed" (was: Re: memory_op hypercall failure: XENMEM_maximum_gpfn)



Jean-Yves Migeon wrote:
So, here is what the code looks like, after some tryouts. See the diff.

Just for information; this is not for a commit. Its current place is totaly unsuitable (hypervisor_attach), but as I am using memory allocators, I put it late during boot up process.

When applying it, looks like that the error with xm dump-core disappeared, now I am getting some VCPU error:

current# xm dump-core 1 /root/core
Dumping core of domain: 1 ...
Error: Failed to dump core: (1, 'Internal error', 'No VCPU context could be grabbed (14 = Bad address)')
Usage: xm dump-core [-L|--live] [-C|--crash] <Domain> [Filename]
...

I do not presently understand what such an error means (failed to access vcpu_info[], huh?), I will investigate a bit. Debugging xend to see what raises this error ends in a panic() (see kern/36183).
So this error is erratic. There is two chances in three that the dump-core operation ends successfully. The 1/3 ends with a "context couldn't be grabbed" error.

It looks like that the lock_pages() in sysutils/xentools/work/xen-3.1.4/tools/libxc/xc_private.c code is at fault there (see patch-ad):

int lock_pages(void *addr, size_t len)
{
      int e = 0;
+      void *laddr = (void *)((u_long)addr & ~0xfffUL);
+      size_t llen = (len + 0xfffUL) & ~0xfffUL;
#ifndef __sun__
-      e = mlock(addr, len);
+      e = mlock(laddr, llen);
#endif
      return (e);
}

(and its counterpart unlock_pages() ).

Like I said in a previous mail, the roundings do not exactly match the ones used by NetBSD (this one rounds down, while NetBSD rounds it up for mlock(2) ).

Removing them in patch-ad for lock_pages and unlock_pages makes the xm dump-core operation work fine (well it didn't crashed after a whole 20 min loop, while I didn't have to wait more than 10s for it to crash before):

int lock_pages(void *addr, size_t len)
{
      int e = 0;
#ifndef __sun__
     e = mlock(addr, len);
#endif
      return (e);
}

void unlock_pages(void *addr, size_t len)
{
#ifndef __sun__
   safe_munlock(addr, len);
#endif
}


So why was it needed? Well, the xm dump-core shares operations with save/restore, so if xm dump-core does not work properly, save/restore won't either.

Small remark: the core generated by the dump-core is not the same (content side) as one generated by savecore(8), so there is no chance that you could use it for debug like a traditional core file with gdb. To use it, we need a patched gdbserver version for xen dumps (look for it in xentools3 pkg). However, I did not manage to check the correctness of the dump, as gdbserver-xen looks Linux centric, and does not work (currently) with netbsd. It could work when porting some other stuff like crash(8), but that is a whole another story.


Please find enclosed patch-ad and diff (for arch.p2m_table).

Questions remaining:
- is there any other possibility than using memory allocators for the diff? xpmap bootstrap happens early during boot up, and I don't think that using malloc/kmem_alloc stuff before mm init is possible (I know that putting that stuff in hypervisor is the ugliest thing I could have done, but that is a try :o ) - regarding debugging of domU (for my own culture): what are you using? Serial lines with options KGDB?


Thanks for your attention :)

Cheers,

--
Jean-Yves Migeon
jean-yves.migeon%espci.fr@localhost

Index: hypervisor.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/hypervisor.c,v
retrieving revision 1.36
diff -u -r1.36 hypervisor.c
--- hypervisor.c        16 Apr 2008 18:41:48 -0000      1.36
+++ hypervisor.c        25 May 2008 17:35:25 -0000
@@ -382,6 +382,56 @@
                ctrl_if_register_receiver(CMSG_SHUTDOWN,
                    hypervisor_shutdown_handler, CALLBACK_IN_BLOCKING_CONTEXT);
 #endif
+
+#ifdef XEN3
+
+#define vtomfn(va) (vtomach(va) >> PAGE_SHIFT)
+
+       /*
+        * pfn_to_mfn_frame_list_list initialization
+        * required by Xen tools for dump-core/save/restore
+        * These lists consist of 3 layers of page frames, each level
+        * referencing its lower ones through their mfn, and providing
+        * a physical to machine mapping
+        */ 
+        int i, j;
+        int fpp;
+        unsigned long cur_pfn, max_pfn;
+        unsigned long * l3_p2m_page;
+        unsigned long * l2_p2m_page;
+
+        max_pfn = xen_start_info.nr_pages;
+        /* number of frames referenced in a page */
+        fpp = PAGE_SIZE / sizeof(unsigned long);
+        l3_p2m_page = kmem_alloc(PAGE_SIZE, KM_NOSLEEP);
+        if (l3_p2m_page == NULL)
+                panic("Could not allocate memory for l3_p2m_page");
+
+        for (i = 0; i < fpp; i++) {
+                l2_p2m_page = kmem_alloc(PAGE_SIZE, KM_NOSLEEP);
+                if (l2_p2m_page == NULL)
+                        panic("Could not allocate memory for l2_p2m_page");
+                l3_p2m_page[i] = vtomfn((vaddr_t)l2_p2m_page);
+
+                for (j = 0; j < fpp; j++) {
+                       /*
+                        * index of the pseudo L1 page we are referencing
+                        * in L2 page
+                        */
+                        cur_pfn = (i + j) * fpp;
+                        if (cur_pfn >= max_pfn)
+                               goto exit_p2m;
+                        l2_p2m_page[j] = 
vtomfn((vaddr_t)&xpmap_phys_to_machine_mapping[cur_pfn]);
+                }
+        }
+
+exit_p2m:
+        HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = 
vtomfn((vaddr_t)l3_p2m_page);
+        HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
+
+#endif
 }
 
 static int
$NetBSD: patch-ad,v 1.1.1.1 2007/06/14 19:39:46 bouyer Exp $

--- libxc/xc_private.c.orig     2007-05-18 16:45:21.000000000 +0200
+++ libxc/xc_private.c  2007-05-27 13:43:06.000000000 +0200
@@ -10,7 +10,12 @@
 #include <stdarg.h>
 #include <pthread.h>
 
-static __thread xc_error last_error = { XC_ERROR_NONE, ""};
+static pthread_key_t last_error_pkey;
+static pthread_once_t last_error_pkey_once = PTHREAD_ONCE_INIT;
+
+static pthread_key_t errbuf_pkey;
+static pthread_once_t errbuf_pkey_once = PTHREAD_ONCE_INIT;
+
 #if DEBUG
 static xc_error_handler error_handler = xc_default_error_handler;
 #else
@@ -23,15 +28,44 @@
     fprintf(stderr, "ERROR %s: %s\n", desc, err->message);
 }
 
+static void
+_xc_clean_last_error(void *m)
+{
+       if (m)
+               free(m);
+       pthread_setspecific(last_error_pkey, NULL);
+}
+
+static void
+_xc_init_last_error(void)
+{
+               pthread_key_create(&last_error_pkey, _xc_clean_last_error);
+}
+static xc_error *
+_xc_get_last_error(void) {
+       xc_error *last_error;
+
+       pthread_once(&last_error_pkey_once, _xc_init_last_error);
+
+       last_error = pthread_getspecific(last_error_pkey);
+       if (last_error == NULL) {
+               last_error = malloc(sizeof(xc_error));
+               pthread_setspecific(last_error_pkey, last_error);
+       }
+       return last_error;
+}
+
+
 const xc_error *xc_get_last_error(void)
 {
-    return &last_error;
+    return _xc_get_last_error();
 }
 
 void xc_clear_last_error(void)
 {
-    last_error.code = XC_ERROR_NONE;
-    last_error.message[0] = '\0';
+    xc_error *last_error = _xc_get_last_error();
+    last_error->code = XC_ERROR_NONE;
+    last_error->message[0] = '\0';
 }
 
 const char *xc_error_code_to_desc(int code)
@@ -64,9 +98,10 @@
 
 static void _xc_set_error(int code, const char *msg)
 {
-    last_error.code = code;
-    strncpy(last_error.message, msg, XC_MAX_ERROR_MSG_LEN - 1);
-    last_error.message[XC_MAX_ERROR_MSG_LEN-1] = '\0';
+    xc_error *last_error = _xc_get_last_error();
+    last_error->code = code;
+    strncpy(last_error->message, msg, XC_MAX_ERROR_MSG_LEN - 1);
+    last_error->message[XC_MAX_ERROR_MSG_LEN-1] = '\0';
 }
 
 void xc_set_error(int code, const char *fmt, ...)
@@ -84,23 +119,29 @@
 
     errno = saved_errno;
 
-    if ( error_handler != NULL )
-        error_handler(&last_error);
+    if ( error_handler != NULL ) {
+       xc_error *last_error = _xc_get_last_error();
+        error_handler(last_error);
+    }
 }
 
 int lock_pages(void *addr, size_t len)
 {
       int e = 0;
+      void *laddr = (void *)((u_long)addr & ~0xfffUL);
+      size_t llen = (len + 0xfffUL) & ~0xfffUL;
 #ifndef __sun__
-      e = mlock(addr, len);
+      e = mlock(laddr, llen);
 #endif
       return (e);
 }
 
 void unlock_pages(void *addr, size_t len)
 {
+    void *laddr = (void *)((u_long)addr & ~0xfffUL);
+    size_t llen = (len + 0xfffUL) & ~0xfffUL;
 #ifndef __sun__
-    safe_munlock(addr, len);
+    safe_munlock(laddr, llen);
 #endif
 }
 
@@ -466,20 +507,43 @@
     return new_mfn;
 }
 
+static void
+_xc_clean_errbuf(void * m)
+{
+       if (m)
+               free(m);
+       pthread_setspecific(errbuf_pkey, NULL);
+}
+       
+static void
+_xc_init_errbuf(void)
+{
+       pthread_key_create(&errbuf_pkey, _xc_clean_errbuf);
+}
+
 char *safe_strerror(int errcode)
 {
-    static __thread char errbuf[32];
+#define XS_BUFSIZE 32
+    char *errbuf;
     static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
     char *strerror_str;
 
+    pthread_once(&errbuf_pkey_once, _xc_init_errbuf);
+
+    errbuf = pthread_getspecific(errbuf_pkey);
+    if (errbuf == NULL) {
+       errbuf = malloc(XS_BUFSIZE);
+       pthread_setspecific(errbuf_pkey, errbuf);
+    }
+    
     /*
      * Thread-unsafe strerror() is protected by a local mutex. We copy
      * the string to a thread-private buffer before releasing the mutex.
      */
     pthread_mutex_lock(&mutex);
     strerror_str = strerror(errcode);
-    strncpy(errbuf, strerror_str, sizeof(errbuf));
-    errbuf[sizeof(errbuf)-1] = '\0';
+    strncpy(errbuf, strerror_str, XS_BUFSIZE);
+    errbuf[XS_BUFSIZE-1] = '\0';
     pthread_mutex_unlock(&mutex);
 
     return errbuf;


Home | Main Index | Thread Index | Old Index