Subject: Re: remplacing page mappings behind uvm
To: None <bouyer@antioche.eu.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 12/01/2007 19:04:59
> I played with this, it seems to work. See the attached privcmd.c
> There is a wired entry accounting issue I still need to solve,
> which I think is related to MMAPBATCH, but it looks promising.

i wonder how many objects are used with the usage pattern of xentools.

> 	case IOCTL_PRIVCMD_MMAP:

> 		for (i = 0; i < mcmd->num; i++) {

> 			va = mentry.va & ~PAGE_MASK;
> 			ma = mentry.mfn <<  PGSHIFT; /* XXX ??? */
> 			error  = privcmd_map_obj(vmm, va,
> 			    (mentry.npages << PGSHIFT), &obj);

do you need an object per mentry?

> 			if (error)
> 				return error;
> 
> 			for (j = 0; j < mentry.npages; j++) {
> 				//printf("remap va 0x%lx to 0x%lx\n", va, ma);
> 				maddr[j] = ma;
> 				ma += PAGE_SIZE;
> 			}
> 			simple_lock(&obj->uobj.vmobjlock);
> 			obj->maddr = maddr;
> 			obj->npages = mentry.npages;
> 			obj->domid = mcmd->dom;
> 			simple_unlock(&obj->uobj.vmobjlock);

are you assuming callers are serialized and bug-free?
otherwise the object should be set up before being mapped.

> 		/* force mapping to be loaded */
> 		if ((error = uvm_map_pageable(vmm, va0,
> 		    va0 + (pmb->num << PGSHIFT), false, 0)) != 0) {
> 			printf("uvm_map_pageable error %d\n", error);
> 			return(error);
> 		}

is this uvm_map_pageable only for debug?

> 			/* XXX for proper ptp accountings */
> 			pmap_remove(ufi->orig_map->pmap, vaddr, 
> 			    vaddr + PAGE_SIZE);
> 			pobj->maddr[maddr_i] |= 0xF0000000000;

does this error happen during normal operations?

> 			error = 0;
> 		}
> 	}
> 	uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL);
> 	pmap_update(ufi->orig_map->pmap);
> 	return (error);
> }
> 
> static int
> privcmd_map_obj(struct vm_map *map, vaddr_t start, off_t size,
> 	struct privcmd_object **nobj)
> {
> 	struct vm_map_entry *dead_entries;
> 	struct uvm_object *uobj;
> 	int error;
> 	uvm_flag_t uvmflag;
> 	vaddr_t newstart = start;
> 	vm_prot_t prot;
> 
> 	vm_map_lock(map);
> 	/* get protections. This also check for validity of mapping */
> 	if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_WRITE))
> 		prot = VM_PROT_READ | VM_PROT_WRITE;
> 	else if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_READ))
> 		prot = VM_PROT_READ;
> 	else {
> 		printf("uvm_map_checkprot 0x%lx -> 0x%lx "
> 		    "failed\n",
> 		    start, start + size - 1);
> 		vm_map_unlock(map);
> 		return EINVAL;
> 	}
> 	/* remove current entries */
> 	uvm_unmap_remove(map, start, start + size, &dead_entries, NULL, 0);
> 	if (dead_entries != NULL)
> 		uvm_unmap_detach(dead_entries, 0);
> 
> 	vm_map_unlock(map);

why do checkprot here?

please don't use uvm_unmap_remove and uvm_unmap_detach here.
they are uvm-internal.
besides, normally uvm_unmap_detach should not be used with the map locked.
(it's the main reason why it's separated from uvm_unmap_remove.)

if the process is multithreaded (is xentools multithreaded?),
you need to replace the mapping atomically.
othewise the va range can be taken by another thread while we unmap it.
uvm_map_replace does something similar, but it doesn't deal with
arbitrary existing mappings.

> 	uobj = malloc(sizeof(struct privcmd_object), M_TEMP, M_WAITOK);
> 	if (uobj == NULL)
> 		return ENOMEM;

please use kmem_alloc unless you have a reason.

> 	UVM_OBJ_INIT(uobj, &privpgops, 1);
> 	uvmflag = UVM_MAPFLAG(prot, prot, UVM_INH_NONE, UVM_ADV_NORMAL,
> 	    UVM_FLAG_FIXED | UVM_FLAG_NOMERGE);
> 	error = uvm_map(map, &newstart, size, uobj, 0, 0, uvmflag);
> 
> 	if (error) {
> 		if (uobj)
> 			uobj->pgops->pgo_detach(uobj);
> 		return error;
> 	}
> 	if (newstart != start) {
> 		printf("uvm_map didn't give us back our vm space\n");
> 		return EINVAL;
> 	}

it should be KASSERT given UVM_FLAG_FIXED.

YAMAMOTO Takashi