tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

uvm_swap v2



Dear folks,

after some feedback, here is revised version.

With regards,
Reinoud

? sys/uvm/uvm_swap.c.1st
Index: sys/uvm/uvm_pager.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.136
diff -u -p -r1.136 uvm_pager.c
--- sys/uvm/uvm_pager.c	3 May 2026 16:02:37 -0000	1.136
+++ sys/uvm/uvm_pager.c	1 Jul 2026 20:43:46 -0000
@@ -547,16 +547,6 @@ uvm_aio_aiodone(struct buf *bp)
 		    (uintptr_t)pgs[i], 0, 0);
 	}
 
-#if defined(VMSWAP)
-	if (__predict_false(error != 0) &&
-	    ((pgs[0]->flags & PG_SWAPBACKED) != 0)) {
-		int swslot = uvm_page_swapslot(pgs[0]);
-
-		KASSERT(swslot > 0);
-		uvm_swap_decrypt_pages(swslot, bp->b_data, npages);
-	}
-#endif
-
 	uvm_pagermapout((vaddr_t)bp->b_data, npages);
 
 	uvm_aio_aiodone_pages(pgs, npages, write, error);
Index: sys/uvm/uvm_swap.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.233
diff -u -p -r1.233 uvm_swap.c
--- sys/uvm/uvm_swap.c	3 Jun 2026 15:00:06 -0000	1.233
+++ sys/uvm/uvm_swap.c	1 Jul 2026 20:43:46 -0000
@@ -218,6 +218,7 @@ static int uvm_swap_io(struct vm_page **
 static void uvm_swap_genkey(struct swapdev *);
 static void uvm_swap_encryptpage(struct swapdev *, void *, int);
 static void uvm_swap_decryptpage(struct swapdev *, void *, int);
+static void uvm_swap_encrypt_pages(int startslot, void *p, int npages);
 
 /*
  * uvm_swap_init: init the swap system data structures and locks
@@ -1335,40 +1336,47 @@ iobuf_redirect(struct buf *bp, struct vn
 	bp->b_objlock = vp->v_interlock;
 }
 
-struct sw_physio_decrypt_context {
-	void *orig_buf;
-	void *orig_private;
-	void (*orig_iodone)(struct buf *);
-	int swslot;
-};
 
+/* handle the nestio buffer */
 static void
-sw_physio_decrypt_iodone(struct buf *bp)
+sw_physio_decrypt_iodone(struct buf *nbp)
 {
-	struct sw_physio_decrypt_context *ctx = bp->b_private;
-	void (*cb)(struct buf *bp) = ctx->orig_iodone;
-	size_t npages = bp->b_bcount >> PAGE_SHIFT;
-
-	KASSERT(ctx->swslot > 0);
-	KASSERT(npages << PAGE_SHIFT == bp->b_bcount);
-	if (bp->b_error == 0) {
-		if (bp->b_resid == 0) {
-			uvm_swap_decrypt_pages(ctx->swslot, bp->b_data,
-					       npages);
-			memcpy(ctx->orig_buf, (uint8_t *)bp->b_data,
-			       bp->b_bcount);
+	struct buf *bp = nbp->b_private;	/* parent buffer */
+	int swslot = (intptr_t) nbp->b_private2;
+	size_t npages = nbp->b_bcount >> PAGE_SHIFT;
+
+	KASSERT(swslot > 0);
+	KASSERT(npages << PAGE_SHIFT == nbp->b_bcount);
+
+	/*
+	 * always decrypt even after we wrote as its backing page(s) could
+	 * still be in use after the write.
+	 */
+	uvm_swap_decrypt_pages(swslot, nbp->b_data, npages);
+
+	/* copy data if we used a bounce buffer */
+	if (nbp->b_error == 0) {
+		if (nbp->b_resid == 0) {
+			if (nbp->b_data != bp->b_data)
+				memcpy(bp->b_data, (uint8_t *)nbp->b_data,
+				       nbp->b_bcount);
 		} else {
-			bp->b_error = EIO;
+			/* XXX we could zero the buffer */
+			nbp->b_error = EIO;
 		}
 	}
-	kmem_intr_free(bp->b_data, bp->b_bcount);
-	bp->b_data = ctx->orig_buf;
-	if (bp->b_error != 0) {
-		bp->b_resid = bp->b_bcount;
+	/* free optional bounce buffer */
+	if (nbp->b_data != bp->b_data)
+		kmem_intr_free(nbp->b_data, nbp->b_bcount);
+
+	bp->b_resid = 0;
+	if (nbp->b_error != 0) {
+		bp->b_error = nbp->b_error;
+		bp->b_resid = nbp->b_bcount;
 	}
-	bp->b_private = ctx->orig_private;
-	kmem_intr_free(ctx, sizeof(*ctx));
-	(cb)(bp); /* call the original b_iodone callback */
+
+	putiobuf(nbp);
+	biodone(bp);
 }
 
 /*
@@ -1381,6 +1389,8 @@ swstrategy(struct buf *bp)
 {
 	struct swapdev *sdp;
 	struct vnode *vp;
+	struct buf *nbp = bp;
+	int npages = bp->b_bufsize >> PAGE_SHIFT;
 	int pageno, bn;
 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);
 
@@ -1399,10 +1409,9 @@ swstrategy(struct buf *bp)
 	}
 
 	/*
-	 * convert block number to swapdev.   note that swapdev can't
-	 * be yanked out from under us because we are holding resources
-	 * in it (i.e. the blocks we are doing I/O on) or read lock on
-	 * swap_syscall_lock.
+	 * convert block number to swapdev. note that swapdev can't be yanked
+	 * out from under us because we are holding resources in it (i.e. the
+	 * blocks we are doing I/O on) or read lock on swap_syscall_lock.
 	 */
 	pageno = dbtob((int64_t)bp->b_blkno) >> PAGE_SHIFT;
 	mutex_enter(&uvm_swap_data_lock);
@@ -1419,40 +1428,50 @@ swstrategy(struct buf *bp)
 	}
 
 	/*
-	 * B_RAW here implies user i/o on /dev/drum, for which we need
-	 * to handle encryption/decryption here.
-	 * for swap in/out, it's handled by the caller.
+	 * if the swap is encrypted, we encrypt before we write. in the
+	 * callback we ensure its always decrypted (again).
 	 */
-	if ((bp->b_flags & B_RAW) != 0 &&
-	    atomic_load_relaxed(&uvm_swap_encrypt)) {
-		struct sw_physio_decrypt_context *ctx;
+	if (atomic_load_relaxed(&uvm_swap_encrypt)) {
+		int swslot;
 
-		/*
-		 * we only implement B_READ for now.
-		 *
-		 * REVISIT: what kind of apps needs to write to /dev/drum?
-		 */
-		if ((bp->b_flags & B_READ) == 0) {
-			bp->b_error = ENOTSUP;
+		/* get iobuf for our decryption nestio setup */
+		nbp = getiobuf(bp->b_vp, !uvm_lwp_is_pagedaemon(curlwp));
+		if (nbp == NULL) {
+			bp->b_error = ENOMEM;
 			bp->b_resid = bp->b_bcount;
 			biodone(bp);
+			UVMHIST_LOG(pdhist, "  failed to get iobuf",
+				0, 0, 0, 0);
 			return;
 		}
+		nestiobuf_setup(bp, nbp, 0, bp->b_bcount);
+		nbp->b_blkno = bp->b_blkno;
+
+		/* pass on swslot around for callback */
+		swslot = dbtob((int64_t)bp->b_blkno) >> PAGE_SHIFT;
+		KASSERT(swslot > 0);
+		nbp->b_private2 = (void *) (intptr_t) swslot;
+
+		/* redirect our iodone to the decrypt iodone */
+		nbp->b_iodone = sw_physio_decrypt_iodone;
 
 		/*
 		 * in-place decryption in the userland buffer might
 		 * have non-trivial implications. for simplicity,
-		 * we use a bounce buffer.
+		 * we use a bounce buffer. B_RAW here implies user i/o on
+		 * /dev/drum. It gets freed in the callback.
 		 */
-		ctx = kmem_intr_alloc(sizeof(*ctx), KM_SLEEP);
-		ctx->swslot = dbtob((int64_t)bp->b_blkno) >> PAGE_SHIFT;
-		KASSERT(ctx->swslot > 0);
-		ctx->orig_buf = bp->b_data;
-		ctx->orig_private = bp->b_private;
-		ctx->orig_iodone = bp->b_iodone;
-		bp->b_data = kmem_intr_alloc(bp->b_bcount, KM_SLEEP);
-		bp->b_private = ctx;
-		bp->b_iodone = sw_physio_decrypt_iodone;
+		if (BUF_ISREAD(bp) && (bp->b_flags & B_RAW))
+			nbp->b_data = kmem_intr_alloc(nbp->b_bcount, KM_SLEEP);
+
+		/*
+		 * encrypt in-place when writing as we can't claim a bounce
+		 * buffer as it could be a writeout due to memory pressure.
+		 * its later decrypted in the callback as the pages couild
+		 * still be in use.
+		 */
+		if (BUF_ISWRITE(bp))
+			uvm_swap_encrypt_pages(pageno, bp->b_data, npages);
 	}
 
 	/*
@@ -1486,16 +1505,16 @@ swstrategy(struct buf *bp)
 		 * if we are doing a write, we have to redirect the i/o on
 		 * drum's v_numoutput counter to the swapdev's.
 		 */
-		iobuf_redirect(bp, vp);
-		bp->b_blkno = bn;		/* swapdev block number */
-		VOP_STRATEGY(vp, bp);
+		iobuf_redirect(nbp, vp);
+		nbp->b_blkno = bn;		/* swapdev block number */
+		VOP_STRATEGY(vp, nbp);
 		return;
 
 	case VREG:
 		/*
 		 * delegate to sw_reg_strategy function.
 		 */
-		sw_reg_strategy(sdp, bp, bn);
+		sw_reg_strategy(sdp, nbp, bn);
 		return;
 	}
 	/* NOTREACHED */
@@ -2109,14 +2128,6 @@ uvm_swap_io(struct vm_page **pps, int st
 	kva = uvm_pagermapin(pps, npages, mapinflags);
 
 	/*
-	 * encrypt writes in place if requested
-	 */
-
-	if (write) {
-		uvm_swap_encrypt_pages(startslot, (void *)kva, npages);
-	}
-
-	/*
 	 * fill in the bp/sbp.   we currently route our i/o through
 	 * /dev/drum's vnode [swapdev_vp].
 	 */
@@ -2161,7 +2172,6 @@ uvm_swap_io(struct vm_page **pps, int st
 	/*
 	 * now we start the I/O, and if async, return.
 	 */
-
 	VOP_STRATEGY(swapdev_vp, bp);
 	if (async) {
 		/*
@@ -2180,13 +2190,6 @@ uvm_swap_io(struct vm_page **pps, int st
 	if (error)
 		goto out;
 
-	/*
-	 * decrypt reads in place if needed
-	 */
-
-	if (!write) {
-		uvm_swap_decrypt_pages(startslot, (void *)kva, npages);
-	}
 out:
 	/*
 	 * kill the pager mapping


Home | Main Index | Thread Index | Old Index