tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
uvm_swap compression patch
Dear folks,
as part of a compressed ram experiment I'd like to try, I've first cleaned up
the current uvm swap support for I noticed that the support for encrypted swap
became distributed and not logical anymore. In the attached patch I've
contracted the encrypted support into the /dev/drum device and removed it from
the specific swap support and callbacks.
I've tested it using read/write on /dev/drum and tested it for swapping on
amd64 using plain and encrypted forms. One point of discussion could be that
writing to /dev/drum from userland could be allowed or not. In the current
patch its allowed.
Any thouhts? I'd like to commit it.
With regards,
Reinoud
Index: sys/uvm/uvm_pager.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.136
diff -u -p -r1.136 uvm_pager.c
--- sys/uvm/uvm_pager.c 3 May 2026 16:02:37 -0000 1.136
+++ sys/uvm/uvm_pager.c 1 Jul 2026 18:45:55 -0000
@@ -547,16 +547,6 @@ uvm_aio_aiodone(struct buf *bp)
(uintptr_t)pgs[i], 0, 0);
}
-#if defined(VMSWAP)
- if (__predict_false(error != 0) &&
- ((pgs[0]->flags & PG_SWAPBACKED) != 0)) {
- int swslot = uvm_page_swapslot(pgs[0]);
-
- KASSERT(swslot > 0);
- uvm_swap_decrypt_pages(swslot, bp->b_data, npages);
- }
-#endif
-
uvm_pagermapout((vaddr_t)bp->b_data, npages);
uvm_aio_aiodone_pages(pgs, npages, write, error);
Index: sys/uvm/uvm_swap.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.233
diff -u -p -r1.233 uvm_swap.c
--- sys/uvm/uvm_swap.c 3 Jun 2026 15:00:06 -0000 1.233
+++ sys/uvm/uvm_swap.c 1 Jul 2026 18:45:55 -0000
@@ -218,6 +218,7 @@ static int uvm_swap_io(struct vm_page **
static void uvm_swap_genkey(struct swapdev *);
static void uvm_swap_encryptpage(struct swapdev *, void *, int);
static void uvm_swap_decryptpage(struct swapdev *, void *, int);
+static void uvm_swap_encrypt_pages(int startslot, void *p, int npages);
/*
* uvm_swap_init: init the swap system data structures and locks
@@ -1337,38 +1338,47 @@ iobuf_redirect(struct buf *bp, struct vn
struct sw_physio_decrypt_context {
void *orig_buf;
- void *orig_private;
- void (*orig_iodone)(struct buf *);
int swslot;
};
static void
sw_physio_decrypt_iodone(struct buf *bp)
{
- struct sw_physio_decrypt_context *ctx = bp->b_private;
- void (*cb)(struct buf *bp) = ctx->orig_iodone;
+ struct sw_physio_decrypt_context *ctx = bp->b_private2;
+ struct buf *mbp = bp->b_private;
size_t npages = bp->b_bcount >> PAGE_SHIFT;
KASSERT(ctx->swslot > 0);
KASSERT(npages << PAGE_SHIFT == bp->b_bcount);
+
+ /* always decrypt, we might have written */
+ uvm_swap_decrypt_pages(ctx->swslot, bp->b_data, npages);
+
+ /* copy data if we used a bounce buffer */
if (bp->b_error == 0) {
if (bp->b_resid == 0) {
- uvm_swap_decrypt_pages(ctx->swslot, bp->b_data,
- npages);
- memcpy(ctx->orig_buf, (uint8_t *)bp->b_data,
- bp->b_bcount);
+ if (bp->b_data != ctx->orig_buf)
+ memcpy(ctx->orig_buf, (uint8_t *)bp->b_data,
+ bp->b_bcount);
} else {
+ /* XXX we could zero the buffer */
bp->b_error = EIO;
}
}
- kmem_intr_free(bp->b_data, bp->b_bcount);
- bp->b_data = ctx->orig_buf;
+ /* free optional bounce buffer */
+ if (bp->b_data != ctx->orig_buf)
+ kmem_intr_free(bp->b_data, bp->b_bcount);
+
+ mbp->b_resid = 0;
if (bp->b_error != 0) {
- bp->b_resid = bp->b_bcount;
+ mbp->b_error = bp->b_error;
+ mbp->b_resid = bp->b_bcount;
}
- bp->b_private = ctx->orig_private;
+
kmem_intr_free(ctx, sizeof(*ctx));
- (cb)(bp); /* call the original b_iodone callback */
+ putiobuf(bp);
+
+ biodone(mbp);
}
/*
@@ -1381,6 +1391,8 @@ swstrategy(struct buf *bp)
{
struct swapdev *sdp;
struct vnode *vp;
+ struct buf *nbp = bp;
+ int npages = bp->b_bufsize >> PAGE_SHIFT;
int pageno, bn;
UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);
@@ -1399,10 +1411,9 @@ swstrategy(struct buf *bp)
}
/*
- * convert block number to swapdev. note that swapdev can't
- * be yanked out from under us because we are holding resources
- * in it (i.e. the blocks we are doing I/O on) or read lock on
- * swap_syscall_lock.
+ * convert block number to swapdev. note that swapdev can't be yanked
+ * out from under us because we are holding resources in it (i.e. the
+ * blocks we are doing I/O on) or read lock on swap_syscall_lock.
*/
pageno = dbtob((int64_t)bp->b_blkno) >> PAGE_SHIFT;
mutex_enter(&uvm_swap_data_lock);
@@ -1419,40 +1430,46 @@ swstrategy(struct buf *bp)
}
/*
- * B_RAW here implies user i/o on /dev/drum, for which we need
- * to handle encryption/decryption here.
- * for swap in/out, it's handled by the caller.
+ * if the swap is encrypted, we encrypt before we write. in the
+ * callback we ensure its always decrypted (again).
*/
- if ((bp->b_flags & B_RAW) != 0 &&
- atomic_load_relaxed(&uvm_swap_encrypt)) {
+ if (atomic_load_relaxed(&uvm_swap_encrypt)) {
struct sw_physio_decrypt_context *ctx;
- /*
- * we only implement B_READ for now.
- *
- * REVISIT: what kind of apps needs to write to /dev/drum?
- */
- if ((bp->b_flags & B_READ) == 0) {
- bp->b_error = ENOTSUP;
+ /* get iobuf for our decryption nestio setup */
+ nbp = getiobuf(bp->b_vp, !uvm_lwp_is_pagedaemon(curlwp));
+ if (nbp == NULL) {
+ bp->b_error = ENOMEM;
bp->b_resid = bp->b_bcount;
biodone(bp);
+ UVMHIST_LOG(pdhist, " failed to get iobuf",
+ 0, 0, 0, 0);
return;
}
+ nestiobuf_setup(bp, nbp, 0, bp->b_bcount);
+ nbp->b_blkno = bp->b_blkno;
- /*
- * in-place decryption in the userland buffer might
- * have non-trivial implications. for simplicity,
- * we use a bounce buffer.
- */
+ /* keep some context around for callback */
ctx = kmem_intr_alloc(sizeof(*ctx), KM_SLEEP);
ctx->swslot = dbtob((int64_t)bp->b_blkno) >> PAGE_SHIFT;
KASSERT(ctx->swslot > 0);
ctx->orig_buf = bp->b_data;
- ctx->orig_private = bp->b_private;
- ctx->orig_iodone = bp->b_iodone;
- bp->b_data = kmem_intr_alloc(bp->b_bcount, KM_SLEEP);
- bp->b_private = ctx;
- bp->b_iodone = sw_physio_decrypt_iodone;
+ nbp->b_private2 = ctx;
+ /* redirect our iodone to the decrypt iodone */
+ nbp->b_iodone = sw_physio_decrypt_iodone;
+
+ /*
+ * in-place decryption in the userland buffer might
+ * have non-trivial implications. for simplicity,
+ * we use a bounce buffer. B_RAW here implies user i/o on
+ * /dev/drum. It gets freed in the callback.
+ */
+ if (BUF_ISREAD(bp) && (bp->b_flags & B_RAW))
+ nbp->b_data = kmem_intr_alloc(nbp->b_bcount, KM_SLEEP);
+
+ /* encrypt in place when writing */
+ if (BUF_ISWRITE(bp))
+ uvm_swap_encrypt_pages(pageno, bp->b_data, npages);
}
/*
@@ -1486,16 +1503,16 @@ swstrategy(struct buf *bp)
* if we are doing a write, we have to redirect the i/o on
* drum's v_numoutput counter to the swapdev's.
*/
- iobuf_redirect(bp, vp);
- bp->b_blkno = bn; /* swapdev block number */
- VOP_STRATEGY(vp, bp);
+ iobuf_redirect(nbp, vp);
+ nbp->b_blkno = bn; /* swapdev block number */
+ VOP_STRATEGY(vp, nbp);
return;
case VREG:
/*
* delegate to sw_reg_strategy function.
*/
- sw_reg_strategy(sdp, bp, bn);
+ sw_reg_strategy(sdp, nbp, bn);
return;
}
/* NOTREACHED */
@@ -2109,14 +2126,6 @@ uvm_swap_io(struct vm_page **pps, int st
kva = uvm_pagermapin(pps, npages, mapinflags);
/*
- * encrypt writes in place if requested
- */
-
- if (write) {
- uvm_swap_encrypt_pages(startslot, (void *)kva, npages);
- }
-
- /*
* fill in the bp/sbp. we currently route our i/o through
* /dev/drum's vnode [swapdev_vp].
*/
@@ -2161,7 +2170,6 @@ uvm_swap_io(struct vm_page **pps, int st
/*
* now we start the I/O, and if async, return.
*/
-
VOP_STRATEGY(swapdev_vp, bp);
if (async) {
/*
@@ -2180,13 +2188,6 @@ uvm_swap_io(struct vm_page **pps, int st
if (error)
goto out;
- /*
- * decrypt reads in place if needed
- */
-
- if (!write) {
- uvm_swap_decrypt_pages(startslot, (void *)kva, npages);
- }
out:
/*
* kill the pager mapping
Home |
Main Index |
Thread Index |
Old Index