Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/luna68k/dev Pull LUNA's framebuffer improvements by...



details:   https://anonhg.NetBSD.org/src/rev/f75b17b43727
branches:  trunk
changeset: 332703:f75b17b43727
user:      tsutsui <tsutsui%NetBSD.org@localhost>
date:      Sat Oct 04 16:58:17 2014 +0000

description:
Pull LUNA's framebuffer improvements by Kenji Aoyama from OpenBSD/luna88k.

http://marc.info/?l=openbsd-cvs&m=141199909120631&w=2
>> Use raster(logic) operation, or ROP, function on LUNA frame buffer.
>> It makes 4bpp wscons putchar ~20% faster.

This Makes 4bpp wscons putchar ~30% on LUNA-II.

Also use the similar ROP in 1bpp putchar and cursor functions
and the 1bpp putchar is also ~5% faster.
While here, reduce diffs from OpenBSD a bit.

Tested on all 1bpp/4bpp/8bpp framebuffers.

diffstat:

 sys/arch/luna68k/dev/lunafb.c      |    9 +-
 sys/arch/luna68k/dev/omrasops.c    |  282 +++++++++++++++++++++---------------
 sys/arch/luna68k/dev/omrasopsvar.h |   87 ++++++++---
 3 files changed, 230 insertions(+), 148 deletions(-)

diffs (truncated from 711 to 300 lines):

diff -r 93fedcec6370 -r f75b17b43727 sys/arch/luna68k/dev/lunafb.c
--- a/sys/arch/luna68k/dev/lunafb.c     Sat Oct 04 15:54:53 2014 +0000
+++ b/sys/arch/luna68k/dev/lunafb.c     Sat Oct 04 16:58:17 2014 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: lunafb.c,v 1.35 2014/07/25 16:40:12 tsutsui Exp $ */
+/* $NetBSD: lunafb.c,v 1.36 2014/10/04 16:58:17 tsutsui Exp $ */
 
 /*-
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
 
 #include <sys/cdefs.h>                 /* RCS ID & Copyright macro defns */
 
-__KERNEL_RCSID(0, "$NetBSD: lunafb.c,v 1.35 2014/07/25 16:40:12 tsutsui Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lunafb.c,v 1.36 2014/10/04 16:58:17 tsutsui Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -75,11 +75,8 @@
 };
 
 #define        OMFB_RFCNT      0xB1000000      /* video h-origin/v-origin */
-#define        OMFB_PLANEMASK  0xB1040000      /* planemask register */
-#define        OMFB_FB_WADDR   0xB1080008      /* common plane */
-#define        OMFB_FB_RADDR   0xB10C0008      /* plane #0 */
-#define        OMFB_ROPFUNC    0xB12C0000      /* ROP function code */
 #define        OMFB_RAMDAC     0xC1100000      /* Bt454/Bt458 RAMDAC */
+
 #define        OMFB_SIZE       (0xB1300000 - 0xB1080000 + PAGE_SIZE)
 
 struct hwcmap {
diff -r 93fedcec6370 -r f75b17b43727 sys/arch/luna68k/dev/omrasops.c
--- a/sys/arch/luna68k/dev/omrasops.c   Sat Oct 04 15:54:53 2014 +0000
+++ b/sys/arch/luna68k/dev/omrasops.c   Sat Oct 04 16:58:17 2014 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: omrasops.c,v 1.18 2014/09/28 05:00:56 tsutsui Exp $ */
+/* $NetBSD: omrasops.c,v 1.19 2014/10/04 16:58:17 tsutsui Exp $ */
 
 /*-
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
 
 #include <sys/cdefs.h>                 /* RCS ID & Copyright macro defns */
 
-__KERNEL_RCSID(0, "$NetBSD: omrasops.c,v 1.18 2014/09/28 05:00:56 tsutsui Exp $");
+__KERNEL_RCSID(0, "$NetBSD: omrasops.c,v 1.19 2014/10/04 16:58:17 tsutsui Exp $");
 
 /*
  * Designed speficically for 'm68k bitorder';
@@ -78,13 +78,6 @@
 #define        ALIGNMASK       (0x1f)
 #define        BYTESDONE       (4)
 
-#define        W(p) (*(uint32_t *)(p))
-#define        R(p) (*(uint32_t *)((uint8_t *)(p) + 0x40000))
-#define        P0(p) (*(uint32_t *)((uint8_t *)(p) + 0x40000))
-#define        P1(p) (*(uint32_t *)((uint8_t *)(p) + 0x80000))
-#define        P2(p) (*(uint32_t *)((uint8_t *)(p) + 0xc0000))
-#define        P3(p) (*(uint32_t *)((uint8_t *)(p) + 0x100000))
-
 /*
  * macros to handle unaligned bit copy ops.
  * See src/sys/dev/rasops/rasops_mask.h for MI version.
@@ -96,13 +89,13 @@
 #define        FASTGETBITS(psrc, x, w, dst)                                    \
        asm("bfextu %3{%1:%2},%0"                                       \
            : "=d" (dst)                                                \
-           : "di" (x), "di" (w), "o" ((uint32_t *)(psrc)))
+           : "di" (x), "di" (w), "o" (*(uint32_t *)(psrc)))
 
 /* luna68k version PUTBITS() that puts w bits from bit x at pdst memory */
 /* XXX this macro assumes (x + w) <= 32 to handle unaligned residual bits */
 #define        FASTPUTBITS(src, x, w, pdst)                                    \
        asm("bfins %3,%0{%1:%2}"                                        \
-           : "+o" ((uint32_t *)(pdst))                                 \
+           : "+o" (*(uint32_t *)(pdst))                                \
            : "di" (x), "di" (w), "d" (src)                             \
            : "memory" );
 
@@ -137,16 +130,23 @@
        rmask = ALL1BITS << (-width & ALIGNMASK);
        if (width <= BLITWIDTH) {
                lmask &= rmask;
+               /* set lmask as ROP mask value, with THROUGH mode */
+               ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] = lmask;
+
                while (height > 0) {
                        glyph = 0;
                        for (i = ri->ri_font->stride; i != 0; i--)
                                glyph = (glyph << 8) | *fb++;
                        glyph <<= (4 - ri->ri_font->stride) * NBBY;
                        glyph = (glyph >> align) ^ inverse;
-                       P0(p) = (P0(p) & ~lmask) | (glyph & lmask);
+
+                       *W(p) = glyph;
+
                        p += scanspan;
                        height--;
                }
+               /* reset mask value */
+               ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] = ALL1BITS;
        } else {
                uint8_t *q = p;
                uint32_t lhalf, rhalf;
@@ -157,14 +157,26 @@
                                glyph = (glyph << 8) | *fb++;
                        glyph <<= (4 - ri->ri_font->stride) * NBBY;
                        lhalf = (glyph >> align) ^ inverse;
-                       P0(p) = (P0(p) & ~lmask) | (lhalf & lmask);
+                       /* set lmask as ROP mask value, with THROUGH mode */
+                       ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] =
+                           lmask;
+                       
+                       *W(p) = lhalf;
+
                        p += BYTESDONE;
+
                        rhalf = (glyph << (BLITWIDTH - align)) ^ inverse;
-                       P0(p) = (rhalf & rmask) | (P0(p) & ~rmask);
+                       /* set rmask as ROP mask value, with THROUGH mode */
+                       ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] =
+                           rmask;
+
+                       *W(p) = rhalf;
 
                        p = (q += scanspan);
                        height--;
                }
+               /* reset mask value */
+               ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] = ALL1BITS;
        }
 }
 
@@ -201,8 +213,15 @@
        width = ri->ri_font->fontwidth + align;
        lmask = ALL1BITS >> align;
        rmask = ALL1BITS << (-width & ALIGNMASK);
+
+       /* select all planes for later ROP function target */
+       *(volatile uint32_t *)OMFB_PLANEMASK = 0xff;
+
        if (width <= BLITWIDTH) {
                lmask &= rmask;
+               /* set lmask as ROP mask value, with THROUGH mode */
+               ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] = lmask;
+
                while (height > 0) {
                        glyph = 0;
                        for (i = ri->ri_font->stride; i != 0; i--)
@@ -210,21 +229,25 @@
                        glyph <<= (4 - ri->ri_font->stride) * NBBY;
                        glyph = (glyph >> align);
                        glyphbg = glyph ^ ALL1BITS;
+
                        fgpat = glyph   & fgmask0;
                        bgpat = glyphbg & bgmask0;
-                       P0(p) = (P0(p) & ~lmask) | ((fgpat | bgpat) & lmask);
+                       *P0(p) = (fgpat | bgpat);
                        fgpat = glyph   & fgmask1;
                        bgpat = glyphbg & bgmask1;
-                       P1(p) = (P1(p) & ~lmask) | ((fgpat | bgpat) & lmask);
+                       *P1(p) = (fgpat | bgpat);
                        fgpat = glyph   & fgmask2;
                        bgpat = glyphbg & bgmask2;
-                       P2(p) = (P2(p) & ~lmask) | ((fgpat | bgpat) & lmask);
+                       *P2(p) = (fgpat | bgpat);
                        fgpat = glyph   & fgmask3;
                        bgpat = glyphbg & bgmask3;
-                       P3(p) = (P3(p) & ~lmask) | ((fgpat | bgpat) & lmask);
+                       *P3(p) = (fgpat | bgpat);
+
                        p += scanspan;
                        height--;
                }
+               /* reset mask value */
+               ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] = ALL1BITS;
        } else {
                uint8_t *q = p;
                uint32_t lhalf, rhalf;
@@ -237,38 +260,52 @@
                        glyph <<= (4 - ri->ri_font->stride) * NBBY;
                        lhalf = (glyph >> align);
                        lhalfbg = lhalf ^ ALL1BITS;
+                       /* set lmask as ROP mask value, with THROUGH mode */
+                       ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] =
+                           lmask;
+
                        fgpat = lhalf   & fgmask0;
                        bgpat = lhalfbg & bgmask0;
-                       P0(p) = (P0(p) & ~lmask) | ((fgpat | bgpat) & lmask);
+                       *P0(p) = (fgpat | bgpat);
                        fgpat = lhalf   & fgmask1;
                        bgpat = lhalfbg & bgmask1;
-                       P1(p) = (P1(p) & ~lmask) | ((fgpat | bgpat) & lmask);
+                       *P1(p) = (fgpat | bgpat);
                        fgpat = lhalf   & fgmask2;
                        bgpat = lhalfbg & bgmask2;
-                       P2(p) = (P2(p) & ~lmask) | ((fgpat | bgpat) & lmask);
+                       *P2(p) = (fgpat | bgpat);
                        fgpat = lhalf   & fgmask3;
                        bgpat = lhalfbg & bgmask3;
-                       P3(p) = (P3(p) & ~lmask) | ((fgpat | bgpat) & lmask);
+                       *P3(p) = (fgpat | bgpat);
+
                        p += BYTESDONE;
+
                        rhalf = (glyph << (BLITWIDTH - align));
                        rhalfbg = rhalf ^ ALL1BITS;
+                       /* set rmask as ROP mask value, with THROUGH mode */
+                       ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] =
+                           rmask;
+
                        fgpat = rhalf   & fgmask0;
                        bgpat = rhalfbg & bgmask0;
-                       P0(p) = ((fgpat | bgpat) & rmask) | (P0(p) & ~rmask);
+                       *P0(p) = (fgpat | bgpat);
                        fgpat = rhalf   & fgmask1;
                        bgpat = rhalfbg & bgmask1;
-                       P1(p) = ((fgpat | bgpat) & rmask) | (P1(p) & ~rmask);
+                       *P1(p) = (fgpat | bgpat);
                        fgpat = rhalf   & fgmask2;
                        bgpat = rhalfbg & bgmask2;
-                       P2(p) = ((fgpat | bgpat) & rmask) | (P2(p) & ~rmask);
+                       *P2(p) = (fgpat | bgpat);
                        fgpat = rhalf   & fgmask3;
                        bgpat = rhalfbg & bgmask3;
-                       P3(p) = ((fgpat | bgpat) & rmask) | (P3(p) & ~rmask);
+                       *P3(p) = (fgpat | bgpat);
 
                        p = (q += scanspan);
                        height--;
                }
+               /* reset mask value */
+               ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] = ALL1BITS;
        }
+       /* select plane #0 only; XXX need this ? */
+       *(volatile uint32_t *)OMFB_PLANEMASK = 0x01;
 }
 
 static void
@@ -295,22 +332,22 @@
                lmask &= rmask;
                fill  &= lmask;
                while (height > 0) {
-                       P0(p) = (P0(p) & ~lmask) | fill;
+                       *P0(p) = (*P0(p) & ~lmask) | fill;
                        p += scanspan;
                        height--;
                }
        } else {
                uint8_t *q = p;
                while (height > 0) {
-                       P0(p) = (P0(p) & ~lmask) | (fill & lmask);
+                       *P0(p) = (*P0(p) & ~lmask) | (fill & lmask);
                        width -= 2 * BLITWIDTH;
                        while (width > 0) {
                                p += BYTESDONE;
-                               P0(p) = fill;
+                               *P0(p) = fill;
                                width -= BLITWIDTH;
                        }
                        p += BYTESDONE;
-                       P0(p) = (fill & rmask) | (P0(p) & ~rmask);
+                       *P0(p) = (fill & rmask) | (*P0(p) & ~rmask);
 
                        p = (q += scanspan);
                        width = w + align;
@@ -350,34 +387,34 @@
                fill2 &= lmask;
                fill3 &= lmask;
                while (height > 0) {
-                       P0(p) = (P0(p) & ~lmask) | fill0;
-                       P1(p) = (P1(p) & ~lmask) | fill1;
-                       P2(p) = (P2(p) & ~lmask) | fill2;
-                       P3(p) = (P3(p) & ~lmask) | fill3;
+                       *P0(p) = (*P0(p) & ~lmask) | fill0;
+                       *P1(p) = (*P1(p) & ~lmask) | fill1;
+                       *P2(p) = (*P2(p) & ~lmask) | fill2;
+                       *P3(p) = (*P3(p) & ~lmask) | fill3;
                        p += scanspan;
                        height--;
                }
        } else {
                uint8_t *q = p;
                while (height > 0) {
-                       P0(p) = (P0(p) & ~lmask) | (fill0 & lmask);
-                       P1(p) = (P1(p) & ~lmask) | (fill1 & lmask);
-                       P2(p) = (P2(p) & ~lmask) | (fill2 & lmask);
-                       P3(p) = (P3(p) & ~lmask) | (fill3 & lmask);
+                       *P0(p) = (*P0(p) & ~lmask) | (fill0 & lmask);
+                       *P1(p) = (*P1(p) & ~lmask) | (fill1 & lmask);
+                       *P2(p) = (*P2(p) & ~lmask) | (fill2 & lmask);
+                       *P3(p) = (*P3(p) & ~lmask) | (fill3 & lmask);
                        width -= 2 * BLITWIDTH;
                        while (width > 0) {
                                p += BYTESDONE;
-                               P0(p) = fill0;
-                               P1(p) = fill1;
-                               P2(p) = fill2;
-                               P3(p) = fill3;



Home | Main Index | Thread Index | Old Index