Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/luna68k/dev lunafb: Improve drawing performance usi...



details:   https://anonhg.NetBSD.org/src/rev/b3abf6db90ba
branches:  trunk
changeset: 370717:b3abf6db90ba
user:      isaki <isaki%NetBSD.org@localhost>
date:      Sun Sep 25 11:28:40 2022 +0000

description:
lunafb: Improve drawing performance using VRAM ROP features.
- Drawing a character on 4bpp normally needs 4 times writes, but by using
  VRAM ROP actively, it can be reduced to write only once.
  The same goes for copyrows.  If the whole row consists of only two colors
  (one foreground and one background), it can be copied by reading once and
  writing once, regardless of the number of planes.  Only if the row consists
  of more than two colors, it will be copied plane by plane.
- On 8bpp board, it acts as 4bpp (16 colors).
- On 4bpp board on the real LUNA-I(68030/20MHz), monochrome scroll is about
  4 times faster even without asm.  Using asm improves it by additional 5%
  (asm is enabled by default).
- By tsutsui@-san's report, even color scroll is about about 2 times faster
  on his 8bpp board on the real LUNA-II(68040).
This was first developped by Y.Sugahara back in late 2019, and was modified
a lot by me in 2022.
http://mail-index.netbsd.org/port-luna68k/2022/09/23/msg000072.html

diffstat:

 sys/arch/luna68k/dev/lunafb.c      |    22 +-
 sys/arch/luna68k/dev/omrasops.c    |  1738 ++++++++++++++++++++++++-----------
 sys/arch/luna68k/dev/omrasopsvar.h |    31 +-
 3 files changed, 1217 insertions(+), 574 deletions(-)

diffs (truncated from 2079 to 300 lines):

diff -r 97bcba7eaf44 -r b3abf6db90ba sys/arch/luna68k/dev/lunafb.c
--- a/sys/arch/luna68k/dev/lunafb.c     Sun Sep 25 11:22:36 2022 +0000
+++ b/sys/arch/luna68k/dev/lunafb.c     Sun Sep 25 11:28:40 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: lunafb.c,v 1.46 2022/07/14 20:13:21 tsutsui Exp $ */
+/* $NetBSD: lunafb.c,v 1.47 2022/09/25 11:28:40 isaki Exp $ */
 
 /*-
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
 
 #include <sys/cdefs.h>                 /* RCS ID & Copyright macro defns */
 
-__KERNEL_RCSID(0, "$NetBSD: lunafb.c,v 1.46 2022/07/14 20:13:21 tsutsui Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lunafb.c,v 1.47 2022/09/25 11:28:40 isaki Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -76,6 +76,8 @@
 
 #define        OMFB_RFCNT      BMAP_RFCNT      /* video h-origin/v-origin */
 #define        OMFB_RAMDAC     BMAP_PALLET2    /* Bt454/Bt458 RAMDAC */
+#define        OMFB_FB_WADDR   (BMAP_BMP + 8)  /* common bitmap plane */
+#define        OMFB_FB_RADDR   (BMAP_BMAP0 + 8)/* bitmap plane #0 */
 
 #define        OMFB_SIZE       (BMAP_FN0 - BMAP_BMP + PAGE_SIZE)
 
@@ -173,6 +175,8 @@
 
 extern int hwplanemask;        /* hardware planemask; retrieved at boot */
 
+int hwplanecount;      /* for omrasops */
+
 static int omfb_console;
 int  omfb_cnattach(void);
 
@@ -456,7 +460,7 @@
 static void
 omfb_getdevconfig(paddr_t paddr, struct om_hwdevconfig *dc)
 {
-       int bpp, i;
+       int i;
        struct rasops_info *ri;
        union {
                struct { short h, v; } p;
@@ -465,21 +469,21 @@
 
        switch (hwplanemask) {
        case 0xff:
-               bpp = 8;        /* XXX check monochrome bit in DIPSW */
+               hwplanecount = 8;       /* XXX check monochrome bit in DIPSW */
                break;
        default:
        case 0x0f:
-               bpp = 4;        /* XXX check monochrome bit in DIPSW */
+               hwplanecount = 4;       /* XXX check monochrome bit in DIPSW */
                break;
        case 1:
-               bpp = 1;
+               hwplanecount = 1;
                break;
        }
        dc->dc_wid = 1280;
        dc->dc_ht = 1024;
-       dc->dc_depth = bpp;
+       dc->dc_depth = hwplanecount;
        dc->dc_rowbytes = 2048 / 8;
-       dc->dc_cmsize = (bpp == 1) ? 0 : 1 << bpp;
+       dc->dc_cmsize = (hwplanecount == 1) ? 0 : 1 << hwplanecount;
        dc->dc_videobase = paddr;
 
        omfb_resetcmap(dc);
@@ -509,7 +513,7 @@
                ri->ri_flg |= RI_NO_AUTO;
        ri->ri_hw = dc;
 
-       if (bpp == 4 || bpp == 8)
+       if (hwplanecount == 4 || hwplanecount == 8)
                omrasops4_init(ri, 34, 80);
        else
                omrasops1_init(ri, 34, 80);
diff -r 97bcba7eaf44 -r b3abf6db90ba sys/arch/luna68k/dev/omrasops.c
--- a/sys/arch/luna68k/dev/omrasops.c   Sun Sep 25 11:22:36 2022 +0000
+++ b/sys/arch/luna68k/dev/omrasops.c   Sun Sep 25 11:28:40 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: omrasops.c,v 1.22 2022/09/25 11:22:36 isaki Exp $ */
+/* $NetBSD: omrasops.c,v 1.23 2022/09/25 11:28:40 isaki Exp $ */
 
 /*-
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
 
 #include <sys/cdefs.h>                 /* RCS ID & Copyright macro defns */
 
-__KERNEL_RCSID(0, "$NetBSD: omrasops.c,v 1.22 2022/09/25 11:22:36 isaki Exp $");
+__KERNEL_RCSID(0, "$NetBSD: omrasops.c,v 1.23 2022/09/25 11:28:40 isaki Exp $");
 
 /*
  * Designed speficically for 'm68k bitorder';
@@ -41,6 +41,15 @@
  *     - first column is at 32bit aligned address,
  *     - font glyphs are stored in 32bit padded.
  */
+/*
+ * BMSEL affects both of
+ * 1) which plane a write to the common bitmap plane is reflected on and
+ * 2) which plane's ROP a write to the common ROP is reflected on.
+ *
+ * The common ROP is not a ROP applied to write to the common bitmap plane.
+ * It's equivalent to set ROPs of the plane selected in the plane mask one
+ * by one.
+ */
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -52,32 +61,84 @@
 
 #include <arch/luna68k/dev/omrasopsvar.h>
 
+#ifdef luna68k
+#define USE_M68K_ASM   1
+#endif
+
+/* To provide optimization conditions to compilers */
+#if defined(__GNUC__)
+#define ASSUME(cond)   if (!(cond)) __unreachable()
+#elif defined(__clang__) && __has_builtin(__builtin_assume)
+#define ASSUME(cond)   __builtin_assume(cond)
+#else
+#define ASSUME(cond)   (void)(cond)
+#endif
+
+/* XXX it should be redesigned, including making the attributes support 8bpp */
+typedef struct {
+       union {
+               int32_t all;
+               struct {
+                       int8_t ismulti; /* is multi color used */
+                       uint8_t fg;
+                       uint8_t bg;
+                       uint8_t reserved;
+               };
+       };
+} rowattr_t;
+
 /* wscons emulator operations */
-static void    om1_cursor(void *, int, int, int);
-static void    om4_cursor(void *, int, int, int);
-static int     om_mapchar(void *, int, unsigned int *);
-static void    om1_putchar(void *, int, int, u_int, long);
-static void    om4_putchar(void *, int, int, u_int, long);
+static void    om_cursor(void *, int, int, int);
+static int     om_mapchar(void *, int, u_int *);
+static void    om_putchar(void *, int, int, u_int, long);
 static void    om1_copycols(void *, int, int, int, int);
 static void    om4_copycols(void *, int, int, int, int);
 static void    om1_copyrows(void *, int, int, int num);
 static void    om4_copyrows(void *, int, int, int num);
-static void    om1_erasecols(void *, int, int, int, long);
-static void    om4_erasecols(void *, int, int, int, long);
-static void    om1_eraserows(void *, int, int, long);
-static void    om4_eraserows(void *, int, int, long);
-static int     om1_allocattr(void *, int, int, int, long *);
-static int     om4_allocattr(void *, int, int, int, long *);
-static void    om4_unpack_attr(long, int *, int *, int *);
+static void    om_erasecols(void *, int, int, int, long);
+static void    om_eraserows(void *, int, int, long);
+static int     om_allocattr(void *, int, int, int, long *);
+
+static void    om_fill(int, int, uint8_t *, int, int, uint32_t, int, int);
+static void    om_fill_color(int, uint8_t *, int, int, int, int);
+static void    om_rascopy_single(uint8_t *, uint8_t *, int16_t, int16_t,
+    uint8_t[]);
+static void    om4_rascopy_multi(uint8_t *, uint8_t *, int16_t, int16_t);
+static void    om_unpack_attr(long, uint8_t *, uint8_t *, int *);
 
 static int     omrasops_init(struct rasops_info *, int, int);
 
+/*
+ * XXX should be fixed...
+ * This number of elements is derived from howmany(1024, fontheight = 24).
+ * But it is currently initialized with row = 34, so it is used only up to 34.
+ */
+static rowattr_t rowattr[43];
+
 #define        ALL1BITS        (~0U)
 #define        ALL0BITS        (0U)
 #define        BLITWIDTH       (32)
 #define        ALIGNMASK       (0x1f)
 #define        BYTESDONE       (4)
 
+#if 0 /* XXX not used yet */
+/*
+ * internal attributes. see om_allocattr().
+ */
+#define OMFB_ATTR_MULTICOLOR           (1U << 31)
+#define OMFB_ATTR_UNDERLINE            (1U << 17)
+#define OMFB_ATTR_BOLD                 (1U << 16)
+#endif
+
+/*
+ * XXX deprecated.
+ * This way cannot be extended to 8bpp, so don't use it in new code.
+ */
+#define P0(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 1))
+#define P1(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 2))
+#define P2(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 3))
+#define P3(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 4))
+
 /*
  * macros to handle unaligned bit copy ops.
  * See src/sys/dev/rasops/rasops_masks.h for MI version.
@@ -103,405 +164,934 @@
 #define        PUTBITS(src, x, w, pdst)        FASTPUTBITS(src, x, w, pdst)
 
 /*
- * Blit a character at the specified co-ordinates.
+ * Clear lower w bits from x.
+ * x must be filled with 1 at least lower w bits.
  */
-static void
-om1_putchar(void *cookie, int row, int startcol, u_int uc, long attr)
-{
-       struct rasops_info *ri = cookie;
-       uint8_t *p;
-       int scanspan, startx, height, width, align, y;
-       uint32_t lmask, rmask, glyph, inverse;
-       int i;
-       uint8_t *fb;
+#if USE_M68K_ASM
+#define CLEAR_LOWER_BITS(x, w)                                         \
+       asm volatile(                                                   \
+       "       bclr    %[width],%[data]        ;\n"                    \
+       "       addq.l  #1,%[data]              ;\n"                    \
+           : [data] "+&d" (x)                                          \
+           : [width] "d" (w)                                           \
+           :                                                           \
+       )
+#else
+#define CLEAR_LOWER_BITS(x, w) x = ((x) & ~(1U << (w))) + 1
+#endif
 
-       scanspan = ri->ri_stride;
-       y = ri->ri_font->fontheight * row;
-       startx = ri->ri_font->fontwidth * startcol;
-       height = ri->ri_font->fontheight;
-       fb = (uint8_t *)ri->ri_font->data +
-           (uc - ri->ri_font->firstchar) * ri->ri_fontscale;
-       inverse = ((attr & 0x00000001) != 0) ? ALL1BITS : ALL0BITS;
+/* Set planemask for the common plane and the common ROP */
+static inline void
+om_set_planemask(int planemask)
+{
+
+       *(volatile uint32_t *)OMFB_PLANEMASK = planemask;
+}
 
-       p = (uint8_t *)ri->ri_bits + y * scanspan + ((startx / 32) * 4);
-       align = startx & ALIGNMASK;
-       width = ri->ri_font->fontwidth + align;
-       lmask = ALL1BITS >> align;
-       rmask = ALL1BITS << (-width & ALIGNMASK);
-       if (width <= BLITWIDTH) {
-               lmask &= rmask;
-               /* set lmask as ROP mask value, with THROUGH mode */
-               ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] = lmask;
+/* Get a ROP address */
+static inline volatile uint32_t *
+om_rop_addr(int plane, int rop)
+{
 
-               while (height > 0) {
-                       glyph = 0;
-                       for (i = ri->ri_font->stride; i != 0; i--)
-                               glyph = (glyph << 8) | *fb++;
-                       glyph <<= (4 - ri->ri_font->stride) * NBBY;
-                       glyph = (glyph >> align) ^ inverse;
+       return (volatile uint32_t *)
+           (OMFB_ROP_P0 + OMFB_PLANEOFFS * plane + rop * 4);
+}
+
+/* Set ROP and ROP's mask for individual plane */
+static inline void
+om_set_rop(int plane, int rop, uint32_t mask)
+{
 
-                       *W(p) = glyph;
+       *om_rop_addr(plane, rop) = mask;
+}
+
+/* Set ROP and ROP's mask for current setplanemask-ed plane(s) */
+static inline void
+om_set_rop_curplane(int rop, uint32_t mask)
+{
 
-                       p += scanspan;
-                       height--;
-               }
-               /* reset mask value */
-               ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] = ALL1BITS;



Home | Main Index | Thread Index | Old Index