Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/dev/rasops Replace manually unrolled loops with memcpy/m...



details:   https://anonhg.NetBSD.org/src/rev/225cc249fc2a
branches:  trunk
changeset: 1000481:225cc249fc2a
user:      rin <rin%NetBSD.org@localhost>
date:      Fri Jul 26 05:24:04 2019 +0000

description:
Replace manually unrolled loops with memcpy/memmove or simple loops.
Modern compilers are smart enough; there's no measurable changes in
performance even on MC68040 with optimization level -Os.

Also, convert loop of byte-wise copy into memset.

diffstat:

 sys/dev/rasops/rasops.c |  162 +++++++++--------------------------------------
 1 files changed, 33 insertions(+), 129 deletions(-)

diffs (267 lines):

diff -r 726e5d41e821 -r 225cc249fc2a sys/dev/rasops/rasops.c
--- a/sys/dev/rasops/rasops.c   Fri Jul 26 05:15:47 2019 +0000
+++ b/sys/dev/rasops/rasops.c   Fri Jul 26 05:24:04 2019 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: rasops.c,v 1.89 2019/07/26 05:15:47 rin Exp $ */
+/*      $NetBSD: rasops.c,v 1.90 2019/07/26 05:24:04 rin Exp $ */
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1.89 2019/07/26 05:15:47 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1.90 2019/07/26 05:24:04 rin Exp $");
 
 #include "opt_rasops.h"
 #include "rasops_glue.h"
@@ -617,12 +617,11 @@
 static void
 rasops_copyrows(void *cookie, int src, int dst, int num)
 {
-       uint32_t *sp, *dp, *hp, *srp, *drp, *hrp;
-       struct rasops_info *ri;
-       int n8, n1, cnt, delta;
+       struct rasops_info *ri = (struct rasops_info *)cookie;
+       uint8_t *sp, *dp, *hp;
+       int n;
 
-       ri = (struct rasops_info *)cookie;
-       hp = hrp = NULL;
+       hp = NULL;      /* XXX GCC */
 
 #ifdef RASOPS_CLIPPING
        if (dst == src)
@@ -649,72 +648,21 @@
 #endif
 
        num *= ri->ri_font->fontheight;
-       n8 = ri->ri_emustride >> 5;
-       n1 = (ri->ri_emustride >> 2) & 7;
+       n = ri->ri_emustride;
 
-       if (dst < src) {
-               srp = (uint32_t *)(ri->ri_bits + src * ri->ri_yscale);
-               drp = (uint32_t *)(ri->ri_bits + dst * ri->ri_yscale);
-               if (ri->ri_hwbits)
-                       hrp = (uint32_t *)(ri->ri_hwbits + dst *
-                           ri->ri_yscale);
-               delta = ri->ri_stride;
-       } else {
-               src = ri->ri_font->fontheight * src + num - 1;
-               dst = ri->ri_font->fontheight * dst + num - 1;
-               srp = (uint32_t *)(ri->ri_bits + src * ri->ri_stride);
-               drp = (uint32_t *)(ri->ri_bits + dst * ri->ri_stride);
-               if (ri->ri_hwbits)
-                       hrp = (uint32_t *)(ri->ri_hwbits + dst *
-                           ri->ri_stride);
-               
-               delta = -ri->ri_stride;
-       }
+       sp = ri->ri_bits + src * ri->ri_yscale;
+       dp = ri->ri_bits + dst * ri->ri_yscale;
+       if (ri->ri_hwbits)
+               hp = ri->ri_hwbits + dst * ri->ri_yscale;
 
        while (num--) {
-               dp = drp;
-               sp = srp;
-               if (ri->ri_hwbits)
-                       hp = hrp;
-
-               DELTA(drp, delta, uint32_t *);
-               DELTA(srp, delta, uint32_t *);
-               if (ri->ri_hwbits)
-                       DELTA(hrp, delta, uint32_t *);
-
-               for (cnt = n8; cnt; cnt--) {
-                       dp[0] = sp[0];
-                       dp[1] = sp[1];
-                       dp[2] = sp[2];
-                       dp[3] = sp[3];
-                       dp[4] = sp[4];
-                       dp[5] = sp[5];
-                       dp[6] = sp[6];
-                       dp[7] = sp[7];
-                       dp += 8;
-                       sp += 8;
+               memmove(dp, sp, n);
+               dp += n;
+               if (ri->ri_hwbits) {
+                       memcpy(hp, sp, n);
+                       hp += n;
                }
-               if (ri->ri_hwbits) {
-                       sp -= (8 * n8);
-                       for (cnt = n8; cnt; cnt--) {
-                               hp[0] = sp[0];
-                               hp[1] = sp[1];
-                               hp[2] = sp[2];
-                               hp[3] = sp[3];
-                               hp[4] = sp[4];
-                               hp[5] = sp[5];
-                               hp[6] = sp[6];
-                               hp[7] = sp[7];
-                               hp += 8;
-                               sp += 8;
-                       }
-               }
-
-               for (cnt = n1; cnt; cnt--) {
-                       *dp++ = *sp++;
-                       if (ri->ri_hwbits)
-                               *hp++ = *(sp - 1);
-               }
+               sp += n;
        }
 }
 
@@ -916,13 +864,11 @@
 void
 rasops_eraserows(void *cookie, int row, int num, long attr)
 {
-       struct rasops_info *ri;
-       int np, nw, cnt, delta;
+       struct rasops_info *ri = (struct rasops_info *)cookie;
        uint32_t *dp, *hp, clr;
-       int i;
+       int n, cnt, delta;
 
-       ri = (struct rasops_info *)cookie;
-       hp = NULL;
+       hp = NULL;      /* XXX GCC */
 
 #ifdef RASOPS_CLIPPING
        if (row < 0) {
@@ -946,16 +892,14 @@
         * the RI_FULLCLEAR flag is set, clear the entire display.
         */
        if (num == ri->ri_rows && (ri->ri_flg & RI_FULLCLEAR) != 0) {
-               np = ri->ri_stride >> 5;
-               nw = (ri->ri_stride >> 2) & 7;
+               n = ri->ri_stride >> 2;
                num = ri->ri_height;
                dp = (uint32_t *)ri->ri_origbits;
                if (ri->ri_hwbits)
                        hp = (uint32_t *)ri->ri_hworigbits;
                delta = 0;
        } else {
-               np = ri->ri_emustride >> 5;
-               nw = (ri->ri_emustride >> 2) & 7;
+               n = ri->ri_emustride >> 2;
                num *= ri->ri_font->fontheight;
                dp = (uint32_t *)(ri->ri_bits + row * ri->ri_yscale);
                if (ri->ri_hwbits)
@@ -965,26 +909,11 @@
        }
 
        while (num--) {
-               for (cnt = np; cnt; cnt--) {
-                       for (i = 0; i < 8; i++) {
-                               dp[i] = clr;
-                               if (ri->ri_hwbits)
-                                       hp[i] = clr;
-                       }
-                       dp += 8;
+               for (cnt = n; cnt; cnt--) {
+                       *dp++ = clr;
                        if (ri->ri_hwbits)
-                               hp += 8;
+                               *hp++ = clr;
                }
-
-               for (cnt = nw; cnt; cnt--) {
-                       *(uint32_t *)dp = clr;
-                       DELTA(dp, 4, uint32_t *);
-                       if (ri->ri_hwbits) {
-                               *(uint32_t *)hp = clr;
-                               DELTA(hp, 4, uint32_t *);
-                       }
-               }
-
                DELTA(dp, delta, uint32_t *);
                if (ri->ri_hwbits)
                        DELTA(hp, delta, uint32_t *);
@@ -1112,13 +1041,11 @@
 void
 rasops_erasecols(void *cookie, int row, int col, int num, long attr)
 {
-       int n8, height, cnt, slop1, slop2, clr;
-       struct rasops_info *ri;
+       struct rasops_info *ri = (struct rasops_info *)cookie;
+       int height, cnt, slop1, slop2, clr;
        uint32_t *rp, *dp, *hrp, *hp;
-       int i;
 
-       ri = (struct rasops_info *)cookie;
-       hrp = hp = NULL;
+       hrp = hp = NULL;        /* XXX GCC */
 
 #ifdef RASOPS_CLIPPING
        if ((unsigned)row >= (unsigned)ri->ri_rows)
@@ -1136,7 +1063,7 @@
                return;
 #endif
 
-       num = num * ri->ri_xscale;
+       num *= ri->ri_xscale;
        rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale);
        if (ri->ri_hwbits)
                hrp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale +
@@ -1190,32 +1117,21 @@
                        }
                } else {
                        while (height--) {
-                               dp = rp;
+                               memset(rp, clr, num);
                                DELTA(rp, ri->ri_stride, uint32_t *);
                                if (ri->ri_hwbits) {
-                                       hp = hrp;
+                                       memset(hrp, clr, num);
                                        DELTA(hrp, ri->ri_stride, uint32_t *);
                                }
-
-                               for (cnt = num; cnt; cnt--) {
-                                       *(uint8_t *)dp = clr;
-                                       DELTA(dp, 1, uint32_t *);
-                                       if (ri->ri_hwbits) {
-                                               *(uint8_t *)hp = clr;
-                                               DELTA(hp, 1, uint32_t *);
-                                       }
-                               }
                        }
                }
 
                return;
        }
 
-       slop1 = (4 - ((long)rp & 3)) & 3;
+       slop1 = (4 - ((uintptr_t)rp & 3)) & 3;
        slop2 = (num - slop1) & 3;
-       num -= slop1 + slop2;
-       n8 = num >> 5;
-       num = (num >> 2) & 7;
+       num = (num - slop1 /* - slop2 */) >> 2;
 
        while (height--) {
                dp = rp;
@@ -1244,18 +1160,6 @@
                        }
                }
 
-               /* Write 32 bytes per loop */
-               for (cnt = n8; cnt; cnt--) {
-                       for (i = 0; i < 8; i++) {
-                               dp[i] = clr;
-                               if (ri->ri_hwbits)
-                                       hp[i] = clr;
-                       }
-                       dp += 8;
-                       if (ri->ri_hwbits)
-                               hp += 8;
-               }
-
                /* Write 4 bytes per loop */
                for (cnt = num; cnt; cnt--) {
                        *dp++ = clr;



Home | Main Index | Thread Index | Old Index