Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[xsrc/trunk]: xsrc/external/mit/xf86-video-suncg14/dist/src CG14Copy8_short_r...



details:   https://anonhg.NetBSD.org/xsrc/rev/6dfd4ad9f737
branches:  trunk
changeset: 10829:6dfd4ad9f737
user:      macallan <macallan%NetBSD.org@localhost>
date:      Fri Dec 10 19:09:56 2021 +0000

description:
CG14Copy8_short_rop(): skip the funnel shifter if source and destination are
aligned. Small but measurable speedup.

diffstat:

 external/mit/xf86-video-suncg14/dist/src/cg14_accel.c |  34 ++++++++++--------
 1 files changed, 19 insertions(+), 15 deletions(-)

diffs (74 lines):

diff -r c53a9b59fd2c -r 6dfd4ad9f737 external/mit/xf86-video-suncg14/dist/src/cg14_accel.c
--- a/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c     Fri Dec 10 18:25:43 2021 +0000
+++ b/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c     Fri Dec 10 19:09:56 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cg14_accel.c,v 1.22 2021/12/10 18:25:43 macallan Exp $ */
+/* $NetBSD: cg14_accel.c,v 1.23 2021/12/10 19:09:56 macallan Exp $ */
 /*
  * Copyright (c) 2013 Michael Lorenz
  * All rights reserved.
@@ -410,6 +410,7 @@
 CG14Copy8_short_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
 {
        int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post;
+       int ssreg;
 #ifdef DEBUG
        int taddr = 4 + dstpitch * 50;
 #endif
@@ -453,8 +454,6 @@
        daddr = dststart & ~3;
        
        /* TODO:
-        * - special case dist == 0 where we can skip the funnel shifter
-        *   and only need to deal with leading / trailing garbage
         * - skip reading the fb where we can get away with it, for example
         *   GXcopy, where we only need to read the destination for partials,
         *   everything in between is straight copy
@@ -463,30 +462,35 @@
                write_sx_io(p, daddr & ~7, SX_LD(80, wrds - 1, daddr & 7));
                write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7));
                if (wrds > 15) {
-                       write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15));
-                       write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16));
-                       /* shifted source pixels are now at register 40+ */
+                       if (dist != 0) {
+                               write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15));
+                               write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16));
+                               /* shifted source pixels are now at register 40+ */
+                               ssreg = 40;
+                       } else ssreg = 8;
                        if (pre != 0) {
                                /* mask out leading junk */
                                write_sx_reg(p, SX_QUEUED(R_MASK), lmask);
-                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(40, 80, 8, 0));
+                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0));
                                write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff);
-                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(41, 81, 9, 14));       
+                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, 14));        
                        } else {
-                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(40, 80, 8, 15));
+                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 15));
                        }
-                       write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(56, 96, 24, wrds - 16));
+                       write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 16, 96, 24, wrds - 16));
                } else {
-                       write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds));
-
+                       if (dist != 0) {
+                               write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds));
+                               ssreg = 40;
+                       } else ssreg = 8;
                        if (pre != 0) {
                                /* mask out leading junk */
                                write_sx_reg(p, SX_QUEUED(R_MASK), lmask);
-                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(40, 80, 8, 0));
+                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0));
                                write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff);
-                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(41, 81, 9, wrds));
+                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, wrds));
                        } else {
-                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(40, 80, 8, wrds));
+                               write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, wrds));
                        }
                }
                if (post != 0) {



Home | Main Index | Thread Index | Old Index