Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/vax/vax VAX version of cpu_in_checksum...



details:   https://anonhg.NetBSD.org/src/rev/dfa4905d792f
branches:  trunk
changeset: 318465:dfa4905d792f
user:      ragge <ragge%NetBSD.org@localhost>
date:      Wed Apr 25 11:06:49 2018 +0000
description:
VAX version of cpu_in_checksum().  Increases network performance significantly.

diffstat:

 sys/arch/vax/vax/cpu_in_cksum.S |  222 ++++++++++++++++++++++++++++++++++++++++
 1 files changed, 222 insertions(+), 0 deletions(-)

diffs (226 lines):

diff -r cf01c67d4fba -r dfa4905d792f sys/arch/vax/vax/cpu_in_cksum.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/vax/vax/cpu_in_cksum.S   Wed Apr 25 11:06:49 2018 +0000
@@ -0,0 +1,222 @@
+/*     $NetBSD: cpu_in_cksum.S,v 1.1 2018/04/25 11:06:49 ragge Exp $   */
+
+/*-
+ * Copyright (c) 2017 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Anders Magnusson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 1988, 1992, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)in_cksum.c  8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Assembly version of cpu_in_cksum() for vax, following the structure
+ * in the C version of the file but using vax instructions for speed.
+ * Increases network traffic speed with almost 50% (NFS tests).
+ */
+
+#include <machine/asm.h>
+__KERNEL_RCSID(0, "$NetBSD: cpu_in_cksum.S,v 1.1 2018/04/25 11:06:49 ragge Exp $");
+
+#include "assym.h"
+
+#define off    %r0
+#define mlen   %r1
+#define m      %r2
+#define data   %r3
+#define sum    %r4
+#define len    %r5
+#define byte_swapped   %r6
+#define tmp    %r7
+#
+# int cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum)
+#
+ENTRY(cpu_in_cksum, R7|R6)
+
+       subl2 $4,%sp
+
+       movl 4(%ap),m
+       movl 8(%ap),len
+       movl 12(%ap),off
+       movl 16(%ap),sum
+
+       clrl byte_swapped
+
+.Lfirstloop:                           # for (;;) {
+       tstl m                          # if (__predict_false(m == NULL)) {
+       jeql .Lout_of_data
+
+       movl M_LEN(m),mlen              # mlen = m->m_len;
+       cmpl off,mlen                   # if (mlen > off) {
+       jgeq 1f
+       subl2 off,mlen                  #       mlen -= off;
+       addl3 M_DATA(m),off,data        #       data = mtod(m, uint8_t *) + off;
+       jbr .Lpost_initial_offset       #       goto post_initial_offset;
+1:                                     # }
+       subl2 mlen,off                  # off -= mlen;
+       tstl len                        # if (len == 0)
+       jeql .Lsecondloop               #       break;
+       movl M_NEXT(m),m                # m = m->m_next;
+       jbr .Lfirstloop                 # }
+
+.Lthirdstmt:
+       movl M_NEXT(m),m                # m = m->m_next) {
+.Lsecondloop:                          # for (; 
+       tstl len                        # len > 0;
+       jeql .Lendsecond
+       tstl m                          # if (__predict_false(m == NULL)) {
+       jeql .Lout_of_data
+
+       movl M_LEN(m),mlen              # mlen = m->m_len;
+       movl M_DATA(m),data             # data = mtod(m, uint8_t *);
+.Lpost_initial_offset:
+       tstl mlen                       # if (mlen == 0)
+       jeql .Lthirdstmt                #       continue;
+       cmpl len,mlen                   # if (mlen > len)
+       jgeq 1f                         
+       movl len,mlen                   #       mlen = len;
+1:     subl2 mlen,len                  # len -= mlen
+       cmpl mlen,$16                   # if (mlen < 16)
+       jlss .Lshort_mbuf               #       goto short_mbuf;
+#
+#      Align on longword boundary
+#
+       blbc data,1f                    # if ((uintptr_t)data & 1) {
+       movzbl (data)+,tmp              #       tmp = *data++;
+       addl2 tmp,sum                   #       sum += tmp;
+       adwc $0,sum
+       rotl $8,sum,sum                 #       sum = (sum << 8 | sum >> 24);
+       xorl2 $1,byte_swapped           #       byte_swapped ^= 1;
+       decl mlen                       #       mlen--;
+1:                                     # }
+       bbc $1,data,1f                  # if ((uintptr_t)data & 2) {
+       movzwl (data)+,tmp              #       tmp = *data++; (word *)
+       addl2 tmp,sum                   #       sum += tmp;
+       adwc $0,sum
+       subl2 $2,mlen                   #       mlen -= 2;
+1:                                     # }
+#
+# Add 16 word in a chunk
+#
+2:     subl2 $32,mlen                  # while ((mlen -= 32) >= 0) {
+       jlss 1f
+       addl2 (data)+,sum               #       sum += *(uint32_t *)data;
+       adwc (data)+,sum                #       sum += *(uint32_t *)data;
+       adwc (data)+,sum                #       sum += *(uint32_t *)data;
+       adwc (data)+,sum                #       sum += *(uint32_t *)data;
+       adwc (data)+,sum                #       sum += *(uint32_t *)data;
+       adwc (data)+,sum                #       sum += *(uint32_t *)data;
+       adwc (data)+,sum                #       sum += *(uint32_t *)data;
+       adwc (data)+,sum                #       sum += *(uint32_t *)data;
+       adwc $0,sum
+       jbr 2b                          # }
+
+1:     addl2 $32,mlen                  # mlen += 32;
+       bbc $4,mlen,1f                  # if (mlen >= 16) {
+       addl2 (data)+,sum               #       sum += *(uint32_t *)data;
+       adwc (data)+,sum                #       sum += *(uint32_t *)data;
+       adwc (data)+,sum                #       sum += *(uint32_t *)data;
+       adwc (data)+,sum                #       sum += *(uint32_t *)data;
+       adwc $0,sum
+       subl2 $16,mlen                  #       mlen -= 16;
+1:                                     # }
+
+.Lshort_mbuf:                          # short_mbuf:
+       bbc $3,mlen,1f                  # if (mlen >= 8) {
+       addl2 (data)+,sum               #       sum += *(uint32_t *)data;
+       adwc (data)+,sum                #       sum += *(uint32_t *)data;
+       adwc $0,sum
+       subl2 $8,mlen                   #       mlen -= 8;
+1:                                     # }
+       bbc $2,mlen,1f                  # if (mlen >= 4) {
+       addl2 (data)+,sum               #       sum += *(uint32_t *)data;
+       adwc $0,sum
+       subl2 $4,mlen                   #       mlen -= 4;
+1:                                     # }
+
+       bbc $1,mlen,1f                  # if (mlen >= 2) {
+       movzwl (data)+,tmp              #       tmp = *data++; (word *)
+       addl2 tmp,sum                   #       sum += tmp;
+       adwc $0,sum
+1:                                     # }
+       blbc mlen,1f                    # if (mlen & 1) {
+       movzbl (data)+,tmp              #       tmp = *data++;
+       addl2 tmp,sum                   #       sum += tmp;
+       adwc $0,sum
+       rotl $8,sum,sum                 #       sum = (sum << 8 | sum >> 24);
+       xorl2 $1,byte_swapped           #       byte_swapped ^= 1;
+1:                                     # }
+       jbr .Lthirdstmt
+
+.Lendsecond:
+       tstl len                        # if (len != 0)
+       jneq .Lout_of_data              #       goto out_of_data;
+       tstl byte_swapped               # if (byte_swapped) {
+       jeql 1f
+       rotl $8,sum,sum                 # sum = (sum << 8 | sum >> 24);
+1:     rotl $16,sum,tmp                # tmp = sum >> 16;
+       addw2 tmp,sum                   # sum(16) += tmp;
+       bicl2 $0xffff0000,sum           # sum &= ~0xffff0000;
+       adwc $0,sum
+       xorl3 $0xffff,sum,%r0           # return (sum ^ 0xffff);
+       ret
+.Lout_of_data:
+       pushab .Lin_cksum
+       calls $1,printf
+       mnegl $1,%r0
+       ret
+
+       .section        .rodata
+.Lin_cksum:
+       .asciz "in_cksum: out of data\n"
+



Home | Main Index | Thread Index | Old Index