Subject: bcopy oops
To: None <port-alpha@NetBSD.ORG>
From: Trevor Blackwell <tlb@eecs.harvard.edu>
List: port-alpha
Date: 08/13/1995 20:00:40
I forgot to make bcopy return 0. This was causing copyin and friends
return garbage values, indicating errors.

Here's the fixed bcopy, and also a fast bzero, as diffs from Chris's
last release.

*** locore.s    Sun Aug 13 17:39:52 1995
--- locore.s-orig       Fri Mar 24 12:11:54 1995
***************
*** 1039,1270 ****
   *
   * int bcopy(char *from, char *to, u_int len);
   */
- #if 1
  LEAF(bcopy,3)
- 
-       /* Check for negative length */ 
-       ble     a2,bcopy_done
- 
- /* Check for overlap */
-         subq    a1,a0,t5
-         cmpult  t5,a2,t5
-         bne     t5,bcopy_overlap
- 
- /* a3 = end address */
-         addq    a0,a2,a3
- 
- /* Get the first word */
-         ldq_u   t2,0(a0)
- 
- /* Do they have the same alignment? */
-         xor     a0,a1,t0
-         and     t0,7,t0       
-         and     a1,7,t1
-         bne     t0,bcopy_different_alignment
- 
- /* src & dst have same alignment */
-         beq     t1,bcopy_all_aligned
-       
-         ldq_u   t3,0(a1)
-         addq    a2,t1,a2
-         mskqh   t2,a0,t2
-         mskql   t3,a0,t3
-         or      t2,t3,t2
- 
- /* Dst is 8-byte aligned */
- 
- /* If less than 8 bytes,skip loop */
- bcopy_all_aligned:
-         subq    a2,1,t0
-         and     a2,7,a2
-         bic     t0,7,t0
-         beq     t0,bcopy_samealign_lp_end
- 
- bcopy_samealign_lp:   
-         stq_u   t2,0(a1)
-         addq    a1,8,a1
-         ldq_u   t2,8(a0)
-         subq    t0,8,t0
-         addq    a0,8,a0
-         bne     t0,bcopy_samealign_lp
- 
- /* If we're done,exit */
- bcopy_samealign_lp_end:
-         bne     a2,bcopy_small_left
-         stq_u   t2,0(a1)
-         mov     zero, v0              /* return 0. */
-         RET
- 
- bcopy_small_left:
-         mskql   t2,a2,t4
-         ldq_u   t3,0(a1)
-         mskqh   t3,a2,t3
-         or      t4,t3,t4
-         stq_u   t4,0(a1)
-         mov     zero, v0              /* return 0. */
-         RET
- 
- /* this is the fun part */
- bcopy_different_alignment:    
-         addq    a0,a2,a3
-         cmpule  a2,8,t0
-         bne     t0,bcopy_da_finish
-       
-         beq     t1,bcopy_da_noentry
- 
- /* Do the initial partial word */
-         subq    zero,a1,t0
-         and     t0,7,t0
-         ldq_u   t3,7(a0)
-         extql   t2,a0,t2
-         extqh   t3,a0,t3
-         or      t2,t3,t5
-         insql   t5,a1,t5
-         ldq_u   t6,0(a1)
-         mskql   t6,a1,t6
-         or      t5,t6,t5
-         stq_u   t5,0(a1)
-         addq    a0,t0,a0
-         addq    a1,t0,a1
-         subq    a2,t0,a2
-         ldq_u   t2,0(a0)
-       
- bcopy_da_noentry:     
-         subq    a2,1,t0
-         bic     t0,7,t0
-         and     a2,7,a2
-         beq     t0,bcopy_da_finish2
- 
- bcopy_da_lp:  
-         ldq_u   t3,7(a0)
-         addq    a0,8,a0
-         extql   t2,a0,t4
-         extqh   t3,a0,t5
-         subq    t0,8,t0
-         or      t4,t5,t5
-         stq     t5,0(a1)
-         addq    a1,8,a1
-         beq     t0,bcopy_da_finish1
-         ldq_u   t2,7(a0)
-         addq    a0,8,a0
-         extql   t3,a0,t4
-         extqh   t2,a0,t5
-         subq    t0,8,t0
-         or      t4,t5,t5
-         stq     t5,0(a1)
-         addq    a1,8,a1
-         bne     t0,bcopy_da_lp
- 
- /* Do the last new word */
- bcopy_da_finish2:     
-         mov     t2,t3
- 
- /* Do the last partial word */
- bcopy_da_finish1:     
-         ldq_u   t2,-1(a3)
-         extql   t3,a0,t3
-         extqh   t2,a0,t2
-         or      t2,t3,t2
-         br      zero,bcopy_samealign_lp_end
- 
- /* Do the last word in the next source word */
- bcopy_da_finish:      
-         ldq_u   t3,-1(a3)
-         extql   t2,a0,t2
-         extqh   t3,a0,t3
-         or      t2,t3,t2
-         insqh   t2,a1,t3
-         insql   t2,a1,t2
-         lda     t4,-1(zero)
-         mskql   t4,a2,t5
-         cmovne  t5,t5,t4
-         insqh   t4,a1,t5
-         insql   t4,a1,t4
-         addq    a1,a2,a4
-         ldq_u   t6,0(a1)
-         ldq_u   t7,-1(a4)
-         bic     t6,t4,t6
-         bic     t7,t5,t7
-         and     t2,t4,t2
-         and     t3,t5,t3
-         or      t2,t6,t2
-         or      t3,t7,t3
-         stq_u   t3,-1(a4)
-         stq_u   t2,0(a1)
-         mov     zero, v0              /* return 0. */
-         RET
- 
- /* Basically equivalent to previous case, only backwards.
-    Not quite as highly optimized */
- bcopy_overlap:                
-         addq    a0,a2,a3
-         addq    a1,a2,a4
- 
- /* less than 8 bytes - don't worry about overlap */
-         cmpule  a2,8,t0
-         bne     t0,bcopy_ov_short
- 
- /* Possibly do a partial first word */
-         and     a4,7,t4
-         beq     t4,bcopy_ov_nostart2
-         subq    a3,t4,a3
-         subq    a4,t4,a4
-         ldq_u   t1,0(a3)
-         subq    a2,t4,a2
-         ldq_u   t2,7(a3)
-         ldq     t3,0(a4)
-         extql   t1,a3,t1
-         extqh   t2,a3,t2
-         or      t1,t2,t1
-       mskqh   t3,t4,t3
-       mskql   t1,t4,t1
-       or      t1,t3,t1
-       stq     t1,0(a4)
- 
- bcopy_ov_nostart2:
-       bic     a2,7,t4
-       and     a2,7,a2
-       beq     t4,bcopy_ov_lp_end
- 
- /* This could be more pipelined, but it doesn't seem worth it */
- bcopy_ov_lp:  
-       ldq_u   t0,-8(a3)
-       subq    a4,8,a4
-       ldq_u   t1,-1(a3)
-       subq    a3,8,a3
-       extql   t0,a3,t0
-       extqh   t1,a3,t1
-       subq    t4,8,t4
-       or      t0,t1,t0
-       stq     t0,0(a4)
-       bne     t4,bcopy_ov_lp
- 
- bcopy_ov_lp_end:      
-       beq     a2,bcopy_done
-       
-       ldq_u   t0,0(a0)
-       ldq_u   t1,7(a0)
-       ldq_u   t2,0(a1)
-       extql   t0,a0,t0
-       extqh   t1,a0,t1
-       or      t0,t1,t0
-       insql   t0,a1,t0
-       mskql   t2,a1,t2
-       or      t2,t0,t2
-       stq_u   t2,0(a1)
-       
- bcopy_done:   
-         mov     zero, v0              /* return 0. */
-       RET
- 
- bcopy_ov_short:       
-         ldq_u   t2,0(a0)
-         br      zero,bcopy_da_finish
-       
-       END(bcopy)
- 
- #else
- LEAF(bcopy, 3)
        SETGP(pv)
        mov     a2, t0                  /* t0 = i = len */
        beq     a2, 2f                  /* if (len == 0), bail out */
--- 1039,1045 ----
***************
*** 1287,1370 ****
          mov     zero, v0              /* return 0. */
        RET
        END(bcopy)
- #endif
- 
- #if 1
- LEAF(bzero,2)
-       ble     a1,bzero_done
-       bic     a1,63,t3           /* t3 is # bytes to do 64 bytes at a time */
- 
- /* If nothing in first word, ignore it */
-       subq    zero,a0,t0              
-       and     t0,7,t0         /* t0 = (0-size)%8 */
-       beq     t0,bzero_nostart1
-       
-       cmpult  a1,t0,t1                /* if size > size%8 goto noshort */
-       beq     t1,bzero_noshort
- 
- /* The whole thing is less than a word. Mask off 1..7 bytes, and finish */
-       ldq_u   t2,0(a0)                
-       lda     t0,-1(zero)     /* t0=-1 */
-       mskql   t0,a1,t0                /* Get ff in bytes (a0%8)..((a0+a1-1)%8) */
-       insql   t0,a0,t0
-       bic     t2,t0,t2                /* zero those bytes in word */
-       stq_u   t2,0(a0)
-       mov     zero, v0                /* return 0. */
-       RET
- 
- bzero_noshort:
- 
- /* Handle the first partial word */
-       ldq_u   t2,0(a0)                
-       subq    a1,t0,a1                
-       mskql   t2,a0,t2                /* zero bytes (a0%8)..7 in word */
-       stq_u   t2,0(a0)
-       
-       addq    a0,t0,a0                /* round a0 up to next word */
-       bic     a1,63,t3                /* recalc t3 (# bytes to do 64 bytes at a time)*/
- bzero_nostart1:
- 
- /* Loop, zeroing 64 bytes at a time */
-       beq     t3,bzero_lp_done
- bzero_lp:     
-       stq     zero,0(a0)
-       stq     zero,8(a0)
-       stq     zero,16(a0)
-       stq     zero,24(a0)
-       subq    t3,64,t3
-       stq     zero,32(a0)
-       stq     zero,40(a0)
-       stq     zero,48(a0)
-       stq     zero,56(a0)
-       addq    a0,64,a0
-       bne     t3,bzero_lp
- 
- bzero_lp_done:        
- /* Handle the last 0..7 words */
- /* We mask off the low bits, so we don't need an extra compare
-    instruction for the loop (just a bne. heh-heh) */
-       and     a1,0x38,t4
-       beq     t4,bzero_finish_lp_done
- bzero_finish_lp:      
-       stq     zero,0(a0)
-       subq    t4,8,t4
-       addq    a0,8,a0
-       bne     t4,bzero_finish_lp
-       
- /* Do the last partial word */
- bzero_finish_lp_done: 
-       and     a1,7,t5         /* 0..7 bytes left */
-         beq     t5,bzero_done /* mskqh won't change t0 if t5==0, but I 
-                                  don't want to touch, say, a new VM page */
-       ldq     t0,0(a0)
-       mskqh   t0,t5,t0
-       stq     t0,0(a0)
- bzero_done:
-       mov     zero, v0                /* return 0. */
-       RET
- 
-       END(bzero)
- #endif
        
  NESTED(copyin, 3, 16, ra, 0, 0)
        SETGP(pv)
--- 1062,1067 ----

--
Trevor Blackwell         tlb@eecs.harvard.edu          (617) 495-8912