Subject: Compiler changes for XScale, round 2
To: None <port-arm@netbsd.org>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: port-arm
Date: 08/20/2002 18:27:03
--uZ3hkaAS1mZxFaxD
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

This adds support for the ARMv5 instruction set extensions, lifted from
GCC 3.2.  This basically amounts to the "clz" (count leading zeros) insn,
as well as an expander for ffssi2, so that __builtin_ffs() can use "clz".

-- 
        -- Jason R. Thorpe <thorpej@wasabisystems.com>

--uZ3hkaAS1mZxFaxD
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=gcc-xscale-2

Index: arm.c
===================================================================
RCS file: /cvsroot/gnusrc/gnu/dist/toolchain/gcc/config/arm/arm.c,v
retrieving revision 1.7
diff -c -r1.7 arm.c
*** arm.c	2002/08/20 23:46:44	1.7
--- arm.c	2002/08/21 01:13:31
***************
*** 100,109 ****
  #define FL_MODE26     0x04            /* 26-bit mode support */
  #define FL_MODE32     0x08            /* 32-bit mode support */
  #define FL_ARCH4      0x10            /* Architecture rel 4 */
! #define FL_THUMB      0x20            /* Thumb aware */
! #define FL_LDSCHED    0x40	      /* Load scheduling necessary */
! #define FL_STRONG     0x80	      /* StrongARM */
! #define FL_XSCALE     0x100           /* XScale */
  
  /* The bits in this mask specify which instructions we are allowed to generate.  */
  static int insn_flags = 0;
--- 100,111 ----
  #define FL_MODE26     0x04            /* 26-bit mode support */
  #define FL_MODE32     0x08            /* 32-bit mode support */
  #define FL_ARCH4      0x10            /* Architecture rel 4 */
! #define FL_ARCH5      0x20            /* Architecture rel 5 */
! #define FL_THUMB      0x40            /* Thumb aware */
! #define FL_LDSCHED    0x80	      /* Load scheduling necessary */
! #define FL_STRONG     0x100	      /* StrongARM */
! #define FL_ARCH5E     0x200           /* DSP extensions to v5 */
! #define FL_XSCALE     0x400           /* XScale */
  
  /* The bits in this mask specify which instructions we are allowed to generate.  */
  static int insn_flags = 0;
***************
*** 122,127 ****
--- 124,135 ----
  /* Nonzero if this chip supports the ARM Architecture 4 extensions */
  int arm_arch4 = 0;
  
+ /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
+ int arm_arch5 = 0;
+ 
+ /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
+ int arm_arch5e = 0;
+ 
  /* Nonzero if this chip can benefit from load scheduling.  */
  int arm_ld_sched = 0;
  
***************
*** 224,245 ****
    {"strongarm",	             FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
    {"strongarm110",           FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
    {"strongarm1100",          FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
! 
!   /* Local NetBSD additions.  These switches also appear in gcc 3.x.  They
!      have been added to the NetBSD in-tree 2.95.3 in order to provide for
!      command-line compatibility with gcc 3.x.
! 
!      We treat them like other CPUs that 2.95.3 already supports:
! 
!      arm10tdmi -> arm9tdmi
!      arm1020t -> arm9tdmi
! 
!      xscame -> strongarm
! 
!      --thorpej@netbsd.org  */
!   {"arm10tdmi",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
!   {"arm1020t",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
!   {"xscale",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG | FL_XSCALE },
    
    {NULL, 0}
  };
--- 232,240 ----
    {"strongarm",	             FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
    {"strongarm110",           FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
    {"strongarm1100",          FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
!   {"arm10tdmi",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED             | FL_ARCH5 },
!   {"arm1020t",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED             | FL_ARCH5 },
!   {"xscale",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG | FL_ARCH5 | FL_ARCH5E | FL_XSCALE },
    
    {NULL, 0}
  };
***************
*** 256,275 ****
    /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
       implementations that support it, so we will leave it out for now.  */
    {"armv4t",    FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB },
!   /* Local NetBSD additions.  These switches also appear in gcc 3.x.  They
!      have been added to the NetBSD in-tree 2.95.3 in order to provide for
!      command-line compatibility with gcc 3.x.
! 
!      We treat them like other ARCHs that 2.95.3 already supports:
! 
!      armv5 -> armv4t
!      armv5t -> armv4t
!      armv5te -> armv4t
! 
!      --thorpej@netbsd.org  */
!   {"armv5",    FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB },
!   {"armv5t",   FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB },
!   {"armv5te",  FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB },
    {NULL, 0}
  };
  
--- 251,259 ----
    /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
       implementations that support it, so we will leave it out for now.  */
    {"armv4t",    FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB },
!   {"armv5",    FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 },
!   {"armv5t",   FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 },
!   {"armv5te",  FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 | FL_ARCH5E },
    {NULL, 0}
  };
  
***************
*** 527,532 ****
--- 511,518 ----
    /* Initialise boolean versions of the flags, for use in the arm.md file.  */
    arm_fast_multiply = (insn_flags & FL_FAST_MULT) != 0;
    arm_arch4         = (insn_flags & FL_ARCH4) != 0;
+   arm_arch5         = (insn_flags & FL_ARCH5) != 0;
+   arm_arch5e        = (insn_flags & FL_ARCH5E) != 0;
    arm_is_xscale     = (insn_flags & FL_XSCALE) != 0;
    
    arm_ld_sched      = (tune_flags & FL_LDSCHED) != 0;
Index: arm.h
===================================================================
RCS file: /cvsroot/gnusrc/gnu/dist/toolchain/gcc/config/arm/arm.h,v
retrieving revision 1.6
diff -c -r1.6 arm.h
*** arm.h	2002/08/20 23:46:45	1.6
--- arm.h	2002/08/21 01:13:34
***************
*** 471,476 ****
--- 471,482 ----
  /* Nonzero if this chip supports the ARM Architecture 4 extensions */
  extern int arm_arch4;
  
+ /* Nonzero if this chip supports the ARM Architecture 5 extensions */
+ extern int arm_arch5;
+ 
+ /* Nonzero if this chip supports the ARM Architecture 5E extensions */
+ extern int arm_arch5e;
+ 
  /* Nonzero if this chip can benefit from load scheduling.  */
  extern int arm_ld_sched;
  
Index: arm.md
===================================================================
RCS file: /cvsroot/gnusrc/gnu/dist/toolchain/gcc/config/arm/arm.md,v
retrieving revision 1.5
diff -c -r1.5 arm.md
*** arm.md	2002/08/20 23:46:45	1.5
--- arm.md	2002/08/21 01:13:40
***************
*** 6379,6381 ****
--- 6379,6409 ----
    assemble_align (32);
    return \"\";
  ")
+ 
+ ;; V5 instructions
+ 
+ (define_insn "clz"
+   [(set (match_operand:SI             0 "s_register_operand" "=r")
+         (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] 128))]
+   "arm_arch5"
+   "clz\\t%0, %1")
+ 
+ (define_expand "ffssi2"
+   [(set (match_operand:SI 0 "s_register_operand" "")
+ 	(ffs:SI (match_operand:SI 1 "s_register_operand" "")))]
+   "arm_arch5"
+   "
+   {
+     rtx t1, t2, t3;
+ 
+     t1 = gen_reg_rtx (SImode);
+     t2 = gen_reg_rtx (SImode);
+     t3 = gen_reg_rtx (SImode);
+ 
+     emit_insn (gen_negsi2 (t1, operands[1]));
+     emit_insn (gen_andsi3 (t2, operands[1], t1));
+     emit_insn (gen_clz (t3, t2));
+     emit_insn (gen_subsi3 (operands[0], GEN_INT (32), t3));
+     DONE;
+   }"
+ )

--uZ3hkaAS1mZxFaxD--