Subject: Compiler changes for XScale, round 1
To: None <port-arm@netbsd.org>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: port-arm
Date: 08/20/2002 16:23:58
--45Z9DzgjV8m4Oswq
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Folks...

GCC 3.x has several XScale-specific optimizations, mostly related to
instruction scheduling.  I've back-ported the scheduling-related bits
to our GCC 2.95.3, resulting in the attached patch.

I will check this into -current, and after letting it shake out a bit
more, request a pullup to the NetBSD 1.6 branch for 1.6.1.

-- 
        -- Jason R. Thorpe <thorpej@wasabisystems.com>

--45Z9DzgjV8m4Oswq
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=gcc-xscale-1

Index: arm.c
===================================================================
RCS file: /cvsroot/gnusrc/gnu/dist/toolchain/gcc/config/arm/arm.c,v
retrieving revision 1.6
diff -c -r1.6 arm.c
*** arm.c	2002/08/07 03:27:39	1.6
--- arm.c	2002/08/20 22:07:51
***************
*** 103,108 ****
--- 103,109 ----
  #define FL_THUMB      0x20            /* Thumb aware */
  #define FL_LDSCHED    0x40	      /* Load scheduling necessary */
  #define FL_STRONG     0x80	      /* StrongARM */
+ #define FL_XSCALE     0x100           /* XScale */
  
  /* The bits in this mask specify which instructions we are allowed to generate.  */
  static int insn_flags = 0;
***************
*** 127,132 ****
--- 128,136 ----
  /* Nonzero if this chip is a StrongARM.  */
  int arm_is_strong = 0;
  
+ /* Nonzero if this chip is an XScale.  */
+ int arm_is_xscale = 0;
+ 
  /* Nonzero if this chip is a an ARM6 or an ARM7.  */
  int arm_is_6_or_7 = 0;
  
***************
*** 235,241 ****
       --thorpej@netbsd.org  */
    {"arm10tdmi",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
    {"arm1020t",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
!   {"xscale",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
    
    {NULL, 0}
  };
--- 239,245 ----
       --thorpej@netbsd.org  */
    {"arm10tdmi",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
    {"arm1020t",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
!   {"xscale",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG | FL_XSCALE },
    
    {NULL, 0}
  };
***************
*** 523,528 ****
--- 527,533 ----
    /* Initialise boolean versions of the flags, for use in the arm.md file.  */
    arm_fast_multiply = (insn_flags & FL_FAST_MULT) != 0;
    arm_arch4         = (insn_flags & FL_ARCH4) != 0;
+   arm_is_xscale     = (insn_flags & FL_XSCALE) != 0;
    
    arm_ld_sched      = (tune_flags & FL_LDSCHED) != 0;
    arm_is_strong     = (tune_flags & FL_STRONG) != 0;
***************
*** 574,579 ****
--- 579,587 ----
       to load a constant, and the load scheduler may well reduce that to 1.  */
    if (optimize_size || (tune_flags & FL_LDSCHED))
      arm_constant_limit = 1;
+ 
+   if (arm_is_xscale)
+     arm_constant_limit = 2;
    
    /* If optimizing for size, bump the number of instructions that we
       are prepared to conditionally execute (even on a StrongARM). 
***************
*** 1867,1872 ****
--- 1875,1921 ----
  {
    rtx i_pat, d_pat;
  
+   /* Some true dependencies can have a higher cost depending
+      on precisely how certain input operands are used.  */
+   if (arm_is_xscale
+       && REG_NOTE_KIND (link) == 0
+       && recog_memoized (insn) < 0
+       && recog_memoized (dep) < 0)
+     {
+       int shift_opnum = get_attr_shift (insn);
+       enum attr_type attr_type = get_attr_type (dep);
+ 
+       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
+ 	 operand for INSN.  If we have a shifted input operand and the
+ 	 instruction we depend on is another ALU instruction, then we may
+ 	 have to account for an additional stall.  */
+       if (shift_opnum != 0 && attr_type == TYPE_NORMAL)
+ 	{
+ 	  rtx shifted_operand;
+ 	  int opno;
+ 
+ 	  /* Get the shifted operand.  */
+ 	  extract_insn (insn);
+ 	  shifted_operand = recog_operand[shift_opnum];
+ 
+ 	  /* Iterate over all the operands in DEP.  If we write an operand
+ 	     that overlaps with SHIFTED_OPERAND, then we have increate the
+ 	     cost of this dependency.  */
+ 	  extract_insn (dep);
+ 	  preprocess_constraints ();
+ 	  for (opno = 0; opno < recog_n_operands; opno++)
+ 	    {
+ 	      /* We can ignore strict inputs.  */
+ 	      if (recog_op_type[opno] == OP_IN)
+ 		continue;
+ 
+ 	      if (reg_overlap_mentioned_p (recog_operand[opno],
+ 					   shifted_operand))
+ 		return 2;
+ 	    }
+ 	}
+     }
+ 
    /* XXX This is not strictly true for the FPA. */
    if (REG_NOTE_KIND(link) == REG_DEP_ANTI
        || REG_NOTE_KIND(link) == REG_DEP_OUTPUT)
***************
*** 3164,3169 ****
--- 3213,3270 ----
    int sign = up ? 1 : -1;
    rtx mem;
  
+   /* XScale has load-store double instructions, but they have stricter
+      alignment requirements than load-store multiple, so we can not
+      use them.
+ 
+      For XScale ldm requires 2 + NREGS cycles to complete and blocks
+      the pipeline until completion.
+ 
+ 	NREGS		CYCLES
+ 	  1		  3
+ 	  2		  4
+ 	  3		  5
+ 	  4		  6
+      
+      an ldr instruction takes 1-3 cycles, but does not block the
+      pipeline.
+ 
+ 	NREGS		CYCLES
+ 	  1		 1-3
+ 	  2		 2-6
+ 	  3		 3-9
+ 	  4		 4-12
+ 
+      Best case ldr will always win.  However, the more ldr instructions
+      we issue, the less likely we are to be able to schedule them well.
+      Using ldr instructions also increases code size.
+ 
+      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
+      for counts of 3 or 4 regs.  */
+   if (arm_is_xscale && count <= 2 && ! optimize_size)
+     {
+       rtx seq;
+ 
+       start_sequence ();
+ 
+       for (i = 0; i < count; i++)
+ 	{
+ 	  mem = gen_rtx_MEM (SImode, plus_constant (from, i * 4 * sign));
+ 	  RTX_UNCHANGING_P (mem) = unchanging_p;
+ 	  MEM_IN_STRUCT_P (mem) = in_struct_p;
+ 	  MEM_SCALAR_P (mem) = scalar_p;
+ 	  emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
+ 	}
+ 
+       if (write_back)
+ 	emit_move_insn (from, plus_constant (from, count * 4 * sign));
+ 
+       seq = gen_sequence ();
+       end_sequence ();
+ 
+       return seq;
+     }
+ 
    result = gen_rtx_PARALLEL (VOIDmode,
  			     rtvec_alloc (count + (write_back ? 2 : 0)));
    if (write_back)
***************
*** 3207,3212 ****
--- 3308,3339 ----
    rtx result;
    int sign = up ? 1 : -1;
    rtx mem;
+ 
+   /* See arm_gen_load_multiple for discussion of
+      the pros/cons of ldm/stm usage for XScale.  */
+   if (arm_is_xscale && count <= 2 && ! optimize_size)
+     {
+       rtx seq;
+ 
+       start_sequence ();
+ 
+       for (i = 0; i < count; i++)
+ 	{
+ 	  mem = gen_rtx_MEM (SImode, plus_constant (to, i * 4 * sign));
+ 	  RTX_UNCHANGING_P (mem) = unchanging_p;
+ 	  MEM_IN_STRUCT_P (mem) = in_struct_p;
+ 	  MEM_SCALAR_P (mem) = scalar_p;
+ 	  emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
+ 	}
+ 
+       if (write_back)
+ 	emit_move_insn (to, plus_constant (to, count * 4 * sign));
+ 
+       seq = gen_sequence ();
+       end_sequence ();
+ 
+       return seq;
+     }
  
    result = gen_rtx_PARALLEL (VOIDmode,
  			     rtvec_alloc (count + (write_back ? 2 : 0)));
Index: arm.h
===================================================================
RCS file: /cvsroot/gnusrc/gnu/dist/toolchain/gcc/config/arm/arm.h,v
retrieving revision 1.5
diff -c -r1.5 arm.h
*** arm.h	2002/08/07 03:27:39	1.5
--- arm.h	2002/08/20 22:07:54
***************
*** 477,482 ****
--- 477,485 ----
  /* Nonzero if this chip is a StrongARM.  */
  extern int arm_is_strong;
  
+ /* Nonzero if this chip is an XScale.  */
+ extern int arm_is_xscale;
+ 
  /* Nonzero if this chip is a an ARM6 or an ARM7.  */
  extern int arm_is_6_or_7;
  
***************
*** 614,622 ****
  #define BIGGEST_ALIGNMENT  32
  
  /* Make strings word-aligned so strcpy from constants will be faster.  */
! #define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
!   (TREE_CODE (EXP) == STRING_CST        \
!    && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
  
  /* Every structures size must be a multiple of 32 bits.  */
  /* This is for compatibility with ARMCC.  ARM SDT Reference Manual
--- 617,628 ----
  #define BIGGEST_ALIGNMENT  32
  
  /* Make strings word-aligned so strcpy from constants will be faster.  */
! #define CONSTANT_ALIGNMENT_FACTOR (! arm_is_xscale ? 1 : 2)
! 
! #define CONSTANT_ALIGNMENT(EXP, ALIGN)				\
!   ((TREE_CODE (EXP) == STRING_CST				\
!     && (ALIGN) < BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR)	\
!    ? BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR : (ALIGN))
  
  /* Every structures size must be a multiple of 32 bits.  */
  /* This is for compatibility with ARMCC.  ARM SDT Reference Manual
***************
*** 1702,1707 ****
--- 1708,1716 ----
  /* Max number of bytes we can move from memory to memory
     in one reasonably fast instruction.  */
  #define MOVE_MAX 4
+ 
+ #undef  MOVE_RATIO
+ #define MOVE_RATIO (arm_is_xscale ? 4 : 2)
  
  /* Define if operations between registers always perform the operation
     on the full register even if a narrower mode is specified.  */
Index: arm.md
===================================================================
RCS file: /cvsroot/gnusrc/gnu/dist/toolchain/gcc/config/arm/arm.md,v
retrieving revision 1.4
diff -c -r1.4 arm.md
*** arm.md	2002/05/03 20:32:22	1.4
--- arm.md	2002/08/20 22:08:00
***************
*** 48,53 ****
--- 48,58 ----
  
  (define_attr "is_strongarm" "no,yes" (const (symbol_ref "arm_is_strong")))
  
+ ;; Operand number of an input operand that is shifted.  Zoer if the
+ ;; given instruction does not shift one of its input operands.
+ (define_attr "is_xscale" "no,yes" (const (symbol_ref "arm_is_xscale")))
+ (define_attr "shift" "" (const_int 0))
+ 
  ; Floating Point Unit.  If we only have floating point emulation, then there
  ; is no point in scheduling the floating point insns.  (Well, for best
  ; performance we should try and group them together).
***************
*** 238,250 ****
--- 243,269 ----
  ;; Core unit
  ;;--------------------------------------------------------------------
  ;; Everything must spend at least one cycle in the core unit
+ (define_function_unit "core" 1 0 (eq_attr "core_cycles" "single") 1 1)
+ 
  (define_function_unit "core" 1 0
    (and (eq_attr "ldsched" "yes") (eq_attr "type" "store1")) 1 1)
  
  (define_function_unit "core" 1 0
    (and (eq_attr "ldsched" "yes") (eq_attr "type" "load")) 2 1)
  
+ ;; We do not need to conditionalize the define_function_unit immediately
+ ;; above.  This one will be ignored for anything other than xscale
+ ;; compiles and for xscale compiles it provides a larger delay
+ ;; and the scheduler will DTRT.
+ ;; FIXME: this test need to be revamped to not depend on this feature
+ ;; of the scheduler.
+ 
  (define_function_unit "core" 1 0
+   (and (and (eq_attr "ldsched" "yes") (eq_attr "type" "load"))
+        (eq_attr "is_xscale" "yes"))
+    3 1)
+ 
+ (define_function_unit "core" 1 0
    (and (eq_attr "ldsched" "!yes") (eq_attr "type" "load,store1")) 2 2)
  
  (define_function_unit "core" 1 0
***************
*** 275,280 ****
--- 294,303 ----
  (define_function_unit "core" 1 0 (eq_attr "type" "store3") 4 4)
  
  (define_function_unit "core" 1 0 (eq_attr "type" "store4") 5 5)
+ 
+ (define_function_unit "core" 1 0
+   (and (eq_attr "core_cycles" "multi")
+        (eq_attr "type" "!mult,load,store1,store2,store3,store4")) 32 32)
  
  ;; Note: For DImode insns, there is normally no reason why operands should
  ;; not be in the same register, what we don't want is for something being
***************
*** 1410,1416 ****
  			  (match_operand:SI 3 "arm_rhs_operand" "rM")]))
  		(match_operand:SI 1 "s_register_operand" "r")))]
    ""
!   "bic%?\\t%0, %1, %2%S4")
  
  (define_insn "*andsi_notsi_si_compare0"
    [(set (reg:CC_NOOV 24)
--- 1433,1441 ----
  			  (match_operand:SI 3 "arm_rhs_operand" "rM")]))
  		(match_operand:SI 1 "s_register_operand" "r")))]
    ""
!   "bic%?\\t%0, %1, %2%S4"
!   [(set_attr "shift" "2")]
! )
  
  (define_insn "*andsi_notsi_si_compare0"
    [(set (reg:CC_NOOV 24)
***************
*** 1783,1789 ****
  	 [(match_operand:SI 1 "s_register_operand" "r")
  	  (match_operand:SI 2 "reg_or_int_operand" "rM")]))]
    ""
!   "mov%?\\t%0, %1%S3")
  
  (define_insn "*shiftsi3_compare0"
    [(set (reg:CC_NOOV 24)
--- 1808,1816 ----
  	 [(match_operand:SI 1 "s_register_operand" "r")
  	  (match_operand:SI 2 "reg_or_int_operand" "rM")]))]
    ""
!   "mov%?\\t%0, %1%S3"
!   [(set_attr "shift" "1")]
! )
  
  (define_insn "*shiftsi3_compare0"
    [(set (reg:CC_NOOV 24)
***************
*** 1795,1801 ****
  	(match_op_dup 3 [(match_dup 1) (match_dup 2)]))]
    ""
    "mov%?s\\t%0, %1%S3"
! [(set_attr "conds" "set")])
  
  (define_insn "*shiftsi3_compare0_scratch"
    [(set (reg:CC_NOOV 24)
--- 1822,1831 ----
  	(match_op_dup 3 [(match_dup 1) (match_dup 2)]))]
    ""
    "mov%?s\\t%0, %1%S3"
! [(set_attr "conds" "set")
!  (set_attr "shift" "1")
!  ]
! )
  
  (define_insn "*shiftsi3_compare0_scratch"
    [(set (reg:CC_NOOV 24)
***************
*** 1806,1812 ****
     (clobber (match_scratch:SI 0 "=r"))]
    ""
    "mov%?s\\t%0, %1%S3"
! [(set_attr "conds" "set")])
  
  (define_insn "*notsi_shiftsi"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
--- 1836,1845 ----
     (clobber (match_scratch:SI 0 "=r"))]
    ""
    "mov%?s\\t%0, %1%S3"
! [(set_attr "conds" "set")
!  (set_attr "shift" "1")
!  ]
! )
  
  (define_insn "*notsi_shiftsi"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
***************
*** 1814,1820 ****
  		 [(match_operand:SI 1 "s_register_operand" "r")
  		  (match_operand:SI 2 "arm_rhs_operand" "rM")])))]
    ""
!   "mvn%?\\t%0, %1%S3")
  
  (define_insn "*notsi_shiftsi_compare0"
    [(set (reg:CC_NOOV 24)
--- 1847,1855 ----
  		 [(match_operand:SI 1 "s_register_operand" "r")
  		  (match_operand:SI 2 "arm_rhs_operand" "rM")])))]
    ""
!   "mvn%?\\t%0, %1%S3"
!   [(set_attr "shift" "1")]
! )
  
  (define_insn "*notsi_shiftsi_compare0"
    [(set (reg:CC_NOOV 24)
***************
*** 1826,1832 ****
  	(not:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])))]
    ""
    "mvn%?s\\t%0, %1%S3"
! [(set_attr "conds" "set")])
  
  (define_insn "*not_shiftsi_compare0_scratch"
    [(set (reg:CC_NOOV 24)
--- 1861,1870 ----
  	(not:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])))]
    ""
    "mvn%?s\\t%0, %1%S3"
! [(set_attr "conds" "set")
!  (set_attr "shift" "1")
!  ]
! )
  
  (define_insn "*not_shiftsi_compare0_scratch"
    [(set (reg:CC_NOOV 24)
***************
*** 1837,1843 ****
     (clobber (match_scratch:SI 0 "=r"))]
    ""
    "mvn%?s\\t%0, %1%S3"
! [(set_attr "conds" "set")])
  
  
  ;; Unary arithmetic insns
--- 1875,1884 ----
     (clobber (match_scratch:SI 0 "=r"))]
    ""
    "mvn%?s\\t%0, %1%S3"
! [(set_attr "conds" "set")
!  (set_attr "shift" "1")
!  ]
! )
  
  
  ;; Unary arithmetic insns
***************
*** 1900,1905 ****
--- 1941,1947 ----
     cmp\\t%0, #0\;rsblt\\t%0, %0, #0
     eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31"
  [(set_attr "conds" "clob,*")
+  (set_attr "shift" "1")
   (set_attr "length" "8")])
  
  (define_insn "*neg_abssi2"
***************
*** 1911,1916 ****
--- 1953,1959 ----
     cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
     eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
  [(set_attr "conds" "clob,*")
+  (set_attr "shift" "1")
   (set_attr "length" "8")])
  
  (define_insn "abssf2"
***************
*** 2163,2169 ****
      output_asm_insn (\"mov%?\\t%Q0, %1\", operands);
    return \"mov%?\\t%R0, %Q0, asr #31\";
  "
! [(set_attr "length" "8")])
  
  (define_expand "zero_extendhisi2"
    [(set (match_dup 2) (ashift:SI (match_operand:HI 1 "nonimmediate_operand" "")
--- 2206,2215 ----
      output_asm_insn (\"mov%?\\t%Q0, %1\", operands);
    return \"mov%?\\t%R0, %Q0, asr #31\";
  "
! [(set_attr "length" "8")
!  (set_attr "shift" "1")
!  ]
! )
  
  (define_expand "zero_extendhisi2"
    [(set (match_dup 2) (ashift:SI (match_operand:HI 1 "nonimmediate_operand" "")
***************
*** 3597,3603 ****
  		      (match_operand:SI 2 "arm_rhs_operand" "rM")])))]
    ""
    "cmp%?\\t%0, %1%S3"
! [(set_attr "conds" "set")])
  
  (define_insn "*cmpsi_shiftsi_swp"
    [(set (reg:CC_SWP 24)
--- 3643,3652 ----
  		      (match_operand:SI 2 "arm_rhs_operand" "rM")])))]
    ""
    "cmp%?\\t%0, %1%S3"
! [(set_attr "conds" "set")
!  (set_attr "shift" "1")
!  ]
! )
  
  (define_insn "*cmpsi_shiftsi_swp"
    [(set (reg:CC_SWP 24)
***************
*** 3607,3613 ****
  			(match_operand:SI 0 "s_register_operand" "r")))]
    ""
    "cmp%?\\t%0, %1%S3"
! [(set_attr "conds" "set")])
  
  (define_insn "*cmpsi_neg_shiftsi"
    [(set (reg:CC 24)
--- 3656,3665 ----
  			(match_operand:SI 0 "s_register_operand" "r")))]
    ""
    "cmp%?\\t%0, %1%S3"
! [(set_attr "conds" "set")
!  (set_attr "shift" "1")
!  ]
! )
  
  (define_insn "*cmpsi_neg_shiftsi"
    [(set (reg:CC 24)
***************
*** 3617,3623 ****
  			      (match_operand:SI 2 "arm_rhs_operand" "rM")]))))]
    ""
    "cmn%?\\t%0, %1%S3"
! [(set_attr "conds" "set")])
  
  (define_insn "*cmpsf_insn"
    [(set (reg:CCFP 24)
--- 3669,3678 ----
  			      (match_operand:SI 2 "arm_rhs_operand" "rM")]))))]
    ""
    "cmn%?\\t%0, %1%S3"
! [(set_attr "conds" "set")
!  (set_attr "shift" "1")
!  ]
! )
  
  (define_insn "*cmpsf_insn"
    [(set (reg:CCFP 24)
***************
*** 4467,4473 ****
                (match_operand:SI 5 "reg_or_int_operand" "rI")])
             (match_operand:SI 2 "s_register_operand" "r")]))]
    ""
!   "%i1%?\\t%0, %2, %4%S3")
  
  (define_insn "*arith_shiftsi_compare0"
    [(set (reg:CC_NOOV 24)
--- 4522,4530 ----
                (match_operand:SI 5 "reg_or_int_operand" "rI")])
             (match_operand:SI 2 "s_register_operand" "r")]))]
    ""
!   "%i1%?\\t%0, %2, %4%S3"
!   [(set_attr "shift" "4")]
! )
  
  (define_insn "*arith_shiftsi_compare0"
    [(set (reg:CC_NOOV 24)
***************
*** 4482,4488 ****
  			 (match_dup 2)]))]
    ""
    "%i1%?s\\t%0, %2, %4%S3"
! [(set_attr "conds" "set")])
  
  (define_insn "*arith_shiftsi_compare0_scratch"
    [(set (reg:CC_NOOV 24)
--- 4539,4548 ----
  			 (match_dup 2)]))]
    ""
    "%i1%?s\\t%0, %2, %4%S3"
! [(set_attr "conds" "set")
!  (set_attr "shift" "4")
!  ]
! )
  
  (define_insn "*arith_shiftsi_compare0_scratch"
    [(set (reg:CC_NOOV 24)
***************
*** 4495,4501 ****
     (clobber (match_scratch:SI 0 "=r"))]
    ""
    "%i1%?s\\t%0, %2, %4%S3"
! [(set_attr "conds" "set")])
  
  (define_insn "*sub_shiftsi"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
--- 4555,4564 ----
     (clobber (match_scratch:SI 0 "=r"))]
    ""
    "%i1%?s\\t%0, %2, %4%S3"
! [(set_attr "conds" "set")
!  (set_attr "shift" "4")
!  ]
! )
  
  (define_insn "*sub_shiftsi"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
***************
*** 4504,4510 ****
  		   [(match_operand:SI 3 "s_register_operand" "r")
  		    (match_operand:SI 4 "reg_or_int_operand" "rM")])))]
    ""
!   "sub%?\\t%0, %1, %3%S2")
  
  (define_insn "*sub_shiftsi_compare0"
    [(set (reg:CC_NOOV 24)
--- 4567,4575 ----
  		   [(match_operand:SI 3 "s_register_operand" "r")
  		    (match_operand:SI 4 "reg_or_int_operand" "rM")])))]
    ""
!   "sub%?\\t%0, %1, %3%S2"
!   [(set_attr "shift" "3")]
! )
  
  (define_insn "*sub_shiftsi_compare0"
    [(set (reg:CC_NOOV 24)
***************
*** 4519,4525 ****
  						 (match_dup 4)])))]
    ""
    "sub%?s\\t%0, %1, %3%S2"
! [(set_attr "conds" "set")])
  
  (define_insn "*sub_shiftsi_compare0_scratch"
    [(set (reg:CC_NOOV 24)
--- 4584,4593 ----
  						 (match_dup 4)])))]
    ""
    "sub%?s\\t%0, %1, %3%S2"
! [(set_attr "conds" "set")
!  (set_attr "shift" "3")
!  ]
! )
  
  (define_insn "*sub_shiftsi_compare0_scratch"
    [(set (reg:CC_NOOV 24)
***************
*** 4532,4538 ****
     (clobber (match_scratch:SI 0 "=r"))]
    ""
    "sub%?s\\t%0, %1, %3%S2"
! [(set_attr "conds" "set")])
  
  ;; These variants of the above insns can occur if the first operand is the
  ;; frame pointer and we eliminate that.  This is a kludge, but there doesn't
--- 4600,4609 ----
     (clobber (match_scratch:SI 0 "=r"))]
    ""
    "sub%?s\\t%0, %1, %3%S2"
! [(set_attr "conds" "set")
!  (set_attr "shift" "3")
!  ]
! )
  
  ;; These variants of the above insns can occur if the first operand is the
  ;; frame pointer and we eliminate that.  This is a kludge, but there doesn't
***************
*** 5236,5241 ****
--- 5307,5313 ----
     mov%D5\\t%0, %1\;mov%d5\\t%0, %2%S4
     mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4"
  [(set_attr "conds" "use")
+  (set_attr "shift" "2")
   (set_attr "length" "4,8,8")])
  
  (define_insn "*ifcompare_move_shift"
***************
*** 5269,5274 ****
--- 5341,5347 ----
     mov%d5\\t%0, %1\;mov%D5\\t%0, %2%S4
     mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4"
  [(set_attr "conds" "use")
+  (set_attr "shift" "2")
   (set_attr "length" "4,8,8")])
  
  (define_insn "*ifcompare_shift_shift"
***************
*** 5303,5308 ****
--- 5376,5382 ----
    ""
    "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7"
  [(set_attr "conds" "use")
+  (set_attr "shift" "1")
   (set_attr "length" "8")])
  
  (define_insn "*ifcompare_not_arith"

--45Z9DzgjV8m4Oswq--