Port-mips archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

binutil hacks for loongson2



Hello,
the loongson2f CPU has hardware bugs that must be worked around in
software. Fortunably the workaround is only required for kernel mode,
to userland can be shared with other MIPS platforms.

binutils have 2 options for this: -fix-loongson2f-nop and
-mfix-loongson2f-jump.
The first one cause nop instructions to be replaced with a 'or at,at,zero'.
No problem with this (although my CPU doens't seem to need it).
But from what I've read, this could be required for both kernel and
userland :(

The second one is to work around an issue in kernel mode with
jump register instructions and branch prediction logic, as explained here:
https://groups.google.com/group/loongson-dev/msg/e0d2e220958f10a6?dmode=source&hl=zh
Basically, if the branch prediction logic got the wrong choise, the CPU
would end up fetching an instruction with a wrong register value and this
can hang the system if the wrong value happens to be in I/O space.
What -fix-loongson2f-jump does it to insert instructions before the
j reg or jr reg:
        lui at, 0xcfff0000;
        ori at, at, 0xffff
        and reg, reg, at
        j(r) reg
(instead of jump reg we do jump (reg & 0xcfffffff)).

I've issues with this approach. For example, you can't do a jump 0xbfc00000
anymore without special care. Basically, this assumes that all kernel
code is within the low part of kseg0, but with a 64bit kernel and/or modules
we may want to calls outside of this region (for example, calls to
sseg or kseg3). Worse, the CPU is going to jump to the wrong address
silently, which can be tricky to debug.

Another problem I have with -fix-loongson2f-jump as implemented right
now, is that the fix is silently discarded for .noat assembly section.
I think it should warn in this case instead: instead of
      if (! mips_opts.at)
              return;

do
      if (! mips_opts.at) {
               as_warn (_("unable to apply loongson2f BTB workaround when .set 
noat"));
              return;
      }

Another workaround, implemented by OpenBSD, is to clear the branch target
buffer before doing the jump. this is done via a write to a cop0 register:
        li $at, COP_0_BTB_CLEAR | COP_0_RAS_DISABLE
        dmtc0 $at, COP_0_DIAG
        j(r) reg
it's one instruction less than the binutil way but it may be slower,
because of the dmtc0. But we don't change the address the compiler or
programmer wanted to jump to, so it looks way more safe to me. Also,
it doing an as_warn() if it can't use $at :)

The patch for this is attached. Any comment or objection to commit it ?

-- 
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
     NetBSD: 26 ans d'experience feront toujours la difference
--
Index: dist/gas/config/tc-mips.c
===================================================================
RCS file: /cvsroot/src/external/gpl3/binutils/dist/gas/config/tc-mips.c,v
retrieving revision 1.3
diff -u -p -u -r1.3 tc-mips.c
--- dist/gas/config/tc-mips.c   14 Apr 2011 07:49:48 -0000      1.3
+++ dist/gas/config/tc-mips.c   2 Aug 2011 15:33:53 -0000
@@ -764,6 +764,8 @@ static int mips_fix_vr4130;
    efficient expansion.  */
 
 static int mips_relax_branch;
+
+static int mips_fix_loongson2f_btb;
 
 /* The expansion of many macros depends on the type of symbol that
    they refer to.  For example, when generating position-dependent code,
@@ -1036,6 +1038,7 @@ static void mips_no_prev_insn (void);
 static void mips16_macro_build
   (expressionS *, const char *, const char *, va_list);
 static void load_register (int, expressionS *, int);
+static void macro_build (expressionS *, const char *, const char *, ...);
 static void macro_start (void);
 static void macro_end (void);
 static void macro (struct mips_cl_insn * ip);
@@ -3485,6 +3488,41 @@ macro_read_relocs (va_list *args, bfd_re
       r[i] = (bfd_reloc_code_real_type) va_arg (*args, int);
 }
 
+/* Fix jump through register issue on loongson2f processor for kernel code:
+   force a BTB clear before the jump to prevent it from being incorrectly
+   prefetched by the branch prediction engine. */
+
+static void
+macro_build_jrpatch (expressionS *ep, unsigned int sreg)
+{
+  if (!mips_fix_loongson2f_btb)
+    return;
+
+  if (sreg == ZERO || sreg == KT0 || sreg == KT1 || sreg == AT)
+    return;
+
+  if (!mips_opts.at)
+    {
+      as_warn (_("unable to apply loongson2f BTB workaround when .set noat"));
+      return;
+    }
+
+  /* li $at, COP_0_BTB_CLEAR | COP_0_RAS_DISABLE */
+  ep->X_op = O_constant;
+  ep->X_add_number = 3;
+  macro_build (ep, "ori", "t,r,i", AT, ZERO, BFD_RELOC_LO16);
+
+  /* dmtc0 $at, COP_0_DIAG */
+  macro_build (NULL, "dmtc0", "t,G", AT, 22);
+
+  /* Hide these two instructions to avoid getting a ``macro expanded into
+     multiple instructions'' warning. */
+  if (mips_relax.sequence != 2)
+    mips_macro_warning.sizes[0] -= 2 * 4;
+  if (mips_relax.sequence != 1)
+    mips_macro_warning.sizes[1] -= 2 * 4;
+}
+
 /* Build an instruction created by a macro expansion.  This is passed
    a pointer to the count of instructions created so far, an
    expression, the name of the instruction to build, an operand format
@@ -3886,6 +3924,7 @@ macro_build_jalr (expressionS *ep)
       frag_grow (8);
       f = frag_more (0);
     }
+  macro_build_jrpatch (ep, PIC_CALL_REG);
   macro_build (NULL, "jalr", "d,s", RA, PIC_CALL_REG);
   if (HAVE_NEWABI)
     fix_new_exp (frag_now, f - frag_now->fr_literal,
@@ -6068,6 +6107,26 @@ macro (struct mips_cl_insn *ip)
       /* AT is not used, just return */
       return;
 
+    case M_JR_S:
+      macro_build_jrpatch (&expr1, sreg);
+      macro_build (NULL, "jr", "s", sreg);
+      return;  /* didn't modify $at */
+
+    case M_J_S:
+      macro_build_jrpatch (&expr1, sreg);
+      macro_build (NULL, "j", "s", sreg);
+      return;  /* didn't modify $at */
+
+    case M_JALR_S:
+      macro_build_jrpatch (&expr1, sreg);
+      macro_build (NULL, "jalr", "s", sreg);
+      return;  /* didn't modify $at */
+
+    case M_JALR_DS:
+      macro_build_jrpatch (&expr1, sreg);
+      macro_build (NULL, "jalr", "d,s", dreg, sreg);
+      return;  /* didn't modify $at */
+
     case M_J_A:
       /* The j instruction may not be used in PIC code, since it
         requires an absolute address.  We convert it to a b
@@ -6086,12 +6145,16 @@ macro (struct mips_cl_insn *ip)
       /* Fall through.  */
     case M_JAL_2:
       if (mips_pic == NO_PIC)
-       macro_build (NULL, "jalr", "d,s", dreg, sreg);
+       {
+         macro_build_jrpatch (&expr1, sreg);
+         macro_build (NULL, "jalr", "d,s", dreg, sreg);
+       }
       else
        {
          if (sreg != PIC_CALL_REG)
            as_warn (_("MIPS PIC call to register other than $25"));
 
+         macro_build_jrpatch (&expr1, sreg);
          macro_build (NULL, "jalr", "d,s", dreg, sreg);
          if (mips_pic == SVR4_PIC && !HAVE_NEWABI)
            {
@@ -11215,9 +11278,14 @@ struct option md_longopts[] =
 #define OPTION_NO_FIX_VR4130 (OPTION_FIX_BASE + 5)
   {"mfix-vr4130",    no_argument, NULL, OPTION_FIX_VR4130},
   {"mno-fix-vr4130", no_argument, NULL, OPTION_NO_FIX_VR4130},
+#define        OPTION_FIX_LOONGSON2F_BTB (OPTION_FIX_BASE + 6)
+#define OPTION_NO_FIX_LOONGSON2F_BTB (OPTION_FIX_BASE + 7)
+  {"mfix-loongson2f-btb", no_argument, NULL, OPTION_FIX_LOONGSON2F_BTB},
+  {"mno-fix-loongson2f-btb", no_argument, NULL, OPTION_NO_FIX_LOONGSON2F_BTB},
+
 
   /* Miscellaneous options.  */
-#define OPTION_MISC_BASE (OPTION_FIX_BASE + 6)
+#define OPTION_MISC_BASE (OPTION_FIX_BASE + 8)
 #define OPTION_TRAP (OPTION_MISC_BASE + 0)
   {"trap", no_argument, NULL, OPTION_TRAP},
   {"no-break", no_argument, NULL, OPTION_TRAP},
@@ -11515,6 +11583,14 @@ md_parse_option (int c, char *arg)
       mips_fix_vr4130 = 0;
       break;
 
+    case OPTION_FIX_LOONGSON2F_BTB:
+      mips_fix_loongson2f_btb = 1;
+      break;
+
+    case OPTION_NO_FIX_LOONGSON2F_BTB:
+      mips_fix_loongson2f_btb = 0;
+      break;
+
     case OPTION_RELAX_BRANCH:
       mips_relax_branch = 1;
       break;
@@ -15435,6 +15511,7 @@ MIPS options:\n\
   fprintf (stream, _("\
 -mfix-vr4120           work around certain VR4120 errata\n\
 -mfix-vr4130           work around VR4130 mflo/mfhi errata\n\
+-mfix-loongson2f-btb   work around Loongson2F BTB errata\n\
 -mgp32                 use 32-bit GPRs, regardless of the chosen ISA\n\
 -mfp32                 use 32-bit FPRs, regardless of the chosen ISA\n\
 -msym32                        assume all symbols have 32-bit values\n\
Index: dist/gas/doc/c-mips.texi
===================================================================
RCS file: /cvsroot/src/external/gpl3/binutils/dist/gas/doc/c-mips.texi,v
retrieving revision 1.3
diff -u -p -u -r1.3 c-mips.texi
--- dist/gas/doc/c-mips.texi    14 Apr 2011 07:49:48 -0000      1.3
+++ dist/gas/doc/c-mips.texi    2 Aug 2011 15:33:53 -0000
@@ -182,6 +182,14 @@ all problems in hand-written assembler c
 @itemx -no-mfix-vr4130
 Insert nops to work around the VR4130 @samp{mflo}/@samp{mfhi} errata.
 
+@item -mfix-loongson2f-btb
+@itemx -mno-fix-loongson2f-btb
+Clear the Branch Target Buffer before any jump through a register.  This
+option is intended to be used on kernel code for the Loongson 2F processor
+only; userland code compiled with this option will fault, and kernel code
+compiled with this option run on another processor than Loongson 2F will
+yield unpredictable results.
+
 @item -m4010
 @itemx -no-m4010
 Generate code for the LSI @sc{r4010} chip.  This tells the assembler to
Index: dist/include/opcode/mips.h
===================================================================
RCS file: /cvsroot/src/external/gpl3/binutils/dist/include/opcode/mips.h,v
retrieving revision 1.2
diff -u -p -u -r1.2 mips.h
--- dist/include/opcode/mips.h  14 Apr 2011 07:49:49 -0000      1.2
+++ dist/include/opcode/mips.h  2 Aug 2011 15:33:53 -0000
@@ -762,7 +762,11 @@ enum
   M_DSUB_I,
   M_DSUBU_I,
   M_DSUBU_I_2,
+  M_JR_S,
+  M_J_S,
   M_J_A,
+  M_JALR_S,
+  M_JALR_DS,
   M_JAL_1,
   M_JAL_2,
   M_JAL_A,
Index: dist/opcodes/mips-opc.c
===================================================================
RCS file: /cvsroot/src/external/gpl3/binutils/dist/opcodes/mips-opc.c,v
retrieving revision 1.5
diff -u -p -u -r1.5 mips-opc.c
--- dist/opcodes/mips-opc.c     14 Apr 2011 08:05:00 -0000      1.5
+++ dist/opcodes/mips-opc.c     2 Aug 2011 15:33:53 -0000
@@ -712,10 +712,12 @@ const struct mips_opcode mips_builtin_op
 {"floor.w.s", "D,S",   0x4600000f, 0xffff003f, WR_D|RD_S|FP_S,         0,      
        I2      },
 {"hibernate","",        0x42000023, 0xffffffff,        0,                      
0,              V1      },
 {"ins",     "t,r,+A,+B", 0x7c000004, 0xfc00003f, WR_t|RD_s,                    
0,              I33     },
+{"jr",      "s",       0,        (int) M_JR_S, INSN_MACRO,             0,      
        I1      },
 {"jr",      "s",       0x00000008, 0xfc1fffff, UBD|RD_s,               0,      
        I1      },
 /* jr.hb is officially MIPS{32,64}R2, but it works on R1 as jr with
    the same hazard barrier effect.  */
 {"jr.hb",   "s",       0x00000408, 0xfc1fffff, UBD|RD_s,               0,      
        I32     },
+{"j",       "s",       0,         (int) M_J_S, INSN_MACRO,             0,      
        I1      },
 {"j",       "s",       0x00000008, 0xfc1fffff, UBD|RD_s,               0,      
        I1      }, /* jr */
 /* SVR4 PIC code requires special handling for j, so it must be a
    macro.  */
@@ -724,7 +726,9 @@ const struct mips_opcode mips_builtin_op
    assembler, but will never match user input (because the line above
    will match first).  */
 {"j",       "a",       0x08000000, 0xfc000000, UBD,                    0,      
        I1      },
+{"jalr",    "s",       0,      (int) M_JALR_S, INSN_MACRO,             0,      
        I1      },
 {"jalr",    "s",       0x0000f809, 0xfc1fffff, UBD|RD_s|WR_d,          0,      
        I1      },
+{"jalr",    "d,s",     0,     (int) M_JALR_DS, INSN_MACRO,             0,      
        I1      },
 {"jalr",    "d,s",     0x00000009, 0xfc1f07ff, UBD|RD_s|WR_d,          0,      
        I1      },
 /* jalr.hb is officially MIPS{32,64}R2, but it works on R1 as jalr
    with the same hazard barrier effect.  */


Home | Main Index | Thread Index | Old Index