pkgsrc-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: pkg/48680



The following reply was made to PR pkg/48680; it has been noted by GNATS.

From: Onno van der Linden <o.vd.linden%quicknet.nl@localhost>
To: gnats-bugs%netbsd.org@localhost
Cc: 
Subject: Re: pkg/48680
Date: Mon, 16 Jun 2014 19:35:12 +0200

 Two problems in the current ffmpeg and ffmpeg2 packages
 for NetBSD/i386:
 1) compile error in ac3dsp_init.c
 2) alignment issues with gcc 4.8
 
 Fixed (1) by the diff from 
http://svnweb.freebsd.org/ports/head/multimedia/ffmpeg2/files/patch-libavcodec-x86-ac3dsp_init.c?view=markup&pathrev=329611
 (1) can also be fixed by changing libavutil/x86/asm.h as done in 
http://svnweb.freebsd.org/ports/head/multimedia/ffmpeg/files/patch-libavutil-x86-asm.h?revision=332397&view=markup
 with __clang__ replace by ARCH_X86_32
 As noted in http://ffmpeg.org/pipermail/ffmpeg-cvslog/2012-December/058557.html
 a better fix would be to change the inline asm to yasm. An attempt
 is available from https://patches.libav.org/patch/30666/ but that would
 change the package too much.
 
 Fixed (2) by changing the ffmpeg and ffmpeg2 Makefile slightly
 for i386 to give gcc 4.[01].* no-asm and the rest of gcc 4 
 the alignment options.
 
 Onno
 
 --- libavcodec/x86/ac3dsp_init.c.orig  2013-08-11 01:23:26.000000000 +0200
 +++ libavcodec/x86/ac3dsp_init.c       2013-10-06 17:43:36.000000000 +0200
 @@ -70,11 +70,11 @@ void ff_ac3_extract_exponents_ssse3(uint
          "shufps     $0, %%xmm6, %%xmm6          \n"             \
          "shufps     $0, %%xmm7, %%xmm7          \n"             \
          "1:                                     \n"             \
 -        "movaps       (%0, %2), %%xmm0          \n"             \
 -        "movaps       (%0, %3), %%xmm1          \n"             \
 -        "movaps       (%0, %4), %%xmm2          \n"             \
 -        "movaps       (%0, %5), %%xmm3          \n"             \
 -        "movaps       (%0, %6), %%xmm4          \n"             \
 +        "movups       (%0, %2), %%xmm0          \n"             \
 +        "movups       (%0, %3), %%xmm1          \n"             \
 +        "movups       (%0, %4), %%xmm2          \n"             \
 +        "movups       (%0, %5), %%xmm3          \n"             \
 +        "movups       (%0, %6), %%xmm4          \n"             \
          "mulps          %%xmm5, %%xmm0          \n"             \
          "mulps          %%xmm6, %%xmm1          \n"             \
          "mulps          %%xmm5, %%xmm2          \n"             \
 @@ -85,8 +85,8 @@ void ff_ac3_extract_exponents_ssse3(uint
          "addps          %%xmm3, %%xmm0          \n"             \
          "addps          %%xmm4, %%xmm2          \n"             \
     mono("addps          %%xmm2, %%xmm0          \n")            \
 -        "movaps         %%xmm0, (%0, %2)        \n"             \
 - stereo("movaps         %%xmm2, (%0, %3)        \n")            \
 +        "movups         %%xmm0, (%0, %2)        \n"             \
 + stereo("movups         %%xmm2, (%0, %3)        \n")            \
          "add               $16, %0              \n"             \
          "jl                 1b                  \n"             \
          : "+&r"(i)                                              \
 @@ -106,24 +106,26 @@ void ff_ac3_extract_exponents_ssse3(uint
          "mov              %5, %2            \n"                 \
          "1:                                 \n"                 \
          "mov -%c7(%6, %2, %c8), %3          \n"                 \
 -        "movaps     (%3, %0), %%xmm0        \n"                 \
 +        "movups     (%3, %0), %%xmm0        \n"                 \
   stereo("movaps       %%xmm0, %%xmm1        \n")                \
          "mulps        %%xmm4, %%xmm0        \n"                 \
   stereo("mulps        %%xmm5, %%xmm1        \n")                \
          "2:                                 \n"                 \
          "mov   (%6, %2, %c8), %1            \n"                 \
 -        "movaps     (%1, %0), %%xmm2        \n"                 \
 +        "movups     (%1, %0), %%xmm2        \n"                 \
   stereo("movaps       %%xmm2, %%xmm3        \n")                \
 -        "mulps   (%4, %2, 8), %%xmm2        \n"                 \
 - stereo("mulps 16(%4, %2, 8), %%xmm3        \n")                \
 +        "movups  (%4, %2, 8), %%xmm4        \n"                 \
 +        "mulps        %%xmm4, %%xmm2        \n"                 \
 + stereo("movups 16(%4, %2, 8), %%xmm4       \n")                \
 + stereo("mulps        %%xmm4, %%xmm3        \n")                \
          "addps        %%xmm2, %%xmm0        \n"                 \
   stereo("addps        %%xmm3, %%xmm1        \n")                \
          "add              $4, %2            \n"                 \
          "jl               2b                \n"                 \
          "mov              %5, %2            \n"                 \
   stereo("mov   (%6, %2, %c8), %1            \n")                \
 -        "movaps       %%xmm0, (%3, %0)      \n"                 \
 - stereo("movaps       %%xmm1, (%1, %0)      \n")                \
 +        "movups       %%xmm0, (%3, %0)      \n"                 \
 + stereo("movups       %%xmm1, (%1, %0)      \n")                \
          "add             $16, %0            \n"                 \
          "jl               1b                \n"                 \
          : "+&r"(i), "=&r"(j), "=&r"(k), "=&r"(m)                \
 @@ -152,7 +154,7 @@ static void ac3_downmix_sse(float **samp
                 matrix_cmp[3][0] == matrix_cmp[4][0]) {
          MIX5(IF1, IF0);
      } else {
 -        DECLARE_ALIGNED(16, float, matrix_simd)[AC3_MAX_CHANNELS][2][4];
 +        float matrix_simd[AC3_MAX_CHANNELS][2][4];
          float *samp[AC3_MAX_CHANNELS];
  
          for (j = 0; j < in_ch; j++)
 @@ -166,8 +168,8 @@ static void ac3_downmix_sse(float **samp
              "movss    4(%2, %0), %%xmm5         \n"
              "shufps          $0, %%xmm4, %%xmm4 \n"
              "shufps          $0, %%xmm5, %%xmm5 \n"
 -            "movaps      %%xmm4,   (%1, %0, 4)  \n"
 -            "movaps      %%xmm5, 16(%1, %0, 4)  \n"
 +            "movups      %%xmm4,   (%1, %0, 4)  \n"
 +            "movups      %%xmm5, 16(%1, %0, 4)  \n"
              "jg              1b                 \n"
              : "+&r"(j)
              : "r"(matrix_simd), "r"(matrix)
 
 --- ffmpeg/Makefile.orig       2014-06-16 19:10:35.000000000 +0200
 +++ ffmpeg/Makefile    2014-06-16 19:05:39.000000000 +0200
 @@ -41,15 +41,14 @@
  .include "../../mk/compiler.mk"
  
  # disable asm on i386 for non-gcc and gcc < 4.2
 -# no alignment options needed for gcc >= 4.6 on i386
  .if ${MACHINE_ARCH} == "i386"
  .  if !empty(MACHINE_PLATFORM:MDarwin-*-i386) \
     || !empty(MACHINE_PLATFORM:MSunOS-*-i386) \
     || !empty(CC_VERSION:Mgcc-[123]*) \
 -   || !empty(CC_VERSION:Mgcc-4.[01]*) \
 +   || !empty(CC_VERSION:Mgcc-4.[01].*) \
     ||  empty(CC_VERSION:Mgcc*)
  CONFIGURE_ARGS+=      --disable-asm
 -.  elif !empty(CC_VERSION:Mgcc-4.[23456]*)
 +.  elif !empty(CC_VERSION:Mgcc-4.*)
  CFLAGS+=              -mstackrealign -mpreferred-stack-boundary=4
  .  endif
  .endif
 
 
 #### End of ffmpeg fixes, start of ffmpeg2 fixes
 
 
 --- ffmpeg2/Makefile.orig      2014-06-16 19:10:45.000000000 +0200
 +++ ffmpeg2/Makefile   2014-06-16 19:06:17.000000000 +0200
 @@ -37,15 +37,14 @@
  .include "../../mk/compiler.mk"
  
  # disable asm on i386 for non-gcc and gcc < 4.2
 -# no alignment options needed for gcc >= 4.6 on i386
  .if ${MACHINE_ARCH} == "i386"
  .  if !empty(MACHINE_PLATFORM:MDarwin-*-i386) \
     || !empty(MACHINE_PLATFORM:MSunOS-*-i386) \
     || !empty(CC_VERSION:Mgcc-[123]*) \
 -   || !empty(CC_VERSION:Mgcc-4.[01]*) \
 +   || !empty(CC_VERSION:Mgcc-4.[01].*) \
     || empty(CC_VERSION:Mgcc*)
  CONFIGURE_ARGS+=      --disable-asm
 -.  elif !empty(CC_VERSION:Mgcc-4.[23456]*)
 +.  elif !empty(CC_VERSION:Mgcc-4.*)
  CFLAGS+=              -mstackrealign -mpreferred-stack-boundary=4
  .  endif
  .endif
 
 --- libavcodec/x86/ac3dsp_init.c.orig  2014-06-02 23:18:54.000000000 +0200
 +++ libavcodec/x86/ac3dsp_init.c       2014-06-14 21:27:55.000000000 +0200
 @@ -84,11 +84,11 @@
          "shufps     $0, %%xmm6, %%xmm6          \n"             \
          "shufps     $0, %%xmm7, %%xmm7          \n"             \
          "1:                                     \n"             \
 -        "movaps       (%0, %2), %%xmm0          \n"             \
 -        "movaps       (%0, %3), %%xmm1          \n"             \
 -        "movaps       (%0, %4), %%xmm2          \n"             \
 -        "movaps       (%0, %5), %%xmm3          \n"             \
 -        "movaps       (%0, %6), %%xmm4          \n"             \
 +        "movups       (%0, %2), %%xmm0          \n"             \
 +        "movups       (%0, %3), %%xmm1          \n"             \
 +        "movups       (%0, %4), %%xmm2          \n"             \
 +        "movups       (%0, %5), %%xmm3          \n"             \
 +        "movups       (%0, %6), %%xmm4          \n"             \
          "mulps          %%xmm5, %%xmm0          \n"             \
          "mulps          %%xmm6, %%xmm1          \n"             \
          "mulps          %%xmm5, %%xmm2          \n"             \
 @@ -99,8 +99,8 @@
          "addps          %%xmm3, %%xmm0          \n"             \
          "addps          %%xmm4, %%xmm2          \n"             \
     mono("addps          %%xmm2, %%xmm0          \n")            \
 -        "movaps         %%xmm0, (%0, %2)        \n"             \
 - stereo("movaps         %%xmm2, (%0, %3)        \n")            \
 +        "movups         %%xmm0, (%0, %2)        \n"             \
 + stereo("movups         %%xmm2, (%0, %3)        \n")            \
          "add               $16, %0              \n"             \
          "jl                 1b                  \n"             \
          : "+&r"(i)                                              \
 @@ -120,24 +120,26 @@
          "mov              %5, %2            \n"                 \
          "1:                                 \n"                 \
          "mov -%c7(%6, %2, %c8), %3          \n"                 \
 -        "movaps     (%3, %0), %%xmm0        \n"                 \
 +        "movups     (%3, %0), %%xmm0        \n"                 \
   stereo("movaps       %%xmm0, %%xmm1        \n")                \
          "mulps        %%xmm4, %%xmm0        \n"                 \
   stereo("mulps        %%xmm5, %%xmm1        \n")                \
          "2:                                 \n"                 \
          "mov   (%6, %2, %c8), %1            \n"                 \
 -        "movaps     (%1, %0), %%xmm2        \n"                 \
 +        "movups     (%1, %0), %%xmm2        \n"                 \
   stereo("movaps       %%xmm2, %%xmm3        \n")                \
 -        "mulps   (%4, %2, 8), %%xmm2        \n"                 \
 - stereo("mulps 16(%4, %2, 8), %%xmm3        \n")                \
 +        "movups  (%4, %2, 8), %%xmm4        \n"                 \
 +        "mulps        %%xmm4, %%xmm2        \n"                 \
 + stereo("movups 16(%4, %2, 8), %%xmm4       \n")                \
 + stereo("mulps        %%xmm4, %%xmm3        \n")                \
          "addps        %%xmm2, %%xmm0        \n"                 \
   stereo("addps        %%xmm3, %%xmm1        \n")                \
          "add              $4, %2            \n"                 \
          "jl               2b                \n"                 \
          "mov              %5, %2            \n"                 \
   stereo("mov   (%6, %2, %c8), %1            \n")                \
 -        "movaps       %%xmm0, (%3, %0)      \n"                 \
 - stereo("movaps       %%xmm1, (%1, %0)      \n")                \
 +        "movups       %%xmm0, (%3, %0)      \n"                 \
 + stereo("movups       %%xmm1, (%1, %0)      \n")                \
          "add             $16, %0            \n"                 \
          "jl               1b                \n"                 \
          : "+&r"(i), "=&r"(j), "=&r"(k), "=&r"(m)                \
 @@ -166,7 +168,7 @@
                 matrix_cmp[3][0] == matrix_cmp[4][0]) {
          MIX5(IF1, IF0);
      } else {
 -        DECLARE_ALIGNED(16, float, matrix_simd)[AC3_MAX_CHANNELS][2][4];
 +        float matrix_simd[AC3_MAX_CHANNELS][2][4];
          float *samp[AC3_MAX_CHANNELS];
  
          for (j = 0; j < in_ch; j++)
 @@ -180,8 +182,8 @@
              "movss    4(%2, %0), %%xmm5         \n"
              "shufps          $0, %%xmm4, %%xmm4 \n"
              "shufps          $0, %%xmm5, %%xmm5 \n"
 -            "movaps      %%xmm4,   (%1, %0, 4)  \n"
 -            "movaps      %%xmm5, 16(%1, %0, 4)  \n"
 +            "movups      %%xmm4,   (%1, %0, 4)  \n"
 +            "movups      %%xmm5, 16(%1, %0, 4)  \n"
              "jg              1b                 \n"
              : "+&r"(j)
              : "r"(matrix_simd), "r"(matrix)
 


Home | Main Index | Thread Index | Old Index