Source-Changes-D archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: audio2



At Wed, 8 May 2019 18:14:28 +0000,
maya%netbsd.org@localhost wrote:
> Is there some scenario where this made a difference?
> it sounds like an optimization that a compiler should already do
> 
> 5234:#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
> 5235-				*d++ += ((aint2_t)*s++) * track->volume >> 8;
> 5236-#else
> 5237-				*d++ += ((aint2_t)*s++) * track->volume / 256;
> 5238-#endif

It's signed integer so these two output different code.

 Here is the fragment of amd64 asm of "(x * vol >> 8)"
    96a5:       0f af d1                imul   %ecx,%edx
    96a8:       c1 fa 08                sar    $0x8,%edx
    96ab:       66 89 14 07             mov    %dx,(%rdi,%rax,1)

 Here is "(x * vol / 256)"
    96a5:       0f af c1                imul   %ecx,%eax
    96a8:       85 c0                   test   %eax,%eax
    96aa:       79 05                   jns    96b1 <audio_track_chvol+0x82>
    96ac:       05 ff 00 00 00          add    $0xff,%eax
    96b1:       c1 f8 08                sar    $0x8,%eax
    96b4:       66 89 04 17             mov    %ax,(%rdi,%rdx,1)

I evaluated these (a year ago).  The former is 1.9 times faster on
my old amd64 than the later, 1.3 times faster on m68k, at that point.
# Although, above code is from current but evaluation is a year ago.

Let's consider about (-1 / 2).  The mathematical answer is -0.5 so
it's 0 for integers.  And (-1 >> 1) is -1.  These two answers are
different but this value is audio wave that human hear.  I took
performance.

However, I understand that my code looks strange.
How about this?

Index: sys/dev/audio/audio.c
===================================================================
RCS file: /cvsroot/src/sys/dev/audio/audio.c,v
retrieving revision 1.8
diff -u -r1.8 audio.c
--- sys/dev/audio/audio.c	21 May 2019 12:52:57 -0000	1.8
+++ sys/dev/audio/audio.c	23 May 2019 03:31:36 -0000
@@ -465,6 +465,29 @@
 #define SPECIFIED(x)	((x) != ~0)
 #define SPECIFIED_CH(x)	((x) != (u_char)~0)
 
+/*
+ * AUDIO_ASR() does Arithmetic Shift Right operation.
+ * This macro should be used for audio wave data only.
+ *
+ * Division by power of two is replaced with shift operation in the most
+ * compiler, but even then rounding-to-zero occurs on negative value.
+ * What we handle here is the audio wave data that human hear, so we can
+ * ignore the rounding difference.  Therefore we want to use faster
+ * arithmetic shift right operation.  But the right shift operator ('>>')
+ * for negative integer is "implementation defined" behavior in C (note
+ * that it's not "undefined" behavior).  So if implementation defines '>>'
+ * as ASR, we use it.
+ *
+ * Using ASR is 1.9 times faster than division on my amd64, and 1.3 times
+ * faster on my m68k.  -- isaki 201801.
+ */
+#if defined(__GNUC__)
+/* gcc defines '>>' as ASR. */
+#define AUDIO_ASR(value, shift)	((value) >> (shift))
+#else
+#define AUDIO_ASR(value, shift)	((value) / (1 << (shift)))
+#endif
+
 /* Device timeout in msec */
 #define AUDIO_TIMEOUT	(3000)
 
@@ -3304,11 +3327,7 @@
 		for (ch = 0; ch < channels; ch++) {
 			aint2_t val;
 			val = *s++;
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
-			val = val * ch_volume[ch] >> 8;
-#else
-			val = val * ch_volume[ch] / 256;
-#endif
+			val = AUDIO_ASR(val * ch_volume[ch], 8);
 			*d++ = (aint_t)val;
 		}
 	}
@@ -3330,11 +3349,7 @@
 	d = arg->dst;
 
 	for (i = 0; i < arg->count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
-		*d++ = (s[0] >> 1) + (s[1] >> 1);
-#else
-		*d++ = (s[0] / 2) + (s[1] / 2);
-#endif
+		*d++ = AUDIO_ASR(s[0], 1) + AUDIO_ASR(s[1], 1);
 		s += arg->srcfmt->channels;
 	}
 }
@@ -5131,11 +5146,7 @@
 		if (vol != 256) {
 			m = mixer->mixsample;
 			for (i = 0; i < sample_count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
-				*m = *m * vol >> 8;
-#else
-				*m = *m * vol / 256;
-#endif
+				*m = AUDIO_ASR(*m * vol, 8);
 				m++;
 			}
 		}
@@ -5223,11 +5234,9 @@
 #if defined(AUDIO_SUPPORT_TRACK_VOLUME)
 		if (track->volume != 256) {
 			for (i = 0; i < sample_count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
-				*d++ = ((aint2_t)*s++) * track->volume >> 8;
-#else
-				*d++ = ((aint2_t)*s++) * track->volume / 256;
-#endif
+				aint2_t v;
+				v = *s++;
+				*d++ = AUDIO_ASR(v * track->volume, 8)
 			}
 		} else
 #endif
@@ -5241,11 +5250,9 @@
 #if defined(AUDIO_SUPPORT_TRACK_VOLUME)
 		if (track->volume != 256) {
 			for (i = 0; i < sample_count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
-				*d++ += ((aint2_t)*s++) * track->volume >> 8;
-#else
-				*d++ += ((aint2_t)*s++) * track->volume / 256;
-#endif
+				aint2_t v;
+				v = *s++;
+				*d++ += AUDIO_ASR(v * track->volume, 8);
 			}
 		} else
 #endif
Index: sys/dev/audio/audiodef.h
===================================================================
RCS file: /cvsroot/src/sys/dev/audio/audiodef.h,v
retrieving revision 1.2
diff -u -r1.2 audiodef.h
--- sys/dev/audio/audiodef.h	8 May 2019 13:40:17 -0000	1.2
+++ sys/dev/audio/audiodef.h	23 May 2019 03:31:36 -0000
@@ -63,12 +63,6 @@
  */
 /* #define AUDIO_SUPPORT_TRACK_VOLUME */
 
-/*
- * Whether use C language's "implementation defined" behavior (note that
- * it's not "undefined" behavior).  It improves performance well.
- */
-#define AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR
-
 /* conversion stage */
 typedef struct {
 	audio_ring_t srcbuf;

---
Tetsuya Isaki <isaki%pastel-flower.jp@localhost / isaki%NetBSD.org@localhost>


Home | Main Index | Thread Index | Old Index