aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSascha Silbe <sascha-pgp@silbe.org>2019-03-26 10:35:55 +0100
committerArun Raghavan <arun@arunraghavan.net>2019-03-29 06:04:28 +0000
commit034b77823ad45b5f02baaeea436863ed104ee66d (patch)
tree0b7fa256cec45d7e6c14308a3c2a6c65fe923c2f
parent1e4fb614360437cb88f8f41cb1d7d51c83adf454 (diff)
downloadpulseaudio-034b77823ad45b5f02baaeea436863ed104ee66d.tar.gz
remap: support S32NE work format
So far PulseAudio only supported two different work formats: S16NE if it's sufficient to represent the input and output formats without loss of precision and FLOAT32NE in all other cases. For systems that use S32NE exclusively, this results in unnecessary conversions from S32NE to FLOAT32NE and back again. Add S32NE remap operations and make use of them (for the COPY and TRIVIAL resamplers) if both input and output format are S32NE. This avoids the back and forth conversions between S32NE and FLOAT32NE, significantly improving performance for those cases.
-rw-r--r--src/pulsecore/remap.c162
-rw-r--r--src/pulsecore/remap.h2
-rw-r--r--src/pulsecore/remap_mmx.c6
-rw-r--r--src/pulsecore/remap_neon.c59
-rw-r--r--src/pulsecore/remap_sse.c6
-rw-r--r--src/pulsecore/resampler.c8
-rw-r--r--src/tests/cpu-remap-test.c98
7 files changed, 327 insertions, 14 deletions
diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c
index 09e2c8f8..35fffd7d 100644
--- a/src/pulsecore/remap.c
+++ b/src/pulsecore/remap.c
@@ -51,6 +51,24 @@ static void remap_mono_to_stereo_s16ne_c(pa_remap_t *m, int16_t *dst, const int1
}
}
+static void remap_mono_to_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+ unsigned i;
+
+ for (i = n >> 2; i; i--) {
+ dst[0] = dst[1] = src[0];
+ dst[2] = dst[3] = src[1];
+ dst[4] = dst[5] = src[2];
+ dst[6] = dst[7] = src[3];
+ src += 4;
+ dst += 8;
+ }
+ for (i = n & 3; i; i--) {
+ dst[0] = dst[1] = src[0];
+ src++;
+ dst += 2;
+ }
+}
+
static void remap_mono_to_stereo_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
@@ -87,6 +105,28 @@ static void remap_stereo_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int1
}
}
+static void remap_stereo_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+ unsigned i;
+
+ for (i = n >> 2; i > 0; i--) {
+ /* Avoid overflow by performing division first. We accept a
+ * difference of +/- 1 to the ideal result. */
+ dst[0] = (src[0]/2 + src[1]/2);
+ dst[1] = (src[2]/2 + src[3]/2);
+ dst[2] = (src[4]/2 + src[5]/2);
+ dst[3] = (src[6]/2 + src[7]/2);
+ src += 8;
+ dst += 4;
+ }
+ for (i = n & 3; i; i--) {
+ /* Avoid overflow by performing division first. We accept a
+ * difference of +/- 1 to the ideal result. */
+ dst[0] = (src[0]/2 + src[1]/2);
+ src += 2;
+ dst += 1;
+ }
+}
+
static void remap_stereo_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
@@ -123,6 +163,24 @@ static void remap_mono_to_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
}
}
+static void remap_mono_to_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+ unsigned i;
+
+ for (i = n >> 2; i; i--) {
+ dst[0] = dst[1] = dst[2] = dst[3] = src[0];
+ dst[4] = dst[5] = dst[6] = dst[7] = src[1];
+ dst[8] = dst[9] = dst[10] = dst[11] = src[2];
+ dst[12] = dst[13] = dst[14] = dst[15] = src[3];
+ src += 4;
+ dst += 16;
+ }
+ for (i = n & 3; i; i--) {
+ dst[0] = dst[1] = dst[2] = dst[3] = src[0];
+ src++;
+ dst += 4;
+ }
+}
+
static void remap_mono_to_ch4_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
@@ -159,6 +217,28 @@ static void remap_ch4_to_mono_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
}
}
+static void remap_ch4_to_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+ unsigned i;
+
+ for (i = n >> 2; i > 0; i--) {
+ /* Avoid overflow by performing division first. We accept a
+ * difference of +/- 3 to the ideal result. */
+ dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4);
+ dst[1] = (src[4]/4 + src[5]/4 + src[6]/4 + src[7]/4);
+ dst[2] = (src[8]/4 + src[9]/4 + src[10]/4 + src[11]/4);
+ dst[3] = (src[12]/4 + src[13]/4 + src[14]/4 + src[15]/4);
+ src += 16;
+ dst += 4;
+ }
+ for (i = n & 3; i; i--) {
+ /* Avoid overflow by performing division first. We accept a
+ * difference of +/- 3 to the ideal result. */
+ dst[0] = (src[0]/4 + src[1]/4 + src[2]/4 + src[3]/4);
+ src += 4;
+ dst += 1;
+ }
+}
+
static void remap_ch4_to_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned i;
@@ -208,6 +288,36 @@ static void remap_channels_matrix_s16ne_c(pa_remap_t *m, int16_t *dst, const int
}
}
+static void remap_channels_matrix_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+ unsigned oc, ic, i;
+ unsigned n_ic, n_oc;
+
+ n_ic = m->i_ss.channels;
+ n_oc = m->o_ss.channels;
+
+ memset(dst, 0, n * sizeof(int32_t) * n_oc);
+
+ for (oc = 0; oc < n_oc; oc++) {
+
+ for (ic = 0; ic < n_ic; ic++) {
+ int32_t *d = dst + oc;
+ const int32_t *s = src + ic;
+ int32_t vol = m->map_table_i[oc][ic];
+
+ if (vol <= 0)
+ continue;
+
+ if (vol >= 0x10000) {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += *s;
+ } else {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += (int32_t) (((int64_t)*s * vol) >> 16);
+ }
+ }
+ }
+}
+
static void remap_channels_matrix_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
unsigned oc, ic, i;
unsigned n_ic, n_oc;
@@ -309,6 +419,44 @@ static void remap_arrange_ch4_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t
}
}
+static void remap_arrange_mono_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+ const unsigned n_ic = m->i_ss.channels;
+ const int8_t *arrange = m->state;
+
+ src += arrange[0];
+ for (; n > 0; n--) {
+ *dst++ = *src;
+ src += n_ic;
+ }
+}
+
+static void remap_arrange_stereo_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+ const unsigned n_ic = m->i_ss.channels;
+ const int8_t *arrange = m->state;
+ const int ic0 = arrange[0], ic1 = arrange[1];
+
+ for (; n > 0; n--) {
+ *dst++ = (ic0 >= 0) ? *(src + ic0) : 0;
+ *dst++ = (ic1 >= 0) ? *(src + ic1) : 0;
+ src += n_ic;
+ }
+}
+
+static void remap_arrange_ch4_s32ne_c(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+ const unsigned n_ic = m->i_ss.channels;
+ const int8_t *arrange = m->state;
+ const int ic0 = arrange[0], ic1 = arrange[1],
+ ic2 = arrange[2], ic3 = arrange[3];
+
+ for (; n > 0; n--) {
+ *dst++ = (ic0 >= 0) ? *(src + ic0) : 0;
+ *dst++ = (ic1 >= 0) ? *(src + ic1) : 0;
+ *dst++ = (ic2 >= 0) ? *(src + ic2) : 0;
+ *dst++ = (ic3 >= 0) ? *(src + ic3) : 0;
+ src += n_ic;
+ }
+}
+
static void remap_arrange_mono_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) {
const unsigned n_ic = m->i_ss.channels;
const int8_t *arrange = m->state;
@@ -348,16 +496,19 @@ static void remap_arrange_ch4_float32ne_c(pa_remap_t *m, float *dst, const float
}
void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16,
- pa_do_remap_func_t func_float) {
+ pa_do_remap_func_t func_s32, pa_do_remap_func_t func_float) {
pa_assert(m);
if (m->format == PA_SAMPLE_S16NE)
m->do_remap = func_s16;
+ else if (m->format == PA_SAMPLE_S32NE)
+ m->do_remap = func_s32;
else if (m->format == PA_SAMPLE_FLOAT32NE)
m->do_remap = func_float;
else
pa_assert_not_reached();
+ pa_assert(m->do_remap);
}
static bool force_generic_code = false;
@@ -374,6 +525,7 @@ static void init_remap_c(pa_remap_t *m) {
if (force_generic_code) {
pa_log_info("Forced to use generic matrix remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c,
+ (pa_do_remap_func_t) remap_channels_matrix_s32ne_c,
(pa_do_remap_func_t) remap_channels_matrix_float32ne_c);
return;
}
@@ -383,12 +535,14 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_c,
+ (pa_do_remap_func_t) remap_mono_to_stereo_s32ne_c,
(pa_do_remap_func_t) remap_mono_to_stereo_float32ne_c);
} else if (n_ic == 2 && n_oc == 1 &&
m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) {
pa_log_info("Using stereo to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_c,
+ (pa_do_remap_func_t) remap_stereo_to_mono_s32ne_c,
(pa_do_remap_func_t) remap_stereo_to_mono_float32ne_c);
} else if (n_ic == 1 && n_oc == 4 &&
m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 &&
@@ -396,6 +550,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using mono to 4-channel remapping");
pa_set_remap_func(m, (pa_do_remap_func_t)remap_mono_to_ch4_s16ne_c,
+ (pa_do_remap_func_t) remap_mono_to_ch4_s32ne_c,
(pa_do_remap_func_t) remap_mono_to_ch4_float32ne_c);
} else if (n_ic == 4 && n_oc == 1 &&
m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 &&
@@ -403,11 +558,13 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using 4-channel to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_c,
+ (pa_do_remap_func_t) remap_ch4_to_mono_s32ne_c,
(pa_do_remap_func_t) remap_ch4_to_mono_float32ne_c);
} else if (pa_setup_remap_arrange(m, arrange) && n_oc == 1) {
pa_log_info("Using mono arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_mono_s16ne_c,
+ (pa_do_remap_func_t) remap_arrange_mono_s32ne_c,
(pa_do_remap_func_t) remap_arrange_mono_float32ne_c);
/* setup state */
@@ -416,6 +573,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using stereo arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_c,
+ (pa_do_remap_func_t) remap_arrange_stereo_s32ne_c,
(pa_do_remap_func_t) remap_arrange_stereo_float32ne_c);
/* setup state */
@@ -424,6 +582,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using 4-channel arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_c,
+ (pa_do_remap_func_t) remap_arrange_ch4_s32ne_c,
(pa_do_remap_func_t) remap_arrange_ch4_float32ne_c);
/* setup state */
@@ -432,6 +591,7 @@ static void init_remap_c(pa_remap_t *m) {
pa_log_info("Using generic matrix remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_channels_matrix_s16ne_c,
+ (pa_do_remap_func_t) remap_channels_matrix_s32ne_c,
(pa_do_remap_func_t) remap_channels_matrix_float32ne_c);
}
}
diff --git a/src/pulsecore/remap.h b/src/pulsecore/remap.h
index 4bad3ea3..473f0cea 100644
--- a/src/pulsecore/remap.h
+++ b/src/pulsecore/remap.h
@@ -55,6 +55,6 @@ void pa_set_init_remap_func(pa_init_remap_func_t func);
bool pa_setup_remap_arrange(const pa_remap_t *m, int8_t arrange[PA_CHANNELS_MAX]);
void pa_set_remap_func(pa_remap_t *m, pa_do_remap_func_t func_s16,
- pa_do_remap_func_t func_float);
+ pa_do_remap_func_t func_s32, pa_do_remap_func_t func_float);
#endif /* fooremapfoo */
diff --git a/src/pulsecore/remap_mmx.c b/src/pulsecore/remap_mmx.c
index 688da6c1..9d076718 100644
--- a/src/pulsecore/remap_mmx.c
+++ b/src/pulsecore/remap_mmx.c
@@ -111,7 +111,8 @@ static void remap_mono_to_stereo_s16ne_mmx(pa_remap_t *m, int16_t *dst, const in
);
}
-static void remap_mono_to_stereo_float32ne_mmx(pa_remap_t *m, float *dst, const float *src, unsigned n) {
+/* Works for both S32NE and FLOAT32NE */
+static void remap_mono_to_stereo_any32ne_mmx(pa_remap_t *m, float *dst, const float *src, unsigned n) {
pa_reg_x86 temp, temp2;
__asm__ __volatile__ (
@@ -135,7 +136,8 @@ static void init_remap_mmx(pa_remap_t *m) {
pa_log_info("Using MMX mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_mmx,
- (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_mmx);
+ (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx,
+ (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx);
}
}
#endif /* defined (__i386__) || defined (__amd64__) */
diff --git a/src/pulsecore/remap_neon.c b/src/pulsecore/remap_neon.c
index ebacf922..41208986 100644
--- a/src/pulsecore/remap_neon.c
+++ b/src/pulsecore/remap_neon.c
@@ -143,6 +143,25 @@ static void remap_stereo_to_mono_float32ne_neon(pa_remap_t *m, float *dst, const
}
}
+static void remap_stereo_to_mono_s32ne_neon(pa_remap_t *m, int32_t *dst, const int32_t *src, unsigned n) {
+ for (; n >= 4; n -= 4) {
+ __asm__ __volatile__ (
+ "vld2.32 {q0,q1}, [%[src]]! \n\t"
+ "vrhadd.s32 q0, q0, q1 \n\t"
+ "vst1.32 {q0}, [%[dst]]! \n\t"
+ : [dst] "+r" (dst), [src] "+r" (src) /* output operands */
+ : /* input operands */
+ : "memory", "q0", "q1" /* clobber list */
+ );
+ }
+
+ for (; n > 0; n--) {
+ dst[0] = src[0]/2 + src[1]/2;
+ src += 2;
+ dst++;
+ }
+}
+
static void remap_stereo_to_mono_s16ne_neon(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
for (; n >= 8; n -= 8) {
__asm__ __volatile__ (
@@ -322,7 +341,8 @@ static void remap_arrange_stereo_float32ne_neon(pa_remap_t *m, float *dst, const
}
}
-static void remap_arrange_ch2_ch4_float32ne_neon(pa_remap_t *m, float *dst, const float *src, unsigned n) {
+/* Works for both S32NE and FLOAT32NE */
+static void remap_arrange_ch2_ch4_any32ne_neon(pa_remap_t *m, float *dst, const float *src, unsigned n) {
const uint8x8_t t0 = ((uint8x8_t *)m->state)[0];
const uint8x8_t t1 = ((uint8x8_t *)m->state)[1];
@@ -365,39 +385,52 @@ static void init_remap_neon(pa_remap_t *m) {
n_oc = m->o_ss.channels;
n_ic = m->i_ss.channels;
+ /* We short-circuit remap function selection for S32NE in most
+ * cases as the corresponding generic C code is performing
+ * similarly or even better. However there are a few cases where
+ * there actually is a significant improvement from using
+ * hand-crafted NEON assembly so we cannot just bail out for S32NE
+ * here. */
if (n_ic == 1 && n_oc == 2 &&
m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000) {
+ if (m->format == PA_SAMPLE_S32NE)
+ return;
if (arm_flags & PA_CPU_ARM_CORTEX_A8) {
pa_log_info("Using ARM NEON/A8 mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon,
- (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_neon_a8);
+ NULL, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_neon_a8);
}
else {
pa_log_info("Using ARM NEON mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_neon,
- (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_generic_arm);
+ NULL, (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_generic_arm);
}
} else if (n_ic == 1 && n_oc == 4 &&
m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000 &&
m->map_table_i[2][0] == 0x10000 && m->map_table_i[3][0] == 0x10000) {
+ if (m->format == PA_SAMPLE_S32NE)
+ return;
pa_log_info("Using ARM NEON mono to 4-channel remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_ch4_s16ne_neon,
- (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_neon);
+ NULL, (pa_do_remap_func_t) remap_mono_to_ch4_float32ne_neon);
} else if (n_ic == 2 && n_oc == 1 &&
m->map_table_i[0][0] == 0x8000 && m->map_table_i[0][1] == 0x8000) {
pa_log_info("Using ARM NEON stereo to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_stereo_to_mono_s16ne_neon,
+ (pa_do_remap_func_t) remap_stereo_to_mono_s32ne_neon,
(pa_do_remap_func_t) remap_stereo_to_mono_float32ne_neon);
} else if (n_ic == 4 && n_oc == 1 &&
m->map_table_i[0][0] == 0x4000 && m->map_table_i[0][1] == 0x4000 &&
m->map_table_i[0][2] == 0x4000 && m->map_table_i[0][3] == 0x4000) {
+ if (m->format == PA_SAMPLE_S32NE)
+ return;
pa_log_info("Using ARM NEON 4-channel to mono remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_to_mono_s16ne_neon,
- (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_neon);
+ NULL, (pa_do_remap_func_t) remap_ch4_to_mono_float32ne_neon);
} else if (pa_setup_remap_arrange(m, arrange) &&
((n_ic == 2 && n_oc == 2) ||
(n_ic == 2 && n_oc == 4) ||
@@ -405,17 +438,22 @@ static void init_remap_neon(pa_remap_t *m) {
unsigned o;
if (n_ic == 2 && n_oc == 2) {
+ if (m->format == PA_SAMPLE_S32NE)
+ return;
pa_log_info("Using NEON stereo arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_stereo_s16ne_neon,
- (pa_do_remap_func_t) remap_arrange_stereo_float32ne_neon);
+ NULL, (pa_do_remap_func_t) remap_arrange_stereo_float32ne_neon);
} else if (n_ic == 2 && n_oc == 4) {
pa_log_info("Using NEON 2-channel to 4-channel arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch2_ch4_s16ne_neon,
- (pa_do_remap_func_t) remap_arrange_ch2_ch4_float32ne_neon);
+ (pa_do_remap_func_t) remap_arrange_ch2_ch4_any32ne_neon,
+ (pa_do_remap_func_t) remap_arrange_ch2_ch4_any32ne_neon);
} else if (n_ic == 4 && n_oc == 4) {
+ if (m->format == PA_SAMPLE_S32NE)
+ return;
pa_log_info("Using NEON 4-channel arrange remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_arrange_ch4_s16ne_neon,
- (pa_do_remap_func_t) remap_arrange_ch4_float32ne_neon);
+ NULL, (pa_do_remap_func_t) remap_arrange_ch4_float32ne_neon);
}
/* setup state */
@@ -436,6 +474,8 @@ static void init_remap_neon(pa_remap_t *m) {
}
break;
}
+ case PA_SAMPLE_S32NE:
+ /* fall-through */
case PA_SAMPLE_FLOAT32NE: {
uint8x8_t *t = m->state = pa_xnew0(uint8x8_t, 2);
for (o = 0; o < n_oc; o++) {
@@ -461,8 +501,11 @@ static void init_remap_neon(pa_remap_t *m) {
} else if (n_ic == 4 && n_oc == 4) {
unsigned i, o;
+ if (m->format == PA_SAMPLE_S32NE)
+ return;
pa_log_info("Using ARM NEON 4-channel remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_ch4_s16ne_neon,
+ (pa_do_remap_func_t) NULL,
(pa_do_remap_func_t) remap_ch4_float32ne_neon);
/* setup state */
diff --git a/src/pulsecore/remap_sse.c b/src/pulsecore/remap_sse.c
index 73e1cc84..5c3b931f 100644
--- a/src/pulsecore/remap_sse.c
+++ b/src/pulsecore/remap_sse.c
@@ -110,7 +110,8 @@ static void remap_mono_to_stereo_s16ne_sse2(pa_remap_t *m, int16_t *dst, const i
);
}
-static void remap_mono_to_stereo_float32ne_sse2(pa_remap_t *m, float *dst, const float *src, unsigned n) {
+/* Works for both S32NE and FLOAT32NE */
+static void remap_mono_to_stereo_any32ne_sse2(pa_remap_t *m, float *dst, const float *src, unsigned n) {
pa_reg_x86 temp, temp2;
__asm__ __volatile__ (
@@ -134,7 +135,8 @@ static void init_remap_sse2(pa_remap_t *m) {
pa_log_info("Using SSE2 mono to stereo remapping");
pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_sse2,
- (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_sse2);
+ (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_sse2,
+ (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_sse2);
}
}
#endif /* defined (__i386__) || defined (__amd64__) */
diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index 6a4ded69..ff9795ec 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -286,6 +286,14 @@ static pa_sample_format_t choose_work_format(
work_format = a;
break;
}
+ /* If both input and output are using S32NE and we don't
+ * need any resampling we can use S32NE directly, avoiding
+ * converting back and forth between S32NE and
+ * FLOAT32NE. */
+ if ((a == PA_SAMPLE_S32NE) && (b == PA_SAMPLE_S32NE)) {
+ work_format = PA_SAMPLE_S32NE;
+ break;
+ }
/* Else fall through */
case PA_RESAMPLER_PEAKS:
/* PEAKS, COPY and TRIVIAL do not benefit from increased
diff --git a/src/tests/cpu-remap-test.c b/src/tests/cpu-remap-test.c
index c8c8addd..7e2b7a49 100644
--- a/src/tests/cpu-remap-test.c
+++ b/src/tests/cpu-remap-test.c
@@ -141,6 +141,60 @@ static void run_remap_test_s16(
}
}
+
+static void run_remap_test_s32(
+ pa_remap_t *remap_func,
+ pa_remap_t *remap_orig,
+ int align,
+ bool correct,
+ bool perf) {
+
+ PA_DECLARE_ALIGNED(8, int32_t, out_buf_ref[SAMPLES*8]) = { 0 };
+ PA_DECLARE_ALIGNED(8, int32_t, out_buf[SAMPLES*8]) = { 0 };
+ PA_DECLARE_ALIGNED(8, int32_t, in_buf[SAMPLES*8]);
+ int32_t *out, *out_ref;
+ int32_t *in;
+ unsigned n_ic = remap_func->i_ss.channels;
+ unsigned n_oc = remap_func->o_ss.channels;
+ unsigned i, nsamples;
+
+ pa_assert(n_ic >= 1 && n_ic <= 8);
+ pa_assert(n_oc >= 1 && n_oc <= 8);
+
+ /* Force sample alignment as requested */
+ out = out_buf + (8 - align);
+ out_ref = out_buf_ref + (8 - align);
+ in = in_buf + (8 - align);
+ nsamples = SAMPLES - (8 - align);
+
+ pa_random(in, nsamples * n_ic * sizeof(int32_t));
+
+ if (correct) {
+ remap_orig->do_remap(remap_orig, out_ref, in, nsamples);
+ remap_func->do_remap(remap_func, out, in, nsamples);
+
+ for (i = 0; i < nsamples * n_oc; i++) {
+ if (abs(out[i] - out_ref[i]) > 4) {
+ pa_log_debug("Correctness test failed: align=%d", align);
+ pa_log_debug("%d: %d != %d", i, out[i], out_ref[i]);
+ ck_abort();
+ }
+ }
+ }
+
+ if (perf) {
+ pa_log_debug("Testing remap performance with %d sample alignment", align);
+
+ PA_RUNTIME_TEST_RUN_START("func", TIMES, TIMES2) {
+ remap_func->do_remap(remap_func, out, in, nsamples);
+ } PA_RUNTIME_TEST_RUN_STOP
+
+ PA_RUNTIME_TEST_RUN_START("orig", TIMES, TIMES2) {
+ remap_orig->do_remap(remap_orig, out_ref, in, nsamples);
+ } PA_RUNTIME_TEST_RUN_STOP
+ }
+}
+
static void setup_remap_channels(
pa_remap_t *m,
pa_sample_format_t f,
@@ -193,6 +247,12 @@ static void remap_test_channels(
run_remap_test_float(remap_func, remap_orig, 2, true, false);
run_remap_test_float(remap_func, remap_orig, 3, true, true);
break;
+ case PA_SAMPLE_S32NE:
+ run_remap_test_s32(remap_func, remap_orig, 0, true, false);
+ run_remap_test_s32(remap_func, remap_orig, 1, true, false);
+ run_remap_test_s32(remap_func, remap_orig, 2, true, false);
+ run_remap_test_s32(remap_func, remap_orig, 3, true, true);
+ break;
case PA_SAMPLE_S16NE:
run_remap_test_s16(remap_func, remap_orig, 0, true, false);
run_remap_test_s16(remap_func, remap_orig, 1, true, false);
@@ -251,6 +311,11 @@ START_TEST (remap_special_test) {
pa_log_debug("Checking special remap (float, mono->4-channel)");
remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 1, 4, false);
+ pa_log_debug("Checking special remap (s32, mono->stereo)");
+ remap_init2_test_channels(PA_SAMPLE_S32NE, 1, 2, false);
+ pa_log_debug("Checking special remap (s32, mono->4-channel)");
+ remap_init2_test_channels(PA_SAMPLE_S32NE, 1, 4, false);
+
pa_log_debug("Checking special remap (s16, mono->stereo)");
remap_init2_test_channels(PA_SAMPLE_S16NE, 1, 2, false);
pa_log_debug("Checking special remap (s16, mono->4-channel)");
@@ -261,6 +326,11 @@ START_TEST (remap_special_test) {
pa_log_debug("Checking special remap (float, 4-channel->mono)");
remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 1, false);
+ pa_log_debug("Checking special remap (s32, stereo->mono)");
+ remap_init2_test_channels(PA_SAMPLE_S32NE, 2, 1, false);
+ pa_log_debug("Checking special remap (s32, 4-channel->mono)");
+ remap_init2_test_channels(PA_SAMPLE_S32NE, 4, 1, false);
+
pa_log_debug("Checking special remap (s16, stereo->mono)");
remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 1, false);
pa_log_debug("Checking special remap (s16, 4-channel->mono)");
@@ -271,11 +341,15 @@ END_TEST
START_TEST (rearrange_special_test) {
pa_log_debug("Checking special remap (s16, stereo rearrange)");
remap_init2_test_channels(PA_SAMPLE_S16NE, 2, 2, true);
+ pa_log_debug("Checking special remap (s32, stereo rearrange)");
+ remap_init2_test_channels(PA_SAMPLE_S32NE, 2, 2, true);
pa_log_debug("Checking special remap (float, stereo rearrange)");
remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 2, 2, true);
pa_log_debug("Checking special remap (s16, 4-channel rearrange)");
remap_init2_test_channels(PA_SAMPLE_S16NE, 4, 4, true);
+ pa_log_debug("Checking special remap (s32, 4-channel rearrange)");
+ remap_init2_test_channels(PA_SAMPLE_S32NE, 4, 4, true);
pa_log_debug("Checking special remap (float, 4-channel rearrange)");
remap_init2_test_channels(PA_SAMPLE_FLOAT32NE, 4, 4, true);
}
@@ -298,6 +372,9 @@ START_TEST (remap_mmx_test) {
init_func = pa_get_init_remap_func();
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false);
+ pa_log_debug("Checking MMX remap (s32, mono->stereo)");
+ remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false);
+
pa_log_debug("Checking MMX remap (s16, mono->stereo)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false);
}
@@ -319,6 +396,9 @@ START_TEST (remap_sse2_test) {
init_func = pa_get_init_remap_func();
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 2, false);
+ pa_log_debug("Checking SSE2 remap (s32, mono->stereo)");
+ remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false);
+
pa_log_debug("Checking SSE2 remap (s16, mono->stereo)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false);
}
@@ -345,6 +425,11 @@ START_TEST (remap_neon_test) {
pa_log_debug("Checking NEON remap (float, mono->4-channel)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 1, 4, false);
+ pa_log_debug("Checking NEON remap (s32, mono->stereo)");
+ remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 2, false);
+ pa_log_debug("Checking NEON remap (s32, mono->4-channel)");
+ remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 1, 4, false);
+
pa_log_debug("Checking NEON remap (s16, mono->stereo)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 1, 2, false);
pa_log_debug("Checking NEON remap (s16, mono->4-channel)");
@@ -355,6 +440,11 @@ START_TEST (remap_neon_test) {
pa_log_debug("Checking NEON remap (float, 4-channel->mono)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 1, false);
+ pa_log_debug("Checking NEON remap (s32, stereo->mono)");
+ remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 1, false);
+ pa_log_debug("Checking NEON remap (s32, 4-channel->mono)");
+ remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 1, false);
+
pa_log_debug("Checking NEON remap (s16, stereo->mono)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 1, false);
pa_log_debug("Checking NEON remap (s16, 4-channel->mono)");
@@ -362,6 +452,8 @@ START_TEST (remap_neon_test) {
pa_log_debug("Checking NEON remap (float, 4-channel->4-channel)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, false);
+ pa_log_debug("Checking NEON remap (s32, 4-channel->4-channel)");
+ remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 4, false);
pa_log_debug("Checking NEON remap (s16, 4-channel->4-channel)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, false);
}
@@ -383,16 +475,22 @@ START_TEST (rearrange_neon_test) {
pa_log_debug("Checking NEON remap (float, stereo rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 2, true);
+ pa_log_debug("Checking NEON remap (s32, stereo rearrange)");
+ remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 2, true);
pa_log_debug("Checking NEON remap (s16, stereo rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 2, true);
pa_log_debug("Checking NEON remap (float, 2-channel->4-channel rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 2, 4, true);
+ pa_log_debug("Checking NEON remap (s32, 2-channel->4-channel rearrange)");
+ remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 2, 4, true);
pa_log_debug("Checking NEON remap (s16, 2-channel->4-channel rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 2, 4, true);
pa_log_debug("Checking NEON remap (float, 4-channel rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_FLOAT32NE, 4, 4, true);
+ pa_log_debug("Checking NEON remap (s32, 4-channel rearrange)");
+ remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S32NE, 4, 4, true);
pa_log_debug("Checking NEON remap (s16, 4-channel rearrange)");
remap_init_test_channels(init_func, orig_init_func, PA_SAMPLE_S16NE, 4, 4, true);
}