aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>2009-01-15 19:45:36 +0200
committerMarcel Holtmann <marcel@holtmann.org>2012-07-29 19:48:27 -0700
commita159ddba212828d4bf2e5068340b420c1fdc1a0d (patch)
tree6c1296849891cc59475712b44fb66385825cb3fb
parent444d360c2ccaf6a72e92697aae8ee31fa93091db (diff)
sbc: SBC arrays and constant tables aligned at 16 byte boundary for SIMD
Most SIMD instruction sets benefit from data being naturally aligned. And even if it is not strictly required, performance is usually better with the aligned data. ARM NEON and SSE2 have different instruction variants for aligned/unaligned memory accesses.
-rw-r--r--sbc/sbc.c26
-rw-r--r--sbc/sbc.h1
-rw-r--r--sbc/sbc_primitives.h2
-rw-r--r--sbc/sbc_tables.h22
4 files changed, 36 insertions, 15 deletions
diff --git a/sbc/sbc.c b/sbc/sbc.c
index 534c935..0699ae0 100644
--- a/sbc/sbc.c
+++ b/sbc/sbc.c
@@ -80,10 +80,13 @@ struct sbc_frame {
uint8_t scale_factor[2][8];
/* raw integer subband samples in the frame */
+ int32_t SBC_ALIGNED sb_sample_f[16][2][8];
- int32_t sb_sample_f[16][2][8];
- int32_t sb_sample[16][2][8]; /* modified subband samples */
- int16_t pcm_sample[2][16*8]; /* original pcm audio samples */
+ /* modified subband samples */
+ int32_t SBC_ALIGNED sb_sample[16][2][8];
+
+ /* original pcm audio samples */
+ int16_t SBC_ALIGNED pcm_sample[2][16*8];
};
struct sbc_decoder_state {
@@ -912,9 +915,9 @@ static void sbc_encoder_init(struct sbc_encoder_state *state,
struct sbc_priv {
int init;
- struct sbc_frame frame;
- struct sbc_decoder_state dec_state;
- struct sbc_encoder_state enc_state;
+ struct SBC_ALIGNED sbc_frame frame;
+ struct SBC_ALIGNED sbc_decoder_state dec_state;
+ struct SBC_ALIGNED sbc_encoder_state enc_state;
};
static void sbc_set_defaults(sbc_t *sbc, unsigned long flags)
@@ -940,10 +943,13 @@ int sbc_init(sbc_t *sbc, unsigned long flags)
memset(sbc, 0, sizeof(sbc_t));
- sbc->priv = malloc(sizeof(struct sbc_priv));
- if (!sbc->priv)
+ sbc->priv_alloc_base = malloc(sizeof(struct sbc_priv) + SBC_ALIGN_MASK);
+ if (!sbc->priv_alloc_base)
return -ENOMEM;
+ sbc->priv = (void *) (((uintptr_t) sbc->priv_alloc_base +
+ SBC_ALIGN_MASK) & ~((uintptr_t) SBC_ALIGN_MASK));
+
memset(sbc->priv, 0, sizeof(struct sbc_priv));
sbc_set_defaults(sbc, flags);
@@ -1091,8 +1097,8 @@ void sbc_finish(sbc_t *sbc)
if (!sbc)
return;
- if (sbc->priv)
- free(sbc->priv);
+ if (sbc->priv_alloc_base)
+ free(sbc->priv_alloc_base);
memset(sbc, 0, sizeof(sbc_t));
}
diff --git a/sbc/sbc.h b/sbc/sbc.h
index 8ac5930..b0a1488 100644
--- a/sbc/sbc.h
+++ b/sbc/sbc.h
@@ -74,6 +74,7 @@ struct sbc_struct {
uint8_t endian;
void *priv;
+ void *priv_alloc_base;
};
typedef struct sbc_struct sbc_t;
diff --git a/sbc/sbc_primitives.h b/sbc/sbc_primitives.h
index ca1ec27..a8b3df6 100644
--- a/sbc/sbc_primitives.h
+++ b/sbc/sbc_primitives.h
@@ -31,7 +31,7 @@
struct sbc_encoder_state {
int subbands;
int position[2];
- int16_t X[2][256];
+ int16_t SBC_ALIGNED X[2][256];
/* Polyphase analysis filter for 4 subbands configuration,
it handles 4 blocks at once */
void (*sbc_analyze_4b_4s)(int16_t *pcm, int16_t *x,
diff --git a/sbc/sbc_tables.h b/sbc/sbc_tables.h
index a9a995f..7c2af07 100644
--- a/sbc/sbc_tables.h
+++ b/sbc/sbc_tables.h
@@ -351,6 +351,20 @@ static const FIXED_T cos_table_fixed_8[128] = {
#undef F
/*
+ * Enforce 16 byte alignment for the data, which is supposed to be used
+ * with SIMD optimized code.
+ */
+
+#define SBC_ALIGN_BITS 4
+#define SBC_ALIGN_MASK ((1 << (SBC_ALIGN_BITS)) - 1)
+
+#ifdef __GNUC__
+#define SBC_ALIGNED __attribute__((aligned(1 << (SBC_ALIGN_BITS))))
+#else
+#define SBC_ALIGNED
+#endif
+
+/*
* Constant tables for the use in SIMD optimized analysis filters
* Each table consists of two parts:
* 1. reordered "proto" table
@@ -360,7 +374,7 @@ static const FIXED_T cos_table_fixed_8[128] = {
* and "odd" cases are needed
*/
-static const FIXED_T analysis_consts_fixed4_simd_even[40 + 16] = {
+static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_even[40 + 16] = {
#define F(x) F_PROTO4(x)
F(0.00000000E+00), F(3.83720193E-03),
F(5.36548976E-04), F(2.73370904E-03),
@@ -395,7 +409,7 @@ static const FIXED_T analysis_consts_fixed4_simd_even[40 + 16] = {
#undef F
};
-static const FIXED_T analysis_consts_fixed4_simd_odd[40 + 16] = {
+static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_odd[40 + 16] = {
#define F(x) F_PROTO4(x)
F(2.73370904E-03), F(5.36548976E-04),
-F(1.49188357E-03), F(0.00000000E+00),
@@ -430,7 +444,7 @@ static const FIXED_T analysis_consts_fixed4_simd_odd[40 + 16] = {
#undef F
};
-static const FIXED_T analysis_consts_fixed8_simd_even[80 + 64] = {
+static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_even[80 + 64] = {
#define F(x) F_PROTO8(x)
F(0.00000000E+00), F(2.01182542E-03),
F(1.56575398E-04), F(1.78371725E-03),
@@ -509,7 +523,7 @@ static const FIXED_T analysis_consts_fixed8_simd_even[80 + 64] = {
#undef F
};
-static const FIXED_T analysis_consts_fixed8_simd_odd[80 + 64] = {
+static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_odd[80 + 64] = {
#define F(x) F_PROTO8(x)
F(0.00000000E+00), -F(8.23919506E-04),
F(1.56575398E-04), F(1.78371725E-03),