summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOliver Upton <oliver.upton@linux.dev>2023-11-28 19:36:49 +0000
committerOliver Upton <oliver.upton@linux.dev>2023-11-28 19:36:49 +0000
commit82da2059833f0235f35f8f913c8258865b1d3ee2 (patch)
treee41022d54c6b5bb639d1d28d6dfe3445b984215c
downloadaarch64-memcpy-82da2059833f0235f35f8f913c8258865b1d3ee2.tar.gz
initial commit
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
-rw-r--r--.gitignore2
-rw-r--r--Makefile20
-rw-r--r--copy_template.h35
-rw-r--r--main.c128
-rw-r--r--memcpy.h10
-rw-r--r--memcpy_ldp_stp.S12
-rw-r--r--memcpy_ldp_str.S14
-rw-r--r--memcpy_ldr_stp.S14
8 files changed, 235 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d08ca4b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/memcpy
+/*.o
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..e9557a3
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,20 @@
+CC := aarch64-linux-gnu-gcc
+CFLAGS += -O3 -Wall -Werror
+
+OBJS = main.o
+OBJS += memcpy_ldp_str.o
+OBJS += memcpy_ldp_stp.o
+OBJS += memcpy_ldr_stp.o
+
+memcpy: $(OBJS)
+ $(CC) $(CFLAGS) $(OBJS) -o $@
+
+%.o: %.S
+ $(CC) $(CFLAGS) -c $<
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+
+clean:
+ rm -rf $(OBJS)
+ rm -rf memcpy
diff --git a/copy_template.h b/copy_template.h
new file mode 100644
index 0000000..3ed0124
--- /dev/null
+++ b/copy_template.h
@@ -0,0 +1,35 @@
+dst .req x0
+src .req x1
+count .req x2
+
+A_l .req x3
+A_h .req x4
+B_l .req x5
+B_h .req x6
+C_l .req x7
+C_h .req x8
+D_l .req x9
+D_h .req x10
+
+ ldp1 A_l, A_h, src, #16
+ ldp1 B_l, B_h, src, #16
+ ldp1 C_l, C_h, src, #16
+ ldp1 D_l, D_h, src, #16
+ sub count, count, #64
+
+1:
+ stp1 A_l, A_h, dst, #16
+ ldp1 A_l, A_h, src, #16
+ stp1 B_l, B_h, dst, #16
+ ldp1 B_l, B_h, src, #16
+ stp1 C_l, C_h, dst, #16
+ ldp1 C_l, C_h, src, #16
+ stp1 D_l, D_h, dst, #16
+ ldp1 D_l, D_h, src, #16
+ subs count, count, #64
+ b.ne 1b
+
+ stp1 A_l, A_h, dst, #16
+ stp1 B_l, B_h, dst, #16
+ stp1 C_l, C_h, dst, #16
+ stp1 D_l, D_h, dst, #16
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..21452ea
--- /dev/null
+++ b/main.c
@@ -0,0 +1,128 @@
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "memcpy.h"
+
+#define KB 1024
+#define MB (1024 * KB)
+#define GB (1024 * MB)
+
+enum mode {
+ LDP_STR = 0,
+ LDR_STP,
+ LDP_STP,
+
+ NR_MODES
+};
+
+static size_t test_size = 2UL * GB;
+static size_t test_iterations = 100;
+static enum mode test_mode;
+
+#define PR_MODE(mode) \
+ printf(" %d: "#mode"\n", mode);
+
+static void pr_modes(void)
+{
+ PR_MODE(LDP_STR);
+ PR_MODE(LDR_STP);
+ PR_MODE(LDP_STP);
+}
+
+static void pr_help(const char *progname)
+{
+ printf("%s [OPTIONS]\n", progname);
+ printf(" -s SIZE (default: %lu)\n", test_size);
+ printf(" -i ITERATIONS (default: %lu)\n", test_iterations);
+ printf(" -m MODE (default: %d)\n", test_mode);
+ pr_modes();
+ printf(" -h prints this message\n");
+}
+
+static void *setup_test_buffer(void)
+{
+ void *buf = mmap(0, test_size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, 0, 0);
+
+ assert(buf != MAP_FAILED);
+ memset(buf, 0xf, test_size);
+ return buf;
+}
+
+static void destroy_test_buffer(void *buf)
+{
+ munmap(buf, test_size);
+}
+
+static void do_memcpy(void *dst, const void *src)
+{
+ switch (test_mode) {
+ case LDP_STR:
+ memcpy_ldp_str(dst, src, test_size);
+ break;
+ case LDR_STP:
+ memcpy_ldr_stp(dst, src, test_size);
+ break;
+ case LDP_STP:
+ memcpy_ldp_stp(dst, src, test_size);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void run_test(void)
+{
+ void *dst = setup_test_buffer();
+ void *src = setup_test_buffer();
+ size_t i;
+
+ printf("Iters: %lu\n", test_iterations);
+ printf("Mode: %d\n", test_mode);
+ printf("Source: [%p, %p)\n", src, src + test_size);
+ printf("Dest: [%p, %p)\n", dst, dst + test_size);
+
+ for (i = 0; i < test_iterations; i++)
+ do_memcpy(dst, src);
+
+ destroy_test_buffer(dst);
+ destroy_test_buffer(src);
+}
+
+int main(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "hs:i:m:")) != -1) {
+ switch (c) {
+ case 's':
+ test_size = strtoul(optarg, NULL, 0);
+ if ((test_size % 64 != 0) || (test_size < 128)) {
+ pr_help(argv[0]);
+ return 1;
+ }
+ break;
+ case 'i':
+ test_iterations = strtoul(optarg, NULL, 0);
+ break;
+ case 'm':
+ test_mode = strtoul(optarg, NULL, 0);
+ if (test_mode >= NR_MODES) {
+ pr_help(argv[0]);
+ return 1;
+ }
+ break;
+ case 'h':
+ pr_help(argv[0]);
+ return 0;
+ }
+ }
+
+ run_test();
+ return 0;
+}
diff --git a/memcpy.h b/memcpy.h
new file mode 100644
index 0000000..f510c8f
--- /dev/null
+++ b/memcpy.h
@@ -0,0 +1,10 @@
+#ifndef __MEMCPY_H__
+#define __MEMCPY_H__
+
+#include <stdint.h>
+
+void memcpy_ldp_str(void *dst, const void *src, size_t count);
+void memcpy_ldr_stp(void *dst, const void *src, size_t count);
+void memcpy_ldp_stp(void *dst, const void *src, size_t count);
+
+#endif /* __MEMCPY_H__ */
diff --git a/memcpy_ldp_stp.S b/memcpy_ldp_stp.S
new file mode 100644
index 0000000..a9851f5
--- /dev/null
+++ b/memcpy_ldp_stp.S
@@ -0,0 +1,12 @@
+.macro ldp1, Xt1, Xt2, Xn, imm
+ ldp \Xt1, \Xt2, [\Xn], \imm
+.endm
+
+.macro stp1, Xt1, Xt2, Xn, imm
+ stp \Xt1, \Xt2, [\Xn], \imm
+.endm
+
+.globl memcpy_ldp_stp
+memcpy_ldp_stp:
+ #include "copy_template.h"
+ ret
diff --git a/memcpy_ldp_str.S b/memcpy_ldp_str.S
new file mode 100644
index 0000000..0b51165
--- /dev/null
+++ b/memcpy_ldp_str.S
@@ -0,0 +1,14 @@
+.macro ldp1, Xt1, Xt2, Xn, imm
+ ldp \Xt1, \Xt2, [\Xn], \imm
+.endm
+
+.macro stp1, Xt1, Xt2, Xn, imm
+ str \Xt1, [\Xn]
+ str \Xt2, [\Xn, #8]
+ add \Xn, \Xn, \imm
+.endm
+
+.globl memcpy_ldp_str
+memcpy_ldp_str:
+ #include "copy_template.h"
+ ret
diff --git a/memcpy_ldr_stp.S b/memcpy_ldr_stp.S
new file mode 100644
index 0000000..78aed07
--- /dev/null
+++ b/memcpy_ldr_stp.S
@@ -0,0 +1,14 @@
+.macro ldp1, Xt1, Xt2, Xn, imm
+ ldr \Xt1, [\Xn]
+ ldr \Xt2, [\Xn, #8]
+ add \Xn, \Xn, \imm
+.endm
+
+.macro stp1, Xt1, Xt2, Xn, imm
+ stp \Xt1, \Xt2, [\Xn], \imm
+.endm
+
+.globl memcpy_ldr_stp
+memcpy_ldr_stp:
+ #include "copy_template.h"
+ ret