diff options
author | Oliver Upton <oliver.upton@linux.dev> | 2023-11-28 19:36:49 +0000 |
---|---|---|
committer | Oliver Upton <oliver.upton@linux.dev> | 2023-11-28 19:36:49 +0000 |
commit | 82da2059833f0235f35f8f913c8258865b1d3ee2 (patch) | |
tree | e41022d54c6b5bb639d1d28d6dfe3445b984215c | |
download | aarch64-memcpy-82da2059833f0235f35f8f913c8258865b1d3ee2.tar.gz |
initial commit
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | copy_template.h | 35 | ||||
-rw-r--r-- | main.c | 128 | ||||
-rw-r--r-- | memcpy.h | 10 | ||||
-rw-r--r-- | memcpy_ldp_stp.S | 12 | ||||
-rw-r--r-- | memcpy_ldp_str.S | 14 | ||||
-rw-r--r-- | memcpy_ldr_stp.S | 14 |
8 files changed, 235 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d08ca4b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/memcpy +/*.o diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e9557a3 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +CC := aarch64-linux-gnu-gcc +CFLAGS += -O3 -Wall -Werror + +OBJS = main.o +OBJS += memcpy_ldp_str.o +OBJS += memcpy_ldp_stp.o +OBJS += memcpy_ldr_stp.o + +memcpy: $(OBJS) + $(CC) $(CFLAGS) $(OBJS) -o $@ + +%.o: %.S + $(CC) $(CFLAGS) -c $< + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -rf $(OBJS) + rm -rf memcpy diff --git a/copy_template.h b/copy_template.h new file mode 100644 index 0000000..3ed0124 --- /dev/null +++ b/copy_template.h @@ -0,0 +1,35 @@ +dst .req x0 +src .req x1 +count .req x2 + +A_l .req x3 +A_h .req x4 +B_l .req x5 +B_h .req x6 +C_l .req x7 +C_h .req x8 +D_l .req x9 +D_h .req x10 + + ldp1 A_l, A_h, src, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + ldp1 D_l, D_h, src, #16 + sub count, count, #64 + +1: + stp1 A_l, A_h, dst, #16 + ldp1 A_l, A_h, src, #16 + stp1 B_l, B_h, dst, #16 + ldp1 B_l, B_h, src, #16 + stp1 C_l, C_h, dst, #16 + ldp1 C_l, C_h, src, #16 + stp1 D_l, D_h, dst, #16 + ldp1 D_l, D_h, src, #16 + subs count, count, #64 + b.ne 1b + + stp1 A_l, A_h, dst, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + stp1 D_l, D_h, dst, #16 @@ -0,0 +1,128 @@ +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> + +#include "memcpy.h" + +#define KB 1024 +#define MB (1024 * KB) +#define GB (1024 * MB) + +enum mode { + LDP_STR = 0, + LDR_STP, + LDP_STP, + + NR_MODES +}; + +static size_t test_size = 2UL * GB; +static size_t test_iterations = 100; +static enum mode test_mode; + +#define PR_MODE(mode) \ + printf(" %d: "#mode"\n", mode); + +static void pr_modes(void) +{ + PR_MODE(LDP_STR); + PR_MODE(LDR_STP); + PR_MODE(LDP_STP); +} + +static void pr_help(const char *progname) +{ + printf("%s [OPTIONS]\n", progname); + printf(" -s SIZE (default: %lu)\n", test_size); + printf(" -i ITERATIONS (default: %lu)\n", test_iterations); + printf(" -m MODE (default: %d)\n", test_mode); + pr_modes(); + printf(" -h prints this message\n"); +} + +static void *setup_test_buffer(void) +{ + void *buf = mmap(0, test_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, 0, 0); + + assert(buf != MAP_FAILED); + memset(buf, 0xf, test_size); + return buf; +} + +static void destroy_test_buffer(void *buf) +{ + munmap(buf, test_size); +} + +static void do_memcpy(void *dst, const void *src) +{ + switch (test_mode) { + case LDP_STR: + memcpy_ldp_str(dst, src, test_size); + break; + case LDR_STP: + memcpy_ldr_stp(dst, src, test_size); + break; + case LDP_STP: + memcpy_ldp_stp(dst, src, test_size); + break; + default: + assert(0); + } +} + +static void run_test(void) +{ + void *dst = setup_test_buffer(); + void *src = setup_test_buffer(); + size_t i; + + printf("Iters: %lu\n", test_iterations); + printf("Mode: %d\n", test_mode); + printf("Source: [%p, %p)\n", src, src + test_size); + printf("Dest: [%p, %p)\n", dst, dst + test_size); + + for (i = 0; i < test_iterations; i++) + do_memcpy(dst, src); + + destroy_test_buffer(dst); + destroy_test_buffer(src); +} + +int main(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "hs:i:m:")) != -1) { + switch (c) { + case 's': + test_size = strtoul(optarg, NULL, 0); + if ((test_size % 64 != 0) || (test_size < 128)) { + pr_help(argv[0]); + return 1; + } + break; + case 'i': + test_iterations = strtoul(optarg, NULL, 0); + break; + case 'm': + test_mode = strtoul(optarg, NULL, 0); + if (test_mode >= NR_MODES) { + pr_help(argv[0]); + return 1; + } + break; + case 'h': + pr_help(argv[0]); + return 0; + } + } + + run_test(); + return 0; +} diff --git a/memcpy.h b/memcpy.h new file mode 100644 index 0000000..f510c8f --- /dev/null +++ b/memcpy.h @@ -0,0 +1,10 @@ +#ifndef __MEMCPY_H__ +#define __MEMCPY_H__ + +#include <stdint.h> + +void memcpy_ldp_str(void *dst, const void *src, size_t count); +void memcpy_ldr_stp(void *dst, const void *src, size_t count); +void memcpy_ldp_stp(void *dst, const void *src, size_t count); + +#endif /* __MEMCPY_H__ */ diff --git a/memcpy_ldp_stp.S b/memcpy_ldp_stp.S new file mode 100644 index 0000000..a9851f5 --- /dev/null +++ b/memcpy_ldp_stp.S @@ -0,0 +1,12 @@ +.macro ldp1, Xt1, Xt2, Xn, imm + ldp \Xt1, \Xt2, [\Xn], \imm +.endm + +.macro stp1, Xt1, Xt2, Xn, imm + stp \Xt1, \Xt2, [\Xn], \imm +.endm + +.globl memcpy_ldp_stp +memcpy_ldp_stp: + #include "copy_template.h" + ret diff --git a/memcpy_ldp_str.S b/memcpy_ldp_str.S new file mode 100644 index 0000000..0b51165 --- /dev/null +++ b/memcpy_ldp_str.S @@ -0,0 +1,14 @@ +.macro ldp1, Xt1, Xt2, Xn, imm + ldp \Xt1, \Xt2, [\Xn], \imm +.endm + +.macro stp1, Xt1, Xt2, Xn, imm + str \Xt1, [\Xn] + str \Xt2, [\Xn, #8] + add \Xn, \Xn, \imm +.endm + +.globl memcpy_ldp_str +memcpy_ldp_str: + #include "copy_template.h" + ret diff --git a/memcpy_ldr_stp.S b/memcpy_ldr_stp.S new file mode 100644 index 0000000..78aed07 --- /dev/null +++ b/memcpy_ldr_stp.S @@ -0,0 +1,14 @@ +.macro ldp1, Xt1, Xt2, Xn, imm + ldr \Xt1, [\Xn] + ldr \Xt2, [\Xn, #8] + add \Xn, \Xn, \imm +.endm + +.macro stp1, Xt1, Xt2, Xn, imm + stp \Xt1, \Xt2, [\Xn], \imm +.endm + +.globl memcpy_ldr_stp +memcpy_ldr_stp: + #include "copy_template.h" + ret |