summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@kernel.org>2023-12-12 20:41:08 -0800
committerPaul E. McKenney <paulmck@kernel.org>2023-12-14 09:58:29 -0800
commiteeeaebac158cf5cd08886c1263aac11f85d6c568 (patch)
tree4fcf526039082efaf9315d5bd6733eabd1518087
parent23553e431f641abc7b79493f9e5015b8a6b4bad4 (diff)
downloadperfbook-eeeaebac158cf5cd08886c1263aac11f85d6c568.tar.gz
CodeSamples/cpu: Add benchmark for load/store communication
The point is to illustrate that rfe is temporal, while fre can be anti-temporal. The underlying temporal.c file can also produce data for coe, but scripts to reduce that data are still TBD. Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
-rw-r--r--CodeSamples/cpu/.gitignore1
-rw-r--r--CodeSamples/cpu/Makefile5
-rwxr-xr-xCodeSamples/cpu/fre.sh35
-rwxr-xr-xCodeSamples/cpu/rfe.sh40
-rw-r--r--CodeSamples/cpu/temporal.c315
5 files changed, 395 insertions, 1 deletions
diff --git a/CodeSamples/cpu/.gitignore b/CodeSamples/cpu/.gitignore
index 46f6ca48..09c58115 100644
--- a/CodeSamples/cpu/.gitignore
+++ b/CodeSamples/cpu/.gitignore
@@ -1 +1,2 @@
cachetorture
+temporal
diff --git a/CodeSamples/cpu/Makefile b/CodeSamples/cpu/Makefile
index 49007790..13eaff69 100644
--- a/CodeSamples/cpu/Makefile
+++ b/CodeSamples/cpu/Makefile
@@ -17,7 +17,7 @@
include ../Makefile.arch
-PROGS = cachetorture
+PROGS = cachetorture temporal
top := ..
include $(top)/depends.mk
@@ -36,5 +36,8 @@ include $(top)/recipes.mk
cachetorture: cachetorture.c ../api.h
$(CC) $(GCC_ARGS) $(CFLAGS) -o cachetorture cachetorture.c -lpthread
+temporal: temporal.c ../api.h
+ $(CC) $(GCC_ARGS) $(CFLAGS) -o temporal temporal.c -lpthread
+
clean:
rm -f $(PROGS)
diff --git a/CodeSamples/cpu/fre.sh b/CodeSamples/cpu/fre.sh
new file mode 100755
index 00000000..9103c966
--- /dev/null
+++ b/CodeSamples/cpu/fre.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+#
+# Produce and reduce temporal fre data.
+#
+# Usage: bash fre.sh [ nthreads ]
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+./temporal --fre --nthreads ${1-15} |
+awk '
+/^Write/ {
+ et = $4;
+ for (i in st) {
+ print i, st[i], et, et - st[i] (et < st[i] ? "!!!" : "");
+ }
+}
+
+$1 ~ /^[0-9][0-9]*$/ && $3 == 0 {
+ st[$1] = $6;
+}'
diff --git a/CodeSamples/cpu/rfe.sh b/CodeSamples/cpu/rfe.sh
new file mode 100755
index 00000000..5de5300e
--- /dev/null
+++ b/CodeSamples/cpu/rfe.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+#
+# Produce and reduce temporal rfe data.
+#
+# Usage: bash rfe.sh [ nthreads ]
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+./temporal --rfe --nthreads ${1-15} |
+awk '
+/^Write/ {
+ print $0;
+ et = $2;
+ for (i in st) {
+ print i, st[i], et, st[i] - et (et > st[i] ? "!!!" : "");
+ }
+}
+
+$1 ~ /^[0-9][0-9]*$/ && $3 == 1 && st[$1] == "" {
+ st[$1] = $4;
+}
+
+END {
+ print "Note: False positives possible due to lack of memory ordering."
+}'
diff --git a/CodeSamples/cpu/temporal.c b/CodeSamples/cpu/temporal.c
new file mode 100644
index 00000000..bcbcf022
--- /dev/null
+++ b/CodeSamples/cpu/temporal.c
@@ -0,0 +1,315 @@
+/*
+ * temporal.c: Demonstrate temporal properties and not of coe, fre, and rfe
+ *
+ * This test produces output as follows:
+ *
+ * ./temporal --coe --nthreads 4
+ * ./temporal arguments: coe nthread 4 duration: 100
+ * 0 881008 0 881080 881008 881080
+ * 0 881368 1 881440 241140380 241140400
+ * 1 881376 1 881476 241140376 241140396
+ * 2 881304 2 881444 881304 881444
+ * 2 881720 1 881792 241140368 241140390
+ * 3 880652 3 880740 880652 880740
+ * 3 881012 2 881080 881012 881080
+ * 3 881724 1 882176 241140344 241140364
+ *
+ * The columns are thread number, start time of the first sample, value
+ * of shared variable, end time of the first sample, the start time of
+ * the last sample, and the end time of the last sample. All times are
+ * in nanoseconds since (roughly) the beginning of the run. If a given
+ * value was observed by only one read, the times for the first and last
+ * samples will be identical.
+ *
+ * The --fre and --rfe arguments also produce a line as follows:
+ *
+ * Write 121297118 1 121297136 121297118 121297136
+ *
+ * This records the times and values of the write that the other threads
+ * are reading from.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (c) 2023 Paul E. McKenney, Meta Platforms Inc.
+ */
+
+#include "../api.h"
+#include <time.h>
+#include <stdarg.h>
+
+/*
+ * Test variables.
+ */
+
+#define GOFLAG_INIT 0
+#define GOFLAG_RUN 1
+#define GOFLAG_STOP 2
+
+int goflag __attribute__((__aligned__(CACHE_LINE_SIZE))) = GOFLAG_INIT;
+
+static int coe;
+static int duration = 100;
+static int fre;
+static int nthreads = 1;
+static int rfe;
+
+static int sharedvar __attribute__((__aligned__(CACHE_LINE_SIZE)));
+
+struct sample {
+ int value;
+ long long tbefore;
+ long long tbeforelast;
+ long long tafter;
+ long long tafterlast;
+};
+
+struct sample_data {
+ int sd_me; // My integer ID.
+ int sd_started; // This thread has begun executing.
+ void (*sd_func)(struct sample_data *); // Init function, if non-NULL.
+ int sd_n; // Number of entries in ->sd_samples array.
+ int sd_nsamples; // Number of samples collected.
+ struct sample *sd_samples; // Array for sample collection.
+};
+
+// Read clock, sharedvar, clock and store in the specified struct sample.
+void readval(struct sample *sp)
+{
+ sp->tbefore = get_timestamp();
+ sp->tbeforelast = sp->tbefore;
+ sp->value = READ_ONCE(sharedvar);
+ sp->tafter = get_timestamp();
+ sp->tafterlast = sp->tafter;
+}
+
+// Collect unique data reads, timestamped. If there are multiple reads
+// of the same value, the beginning timestamp for the first read are and
+// the ending timestamp for the last read are retained.
+void collect_data(struct sample_data *sdp)
+{
+ int i = 1;
+ struct sample s;
+ struct sample *sp;
+
+ sp = &sdp->sd_samples[0];
+ readval(sp);
+ sdp->sd_nsamples = 1;
+ while (READ_ONCE(goflag) == GOFLAG_RUN) {
+ readval(&s);
+ if (s.value == sp->value) {
+ sp->tbeforelast = s.tbefore;
+ sp->tafterlast = s.tafter;
+ } else {
+ sdp->sd_nsamples = i + 1;
+ if (++i >= sdp->sd_n)
+ break;
+ sp++;
+ *sp = s;
+ }
+ }
+}
+
+// Generic child thread, with ->sd_func to invoke. Or not.
+void *child_thread(void *args)
+{
+ struct sample_data *sdp = args;
+
+ WRITE_ONCE(sdp->sd_started, 1);
+ while (READ_ONCE(goflag) == GOFLAG_INIT)
+ continue;
+ if (sdp->sd_func)
+ sdp->sd_func(sdp);
+ collect_data(sdp);
+ return NULL;
+}
+
+// Create all child threads and wait for them to start executing.
+// cf() is the child's initialization function or NULL, and n is
+// the maximum number of samples.
+struct sample_data *create_all_threads(void (*cf)(struct sample_data *), int n)
+{
+ int i;
+ struct sample_data *sdp = calloc(nthreads, sizeof(sdp[0]));
+
+ // Allocate memory and start child threads.
+ BUG_ON(!sdp);
+ for (i = 0; i < nthreads; i++) {
+ sdp[i].sd_me = i;
+ sdp[i].sd_n = n;
+ sdp[i].sd_started = 0;
+ sdp[i].sd_func = cf;
+ sdp[i].sd_nsamples = 0;
+ sdp[i].sd_samples = calloc(n, sizeof(sdp[i].sd_samples[0]));
+ BUG_ON(!sdp[i].sd_samples);
+ create_thread(child_thread, (void *)&sdp[i]);
+ }
+
+ // Wait for all child threads to start actually executing.
+ for (i = 0; i < nthreads; i++)
+ while (!READ_ONCE(sdp[i].sd_started))
+ continue;
+
+ return sdp;
+}
+
+// Dump data from all child threads. If the parent thread needs something
+// dumped, it must dump it itself.
+void dump_all_threads(struct sample_data *sdp, long long *tsp)
+{
+ int cnum;
+ int i;
+ struct sample *sp;
+ long long tsdelta1;
+ long long tsdelta2;
+ long long tsdelta3;
+ long long tsdelta4;
+
+ for (cnum = 0; cnum < nthreads; cnum++) {
+ for (i = 0; i < sdp[cnum].sd_nsamples; i++) {
+ sp = &sdp[cnum].sd_samples[i];
+ tsdelta1 = sp->tbefore - *tsp;
+ tsdelta2 = sp->tafter - *tsp;
+ tsdelta3 = sp->tbeforelast - *tsp;
+ tsdelta4 = sp->tafterlast - *tsp;
+ printf("%d %lld %d %lld %lld %lld\n", cnum,
+ tsdelta1, sp->value, tsdelta2, tsdelta3, tsdelta4);
+ }
+ }
+}
+
+// The coe child threads write their ID after starting up.
+void coe_start(struct sample_data *sdp)
+{
+ WRITE_ONCE(sharedvar, sdp->sd_me);
+}
+
+// The coe parent lets the children do the writing.
+void coe_parent(void)
+{
+ struct sample_data *sdp;
+ long long ts;
+
+ sharedvar = -1;
+ ts = get_timestamp();
+ sdp = create_all_threads(coe_start, 2 * nthreads);
+ WRITE_ONCE(goflag, GOFLAG_RUN);
+ poll(NULL, 0, duration);
+ WRITE_ONCE(goflag, GOFLAG_STOP);
+ wait_all_threads();
+ dump_all_threads(sdp, &ts);
+}
+
+// The fre and rfe parent threads do the writing themselves.
+void fre_rfe_parent(void)
+{
+ struct sample_data *sdp;
+ long long ts;
+ long long tsafter;
+ long long tsbefore;
+
+ sharedvar = 0;
+ ts = get_timestamp();
+ sdp = create_all_threads(NULL, 5);
+ WRITE_ONCE(goflag, GOFLAG_RUN);
+ poll(NULL, 0, (duration + 1) / 2);
+ tsbefore = get_timestamp();
+ WRITE_ONCE(sharedvar, 1);
+ tsafter = get_timestamp();
+ poll(NULL, 0, (duration + 1) / 2);
+ WRITE_ONCE(goflag, GOFLAG_STOP);
+ wait_all_threads();
+ dump_all_threads(sdp, &ts);
+ tsbefore = tsbefore - ts;
+ tsafter = tsafter - ts;
+ printf("Write %lld 1 %lld %lld %lld\n",
+ tsbefore, tsafter, tsbefore, tsafter);
+}
+
+
+/*
+ * Mainprogram.
+ */
+
+void usage(char *progname, const char *format, ...)
+{
+ va_list ap;
+
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+ fprintf(stderr, "Usage: %s\n", progname);
+ fprintf(stderr, "\t --coe\n");
+ fprintf(stderr, "\t\tCollect coherence (modification order) times.\n");
+ fprintf(stderr, "\t --fre\n");
+ fprintf(stderr, "\t\tCollect from-read time.\n");
+ fprintf(stderr, "\t --rfe\n");
+ fprintf(stderr, "\t\tCollect read-from time.\n");
+ fprintf(stderr, "\t --nthreads\n");
+ fprintf(stderr, "\t\tNumber of measurement threads (#CPUs-1).\n");
+ fprintf(stderr, "\t --duration\n");
+ fprintf(stderr, "\t\tDuration of run in milliseconds (default 100).\n");
+ exit(EXIT_FAILURE);
+}
+
+int main(int argc, char *argv[])
+{
+ int i = 1;
+
+ smp_init();
+
+ while (i < argc) {
+ if (strcmp(argv[i], "--coe") == 0) {
+ if (coe + fre + rfe != 0)
+ usage(argv[0], "Only one of --coe, --fre, and rfe may be specified.\n");
+ coe = 1;
+ ++i;
+ } else if (strcmp(argv[i], "--fre") == 0) {
+ if (coe + fre + rfe != 0)
+ usage(argv[0], "Only one of --coe, --fre, and rfe may be specified.\n");
+ fre = 1;
+ ++i;
+ } else if (strcmp(argv[i], "--rfe") == 0) {
+ if (coe + fre + rfe != 0)
+ usage(argv[0], "Only one of --coe, --fre, and rfe may be specified.\n");
+ rfe = 1;
+ ++i;
+ } else if (strcmp(argv[i], "--nthreads") == 0) {
+ nthreads = atoi(argv[++i]);
+ if (nthreads <= 0)
+ usage(argv[0], "%s: --nthreads argument must be positive integer.\n", argv[i]);
+ ++i;
+ } else if (strcmp(argv[i], "--duration") == 0) {
+ duration = atoi(argv[++i]);
+ if (duration <= 0)
+ usage(argv[0], "%s: --duration argument must be positive integer.\n", argv[i]);
+ ++i;
+ } else {
+ usage(argv[0], "Unrecognized argument: %s\n");
+ }
+ }
+ if (coe + fre + rfe == 0)
+ usage(argv[0], "At least one of --coe, --fre, and rfe must be specified.\n");
+
+ // Dump arguments.
+ printf("%s arguments: %s nthread %d duration: %d\n",
+ argv[0], coe ? "coe" : fre ? "fre" : "rfe", nthreads, duration);
+
+ if (coe)
+ coe_parent();
+ else
+ fre_rfe_parent();
+
+ return 0;
+}