From b4e54ae39c132ebd124002841204665b59eee05b Mon Sep 17 00:00:00 2001 From: John Kacur Date: Thu, 23 Jun 2016 13:31:37 +0200 Subject: rt-tests: Add cyclicdeadline and deadline_test Add cyclicdeadline and deadline_test - these are two programs written by Steven Rostedet to test the functionality and performance of SCHED_DEADLINE Signed-off-by: John Kacur --- src/sched_deadline/cyclicdeadline.c | 1267 +++++++++++++++++++++ src/sched_deadline/deadline_test.c | 2097 +++++++++++++++++++++++++++++++++++ 2 files changed, 3364 insertions(+) create mode 100644 src/sched_deadline/cyclicdeadline.c create mode 100644 src/sched_deadline/deadline_test.c diff --git a/src/sched_deadline/cyclicdeadline.c b/src/sched_deadline/cyclicdeadline.c new file mode 100644 index 0000000..9c50456 --- /dev/null +++ b/src/sched_deadline/cyclicdeadline.c @@ -0,0 +1,1267 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef __i386__ +#ifndef __NR_sched_setattr +#define __NR_sched_setattr 351 +#endif +#ifndef __NR_sched_getattr +#define __NR_sched_getattr 352 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 309 +#endif +#else /* x86_64 */ +#ifndef __NR_sched_setattr +#define __NR_sched_setattr 314 +#endif +#ifndef __NR_sched_getattr +#define __NR_sched_getattr 315 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 309 +#endif +#endif /* i386 or x86_64 */ +#ifndef SCHED_DEADLINE +#define SCHED_DEADLINE 6 +#endif + +#define _STR(x) #x +#define STR(x) _STR(x) +#ifndef MAXPATH +#define MAXPATH 1024 +#endif + +#define CPUSET_ALL "my_cpuset_all" +#define CPUSET_LOCAL "my_cpuset" + +#define gettid() syscall(__NR_gettid) +#define sched_setattr(pid, attr, flags) syscall(__NR_sched_setattr, pid, attr, flags) +#define sched_getattr(pid, attr, size, flags) syscall(__NR_sched_getattr, pid, attr, size, flags) +#define getcpu(cpup, nodep, unused) syscall(__NR_getcpu, cpup, nodep, unused) + +typedef unsigned long long u64; +typedef unsigned int u32; +typedef int s32; + +/* Struct to transfer parameters to the thread */ +struct thread_param { + u64 runtime_us; + u64 deadline_us; + + int mode; + int timermode; + int signal; + int clock; + unsigned long max_cycles; + struct thread_stat *stats; + unsigned long interval; + int cpu; + int node; + int tnum; +}; + +/* Struct for statistics */ +struct thread_stat { + unsigned long cycles; + unsigned long cyclesread; + long min; + long max; + long act; + double avg; + long *values; + long *hist_array; + long *outliers; + pthread_t thread; + int threadstarted; + int tid; + long reduce; + long redmax; + long cycleofmax; + long hist_overflow; + long num_outliers; +}; + +struct sched_data { + u64 runtime_us; + u64 deadline_us; + + int bufmsk; + + struct thread_stat stat; + + char buff[BUFSIZ+1]; +}; + +struct sched_attr { + u32 size; + + u32 sched_policy; + u64 sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + s32 sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + u32 sched_priority; + + /* SCHED_DEADLINE */ + u64 sched_runtime; + u64 sched_deadline; + u64 sched_period; +}; + +static int shutdown; + +static pthread_barrier_t barrier; + +static int cpu_count; +static int all_cpus; + +static int nr_threads; +static int use_nsecs; + +static int mark_fd; + +static int find_mount(const char *mount, char *debugfs) +{ + char type[100]; + FILE *fp; + + if ((fp = fopen("/proc/mounts","r")) == NULL) + return 0; + + while (fscanf(fp, "%*s %" + STR(MAXPATH) + "s %99s %*s %*d %*d\n", + debugfs, type) == 2) { + if (strcmp(type, mount) == 0) + break; + } + fclose(fp); + + if (strcmp(type, mount) != 0) + return 0; + return 1; +} + +static const char *find_debugfs(void) +{ + static int debugfs_found; + static char debugfs[MAXPATH+1]; + + if (debugfs_found) + return debugfs; + + if (!find_mount("debugfs", debugfs)) + return ""; + + debugfs_found = 1; + + return debugfs; +} + +static int my_vsprintf(char *buf, int size, const char *fmt, va_list ap) +{ + const char *p; + char tmp[100]; + char *s = buf; + char *end = buf + size; + char *str; + long long lng; + int l; + int i; + + end[-1] = 0; + + for (p = fmt; *p && s < end; p++) { + if (*p == '%') { + l = 0; + again: + p++; + switch (*p) { + case 's': + if (l) { + fprintf(stderr, "Illegal print format l used with %%s\n"); + exit(-1); + } + str = va_arg(ap, char *); + l = strlen(str); + strncpy(s, str, end - s); + s += l; + break; + case 'l': + l++; + goto again; + case 'd': + if (l == 1) { + if (sizeof(long) == 8) + l = 2; + } + if (l == 2) + lng = va_arg(ap, long long); + else if (l > 2) { + fprintf(stderr, "Illegal print format l=%d\n", l); + exit(-1); + } else + lng = va_arg(ap, int); + i = 0; + while (lng > 0) { + tmp[i++] = (lng % 10) + '0'; + lng /= 10; + } + tmp[i] = 0; + l = strlen(tmp); + if (!l) { + *s++ = '0'; + } else { + while (l) + *s++ = tmp[--l]; + } + break; + default: + fprintf(stderr, "Illegal print format '%c'\n", *p); + exit(-1); + } + continue; + } + *s++ = *p; + } + + return s - buf; +} + +#if 0 +static int my_sprintf(char *buf, int size, const char *fmt, ...) +{ + va_list ap; + int n; + + va_start(ap, fmt); + n = vsnprintf(buf, size, fmt, ap); + va_end(ap); + return n; +} +#endif + +static void ftrace_write(char *buf, const char *fmt, ...) +{ + va_list ap; + int n; + + if (mark_fd < 0) + return; + + va_start(ap, fmt); + n = my_vsprintf(buf, BUFSIZ, fmt, ap); + va_end(ap); + + write(mark_fd, buf, n); +} + +static void setup_ftrace_marker(void) +{ + struct stat st; + const char *debugfs = find_debugfs(); + char files[strlen(debugfs) + 14]; + int ret; + + if (strlen(debugfs) == 0) + return; + + sprintf(files, "%s/tracing/trace_marker", debugfs); + ret = stat(files, &st); + if (ret >= 0) + goto found; + /* Do nothing if not mounted */ + return; +found: + mark_fd = open(files, O_WRONLY); +} + +static int setup_hr_tick(void) +{ + const char *debugfs = find_debugfs(); + char files[strlen(debugfs) + strlen("/sched_features") + 1]; + char buf[500]; + struct stat st; + static int set = 0; + char *p; + int ret; + int len; + int fd; + + if (set) + return 1; + + set = 1; + + if (strlen(debugfs) == 0) + return 0; + + sprintf(files, "%s/sched_features", debugfs); + ret = stat(files, &st); + if (ret < 0) + return 0; + + fd = open(files, O_RDWR); + perror(files); + if (fd < 0) + return 0; + + len = sizeof(buf); + + ret = read(fd, buf, len); + if (ret < 0) { + perror(files); + close(fd); + return 0; + } + if (ret >= len) + ret = len - 1; + buf[ret] = 0; + + ret = 1; + + p = strstr(buf, "HRTICK"); + if (p + 3 >= buf) { + p -= 3; + if (strncmp(p, "NO_HRTICK", 9) == 0) { + ret = write(fd, "HRTICK", 6); + if (ret != 6) + ret = 0; + else + ret = 1; + } + } + + close(fd); + return ret; +} + +static int mounted(const char *path, long magic) +{ + struct statfs st_fs; + + if (statfs(path, &st_fs) < 0) + return -1; + if ((long)st_fs.f_type != magic) + return 0; + return 1; +} + +#define CGROUP_PATH "/sys/fs/cgroup" +#define CPUSET_PATH CGROUP_PATH "/cpuset" + +static int open_cpuset(const char *path, const char *name) +{ + char buf[MAXPATH]; + struct stat st; + int ret; + int fd; + + buf[MAXPATH - 1] = 0; + snprintf(buf, MAXPATH - 1, "%s/%s", path, name); + + ret = stat(buf, &st); + if (ret < 0) + return ret; + + fd = open(buf, O_WRONLY); + return fd; +} + +static int mount_cpuset(void) +{ + struct stat st; + int ret; + int fd; + + /* Check if cgroups is already mounted. */ + ret = mounted(CGROUP_PATH, TMPFS_MAGIC); + if (ret < 0) + return ret; + if (!ret) { + ret = mount("cgroup_root", CGROUP_PATH, "tmpfs", 0, NULL); + if (ret < 0) + return ret; + } + ret = stat(CPUSET_PATH, &st); + if (ret < 0) { + ret = mkdir(CPUSET_PATH, 0755); + if (ret < 0) + return ret; + } + ret = mounted(CPUSET_PATH, CGROUP_SUPER_MAGIC); + if (ret < 0) + return ret; + if (!ret) { + ret = mount("cpuset", CPUSET_PATH, "cgroup", 0, "cpuset"); + if (ret < 0) + return ret; + } + + fd = open_cpuset(CPUSET_PATH, "cpuset.cpu_exclusive"); + if (fd < 0) + return fd; + ret = write(fd, "1", 2); + close(fd); + + fd = open_cpuset(CPUSET_PATH, "cpuset.sched_load_balance"); + if (fd < 0) + return fd; + ret = write(fd, "0", 2); + close(fd); + + return 0; +} + +enum { + CPUSET_FL_CPU_EXCLUSIVE = (1 << 0), + CPUSET_FL_MEM_EXCLUSIVE = (1 << 1), + CPUSET_FL_ALL_TASKS = (1 << 2), + CPUSET_FL_TASKS = (1 << 3), + CPUSET_FL_CLEAR_LOADBALANCE = (1 << 4), + CPUSET_FL_SET_LOADBALANCE = (1 << 5), + CPUSET_FL_CLONE_CHILDREN = (1 << 6), +}; + +static const char *make_cpuset(const char *name, const char *cpus, + const char *mems, unsigned flags, ...) +{ + struct stat st; + char path[MAXPATH]; + char buf[100]; + va_list ap; + int ret; + int fd; + + printf("Creating cpuset '%s'\n", name); + snprintf(path, MAXPATH - 1, "%s/%s", CPUSET_PATH, name); + path[MAXPATH - 1] = 0; + + ret = mount_cpuset(); + if (ret < 0) + return "mount_cpuset"; + + ret = stat(path, &st); + if (ret < 0) { + ret = mkdir(path, 0755); + if (ret < 0) + return "mkdir"; + } + + fd = open_cpuset(path, "cpuset.cpus"); + if (fd < 0) + return "cset"; + ret = write(fd, cpus, strlen(cpus)); + close(fd); + if (ret < 0) + return "write cpus"; + + if (mems) { + fd = open_cpuset(path, "cpuset.mems"); + if (fd < 0) + return "open mems"; + ret = write(fd, mems, strlen(mems)); + close(fd); + if (ret < 0) + return "write mems"; + } + + if (flags & CPUSET_FL_CPU_EXCLUSIVE) { + fd = open_cpuset(path, "cpuset.cpu_exclusive"); + if (fd < 0) + return "open cpu_exclusive"; + ret = write(fd, "1", 2); + close(fd); + if (ret < 0) + return "write cpu_exclusive"; + } + + if (flags & (CPUSET_FL_CLEAR_LOADBALANCE | CPUSET_FL_SET_LOADBALANCE)) { + fd = open_cpuset(path, "cpuset.sched_load_balance"); + if (fd < 0) + return "open sched_load_balance"; + if (flags & CPUSET_FL_SET_LOADBALANCE) + ret = write(fd, "1", 2); + else + ret = write(fd, "0", 2); + close(fd); + if (ret < 0) + return "write sched_load_balance"; + } + + if (flags & CPUSET_FL_CLONE_CHILDREN) { + fd = open_cpuset(path, "cgroup.clone_children"); + if (fd < 0) + return "open clone_children"; + ret = write(fd, "1", 2); + close(fd); + if (ret < 0) + return "write clone_children"; + } + + + if (flags & CPUSET_FL_TASKS) { + int *pids; + int i; + + va_start(ap, flags); + + fd = open_cpuset(path, "tasks"); + if (fd < 0) + return "open tasks"; + + ret = 0; + pids = va_arg(ap, int *); + for (i = 0; pids[i]; i++) { + sprintf(buf, "%d ", pids[i]); + ret = write(fd, buf, strlen(buf)); + } + va_end(ap); + close(fd); + if (ret < 0) { + fprintf(stderr, "Failed on task %d\n", pids[i]); + return "write tasks"; + } + } + + if (flags & CPUSET_FL_ALL_TASKS) { + FILE *fp; + int pid; + + fd = open_cpuset(path, "tasks"); + + snprintf(path, MAXPATH - 1, "%s/tasks", CPUSET_PATH); + if ((fp = fopen(path,"r")) == NULL) { + close (fd); + return "opening cpuset tasks"; + } + + while (fscanf(fp, "%d", &pid) == 1) { + sprintf(buf, "%d", pid); + ret = write(fd, buf, strlen(buf)); + /* + * Tasks can come and go, the only error we care + * about is ENOSPC, as that means something went + * wrong that we did not expect. + */ + if (ret < 0 && errno == ENOSPC) { + fclose(fp); + close(fd); + return "Can not move tasks"; + } + } + fclose(fp); + close(fd); + } + + return NULL; +} + +static void destroy_cpuset(const char *name, int print) +{ + struct stat st; + char path[MAXPATH]; + char buf[100]; + FILE *fp; + int pid; + int ret; + int fd; + int retry = 0; + + printf("Removing %s\n", name); + snprintf(path, MAXPATH - 1, "%s/%s", CPUSET_PATH, name); + path[MAXPATH - 1] = 0; + + ret = stat(path, &st); + if (ret < 0) + return; + + again: + strncat(path, "/tasks", MAXPATH - 1); + if ((fp = fopen(path,"r")) == NULL) { + fprintf(stderr, "Failed opening %s\n", path); + perror("fopen"); + return; + } + snprintf(path, MAXPATH - 1, "%s/tasks", CPUSET_PATH); + path[MAXPATH - 1] = 0; + + fd = open(path, O_WRONLY); + if (fd < 0) { + fclose(fp); + fprintf(stderr, "Failed opening %s\n", path); + perror("open"); + return; + } + + while (fscanf(fp, "%d", &pid) == 1) { + sprintf(buf, "%d", pid); + if (print) + printf("Moving %d out of %s\n", pid, name); + write(fd, buf, strlen(buf)); + } + fclose(fp); + close(fd); + + snprintf(path, MAXPATH - 1, "%s/%s", CPUSET_PATH, name); + path[MAXPATH - 1] = 0; + +// return; + sleep(1); + ret = rmdir(path); + if (ret < 0) { + if (retry++ < 5) + goto again; + fprintf(stderr, "Failed to remove %s\n", path); + perror("rmdir"); + if (retry++ < 5) { + fprintf(stderr, "Trying again\n"); + goto again; + } + } +} + +static void teardown(void) +{ + int fd; + + if (all_cpus) + return; + + fd = open_cpuset(CPUSET_PATH, "cpuset.cpu_exclusive"); + if (fd >= 0) { + write(fd, "0", 2); + close(fd); + } + + fd = open_cpuset(CPUSET_PATH, "cpuset.sched_load_balance"); + if (fd >= 0) { + write(fd, "1", 2); + close(fd); + } + + destroy_cpuset(CPUSET_ALL, 0); + destroy_cpuset(CPUSET_LOCAL, 1); +} + +static void usage(char **argv) +{ + char *arg = argv[0]; + char *p = arg+strlen(arg); + + while (p >= arg && *p != '/') + p--; + p++; + + printf("usage: %s\n" + "\n",p); + exit(-1); +} + +static int fail; + +static u64 get_time_us(void) +{ + struct timespec ts; + u64 time; + + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + time = ts.tv_sec * 1000000; + time += ts.tv_nsec / 1000; + + return time; +} + +static void print_stat(FILE *fp, struct sched_data *sd, int index, int verbose, int quiet) +{ + struct thread_stat *stat = &sd->stat; + + if (!verbose) { + if (quiet != 1) { + char *fmt; + if (use_nsecs) + fmt = "T:%2d (%5d) I:%ld C:%7lu " + "Min:%7ld Act:%8ld Avg:%8ld Max:%8ld\n"; + else + fmt = "T:%2d (%5d) I:%ld C:%7lu " + "Min:%7ld Act:%5ld Avg:%5ld Max:%8ld\n"; + fprintf(fp, fmt, index, stat->tid, + sd->deadline_us, stat->cycles, stat->min, stat->act, + stat->cycles ? + (long)(stat->avg/stat->cycles) : 0, stat->max); + } + } else { + while (stat->cycles != stat->cyclesread) { + long diff = stat->values + [stat->cyclesread & sd->bufmsk]; + + if (diff > stat->redmax) { + stat->redmax = diff; + stat->cycleofmax = stat->cyclesread; + } + stat->cyclesread++; + } + } +} + +static u64 do_runtime(long tid, struct sched_data *sd, u64 period) +{ + struct thread_stat *stat = &sd->stat; + u64 next_period = period + sd->deadline_us; + u64 now = get_time_us(); + u64 diff; + + if (now < period) { + u64 delta = period - now; + /* + * The period could be off due to other deadline tasks + * preempting us when we started. If that's the case then + * adjust the current period. + */ + ftrace_write(sd->buff, + "Adjusting period: now: %lld period: %lld delta:%lld%s\n", + now, period, delta, delta > sd->deadline_us / 2 ? + " HUGE ADJUSTMENT" : ""); + period = now; + next_period = period + sd->deadline_us; + } + + ftrace_write(sd->buff, "start at %lld off=%lld (period=%lld next=%lld)\n", + now, now - period, period, next_period); + + + diff = now - period; + if (diff > stat->max) + stat->max = diff; + if (!stat->min || diff < stat->min) + stat->min = diff; + stat->act = diff; + stat->avg += (double) diff; + + stat->cycles++; + + return next_period; +} + +void *run_deadline(void *data) +{ + struct sched_data *sd = data; + struct thread_stat *stat = &sd->stat; + struct sched_attr attr; + long tid = gettid(); + u64 period; + int ret; + + printf("deadline thread %ld\n", tid); + + stat->tid = tid; + + ret = sched_getattr(0, &attr, sizeof(attr), 0); + if (ret < 0) { + fprintf(stderr, "[%ld]", tid); + perror("sched_getattr"); + fail = 1; + pthread_barrier_wait(&barrier); + pthread_exit("Failed sched_getattr"); + return NULL; + } + + pthread_barrier_wait(&barrier); + + if (fail) + return NULL; + + attr.sched_policy = SCHED_DEADLINE; + attr.sched_runtime = sd->runtime_us * 1000; + attr.sched_deadline = sd->deadline_us * 1000; + + printf("thread[%ld] runtime=%lldus deadline=%lldus\n", + gettid(), sd->runtime_us, sd->deadline_us); + + pthread_barrier_wait(&barrier); + + ret = sched_setattr(0, &attr, 0); + if (ret < 0) { + fprintf(stderr, "[%ld]", tid); + perror("sched_setattr"); + fail = 1; + pthread_barrier_wait(&barrier); + pthread_exit("Failed sched_setattr"); + return NULL; + } + + pthread_barrier_wait(&barrier); + + if (fail) + return NULL; + + sched_yield(); + period = get_time_us(); + + while (!shutdown) { + period = do_runtime(tid, sd, period); + sched_yield(); + } + ret = sched_getattr(0, &attr, sizeof(attr), 0); + if (ret < 0) { + perror("sched_getattr"); + pthread_exit("Failed second sched_getattr"); + } + + return NULL; +} + +struct cpu_list { + struct cpu_list *next; + int start_cpu; + int end_cpu; +}; + +static void add_cpus(struct cpu_list **cpu_list, int start_cpu, int end_cpu) +{ + struct cpu_list *list; + + while (*cpu_list && (*cpu_list)->end_cpu + 1 < start_cpu) + cpu_list = &(*cpu_list)->next; + + if (!*cpu_list) { + *cpu_list = malloc(sizeof(struct cpu_list)); + (*cpu_list)->start_cpu = start_cpu; + (*cpu_list)->end_cpu = end_cpu; + (*cpu_list)->next = NULL; + return; + } + + /* Look to concatinate */ + if (end_cpu > (*cpu_list)->start_cpu && + start_cpu <= (*cpu_list)->end_cpu + 1) { + if (start_cpu < (*cpu_list)->start_cpu) + (*cpu_list)->start_cpu = start_cpu; + list = (*cpu_list)->next; + while (list && list->start_cpu <= end_cpu + 1) { + (*cpu_list)->end_cpu = list->end_cpu; + (*cpu_list)->next = list->next; + free(list); + list = (*cpu_list)->next; + } + if ((*cpu_list)->end_cpu < end_cpu) + (*cpu_list)->end_cpu = end_cpu; + return; + } + + /* Check for overlaps */ + if (end_cpu >= (*cpu_list)->start_cpu - 1) { + (*cpu_list)->start_cpu = start_cpu; + return; + } + + list = malloc(sizeof(struct cpu_list)); + list->start_cpu = start_cpu; + list->end_cpu = end_cpu; + list->next = (*cpu_list)->next; + (*cpu_list)->next = list; +} + +static int count_cpus(struct cpu_list *cpu_list) +{ + struct cpu_list *list; + int cpus = 0; + int fail = 0; + + while (cpu_list) { + list = cpu_list; + cpus += (list->end_cpu - list->start_cpu) + 1; + if (list->end_cpu >= cpu_count) + fail = 1; + cpu_list = list->next; + free(list); + } + return fail ? -1 : cpus; +} + +static char *append_cpus(char *buf, int start, int end, + const char *comma, int *total) +{ + int len; + + if (start == end) { + len = snprintf(NULL, 0, "%s%d", comma, start); + buf = realloc(buf, *total + len + 1); + buf[*total] = 0; + snprintf(buf + *total, len + 1, "%s%d", comma, start); + } else { + len = snprintf(NULL, 0, "%s%d-%d", comma, start, end); + buf = realloc(buf, *total + len + 1); + buf[*total] = 0; + snprintf(buf + *total, len + 1, "%s%d-%d", comma, + start, end); + } + *total += len; + return buf; +} + +static void make_new_list(struct cpu_list *cpu_list, char **buf) +{ + char *comma = ""; + int total = 0; + + while (cpu_list) { + *buf = append_cpus(*buf, cpu_list->start_cpu, cpu_list->end_cpu, + comma, &total); + comma = ","; + cpu_list = cpu_list->next; + } +} + +static void make_other_cpu_list(const char *setcpu, char **cpus) +{ + const char *p = setcpu; + const char *comma = ""; + int curr_cpu = 0; + int cpu; + int total = 0; + + while (*p && curr_cpu < cpu_count) { + cpu = atoi(p); + if (cpu > curr_cpu) { + *cpus = append_cpus(*cpus, curr_cpu, cpu - 1, + comma, &total); + comma = ","; + } + while (isdigit(*p)) + p++; + if (*p == '-') { + p++; + cpu = atoi(p); + while (isdigit(*p)) + p++; + } + curr_cpu = cpu + 1; + if (*p) + p++; + } + + if (curr_cpu < cpu_count) { + *cpus = append_cpus(*cpus, curr_cpu, cpu_count - 1, + comma, &total); + } +} + +static int calc_nr_cpus(const char *setcpu, char **buf) +{ + struct cpu_list *cpu_list = NULL; + const char *p; + int end_cpu; + int cpu; + + for (p = setcpu; *p; ) { + cpu = atoi(p); + if (cpu < 0 || (!cpu && *p != '0')) + goto err; + + while (isdigit(*p)) + p++; + if (*p == '-') { + p++; + end_cpu = atoi(p); + if (end_cpu < cpu || (!end_cpu && *p != '0')) + goto err; + while (isdigit(*p)) + p++; + } else + end_cpu = cpu; + + add_cpus(&cpu_list, cpu, end_cpu); + if (*p == ',') + p++; + } + + make_new_list(cpu_list, buf); + return count_cpus(cpu_list); + err: + /* Frees the list */ + count_cpus(cpu_list); + return -1; +} + +static void sighand(int sig) +{ + shutdown = 1; +} + +static const char *join_thread(pthread_t *thread) +{ + void *result; + + pthread_join(*thread, &result); + return result; +} + +static void loop(struct sched_data *sched_data, int nr_threads) +{ + int i; + + while (!shutdown) { + for (i = 0; i < nr_threads; i++) { + print_stat(stdout, &sched_data[i], i, 0, 0); + } + usleep(10000); + printf("\033[%dA", nr_threads); + } + usleep(10000); + for (i = 0; i < nr_threads; i++) { + printf("\n"); + } +} + +int main (int argc, char **argv) +{ + struct sched_data *sched_data; + struct sched_data *sd; + const char *res; + const char *setcpu = NULL; + char *setcpu_buf = NULL; + char *allcpu_buf = NULL; + pthread_t *thread; + unsigned int interval = 1000; + unsigned int step = 500; + int percent = 60; + u64 runtime; + u64 start_period; + u64 end_period; + int nr_cpus; + int i; + int c; + + cpu_count = sysconf(_SC_NPROCESSORS_CONF); + if (cpu_count < 1) { + fprintf(stderr, "Can not calculate number of CPUS\n"); + exit(-1); + } + + while ((c = getopt(argc, argv, "+hac:t:")) >= 0) { + switch (c) { + case 'a': + all_cpus = 1; + if (!nr_threads) + nr_threads = cpu_count; + break; + case 'c': + setcpu = optarg; + break; + case 'i': + interval = atoi(optarg); + break; + case 's': + step = atoi(optarg); + break; + case 't': + nr_threads = atoi(optarg); + break; + case 'h': + default: + usage(argv); + } + } + + if (!nr_threads) + nr_threads = 1; + + if (setcpu) { + nr_cpus = calc_nr_cpus(setcpu, &setcpu_buf); + if (nr_cpus < 0 || nr_cpus > cpu_count) { + fprintf(stderr, "Invalid cpu input '%s'\n", setcpu); + exit(-1); + } + } else + nr_cpus = cpu_count; + + if (!all_cpus && cpu_count == nr_cpus) { + printf("Using all CPUS\n"); + all_cpus = 1; + } + + /* Default cpu to use is the last one */ + if (!all_cpus && !setcpu) { + setcpu_buf = malloc(10); + if (!setcpu_buf) { + perror("malloc"); + exit(-1); + } + sprintf(setcpu_buf, "%d", cpu_count - 1); + } + + setcpu = setcpu_buf; + + if (setcpu) + make_other_cpu_list(setcpu, &allcpu_buf); + + if (mlockall(MCL_CURRENT|MCL_FUTURE) == -1) { + perror("mlockall"); + } + + setup_ftrace_marker(); + + thread = calloc(nr_threads, sizeof(*thread)); + sched_data = calloc(nr_threads, sizeof(*sched_data)); + if (!thread || !sched_data) { + perror("allocating threads"); + exit(-1); + } + + if (nr_threads > nr_cpus) { + /* + * More threads than CPUs, then have the total be + * no more than 80 percent. + */ + percent = nr_cpus * 80 / nr_threads; + } + + /* Set up the data while sill in SCHED_FIFO */ + for (i = 0; i < nr_threads; i++) { + sd = &sched_data[i]; + /* + * Interval is the deadline/period + * The runtime is the percentage of that period. + */ + runtime = interval * percent / 100; + + if (runtime < 2000) { + /* + * If the runtime is less than 2ms, then we better + * have HRTICK enabled. + */ + if (!setup_hr_tick()) { + fprintf(stderr, "For less that 2ms run times, you need to\n" + "have HRTICK enabled in debugfs/sched_features\n"); + exit(-1); + } + } + sd->runtime_us = runtime; + sd->deadline_us = interval; + + printf("interval: %lld:%lld\n", sd->runtime_us, sd->deadline_us); + + /* Make sure that we can make our deadlines */ + start_period = get_time_us(); + do_runtime(gettid(), sd, start_period); + end_period = get_time_us(); + if (end_period - start_period > sd->runtime_us) { + fprintf(stderr, "Failed to perform task within runtime: Missed by %lld us\n", + end_period - start_period - sd->runtime_us); + exit(-1); + } + + printf(" Tested at %lldus of %lldus\n", + end_period - start_period, sd->runtime_us); + + interval += step; + } + + + pthread_barrier_init(&barrier, NULL, nr_threads + 1); + + for (i = 0; i < nr_threads; i++) { + sd = &sched_data[i]; + pthread_create(&thread[i], NULL, run_deadline, sd); + } + + atexit(teardown); + + pthread_barrier_wait(&barrier); + + if (fail) { + printf("fail 1\n"); + exit(-1); + } + + all_cpus = 1; + if (!all_cpus) { + int *pids; + + res = make_cpuset(CPUSET_ALL, allcpu_buf, "0", + CPUSET_FL_SET_LOADBALANCE | + CPUSET_FL_CLONE_CHILDREN | + CPUSET_FL_ALL_TASKS); + if (res) { + perror(res); + exit(-1); + } + + pids = calloc(nr_threads + 1, sizeof(int)); + if (!pids) { + perror("Allocating pids"); + exit(-1); + } + + for (i = 0; i < nr_threads; i++) + pids[i] = sched_data[i].stat.tid; + + res = make_cpuset(CPUSET_LOCAL, setcpu, "0", + CPUSET_FL_CPU_EXCLUSIVE | + CPUSET_FL_SET_LOADBALANCE | + CPUSET_FL_CLONE_CHILDREN | + CPUSET_FL_TASKS, pids); + free(pids); + if (res) { + perror(res); + exit(-1); + } + + system("cat /sys/fs/cgroup/cpuset/my_cpuset/tasks"); + } + + printf("main thread %ld\n", gettid()); + + pthread_barrier_wait(&barrier); + printf("fail 2 %d\n", fail); + + if (fail) + exit(-1); + + pthread_barrier_wait(&barrier); + + signal(SIGINT, sighand); + signal(SIGTERM, sighand); + + if (!fail) + loop(sched_data, nr_threads); + + for (i = 0; i < nr_threads; i++) { + + sd = &sched_data[i]; + + res = join_thread(&thread[i]); + if (res) { + printf("Thread %d failed: %s\n", i, res); + continue; + } + } + + free(setcpu_buf); + return 0; +} diff --git a/src/sched_deadline/deadline_test.c b/src/sched_deadline/deadline_test.c new file mode 100644 index 0000000..8a813c4 --- /dev/null +++ b/src/sched_deadline/deadline_test.c @@ -0,0 +1,2097 @@ +/* + * Copyright (C) 2016 Red Hat Inc, Steven Rostedt + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * deadline_test.c + * + * This program is used to test the deadline scheduler (SCHED_DEADLINE tasks). + * It is broken up into various degrees of complexity that can be set with + * options. + * + * Here are the test cases: + * + * 1) Simplest - create one deadline task that can migrate across all CPUS. + * Look for "simple_test" + * + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/** + * usage - show the usage of the program and exit. + * @argv: The program passed in args + * + * This is defined here to show people looking at this code how + * to use this program as well. + */ +static void usage(char **argv) +{ + char *arg = argv[0]; + char *p = arg+strlen(arg); + + while (p >= arg && *p != '/') + p--; + p++; + + printf("usage: %s [options]\n" + " -h - Show this help menu\n" + " -b - Bind on the last cpu. (shortcut for -c )\n" + " -r prio - Add an RT task with given prio to stress system\n" + " -c cpulist - Comma/hyphen separated list of CPUs to run deadline tasks on\n" + " -p percent - The percent of bandwidth to use (1-90%%)\n" + " -P percent - The percent of runtime for execution completion\n" + " (Default 100%%)\n" + " -t threads - The number of threads to run as deadline (default 1)\n" + " -s step(us) - The amount to increase the deadline for each task (default 500us)\n" + "\n", p); + exit(-1); +} + +/* + * sched_setattr() and sched_getattr() are new system calls. We need to + * hardcode it here. + */ +#if defined(__i386__) + +#ifndef __NR_sched_setattr +#define __NR_sched_setattr 351 +#endif +#ifndef __NR_sched_getattr +#define __NR_sched_getattr 352 +#endif + +#elif defined(__x86_64__) + +#ifndef __NR_sched_setattr +#define __NR_sched_setattr 314 +#endif +#ifndef __NR_sched_getattr +#define __NR_sched_getattr 315 +#endif + +#endif /* i386 or x86_64 */ + +#if !defined(__NR_sched_setattr) +# error "Your arch does not support sched_setattr()" +#endif + +#if !defined(__NR_sched_getattr) +# error "Your arch does not support sched_getattr()" +#endif + +/* If not included in the headers, define sched deadline policy numbe */ +#ifndef SCHED_DEADLINE +#define SCHED_DEADLINE 6 +#endif + +#define _STR(x) #x +#define STR(x) _STR(x) + +/* Max path for cpuset path names. 1K should be enough */ +#ifndef MAXPATH +#define MAXPATH 1024 +#endif + +/* + * "my_cpuset" is the cpuset that will hold the SCHED_DEADLINE tasks that + * want to limit their affinity. + * + * "my_cpuset_all" is the cpuset that will have the affinity of all the + * other CPUs outside the ones for SCHED_DEADLINE threads. It will hold + * all other tasks. + */ +#define CPUSET_ALL "my_cpuset_all" +#define CPUSET_LOCAL "my_cpuset" + +/* Define the system call interfaces */ +#define gettid() syscall(__NR_gettid) +#define sched_setattr(pid, attr, flags) syscall(__NR_sched_setattr, pid, attr, flags) +#define sched_getattr(pid, attr, size, flags) syscall(__NR_sched_getattr, pid, attr, size, flags) + +typedef unsigned long long u64; +typedef unsigned int u32; +typedef int s32; + +/** + * struct sched_attr - get/set attr system call descriptor. + * + * This is the descriptor defined for setting SCHED_DEADLINE tasks. + * It will someday be in a header file. + * + * The fields specific for deadline: + * + * @sched_policy: 6 is for deadline + * @sched_runtime: The runtime in nanoseconds + * @sched_deadline: The deadline in nanoseconds. + * @sched_period: The period, if different than deadline (not used here) + */ +struct sched_attr { + u32 size; + + u32 sched_policy; + u64 sched_flags; + + /* SCHED_NORMAL, SCHED_BATCH */ + s32 sched_nice; + + /* SCHED_FIFO, SCHED_RR */ + u32 sched_priority; + + /* SCHED_DEADLINE */ + u64 sched_runtime; + u64 sched_deadline; + u64 sched_period; +}; + +/** + * struct sched_data - the descriptor for the threads. + * + * This is the descriptor that will be passed as the thread data. + * It is used as both input to the thread, as well as output to + * the main program. + * + * @runtime_us: The runtime for sched_deadline tasks in microseconds + * @deadline_us: The deadline for sched_deadline tasks in microseconds + * @loops_per_period: The amount of loops to run for the runtime + * @max_time: Recorded max time to complete loops + * @min_time: Recorded min time to complete loops + * @total_time: The total time of all periods to perform the loops + * @nr_periods: The number of periods executed + * @prime: Calculating a prime number. + * @missed_periods: The number of periods that were missed (started late) + * @missed_deadlines: The number of deadlines that were missed (ended late) + * @total_adjust: The time in microseconds adjusted for starting early + * @nr_adjust: The number of times adjusted for starting early + * @last_time: Last runtime of loops (used to calculate runtime to give) + * @prio: The priority for SCHED_FIFO threads (uses same descriptor) + * @tid: Stores the thread ID of the thread. + * @vol: The number of voluntary schedules the thread made + * @nonvol: The number of non-voluntary schedules the thread made (preempted) + * @migrate: The number of migrations the thread made. + * @buff: A string buffer to store data to write to ftrace + * + */ +struct sched_data { + u64 runtime_us; + u64 deadline_us; + + u64 loops_per_period; + + u64 max_time; + u64 min_time; + u64 total_time; + u64 nr_periods; + + u64 prime; + + int missed_periods; + int missed_deadlines; + u64 total_adjust; + u64 nr_adjust; + + u64 last_time; + + int prio; + int tid; + + int vol; + int nonvol; + int migrate; + + char buff[BUFSIZ+1]; + + /* Try to keep each sched_data out of cache lines */ + char padding[256]; +}; + +/* Barrier to synchronize the threads for initialization */ +static pthread_barrier_t barrier; + +/* cpu_count is the number of detected cpus on the running machine */ +static int cpu_count; + +/* + * cpusetp and cpuset_size is for cpumasks, in case we run on a machine with + * more than 64 CPUs. + */ +static cpu_set_t *cpusetp; +static int cpuset_size; + +/* Number of threads to create to run deadline scheduler with (default two) */ +static int nr_threads = 2; + +/** + * find_mount - Find if a file system type is already mounted + * @mount: The type of files system to find + * @debugfs: Where to place the path to the found file system. + * + * Returns 1 if found and sets @debugfs. + * Returns 0 otherwise. + */ +static int find_mount(const char *mount, char *debugfs) +{ + char type[100]; + FILE *fp; + + if ((fp = fopen("/proc/mounts","r")) == NULL) + return 0; + + while (fscanf(fp, "%*s %" + STR(MAXPATH) + "s %99s %*s %*d %*d\n", + debugfs, type) == 2) { + if (strcmp(type, mount) == 0) + break; + } + fclose(fp); + + if (strcmp(type, mount) != 0) + return 0; + return 1; +} + +/** + * find_debugfs - Search for where debugfs is found + * + * Finds where debugfs is mounted and returns the path. + * The returned string is static and should not be modified. + */ +static const char *find_debugfs(void) +{ + static int debugfs_found; + static char debugfs[MAXPATH+1]; + + if (debugfs_found) + return debugfs; + + if (!find_mount("debugfs", debugfs)) + return ""; + + debugfs_found = 1; + + return debugfs; +} + +/** + * my_vsprintf - simplified vsprintf() + * @buf: The buffer to write the string to + * @size: The allocated size of @buf + * @fmmt: The format to parse + * @ap: The variable arguments + * + * Because there's no real way to prevent glibc's vsprintf from + * allocating more memory, or doing any type of system call, + * This is a simple version of the function that is under + * our control, to make sure we stay in userspace when creating + * a ftrace_write string, and only do a system call for the + * actual ftrace_write. + */ +static int my_vsprintf(char *buf, int size, const char *fmt, va_list ap) +{ + const char *p; + char tmp[100]; + char *s = buf; + char *end = buf + size; + char *str; + long long lng; + int l; + int i; + + end[-1] = 0; + + for (p = fmt; *p && s < end; p++) { + if (*p == '%') { + l = 0; + again: + p++; + switch (*p) { + case 's': + if (l) { + fprintf(stderr, "Illegal print format l used with %%s\n"); + exit(-1); + } + str = va_arg(ap, char *); + l = strlen(str); + strncpy(s, str, end - s); + s += l; + break; + case 'l': + l++; + goto again; + case 'd': + if (l == 1) { + if (sizeof(long) == 8) + l = 2; + } + if (l == 2) + lng = va_arg(ap, long long); + else if (l > 2) { + fprintf(stderr, "Illegal print format l=%d\n", l); + exit(-1); + } else + lng = va_arg(ap, int); + i = 0; + while (lng > 0) { + tmp[i++] = (lng % 10) + '0'; + lng /= 10; + } + tmp[i] = 0; + l = strlen(tmp); + if (!l) { + *s++ = '0'; + } else { + while (l) + *s++ = tmp[--l]; + } + break; + default: + fprintf(stderr, "Illegal print format '%c'\n", *p); + exit(-1); + } + continue; + } + *s++ = *p; + } + + return s - buf; +} + +/* The ftrace tracing_marker file descriptor to write to ftrace */ +static int mark_fd; + +/** + * ftrace_write - write a string to ftrace tracing_marker + * @buf: A BUFSIZ + 1 allocated scratch pad + * @fmt: The format of the sting to write + * @va_arg: The arguments for @fmt + * + * If mark_fd is not less than zero, format the input + * and write it out to trace_marker (where mark_fd is a file + * descriptor of). + */ +static void ftrace_write(char *buf, const char *fmt, ...) +{ + va_list ap; + int n; + + if (mark_fd < 0) + return; + + va_start(ap, fmt); + n = my_vsprintf(buf, BUFSIZ, fmt, ap); + va_end(ap); + + write(mark_fd, buf, n); +} + +/** + * setup_ftrace_marker - Check if trace_marker exists and open if it does + * + * Tests if debugfs is mounted, and if it is, it tests to see if the + * trace_marker exists. If it does, it opens trace_marker and sets + * mark_fd to the file descriptor. Then ftrace_write() will be able + * to write to the ftrace marker, otherwise ftrace_write() becomes + * a nop. + * + * Failure to open the trace_marker file will not stop this application + * from executing. Only ftrace writes will not be performed. + */ +static void setup_ftrace_marker(void) +{ + struct stat st; + const char *debugfs = find_debugfs(); + char files[strlen(debugfs) + 14]; + int ret; + + if (strlen(debugfs) == 0) + return; + + sprintf(files, "%s/tracing/trace_marker", debugfs); + ret = stat(files, &st); + if (ret >= 0) + goto found; + /* Do nothing if not mounted */ + return; +found: + mark_fd = open(files, O_WRONLY); +} + +/** + * setup_hr_tick - Enable the HRTICK in sched_features (if available) + * + * SCHED_DEADLINE tasks are based on HZ, which could be as slow as + * 100 times a second (10ms). Which is incredibly slow for scheduling. + * For SCHED_DEADLINE to have finer resolution, HRTICK feature must be + * set. That's located in the debugfs/sched_features directory. + * + * This will not mount debugfs. If debugfs is not mounted, this simply + * will fail. + */ +static int setup_hr_tick(void) +{ + const char *debugfs = find_debugfs(); + char files[strlen(debugfs) + strlen("/sched_features") + 1]; + char buf[500]; + struct stat st; + static int set = 0; + char *p; + int ret; + int len; + int fd; + + if (set) + return 1; + + set = 1; + + if (strlen(debugfs) == 0) + return 0; + + sprintf(files, "%s/sched_features", debugfs); + ret = stat(files, &st); + if (ret < 0) + return 0; + + fd = open(files, O_RDWR); + if (fd < 0) { + perror(files); + return 0; + } + + len = sizeof(buf); + + ret = read(fd, buf, len); + if (ret < 0) { + perror(files); + close(fd); + return 0; + } + if (ret >= len) + ret = len - 1; + buf[ret] = 0; + + ret = 1; + + p = strstr(buf, "HRTICK"); + if (p + 3 >= buf) { + p -= 3; + if (strncmp(p, "NO_HRTICK", 9) == 0) { + ret = write(fd, "HRTICK", 6); + if (ret != 6) + ret = 0; + else + ret = 1; + } + } + + close(fd); + return ret; +} + +/** + * mounted - test if a path is mounted via the given mount type + * @path: The path to check is mounted + * @magic: The magic number of the mount type. + * + * Returns -1 if the path does not exist. + * Returns 0 if it is mounted but not of the given @magic type. + * Returns 1 if mounted and the @magic type matches. + */ +static int mounted(const char *path, long magic) +{ + struct statfs st_fs; + + if (statfs(path, &st_fs) < 0) + return -1; + if ((long)st_fs.f_type != magic) + return 0; + return 1; +} + +#define CGROUP_PATH "/sys/fs/cgroup" +#define CPUSET_PATH CGROUP_PATH "/cpuset" + +/** + * open_cpuset - open a file (usually a cpuset file) + * @path: The path of the directory the file is in + * @name: The name of the file in the path to open. + * + * Open a file, used to open cpuset files. This function simply is + * made to open many files in the same directory. + * + * Returns the file descriptor of the opened file or less than zero + * on error. + */ +static int open_cpuset(const char *path, const char *name) +{ + char buf[MAXPATH]; + struct stat st; + int ret; + int fd; + + buf[MAXPATH - 1] = 0; + snprintf(buf, MAXPATH - 1, "%s/%s", path, name); + + ret = stat(buf, &st); + if (ret < 0) + return ret; + + fd = open(buf, O_WRONLY); + return fd; +} + +/** + * mount_cpuset - Inialize the cpuset system + * + * Looks to see if cgroups are mounted, if it is not, then it mounts + * the cgroup_root to /sys/fs/cgroup. Then the directory cpuset exists + * and is mounted in that directory. If it is not, it is created and + * mounted. + * + * The toplevel cpuset "cpu_exclusive" flag is set, this allows child + * cpusets to set the flag too. + * + * The toplevel cpuset "load_balance" flag is cleared, letting the + * child cpusets take over load balancing. + */ +static int mount_cpuset(void) +{ + struct stat st; + int ret; + int fd; + + /* Check if cgroups is already mounted. */ + ret = mounted(CGROUP_PATH, TMPFS_MAGIC); + if (ret < 0) + return ret; + if (!ret) { + ret = mount("cgroup_root", CGROUP_PATH, "tmpfs", 0, NULL); + if (ret < 0) + return ret; + } + ret = stat(CPUSET_PATH, &st); + if (ret < 0) { + ret = mkdir(CPUSET_PATH, 0755); + if (ret < 0) + return ret; + } + ret = mounted(CPUSET_PATH, CGROUP_SUPER_MAGIC); + if (ret < 0) + return ret; + if (!ret) { + ret = mount("cpuset", CPUSET_PATH, "cgroup", 0, "cpuset"); + if (ret < 0) + return ret; + } + + fd = open_cpuset(CPUSET_PATH, "cpuset.cpu_exclusive"); + if (fd < 0) + return fd; + ret = write(fd, "1", 2); + close(fd); + + fd = open_cpuset(CPUSET_PATH, "cpuset.sched_load_balance"); + if (fd < 0) + return fd; + ret = write(fd, "0", 2); + close(fd); + + return 0; +} + +/* + * CPUSET flags: used for creating cpusets + * + * CPU_EXCLUSIVE - Set the cpu exclusive flag + * MEM_EXCLUSIVE - Set the mem exclusive flag + * ALL_TASKS - Move all tasks from the toplevel cpuset to this one + * TASKS - Supply a list of thread IDs to move to this cpuset + * CLEAR_LOADBALANCE - clear the loadbalance flag + * SET_LOADBALANCE - set the loadbalance flag + * CLONE_CHILDREN - set the clone_children flag + */ +enum { + CPUSET_FL_CPU_EXCLUSIVE = (1 << 0), + CPUSET_FL_MEM_EXCLUSIVE = (1 << 1), + CPUSET_FL_ALL_TASKS = (1 << 2), + CPUSET_FL_TASKS = (1 << 3), + CPUSET_FL_CLEAR_LOADBALANCE = (1 << 4), + CPUSET_FL_SET_LOADBALANCE = (1 << 5), + CPUSET_FL_CLONE_CHILDREN = (1 << 6), +}; + +/** + * make_cpuset - create a cpuset + * @name: The name of the cpuset + * @cpus: A string list of cpus this set is for e.g. "1,3,4-7" + * @mems: The memory nodes to use (usually just "0") (set to NULL to ignore) + * @flags: See the CPUSET_FL_* flags above for information + * @va_args: An array of tasks to move if TASKS flag is set. + * + * Creates a cpuset. + * + * If TASKS is set, then @va_args will be an array of PIDs to move from + * the main cpuset, to this cpuset. The last element of the array must + * be a zero, to stop the processing of arrays. + * + * Returns NULL on success, and a string to describe what went wrong on error. + */ +static const char *make_cpuset(const char *name, const char *cpus, + const char *mems, unsigned flags, ...) +{ + struct stat st; + char path[MAXPATH]; + char buf[100]; + va_list ap; + int ret; + int fd; + + printf("Creating cpuset '%s'\n", name); + snprintf(path, MAXPATH - 1, "%s/%s", CPUSET_PATH, name); + path[MAXPATH - 1] = 0; + + ret = mount_cpuset(); + if (ret < 0) + return "mount_cpuset"; + + /* Only create the new cpuset directory if it does not yet exist */ + ret = stat(path, &st); + if (ret < 0) { + ret = mkdir(path, 0755); + if (ret < 0) + return "mkdir"; + } + + /* Assign the CPUs */ + fd = open_cpuset(path, "cpuset.cpus"); + if (fd < 0) + return "cset"; + ret = write(fd, cpus, strlen(cpus)); + close(fd); + if (ret < 0) + return "write cpus"; + + /* Assign the "mems" if it exists */ + if (mems) { + fd = open_cpuset(path, "cpuset.mems"); + if (fd < 0) + return "open mems"; + ret = write(fd, mems, strlen(mems)); + close(fd); + if (ret < 0) + return "write mems"; + } + + if (flags & CPUSET_FL_CPU_EXCLUSIVE) { + fd = open_cpuset(path, "cpuset.cpu_exclusive"); + if (fd < 0) + return "open cpu_exclusive"; + ret = write(fd, "1", 2); + close(fd); + if (ret < 0) + return "write cpu_exclusive"; + } + + if (flags & (CPUSET_FL_CLEAR_LOADBALANCE | CPUSET_FL_SET_LOADBALANCE)) { + fd = open_cpuset(path, "cpuset.sched_load_balance"); + if (fd < 0) + return "open sched_load_balance"; + if (flags & CPUSET_FL_SET_LOADBALANCE) + ret = write(fd, "1", 2); + else + ret = write(fd, "0", 2); + close(fd); + if (ret < 0) + return "write sched_load_balance"; + } + + if (flags & CPUSET_FL_CLONE_CHILDREN) { + fd = open_cpuset(path, "cgroup.clone_children"); + if (fd < 0) + return "open clone_children"; + ret = write(fd, "1", 2); + close(fd); + if (ret < 0) + return "write clone_children"; + } + + + /* If TASKS flag is set, then an array of tasks is passed it */ + if (flags & CPUSET_FL_TASKS) { + int *pids; + int i; + + va_start(ap, flags); + + fd = open_cpuset(path, "tasks"); + if (fd < 0) + return "open tasks"; + + ret = 0; + pids = va_arg(ap, int *); + + /* The array ends with pids[i] == 0 */ + for (i = 0; pids[i]; i++) { + sprintf(buf, "%d ", pids[i]); + ret = write(fd, buf, strlen(buf)); + if (ret < 0) + break; + } + va_end(ap); + close(fd); + if (ret < 0) { + fprintf(stderr, "Failed on task %d\n", pids[i]); + return "write tasks"; + } + } + + /* If ALL_TASKS flag is set, move all tasks from the top level cpuset */ + if (flags & CPUSET_FL_ALL_TASKS) { + FILE *fp; + int pid; + + fd = open_cpuset(path, "tasks"); + + snprintf(path, MAXPATH - 1, "%s/tasks", CPUSET_PATH); + if ((fp = fopen(path,"r")) == NULL) { + close (fd); + return "opening cpuset tasks"; + } + + while (fscanf(fp, "%d", &pid) == 1) { + sprintf(buf, "%d", pid); + ret = write(fd, buf, strlen(buf)); + /* + * Tasks can come and go, and some tasks are kernel + * threads that cannot be moved. The only error we care + * about is ENOSPC, as that means something went + * wrong that we did not expect. + */ + if (ret < 0 && errno == ENOSPC) { + fclose(fp); + close(fd); + return "Can not move tasks"; + } + } + fclose(fp); + close(fd); + } + + return NULL; +} + +/** + * destroy_cpuset - tear down a cpuset that was created + * @name: The name of the cpuset to destroy + * @print: If the tasks being moved should be displayed + * + * Reads the tasks in the cpuset and moves them to the top level cpuset + * then destroys the @name cpuset. + */ +static void destroy_cpuset(const char *name, int print) +{ + struct stat st; + char path[MAXPATH]; + char buf[100]; + FILE *fp; + int pid; + int ret; + int fd; + int retry = 0; + + printf("Removing %s\n", name); + + /* Set path to the cpuset name that we will destroy */ + snprintf(path, MAXPATH - 1, "%s/%s", CPUSET_PATH, name); + path[MAXPATH - 1] = 0; + + /* Make sure it exists! */ + ret = stat(path, &st); + if (ret < 0) + return; + + again: + /* + * Append "/tasks" to the cpuset name, to read the tasks that are + * in this cpuset, that must be moved before destroying the cpuset. + */ + strncat(path, "/tasks", MAXPATH - 1); + if ((fp = fopen(path,"r")) == NULL) { + fprintf(stderr, "Failed opening %s\n", path); + perror("fopen"); + return; + } + /* Set path to the toplevel cpuset tasks file */ + snprintf(path, MAXPATH - 1, "%s/tasks", CPUSET_PATH); + path[MAXPATH - 1] = 0; + + fd = open(path, O_WRONLY); + if (fd < 0) { + fclose(fp); + fprintf(stderr, "Failed opening %s\n", path); + perror("open"); + return; + } + + /* + * Now fp points to the destroying cpuset tasks file, and + * fd is the toplevel cpuset file descriptor. Scan in the + * tasks that are in the cpuset that is being destroyed and + * write their pids into the toplevel cpuset. + */ + while (fscanf(fp, "%d", &pid) == 1) { + sprintf(buf, "%d", pid); + if (print) + printf("Moving %d out of %s\n", pid, name); + write(fd, buf, strlen(buf)); + } + fclose(fp); + close(fd); + + /* Reset the path name back to the cpuset to destroy */ + snprintf(path, MAXPATH - 1, "%s/%s", CPUSET_PATH, name); + path[MAXPATH - 1] = 0; + + /* Sleep a bit to let all tasks migrate out of this cpuset. */ + sleep(1); + + ret = rmdir(path); + if (ret < 0) { + /* + * Sometimes there appears to be a delay, and tasks don't + * always move when you expect them to. Try 5 times, and + * give up after that. + */ + if (retry++ < 5) + goto again; + fprintf(stderr, "Failed to remove %s\n", path); + perror("rmdir"); + if (retry++ < 5) { + fprintf(stderr, "Trying again\n"); + goto again; + } + } +} + +/** + * teardown - Called atexit() to reset the system back to normal + * + * If cpusets were created, this destroys them and puts all tasks + * back to the main cgroup. + */ +static void teardown(void) +{ + int fd; + + fd = open_cpuset(CPUSET_PATH, "cpuset.cpu_exclusive"); + if (fd >= 0) { + write(fd, "0", 2); + close(fd); + } + + fd = open_cpuset(CPUSET_PATH, "cpuset.sched_load_balance"); + if (fd >= 0) { + write(fd, "1", 2); + close(fd); + } + + destroy_cpuset(CPUSET_ALL, 0); + destroy_cpuset(CPUSET_LOCAL, 1); +} + +/** + * bind_cpu - Set the affinity of a thread to a specific CPU. + * @cpu: The CPU to bind to. + * + * Sets the current thread to have an affinity of a sigle CPU. + * Does not work on SCHED_DEADLINE tasks. + */ +static void bind_cpu(int cpu) +{ + int ret; + + CPU_ZERO_S(cpuset_size, cpusetp); + CPU_SET_S(cpu, cpuset_size, cpusetp); + + ret = sched_setaffinity(0, cpuset_size, cpusetp); + if (ret < 0) + perror("sched_setaffinity bind"); +} + +/** + * unbind_cpu - Set the affinity of a task to all CPUs + * + * Sets the current thread to have an affinity for all CPUs. + * Does not work on SCHED_DEADLINE tasks. + */ +static void unbind_cpu(void) +{ + int cpu; + int ret; + + for (cpu = 0; cpu < cpu_count; cpu++) + CPU_SET_S(cpu, cpuset_size, cpusetp); + + ret = sched_setaffinity(0, cpuset_size, cpusetp); + if (ret < 0) + perror("sched_setaffinity unbind"); +} + +/* + * Used by set_prio, but can be used for any task not just current. + */ +static int set_thread_prio(pid_t pid, int prio) +{ + struct sched_param sp = { .sched_priority = prio }; + int policy = SCHED_FIFO; + + if (!prio) + policy = SCHED_OTHER; + + /* set up our priority */ + return sched_setscheduler(pid, policy, &sp); +} + +/** + * set_prio - Set the SCHED_FIFO priority of a thread + * @prio: The priority to set a thread to + * + * Converts a SCHED_OTHER task into a SCHED_FIFO task and sets + * its priority to @prio. If @prio is zero, then it converts + * a SCHED_FIFO task back to a SCHED_OTHER task. + * + * Returns 0 on success, otherwise it failed. + */ +static int set_prio(int prio) +{ + return set_thread_prio(0, prio); +} + +/* done - set when the test is complete to have all threads stop */ +static int done; + +/* fail - set during setup if any thread fails to initialize. */ +static int fail; + +/** + * get_time_us - Git the current clock time in microseconds + * + * Returns the current clock time in microseconds. + */ +static u64 get_time_us(void) +{ + struct timespec ts; + u64 time; + + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + time = ts.tv_sec * 1000000; + time += ts.tv_nsec / 1000; + + return time; +} + +/** + * run_loops - execute a number of loops to perform + * @loops: The number of loops to execute. + * + * Calculates prime numbers, because what else should we do? + */ +static u64 run_loops(struct sched_data *data, u64 loops) +{ + u64 start = get_time_us(); + u64 end; + u64 i; + u64 prime; + u64 cnt = 2; + u64 result; + + prime = data->prime; + + for (i = 0; i < loops; i++) { + if (cnt > prime / 2) { + data->prime = prime; + prime++; + cnt = 2; + } + result = prime / cnt; + if (result * cnt == prime) { + prime++; + cnt = 2; + } else + cnt++; + } + + /* Memory barrier */ + asm("":::"memory"); + + end = get_time_us(); + return end - start; +} + +/* Helper function for read_ctx_switchs */ +static int get_value(const char *line) +{ + const char *p; + + for (p = line; isspace(*p); p++) + ; + if (*p != ':') + return -1; + p++; + for (; isspace(*p); p++) + ; + return atoi(p); +} + +/* Helper function for read_ctx_switchs */ +static int update_value(const char *line, int *val, const char *name) +{ + int ret; + + if (strncmp(line, name, strlen(name)) == 0) { + ret = get_value(line + strlen(name)); + if (ret < 0) + return 0; + *val = ret; + return 1; + } + return 0; +} + +/** + * read_ctx_switches - read the scheduling information of a task + * @vol: Output to place number of voluntary schedules + * @nonvol: Output to place number of non-voluntary schedules (preemption) + * @migrate: Output to place the number of times the task migrated + * + * Reads /proc//sched to get the statistics of the thread. + * + * For info only. + */ +static int read_ctx_switches(int *vol, int *nonvol, int *migrate) +{ + static int vol_once, nonvol_once; + const char *vol_name = "nr_voluntary_switches"; + const char *nonvol_name = "nr_involuntary_switches"; + const char *migrate_name = "se.nr_migrations"; + char file[1024]; + char buf[1024]; + char *pbuf; + int pid; + size_t *pn; + size_t n; + FILE *fp; + int r; + + pid = gettid(); + snprintf(file, 1024, "/proc/%d/sched", pid); + fp = fopen(file, "r"); + if (!fp) { + snprintf(file, 1024, "/proc/%d/status", pid); + fp = fopen(file, "r"); + if (!fp) { + fprintf(stderr, "could not open %s", file); + return -1; + } + vol_name = "voluntary_ctxt_switches"; + nonvol_name = "nonvoluntary_ctxt_switches"; + } + + *vol = *nonvol = *migrate = -1; + + n = 1024; + pn = &n; + pbuf = buf; + + while ((r = getline(&pbuf, pn, fp)) >= 0) { + + if (update_value(buf, vol, vol_name)) + continue; + + if (update_value(buf, nonvol, nonvol_name)) + continue; + + if (update_value(buf, migrate, migrate_name)) + continue; + } + fclose(fp); + + if (!vol_once && *vol == -1) { + vol_once++; + fprintf(stderr, "Warning, could not find voluntary ctx switch count\n"); + } + if (!nonvol_once && *nonvol == -1) { + nonvol_once++; + fprintf(stderr, "Warning, could not find nonvoluntary ctx switch count\n"); + } + + return 0; +} + +/** + * do_runtime - Run a loop to simulate a specific task + * @tid: The thread ID + * @data: The sched_data descriptor + * @period: The time of the last period. + * + * Returns the expected next period. + * + * This simulates some task that needs to be completed within the deadline. + * + * Input: + * @data->deadline_us - to calculate next peroid + * @data->loops_per_peroid - to loop this amount of time + * + * Output: + * @data->total_adjust - Time adjusted for starting a period early + * @data->nr_adjusted - Number of times adjusted + * @data->missed_deadlines - Counter of missed deadlines + * @data->missed_periods - Counter of missed periods (started late) + * @data->max_time - Maximum time it took to complete the loops + * @data->min_time - Minimum time it took to complete the loops + * @data->last_time - How much time it took to complete loops this time + * @data->total_time - Total time it took to complete all loops + * @data->nr_periods - Number of periods that were executed. + */ +static u64 do_runtime(long tid, struct sched_data *data, u64 period) +{ + u64 next_period = period + data->deadline_us; + u64 now = get_time_us(); + u64 end; + u64 diff; + u64 time; + + /* + * next_period is our new deadline. If now is passed that point + * we missed a period. + */ + if (now > next_period) { + ftrace_write(data->buff, + "Missed a period start: %lld next: %lld now: %lld\n", + period, next_period, now); + /* See how many periods were missed. */ + while (next_period < now) { + next_period += data->deadline_us; + data->missed_periods++; + } + } else if (now < period) { + u64 delta = period - now; + /* + * Currently, there's no way to find when the period actually + * does begin. If the first runtime starts late, due to another + * deadline task with a shorter deadline running, then it is + * possible that the next period comes in quicker than we + * expect it to. + * + * Adjust the period to start at now, and record the shift. + */ + ftrace_write(data->buff, + "Adjusting period: now: %lld period: %lld delta:%lld%s\n", + now, period, delta, delta > data->deadline_us / 2 ? + " HUGE ADJUSTMENT" : ""); + data->total_adjust += delta; + data->nr_adjust++; + period = now; + next_period = period + data->deadline_us; + } + + ftrace_write(data->buff, "start at %lld off=%lld (period=%lld next=%lld)\n", + now, now - period, period, next_period); + + /* Run the simulate task (loops) */ + time = run_loops(data, data->loops_per_period); + + end = get_time_us(); + + /* Did we make our deadline? */ + if (end > next_period) { + ftrace_write(data->buff, + "Failed runtime by %lld\n", end - next_period); + data->missed_deadlines++; + /* + * We missed our deadline, which means we entered the + * next period. Move it forward one, if we moved it too + * much, then the next interation will adjust. + */ + next_period += data->deadline_us; + } + + + diff = end - now; + if (diff > data->max_time) + data->max_time = diff; + if (!data->min_time || diff < data->min_time) + data->min_time = diff; + + data->last_time = time; + data->total_time += diff; + data->nr_periods++; + ftrace_write(data->buff, + "end at %lld diff: %lld run loops: %lld us\n", end, diff, time); + + return next_period; +} + +/** + * run_deadline - Run deadline thread + * @data: sched_data descriptor + * + * This is called by pthread_create() and executes the sched deadline + * task. @data has the following: + * + * Input: + * @data->runtime_us: The amount of requested runtime in microseconds + * @data->deadline_us: The requested deadline in microseconds + * @data->loops_per_period: The number of loops to make during its runtime + * + * Output: + * @data->tid: The thread ID + * @data->vol: The number of times the thread voluntarily scheduled out + * @data->nonvol: The number of times the thread non-voluntarily scheduled out + * @data->migrate: The number of times the thread migrated across CPUs. + */ +void *run_deadline(void *data) +{ + struct sched_data *sched_data = data; + struct sched_attr attr; + int vol, nonvol, migrate; + long tid = gettid(); + void *heap; + u64 period; + int ret; + + /* + * The internal glibc vsnprintf() used by ftrace_write() + * may alloc more space to do conversions. Alloc a bunch of + * memory and free it, and hopefully glibc doesn't return that + * back to the system (we did do an mlockall after all). + */ + heap = malloc(1000000); + if (!heap) { + perror("malloc"); + fail = 1; + pthread_barrier_wait(&barrier); + pthread_exit("Failed to alloc heap"); + return NULL; + } + free(heap); + + printf("deadline thread %ld\n", tid); + + sched_data->tid = tid; + sched_data->prime = 2; + + ret = sched_getattr(0, &attr, sizeof(attr), 0); + if (ret < 0) { + fprintf(stderr, "[%ld]", tid); + perror("sched_getattr"); + fail = 1; + pthread_barrier_wait(&barrier); + pthread_exit("Failed sched_getattr"); + return NULL; + } + + pthread_barrier_wait(&barrier); + + if (fail) + return NULL; + + attr.sched_policy = SCHED_DEADLINE; + attr.sched_runtime = sched_data->runtime_us * 1000; + attr.sched_deadline = sched_data->deadline_us * 1000; + + printf("thread[%ld] runtime=%lldus deadline=%lldus loops=%lld\n", + gettid(), sched_data->runtime_us, + sched_data->deadline_us, sched_data->loops_per_period); + + pthread_barrier_wait(&barrier); + + ret = sched_setattr(0, &attr, 0); + if (ret < 0) { + done = 0; + fprintf(stderr, "[%ld]", tid); + perror("sched_setattr"); + fail = 1; + pthread_barrier_wait(&barrier); + pthread_exit("Failed sched_setattr"); + return NULL; + } + + pthread_barrier_wait(&barrier); + + if (fail) + return NULL; + + sched_yield(); + period = get_time_us(); + + while (!done) { + period = do_runtime(tid, sched_data, period); + sched_yield(); + } + ret = sched_getattr(0, &attr, sizeof(attr), 0); + if (ret < 0) { + perror("sched_getattr"); + pthread_exit("Failed second sched_getattr"); + } + + read_ctx_switches(&vol, &nonvol, &migrate); + + sched_data->vol = vol; + sched_data->nonvol = nonvol; + sched_data->migrate = migrate; + + return NULL; +} + +/** + * run_rt_spin - the Real-Time task spinner + * @data: The sched_data descriptor + * + * This function is called as a thread function. It will read @data->prio + * and set its priority base on that parameter. It sets @data->tid to the + * thread ID. Then after waiting through pthread barriers to sync with + * the main thread as well as with sched deadline threads, it will + * run in a tight loop until the global variable "done" is set. + */ +void *run_rt_spin(void *data) +{ + struct sched_data *sched_data = data; + long tid = gettid(); + + sched_data->tid = tid; + + if (set_prio(sched_data->prio) < 0) { + fail = 1; + pthread_barrier_wait(&barrier); + pthread_exit("Failed setting prio"); + return NULL; + } + + pthread_barrier_wait(&barrier); + + if (fail) + return NULL; + + pthread_barrier_wait(&barrier); + + if (fail) + return NULL; + + pthread_barrier_wait(&barrier); + + if (fail) + return NULL; + + while (!done) { + get_time_us(); + } + + return NULL; +} + +struct cpu_list { + struct cpu_list *next; + int start_cpu; + int end_cpu; +}; + +/** + * add_cpus - Add cpus to cpu_list based on the passed in range + * @cpu_list: The cpu list to add to + * @start_cpu: The start of the range to add + * @end_cpu: The end of the range to add. + * + * Adds a sorted unique item into @cpu_list based on @start_cpu and @end_cpu. + * It removes duplicates in @cpu_list, and will even merge lists if a + * new range is entered that will fill a gap. That is, if @cpu_list has + * "1-3" and "6-7", and @start_cpu is 4 and @end_cpu is 5, it will combined + * the two elements into a single list item of "1-7". + */ +static void add_cpus(struct cpu_list **cpu_list, int start_cpu, int end_cpu) +{ + struct cpu_list *list; + + while (*cpu_list && (*cpu_list)->end_cpu + 1 < start_cpu) + cpu_list = &(*cpu_list)->next; + + if (!*cpu_list) { + *cpu_list = malloc(sizeof(struct cpu_list)); + (*cpu_list)->start_cpu = start_cpu; + (*cpu_list)->end_cpu = end_cpu; + (*cpu_list)->next = NULL; + return; + } + + /* Look to concatinate */ + if (end_cpu > (*cpu_list)->start_cpu && + start_cpu <= (*cpu_list)->end_cpu + 1) { + if (start_cpu < (*cpu_list)->start_cpu) + (*cpu_list)->start_cpu = start_cpu; + list = (*cpu_list)->next; + while (list && list->start_cpu <= end_cpu + 1) { + (*cpu_list)->end_cpu = list->end_cpu; + (*cpu_list)->next = list->next; + free(list); + list = (*cpu_list)->next; + } + if ((*cpu_list)->end_cpu < end_cpu) + (*cpu_list)->end_cpu = end_cpu; + return; + } + + /* Check for overlaps */ + if (end_cpu >= (*cpu_list)->start_cpu - 1) { + (*cpu_list)->start_cpu = start_cpu; + return; + } + + list = malloc(sizeof(struct cpu_list)); + list->start_cpu = start_cpu; + list->end_cpu = end_cpu; + list->next = (*cpu_list)->next; + (*cpu_list)->next = list; +} + +/** + * count_cpus - Return the number of CPUs in a list + * @cpu_list: The list of CPUs to count + * + * Reads the list of CPUs in @cpu_list. It als will free the + * list as it reads it, so this can only be called once on @cpu_list. + * It also checks if the CPUs in @cpu_list are less than cpu_count + * (the number of discovered CPUs). + * + * Returns the number of CPUs in @cpu_list, or -1 if any CPU in + * @cpu_list is greater or equal to cpu_count. + */ +static int count_cpus(struct cpu_list *cpu_list) +{ + struct cpu_list *list; + int cpus = 0; + int fail = 0; + + while (cpu_list) { + list = cpu_list; + cpus += (list->end_cpu - list->start_cpu) + 1; + if (list->end_cpu >= cpu_count) + fail = 1; + cpu_list = list->next; + free(list); + } + return fail ? -1 : cpus; +} + +/** + * append_cpus - Append a set of consecutive cpus to a string + * @buf: The string to append to + * @start: The cpu to start at. + * @end: The cpu to end at. + * @comma: The "," or "" to append before the cpu list. + * @total: The total length of buf. + * + * Realloc @buf to include @comma@start-@end. + * Updates @total to the new length of @buf. + */ +static char *append_cpus(char *buf, int start, int end, + const char *comma, int *total) +{ + int len; + + if (start == end) { + len = snprintf(NULL, 0, "%s%d", comma, start); + buf = realloc(buf, *total + len + 1); + buf[*total] = 0; + snprintf(buf + *total, len + 1, "%s%d", comma, start); + } else { + len = snprintf(NULL, 0, "%s%d-%d", comma, start, end); + buf = realloc(buf, *total + len + 1); + buf[*total] = 0; + snprintf(buf + *total, len + 1, "%s%d-%d", comma, + start, end); + } + *total += len; + return buf; +} + +/** + * make_new_list - convert cpu_list into a string + * @cpu_list: The list of CPUs to include + * @buf: The pointer to the allocated string to return + * + * Reads @cpu_list which contains a link list of consecutive + * CPUs, and returns the combined list in @buf. + * If cpu_list has "1", "3" and "6-8", buf would return + * "1,3,6-8" + */ +static void make_new_list(struct cpu_list *cpu_list, char **buf) +{ + char *comma = ""; + int total = 0; + + while (cpu_list) { + *buf = append_cpus(*buf, cpu_list->start_cpu, cpu_list->end_cpu, + comma, &total); + comma = ","; + cpu_list = cpu_list->next; + } +} + +/** + * make_other_cpu_list - parse cpu list and return all other CPUs + * @setcpu: string listing the CPUs to exclude + * @cpus: The buffer to return the list of CPUs not in setcpu. + * + * @setcpu is expected to be compressed by calc_nr_cpus(). + * + * Reads @setcpu and uses cpu_count (number of all CPUs), to return + * a list of CPUs not included in @setcpu. For example, if + * @setcpu is "1-5" and cpu_count is 8, then @cpus would contain + * "0,6-7". + */ +static void make_other_cpu_list(const char *setcpu, char **cpus) +{ + const char *p = setcpu; + const char *comma = ""; + int curr_cpu = 0; + int cpu; + int total = 0; + + while (*p && curr_cpu < cpu_count) { + cpu = atoi(p); + if (cpu > curr_cpu) { + *cpus = append_cpus(*cpus, curr_cpu, cpu - 1, + comma, &total); + comma = ","; + } + while (isdigit(*p)) + p++; + if (*p == '-') { + p++; + cpu = atoi(p); + while (isdigit(*p)) + p++; + } + curr_cpu = cpu + 1; + if (*p) + p++; + } + + if (curr_cpu < cpu_count) { + *cpus = append_cpus(*cpus, curr_cpu, cpu_count - 1, + comma, &total); + } +} + +/** + * calc_nr_cpus - parse cpu list for list of cpus. + * @setcpu: string listing the CPUs to include + * @buf: The buffer to return as a compressed list. + * + * Reads @setcpu and removes duplicates, it also sets @buf to be + * a consolidated list. For example, if @setcpu is "1,2,4,3-5" + * @buf would become "1-5" and 5 would be returned. + * + * Returns the number of cpus listed in @setcpu. + */ +static int calc_nr_cpus(const char *setcpu, char **buf) +{ + struct cpu_list *cpu_list = NULL; + const char *p; + int end_cpu; + int cpu; + + for (p = setcpu; *p; ) { + cpu = atoi(p); + if (cpu < 0 || (!cpu && *p != '0')) + goto err; + + while (isdigit(*p)) + p++; + if (*p == '-') { + p++; + end_cpu = atoi(p); + if (end_cpu < cpu || (!end_cpu && *p != '0')) + goto err; + while (isdigit(*p)) + p++; + } else + end_cpu = cpu; + + add_cpus(&cpu_list, cpu, end_cpu); + if (*p == ',') + p++; + } + + make_new_list(cpu_list, buf); + return count_cpus(cpu_list); + err: + /* Frees the list */ + count_cpus(cpu_list); + return -1; +} + +static const char *join_thread(pthread_t *thread) +{ + void *result; + + pthread_join(*thread, &result); + return result; +} + +static void do_sleep(u64 next) +{ + struct timespec req; + + req.tv_nsec = next * 1000; + req.tv_sec = 0; + while (req.tv_nsec > 1000000000UL) { + req.tv_nsec -= 1000000000UL; + req.tv_sec++; + } + nanosleep(&req, NULL); +} + +/** + * calculate_loops_per_ms - calculate the number of loops per ms + * @overhead: Return the overhead of the call around run_loops() + * + * Runs the do_runtime() to see how long it takes. That returns + * how long the loops took (data->last_time), and the overhead can + * be calculated by that diff. + * + * Returns the length of time it took to run for 1000 loops. + */ +static u64 calculate_loops_per_ms(u64 *overhead) +{ + struct sched_data sd = { }; + u64 test_loops = 100000; + u64 loops; + u64 diff; + u64 odiff; + u64 start; + u64 end; + + sd.prime = 2; + + /* Sleep 1ms to help flush a bit of cache */ + do_sleep(1000); + + start = run_loops(&sd, test_loops); + + sd.deadline_us = start * 2; + sd.runtime_us = start; + sd.loops_per_period = test_loops; + + /* Again try to dirty some cache */ + do_sleep(1000); + + start = get_time_us(); + do_runtime(0, &sd, start + sd.deadline_us); + end = get_time_us(); + + diff = end - start; + + /* + * Based on the time it took to run test_loops, figure + * out how many loops it may take to run for 1000us. + * + * last_time / test_loops = 1000us / loops + * or + * loops = test_loops * 1000us / last_time + */ + + loops = 1000ULL * test_loops / sd.last_time; + + printf("%lld test loops took %lldus total (%lld internal)\n" + "calculated loops for 1000us=%lld\n", + test_loops, diff, sd.last_time, loops); + + sd.deadline_us = 2000; + sd.runtime_us = 1000; + sd.loops_per_period = loops; + + test_loops = loops; + + do_sleep(1000); + + start = get_time_us(); + do_runtime(0, &sd, start + sd.deadline_us); + end = get_time_us(); + + odiff = end - start; + + /* + * Use this new calcualtion to recalculate the number of loops + * for 1000us + */ + loops = 1000ULL * loops / sd.last_time; + + *overhead = odiff - sd.last_time; + + printf("%lld test loops took %lldus total (%lld internal)\n" + "New calculated loops for 1000us=%lld\n" + "Diff from last calculation: %lld loops\n", + test_loops, odiff, sd.last_time, loops, loops - test_loops); + + return loops; +} + +int main (int argc, char **argv) +{ + struct sched_data *sched_data; + struct sched_data *sd; + struct sched_data rt_sched_data; + const char *res; + const char *setcpu = NULL; + char *setcpu_buf = NULL; + char *allcpu_buf = NULL; + pthread_t *thread; + pthread_t rt_thread; + unsigned int interval = 1000; + unsigned int step = 500; + u64 loop_time; + u64 loops; + u64 runtime; + u64 overhead; + u64 start_period; + u64 end_period; + int nr_cpus; + int all_cpus = 1; + int run_percent = 100; + int percent = 80; + int rt_task = 0; + int i; + int c; + + cpu_count = sysconf(_SC_NPROCESSORS_CONF); + if (cpu_count < 1) { + fprintf(stderr, "Can not calculate number of CPUS\n"); + exit(-1); + } + + while ((c = getopt(argc, argv, "+hbr:c:p:P:t:s:")) >= 0) { + switch (c) { + case 'b': + all_cpus = 0; + break; + case 'c': + all_cpus = 0; + setcpu = optarg; + break; + case 'i': + interval = atoi(optarg); + break; + case 'p': + percent = atoi(optarg); + break; + case 'P': + run_percent = atoi(optarg); + break; + case 's': + step = atoi(optarg); + break; + case 't': + nr_threads = atoi(optarg); + break; + case 'r': + rt_task = atoi(optarg); + break; + case 'h': + default: + usage(argv); + } + } + + if (rt_task < 0 || rt_task > 98) { + fprintf(stderr, "RT task can only be from 1 to 98\n"); + exit(-1); + } + + if (percent < 1 || percent > 100 || run_percent < 1 || run_percent > 100) { + fprintf(stderr, "Percent must be between 1 and 100\n"); + exit(-1); + } + + if (setcpu) { + nr_cpus = calc_nr_cpus(setcpu, &setcpu_buf); + if (nr_cpus < 0) { + fprintf(stderr, "Invalid cpu input '%s'\n", setcpu); + exit(-1); + } + } else + nr_cpus = 1; + + if (all_cpus) + nr_cpus = cpu_count; + + if (cpu_count == nr_cpus) + all_cpus = 1; + + /* -b has us bind to the last CPU. */ + if (!all_cpus && !setcpu) { + setcpu_buf = malloc(10); + if (!setcpu_buf) { + perror("malloc"); + exit(-1); + } + sprintf(setcpu_buf, "%d", cpu_count - 1); + setcpu = setcpu_buf; + } + + /* + * Now the amount of bandwidth each tasks takes will be + * percent * nr_cpus / nr_threads. Now if nr_threads is + * But the amount of any one thread can not be more than + * 90 of the CPUs. + */ + percent = (percent * nr_cpus) / nr_threads; + if (percent > 90) + percent = 90; + + cpusetp = CPU_ALLOC(cpu_count); + cpuset_size = CPU_ALLOC_SIZE(cpu_count); + if (!cpusetp) { + perror("allocating cpuset"); + exit(-1); + } + + setup_ftrace_marker(); + + thread = calloc(nr_threads, sizeof(*thread)); + sched_data = calloc(nr_threads, sizeof(*sched_data)); + if (!thread || !sched_data) { + perror("allocating threads"); + exit(-1); + } + + if (mlockall(MCL_CURRENT|MCL_FUTURE) == -1) { + perror("mlockall"); + } + + /* + * Run at prio 99 bound to the last CPU, and try to calculate + * the time it takes to run the loops. + */ + set_prio(99); + bind_cpu(cpu_count - 1); + + loops = calculate_loops_per_ms(&overhead); + + printf("Setup:\n"); + printf(" percent per task:%d", percent); + if (run_percent < 100) + printf(" run-percent:%d", run_percent); + printf(" nr_cpus:%d", nr_cpus); + if (setcpu) + printf(" (%s)", setcpu); + printf(" loops:%lld overhead:%lldus\n", loops, overhead); + + again: + /* Set up the data while sill in SCHED_FIFO */ + for (i = 0; i < nr_threads; i++) { + sd = &sched_data[i]; + /* + * Interval is the deadline/period + * The runtime is the percentage of that period. + */ + runtime = interval * percent / 100; + if (runtime < overhead) { + fprintf(stderr, "Run time too short: %lld us\n", + runtime); + fprintf(stderr, "Read context takes %lld us\n", + overhead); + exit(-1); + } + if (runtime < 2000) { + /* + * If the runtime is less than 2ms, then we better + * have HRTICK enabled. + */ + if (!setup_hr_tick()) { + fprintf(stderr, "For less that 2ms run times, you need to\n" + "have HRTICK enabled in debugfs/sched_features\n"); + exit(-1); + } + } + sd->runtime_us = runtime; + /* Account for the reading of context switches */ + runtime -= overhead; + /* + * loops is # of loops per ms, convert to us and + * take 5% off of it. + * loops * %run_percent / 1000 + */ + loop_time = runtime * run_percent / 100; + sd->loops_per_period = loop_time * loops / 1000; + + sd->deadline_us = interval; + + /* Make sure that we can make our deadlines */ + start_period = get_time_us(); + do_runtime(gettid(), sd, start_period); + end_period = get_time_us(); + if (end_period - start_period > sd->runtime_us) { + printf("Failed to perform task within runtime: Missed by %lld us\n", + end_period - start_period - sd->runtime_us); + overhead += end_period - start_period - sd->runtime_us; + printf("New overhead=%lldus\n", overhead); + goto again; + } + + printf(" Tested at %lldus of %lldus\n", + end_period - start_period, sd->runtime_us); + + interval += step; + } + + set_prio(0); + + unbind_cpu(); + + pthread_barrier_init(&barrier, NULL, nr_threads + 1 + !!rt_task); + + for (i = 0; i < nr_threads; i++) { + sd = &sched_data[i]; + pthread_create(&thread[i], NULL, run_deadline, sd); + } + + if (rt_task) { + /* Make sure we are a higher priority than the spinner */ + set_prio(rt_task + 1); + + rt_sched_data.prio = rt_task; + pthread_create(&rt_thread, NULL, run_rt_spin, &rt_sched_data); + } + + pthread_barrier_wait(&barrier); + + if (fail) { + exit(-1); + } + + if (!all_cpus) { + int *pids; + + atexit(teardown); + + make_other_cpu_list(setcpu, &allcpu_buf); + + res = make_cpuset(CPUSET_ALL, allcpu_buf, "0", + CPUSET_FL_SET_LOADBALANCE | + CPUSET_FL_CLONE_CHILDREN | + CPUSET_FL_ALL_TASKS); + if (res) { + perror(res); + exit(-1); + } + + pids = calloc(nr_threads + !!rt_task + 1, sizeof(int)); + if (!pids) { + perror("Allocating pids"); + exit(-1); + } + + for (i = 0; i < nr_threads; i++) + pids[i] = sched_data[i].tid; + if (rt_task) + pids[i++] = rt_sched_data.tid; + + res = make_cpuset(CPUSET_LOCAL, setcpu, "0", + CPUSET_FL_CPU_EXCLUSIVE | + CPUSET_FL_SET_LOADBALANCE | + CPUSET_FL_CLONE_CHILDREN | + CPUSET_FL_TASKS, pids); + free(pids); + if (res) { + perror(res); + fprintf(stderr, "Check if other cpusets exist that conflict\n"); + exit(-1); + } + + system("cat /sys/fs/cgroup/cpuset/my_cpuset/tasks"); + } + + pthread_barrier_wait(&barrier); + + if (fail) + exit(-1); + + pthread_barrier_wait(&barrier); + + if (!fail) + sleep(10); + + done = 1; + if (rt_task) { + res = join_thread(&rt_thread); + if (res) + printf("RT Thread failed: %s\n", res); + } + + for (i = 0; i < nr_threads; i++) { + + sd = &sched_data[i]; + + res = join_thread(&thread[i]); + if (res) { + printf("Thread %d failed: %s\n", i, res); + continue; + } + + printf("\n[%d]\n", sd->tid); + printf("missed deadlines = %d\n", sd->missed_deadlines); + printf("missed periods = %d\n", sd->missed_periods); + printf("Total adjustments = %lld us\n", sd->total_adjust); + printf("# adjustments = %lld avg: %lld us\n", + sd->nr_adjust, sd->total_adjust / sd->nr_adjust); + printf("deadline : %lld us\n", sd->deadline_us); + printf("runtime : %lld us\n", sd->runtime_us); + printf("nr_periods : %lld\n", sd->nr_periods); + printf("max_time: %lldus", sd->max_time); + printf("\tmin_time: %lldus", sd->min_time); + printf("\tavg_time: %lldus\n", sd->total_time / sd->nr_periods); + printf("ctx switches vol:%d nonvol:%d migration:%d\n", + sd->vol, sd->nonvol, sd->migrate); + printf("highes prime: %lld\n", sd->prime); + printf("\n"); + } + + free(setcpu_buf); + return 0; +} -- cgit 1.2.3-korg