aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Bristot de Oliveira <bristot@redhat.com>2020-10-12 11:19:00 +0200
committerDaniel Bristot de Oliveira <bristot@redhat.com>2020-10-12 11:19:00 +0200
commit741b46474cdbdbacf14c63361652835ed0da9358 (patch)
treea3c5be331c8c22fc4c898202ca32faf761afc272
parent1d3d9329a99b8047ea0886df370de6ddf4c7f5c9 (diff)
downloadstalld-741b46474cdbdbacf14c63361652835ed0da9358.tar.gz
stalld: Do not die if sched_debug returns an invalid value
Instead of dying when an invalid value of nr of tasks is read from sched_debug, warn and return an error to main. Man will let the main try again. Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
-rw-r--r--src/stalld.c68
1 files changed, 57 insertions, 11 deletions
diff --git a/src/stalld.c b/src/stalld.c
index 6d7746c..0f80369 100644
--- a/src/stalld.c
+++ b/src/stalld.c
@@ -148,7 +148,6 @@ int boost_policy;
/*
* print any error messages and exit
*/
-
void die(const char *fmt, ...)
{
va_list ap;
@@ -169,6 +168,25 @@ void die(const char *fmt, ...)
}
/*
+ * printy the error messages and but do not exit.
+ */
+void warn(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (errno)
+ perror("stalld: ");
+
+ va_start(ap, fmt);
+ fprintf(stderr, " ");
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "\n");
+}
+
+
+/*
* path to file for storing daemon pid
*/
char pidfile[MAXPATHLEN];
@@ -506,8 +524,10 @@ long get_long_from_str(char *start)
errno = 0;
value = strtol(start, &end, 10);
- if (errno || start == end)
- die("Invalid ID '%s'", value);
+ if (errno || start == end) {
+ warn("Invalid ID '%s'", value);
+ return -1;
+ }
return value;
}
@@ -691,12 +711,22 @@ int parse_cpu_info(struct cpu_info *cpu_info, char *buffer, int buffer_size)
struct task_info *old_tasks = cpu_info->starving;
int nr_old_tasks = cpu_info->nr_waiting_tasks;
+ long nr_running, nr_rt_running;
int cpu = cpu_info->id;
char *cpu_buffer;
+ int retval = 0;
cpu_buffer = alloc_and_fill_cpu_buffer(cpu, buffer, buffer_size);
if (!cpu_buffer)
- return -1;
+ return -ENOMEM;
+
+ nr_running = get_variable_long_value(cpu_buffer, ".nr_running");
+ nr_rt_running = get_variable_long_value(cpu_buffer, ".rt_nr_running");
+
+ if ((nr_running == -1) || (nr_rt_running == -1)) {
+ retval = -EINVAL;
+ goto out_free;
+ }
cpu_info->nr_running = get_variable_long_value(cpu_buffer, ".nr_running");
cpu_info->nr_rt_running = get_variable_long_value(cpu_buffer, ".rt_nr_running");
@@ -708,9 +738,10 @@ int parse_cpu_info(struct cpu_info *cpu_info, char *buffer, int buffer_size)
free(old_tasks);
}
+out_free:
free(cpu_buffer);
- return 0;
+ return retval;
}
int get_current_policy(int pid, struct sched_attr *attr)
@@ -1169,10 +1200,18 @@ void *cpu_main(void *data)
while (cpu->thread_running) {
retval = read_sched_debug(buffer, BUFFER_SIZE);
- if(!retval)
- die("fail reading sched debug file!");
+ if(!retval) {
+ warn("fail reading sched debug file");
+ warn("Dazed and confused, but trying to continue");
+ continue;
+ }
- parse_cpu_info(cpu, buffer, BUFFER_SIZE);
+ retval = parse_cpu_info(cpu, buffer, BUFFER_SIZE);
+ if (retval) {
+ warn("error parsing CPU info");
+ warn("Dazed and confused, but trying to continue");
+ continue;
+ }
if (config_verbose)
print_waiting_tasks(cpu);
@@ -1248,8 +1287,10 @@ int conservative_main(struct cpu_info *cpus, int nr_cpus)
while (1) {
retval = read_sched_debug(buffer, BUFFER_SIZE);
- if(!retval)
- die("fail reading sched debug file!");
+ if(!retval) {
+ warn("Dazed and confused, but trying to continue");
+ continue;
+ }
for (i = 0; i < nr_cpus; i++) {
if (!should_monitor(i))
@@ -1260,7 +1301,12 @@ int conservative_main(struct cpu_info *cpus, int nr_cpus)
if (cpu->thread_running)
continue;
- parse_cpu_info(cpu, buffer, BUFFER_SIZE);
+ retval = parse_cpu_info(cpu, buffer, BUFFER_SIZE);
+ if (retval) {
+ warn("error parsing CPU info");
+ warn("Dazed and confused, but trying to continue");
+ continue;
+ }
if (config_verbose)
printf("\tchecking cpu %d - rt: %d - starving: %d\n",