aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClark Williams <williams@redhat.com>2017-01-09 20:48:02 -0600
committerClark Williams <williams@redhat.com>2017-03-15 14:16:51 -0500
commit6e4d54b8c6d3697efba0e3d44727b10485a07381 (patch)
treeb73d67129b56d407b05e3e418f5d0c1a497ef1e6
parentc98ab9e517208018099f9097080d4e703ab75c7e (diff)
downloadrteval-6e4d54b8c6d3697efba0e3d44727b10485a07381.tar.gz
kcompile.py: modify to manage jobs on per-node basis
This modification to kcompile takes into account NUMA topology rather than treating the system as a big SMP box. Make loads are started on a per-node basis (e.g. on a four node system there would be four makes started), and the make commands are restricted to the cpu cores available on that node. This is an attempt to reduce cross-node traffic and to better mimic a well-behaved realtime application. Signed-off-by: Clark Williams <williams@redhat.com>
-rw-r--r--rteval/modules/loads/kcompile.py161
-rw-r--r--rteval/rtevalConfig.py5
-rw-r--r--rteval/systopology.py (renamed from rteval/sysinfo/systopology.py)3
3 files changed, 109 insertions, 60 deletions
diff --git a/rteval/modules/loads/kcompile.py b/rteval/modules/loads/kcompile.py
index 1eb2cde..ef636c2 100644
--- a/rteval/modules/loads/kcompile.py
+++ b/rteval/modules/loads/kcompile.py
@@ -1,6 +1,7 @@
#
# Copyright 2009 - 2013 Clark Williams <williams@redhat.com>
# Copyright 2012 - 2013 David Sommerseth <davids@redhat.com>
+# Copyright 2014 - 2017 Clark Williams <williams@redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -23,19 +24,89 @@
# are deemed to be part of the source code.
#
-import sys, os, glob, subprocess
+import sys, os, os.path, glob, subprocess
from signal import SIGTERM
from rteval.modules import rtevalRuntimeError
from rteval.modules.loads import CommandLineLoad
from rteval.Log import Log
from rteval.misc import expand_cpulist
+from rteval.systopology import SysTopology
+
+kernel_prefix="linux-4.9"
+
+class KBuildJob(object):
+ '''Class to manage a build job bound to a particular node'''
+
+ def __init__(self, node, kdir, logger=None):
+ self.kdir = kdir
+ self.jobid = None
+ self.node = node
+ self.logger = logger
+ self.builddir = os.path.dirname(kdir)
+ self.objdir = "%s/node%d" % (self.builddir, int(node))
+ if not os.path.isdir(self.objdir):
+ os.mkdir(self.objdir)
+ if os.path.exists('/usr/bin/numactl'):
+ self.binder = 'numactl --cpunodebind %d' % int(self.node)
+ else:
+ self.binder = 'taskset -c %s' % str(self.node)
+ self.jobs = self.calc_jobs_per_cpu() * len(self.node)
+ self.log(Log.DEBUG, "node %d: jobs == %d" % (int(node), self.jobs))
+ self.runcmd = "%s make O=%s -C %s -j%d bzImage modules" % (self.binder, self.objdir, self.kdir, self.jobs)
+ self.cleancmd = "%s make O=%s -C %s clean allmodconfig" % (self.binder, self.objdir, self.kdir)
+ self.log(Log.DEBUG, "node%d kcompile command: %s" % (int(node), self.runcmd))
+
+ def __str__(self):
+ return self.runcmd
+
+ def log(self, logtype, msg):
+ if self.logger:
+ self.logger.log(logtype, "[kcompile node%d] %s" % (int(self.node), msg))
+
+ def calc_jobs_per_cpu(self):
+ mult = 2
+ self.log(Log.DEBUG, "calulating jobs for node %d" % int(self.node))
+ # get memory total in gigabytes
+ mem = int(self.node.meminfo['MemTotal']) / 1024.0 / 1024.0 / 1024.0
+ # ratio of gigabytes to #cores
+ ratio = float(mem) / float(len(self.node))
+ if ratio < 1.0:
+ ratio = 1.0
+ if ratio < 1.0 or ratio > 2.0:
+ mult = 1
+ self.log(Log.DEBUG, "memory/cores ratio on node %d: %f" % (int(self.node), ratio))
+ self.log(Log.DEBUG, "returning jobs/core value of: %d" % int(ratio) * mult)
+ return int(int(ratio) * int(mult))
+
+ def clean(self, sin=None, sout=None, serr=None):
+ self.log(Log.DEBUG, "cleaning objdir %s" % self.objdir)
+ subprocess.call(self.cleancmd, shell=True,
+ stdin=sin, stdout=sout, stderr=serr)
+
+ def run(self, sin=None, sout=None, serr=None):
+ self.log(Log.INFO, "starting workload on node %d" % int(self.node))
+ self.log(Log.DEBUG, "running on node %d: %s" % (int(self.node), self.runcmd))
+ self.jobid = subprocess.Popen(self.runcmd, shell=True,
+ stdin=sin, stdout=sout, stderr=serr)
+
+ def isrunning(self):
+ if self.jobid == None:
+ return False
+ return (self.jobid.poll() == None)
+
+ def stop(self):
+ if not self.jobid:
+ return True
+ return self.jobid.terminate()
-kernel_prefix="linux-2.6"
class Kcompile(CommandLineLoad):
def __init__(self, config, logger):
+ self.buildjobs = {}
+ self.config = config
+ self.topology = SysTopology()
CommandLineLoad.__init__(self, "kcompile", config, logger)
-
+ self.logger = logger
def _WorkloadSetup(self):
# find our source tarball
@@ -80,13 +151,18 @@ class Kcompile(CommandLineLoad):
break
if kdir == None:
raise rtevalRuntimeError(self, "Can't find kernel directory!")
- self.jobs = 1 # We only run one instance of the kcompile job
self.mydir = os.path.join(self.builddir, kdir)
self._log(Log.DEBUG, "mydir = %s" % self.mydir)
+ self._log(Log.DEBUG, "systopology: %s" % self.topology)
+ self.jobs = len(self.topology)
+ self.args = []
+ for n in self.topology:
+ self._log(Log.DEBUG, "Configuring build job for node %d" % int(n))
+ self.buildjobs[n] = KBuildJob(n, self.mydir, self.logger)
+ self.args.append(str(self.buildjobs[n])+";")
def _WorkloadBuild(self):
- self._log(Log.DEBUG, "setting up all module config file in %s" % self.mydir)
null = os.open("/dev/null", os.O_RDWR)
if self._logging:
out = self.open_logfile("kcompile-build.stdout")
@@ -94,9 +170,9 @@ class Kcompile(CommandLineLoad):
else:
out = err = null
- # clean up from potential previous run
+ # clean up any damage from previous runs
try:
- ret = subprocess.call(["make", "-C", self.mydir, "mrproper", "allmodconfig"],
+ ret = subprocess.call(["make", "-C", self.mydir, "mrproper"],
stdin=null, stdout=out, stderr=err)
if ret:
raise rtevalRuntimeError(self, "kcompile setup failed: %d" % ret)
@@ -104,31 +180,15 @@ class Kcompile(CommandLineLoad):
self._log(Log.DEBUG, "keyboard interrupt, aborting")
return
self._log(Log.DEBUG, "ready to run")
- os.close(null)
if self._logging:
os.close(out)
os.close(err)
+ # clean up object dirs and make sure each has a config file
+ for n in self.topology:
+ self.buildjobs[n].clean(sin=null,sout=null,serr=null)
+ os.close(null)
self._setReady()
-
- def __calc_numjobs(self):
- mult = int(self._cfg.setdefault('jobspercore', 1))
- mem = self.memsize[0]
- if self.memsize[1] == 'KB':
- mem = mem / (1024.0 * 1024.0)
- elif self.memsize[1] == 'MB':
- mem = mem / 1024.0
- elif self.memsize[1] == 'TB':
- mem = mem * 1024
- ratio = float(mem) / float(self.num_cpus)
- if ratio > 1.0:
- njobs = self.num_cpus * mult
- else:
- self._log(Log.DEBUG, "Low memory system (%f GB/core)! Dropping jobs to one per core" % ratio)
- njobs = self.num_cpus
- return njobs
-
-
def _WorkloadPrepare(self):
self.__nullfd = os.open("/dev/null", os.O_RDWR)
if self._logging:
@@ -143,41 +203,30 @@ class Kcompile(CommandLineLoad):
else:
cpulist = ""
- self.jobs = self.__calc_numjobs()
- self._log(Log.DEBUG, "starting loop (jobs: %d)" % self.jobs)
-
- self.args = ["make", "-C", self.mydir,
- "-j%d" % self.jobs ]
-
- if cpulist:
- self.args = ["taskset", '-c', cpulist] + self.args
-
- self.__kcompileproc = None
-
-
def _WorkloadTask(self):
- if not self.__kcompileproc or self.__kcompileproc.poll() is not None:
- # If kcompile has not been kicked off yet, or have completed,
- # restart it
- self._log(Log.DEBUG, "Kicking off kcompile: %s" % " ".join(self.args))
- self.__kcompileproc = subprocess.Popen(self.args,
- stdin=self.__nullfd,
- stdout=self.__outfd,
- stderr=self.__errfd)
-
+ for n in self.topology:
+ if not self.buildjobs[n]:
+ raise RuntimeError, "Build job not set up for node %d" % int(n)
+ if self.buildjobs[n].jobid is None or self.buildjobs[n].jobid.poll() is not None:
+ self._log(Log.INFO, "Starting load on node %d" % n)
+ self.buildjobs[n].run(self.__nullfd, self.__outfd, self.__errfd)
def WorkloadAlive(self):
- # Let _WorkloadTask() kick off new runs, if it stops - thus
- # kcompile will always be alive
+ # if any of the jobs has stopped, return False
+ for n in self.topology:
+ if self.buildjobs[n].jobid.poll() is not None:
+ return False
return True
def _WorkloadCleanup(self):
self._log(Log.DEBUG, "out of stopevent loop")
- if self.__kcompileproc.poll() == None:
- self._log(Log.DEBUG, "killing compile job with SIGTERM")
- os.kill(self.__kcompileproc.pid, SIGTERM)
- self.__kcompileproc.wait()
+ for n in self.buildjobs:
+ if self.buildjobs[n].jobid.poll() == None:
+ self._log(Log.DEBUG, "stopping job on node %d" % int(n))
+ self.buildjobs[n].jobid.terminate()
+ self.buildjobs[n].jobid.wait()
+ del self.buildjobs[n].jobid
os.close(self.__nullfd)
del self.__nullfd
if self._logging:
@@ -185,14 +234,12 @@ class Kcompile(CommandLineLoad):
del self.__outfd
os.close(self.__errfd)
del self.__errfd
- del self.__kcompileproc
self._setFinished()
-
def ModuleParameters():
return {"source": {"descr": "Source tar ball",
- "default": "linux-2.6.21.tar.bz2",
+ "default": "linux-4.9.tar.xz",
"metavar": "TARBALL"},
"jobspercore": {"descr": "Number of working threads per core",
"default": 2,
diff --git a/rteval/rtevalConfig.py b/rteval/rtevalConfig.py
index 47488a0..16ac2d0 100644
--- a/rteval/rtevalConfig.py
+++ b/rteval/rtevalConfig.py
@@ -33,7 +33,7 @@
import os, sys
import ConfigParser
from Log import Log
-from sysinfo.systopology import SysTopology
+from systopology import SysTopology
def get_user_name():
name = os.getenv('SUDO_USER')
@@ -190,8 +190,7 @@ class rtevalConfig(object):
# get our system topology info
self.__systopology = SysTopology()
- # debug
- print self.__systopology
+ print("got system topology: %s" % self.__systopology)
# Import the default config first
for sect, vals in default_config.items():
diff --git a/rteval/sysinfo/systopology.py b/rteval/systopology.py
index 0295276..7c0985e 100644
--- a/rteval/sysinfo/systopology.py
+++ b/rteval/systopology.py
@@ -147,6 +147,9 @@ class NumaNode(object):
def __str__(self):
return self.getcpustr()
+ def __int__(self):
+ return self.nodeid
+
# read info about memory attached to this node
def getmeminfo(self):
self.meminfo = {}