aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorRussell King <rmk+kernel@arm.linux.org.uk>2011-09-21 08:48:33 +0100
committerRussell King <rmk+kernel@arm.linux.org.uk>2011-09-21 08:48:33 +0100
commitf70cac8d9c7125f83048f8b3d1c60f5a041a165c (patch)
tree0d0efd72c1a41f973a919b16aac1d8210ed1ee30 /kernel
parent4722cd7741c6404f967f7a7b8b666540b6c1663e (diff)
parent08aab447c56a5388cf0c768da476ad022f00fef8 (diff)
downloadlinux-x86-syscalls-f70cac8d9c7125f83048f8b3d1c60f5a041a165c.tar.gz
Merge branch 'kprobes-test' of git://git.yxit.co.uk/linux into devel-stable
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c67
-rw-r--r--kernel/sched.c43
-rw-r--r--kernel/time/alarmtimer.c18
3 files changed, 95 insertions, 33 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 68c8017de969e3..fbe38f2e8edb93 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -399,14 +399,54 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
local_irq_restore(flags);
}
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+ struct task_struct *next)
{
- perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+ struct perf_cgroup *cgrp1;
+ struct perf_cgroup *cgrp2 = NULL;
+
+ /*
+ * we come here when we know perf_cgroup_events > 0
+ */
+ cgrp1 = perf_cgroup_from_task(task);
+
+ /*
+ * next is NULL when called from perf_event_enable_on_exec()
+ * that will systematically cause a cgroup_switch()
+ */
+ if (next)
+ cgrp2 = perf_cgroup_from_task(next);
+
+ /*
+ * only schedule out current cgroup events if we know
+ * that we are switching to a different cgroup. Otherwise,
+ * do no touch the cgroup events.
+ */
+ if (cgrp1 != cgrp2)
+ perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
}
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
- perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+ struct perf_cgroup *cgrp1;
+ struct perf_cgroup *cgrp2 = NULL;
+
+ /*
+ * we come here when we know perf_cgroup_events > 0
+ */
+ cgrp1 = perf_cgroup_from_task(task);
+
+ /* prev can never be NULL */
+ cgrp2 = perf_cgroup_from_task(prev);
+
+ /*
+ * only need to schedule in cgroup events if we are changing
+ * cgroup during ctxsw. Cgroup events were not scheduled
+ * out of ctxsw out if that was not the case.
+ */
+ if (cgrp1 != cgrp2)
+ perf_cgroup_switch(task, PERF_CGROUP_SWIN);
}
static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -518,11 +558,13 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
{
}
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+ struct task_struct *next)
{
}
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
}
@@ -1988,7 +2030,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
* cgroup event are system-wide mode only
*/
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
- perf_cgroup_sched_out(task);
+ perf_cgroup_sched_out(task, next);
}
static void task_ctx_sched_out(struct perf_event_context *ctx)
@@ -2153,7 +2195,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
* accessing the event control register. If a NMI hits, then it will
* keep the event running.
*/
-void __perf_event_task_sched_in(struct task_struct *task)
+void __perf_event_task_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
struct perf_event_context *ctx;
int ctxn;
@@ -2171,7 +2214,7 @@ void __perf_event_task_sched_in(struct task_struct *task)
* cgroup event are system-wide mode only
*/
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
- perf_cgroup_sched_in(task);
+ perf_cgroup_sched_in(prev, task);
}
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2427,7 +2470,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
* ctxswin cgroup events which are already scheduled
* in.
*/
- perf_cgroup_sched_out(current);
+ perf_cgroup_sched_out(current, NULL);
raw_spin_lock(&ctx->lock);
task_ctx_sched_out(ctx);
@@ -3353,8 +3396,8 @@ static int perf_event_index(struct perf_event *event)
}
static void calc_timer_values(struct perf_event *event,
- u64 *running,
- u64 *enabled)
+ u64 *enabled,
+ u64 *running)
{
u64 now, ctx_time;
diff --git a/kernel/sched.c b/kernel/sched.c
index ccacdbdecf452b..ec5f472bc5b9ce 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3065,7 +3065,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_disable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
- perf_event_task_sched_in(current);
+ perf_event_task_sched_in(prev, current);
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_enable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -4279,9 +4279,9 @@ pick_next_task(struct rq *rq)
}
/*
- * schedule() is the main scheduler function.
+ * __schedule() is the main scheduler function.
*/
-asmlinkage void __sched schedule(void)
+static void __sched __schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
@@ -4322,16 +4322,6 @@ need_resched:
if (to_wakeup)
try_to_wake_up_local(to_wakeup);
}
-
- /*
- * If we are going to sleep and we have plugged IO
- * queued, make sure to submit it to avoid deadlocks.
- */
- if (blk_needs_flush_plug(prev)) {
- raw_spin_unlock(&rq->lock);
- blk_schedule_flush_plug(prev);
- raw_spin_lock(&rq->lock);
- }
}
switch_count = &prev->nvcsw;
}
@@ -4369,6 +4359,26 @@ need_resched:
if (need_resched())
goto need_resched;
}
+
+static inline void sched_submit_work(struct task_struct *tsk)
+{
+ if (!tsk->state)
+ return;
+ /*
+ * If we are going to sleep and we have plugged IO queued,
+ * make sure to submit it to avoid deadlocks.
+ */
+ if (blk_needs_flush_plug(tsk))
+ blk_schedule_flush_plug(tsk);
+}
+
+asmlinkage void schedule(void)
+{
+ struct task_struct *tsk = current;
+
+ sched_submit_work(tsk);
+ __schedule();
+}
EXPORT_SYMBOL(schedule);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
@@ -4435,7 +4445,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
do {
add_preempt_count_notrace(PREEMPT_ACTIVE);
- schedule();
+ __schedule();
sub_preempt_count_notrace(PREEMPT_ACTIVE);
/*
@@ -4463,7 +4473,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
do {
add_preempt_count(PREEMPT_ACTIVE);
local_irq_enable();
- schedule();
+ __schedule();
local_irq_disable();
sub_preempt_count(PREEMPT_ACTIVE);
@@ -5588,7 +5598,7 @@ static inline int should_resched(void)
static void __cond_resched(void)
{
add_preempt_count(PREEMPT_ACTIVE);
- schedule();
+ __schedule();
sub_preempt_count(PREEMPT_ACTIVE);
}
@@ -7443,6 +7453,7 @@ static void __sdt_free(const struct cpumask *cpu_map)
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
if (sd && (sd->flags & SD_OVERLAP))
free_sched_groups(sd->groups, 0);
+ kfree(*per_cpu_ptr(sdd->sd, j));
kfree(*per_cpu_ptr(sdd->sg, j));
kfree(*per_cpu_ptr(sdd->sgp, j));
}
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 59f369f98a0431..ea5e1a928d5b08 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer)
static void alarm_timer_get(struct k_itimer *timr,
struct itimerspec *cur_setting)
{
+ memset(cur_setting, 0, sizeof(struct itimerspec));
+
cur_setting->it_interval =
ktime_to_timespec(timr->it.alarmtimer.period);
cur_setting->it_value =
@@ -479,11 +481,17 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
if (!rtcdev)
return -ENOTSUPP;
- /* Save old values */
- old_setting->it_interval =
- ktime_to_timespec(timr->it.alarmtimer.period);
- old_setting->it_value =
- ktime_to_timespec(timr->it.alarmtimer.node.expires);
+ /*
+ * XXX HACK! Currently we can DOS a system if the interval
+ * period on alarmtimers is too small. Cap the interval here
+ * to 100us and solve this properly in a future patch! -jstultz
+ */
+ if ((new_setting->it_interval.tv_sec == 0) &&
+ (new_setting->it_interval.tv_nsec < 100000))
+ new_setting->it_interval.tv_nsec = 100000;
+
+ if (old_setting)
+ alarm_timer_get(timr, old_setting);
/* If the timer was already set, cancel it */
alarm_cancel(&timr->it.alarmtimer);