diff --git a/Documentation/hwlat_detector.txt b/Documentation/hwlat_detector.txt new file mode 100644 index 0000000..cb61516 --- /dev/null +++ b/Documentation/hwlat_detector.txt @@ -0,0 +1,64 @@ +Introduction: +------------- + +The module hwlat_detector is a special purpose kernel module that is used to +detect large system latencies induced by the behavior of certain underlying +hardware or firmware, independent of Linux itself. The code was developed +originally to detect SMIs (System Management Interrupts) on x86 systems, +however there is nothing x86 specific about this patchset. It was +originally written for use by the "RT" patch since the Real Time +kernel is highly latency sensitive. + +SMIs are usually not serviced by the Linux kernel, which typically does not +even know that they are occuring. SMIs are instead are set up by BIOS code +and are serviced by BIOS code, usually for "critical" events such as +management of thermal sensors and fans. Sometimes though, SMIs are used for +other tasks and those tasks can spend an inordinate amount of time in the +handler (sometimes measured in milliseconds). Obviously this is a problem if +you are trying to keep event service latencies down in the microsecond range. + +The hardware latency detector works by hogging all of the cpus for configurable +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter +for some period, then looking for gaps in the TSC data. Any gap indicates a +time when the polling was interrupted and since the machine is stopped and +interrupts turned off the only thing that could do that would be an SMI. + +Note that the SMI detector should *NEVER* be used in a production environment. +It is intended to be run manually to determine if the hardware platform has a +problem with long system firmware service routines. + +Usage: +------ + +Loading the module hwlat_detector passing the parameter "enabled=1" (or by +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only +step required to start the hwlat_detector. It is possible to redefine the +threshold in microseconds (us) above which latency spikes will be taken +into account (parameter "threshold="). + +Example: + + # modprobe hwlat_detector enabled=1 threshold=100 + +After the module is loaded, it creates a directory named "hwlat_detector" under +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary +to have debugfs mounted, which might be on /sys/debug on your system. + +The /debug/hwlat_detector interface contains the following files: + +count - number of latency spikes observed since last reset +enable - a global enable/disable toggle (0/1), resets count +max - maximum hardware latency actually observed (usecs) +sample - a pipe from which to read current raw sample data + in the format + (can be opened O_NONBLOCK for a single sample) +threshold - minimum latency value to be considered (usecs) +width - time period to sample with CPUs held (usecs) + must be less than the total window size (enforced) +window - total period of sampling, width being inside (usecs) + +By default we will set width to 500,000 and window to 1,000,000, meaning that +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we +observe any latencies that exceed the threshold (initially 100 usecs), +then we write to a global sample ring buffer of 8K samples, which is +consumed by reading from the "sample" (pipe) debugfs file interface. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7936b80..8e91863 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2480,6 +2480,11 @@ and is between 256 and 4096 characters. It is defined in the file trace_buf_size=nn[KMG] [FTRACE] will set tracing buffer size. + trace_event=[event-list] + [FTRACE] Set and start specified trace events in order + to facilitate early boot debugging. + See also Documentation/trace/events.txt + trix= [HW,OSS] MediaTrix AudioTrix Pro Format: ,,,,,,,, diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index f157d75..6e5f35e 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -1,7 +1,7 @@ Event Tracing Documentation written by Theodore Ts'o - Updated by Li Zefan + Updated by Li Zefan and Tom Zanussi 1. Introduction =============== @@ -83,8 +83,199 @@ When reading one of these enable files, there are four results: X - there is a mixture of events enabled and disabled ? - this file does not affect any event +2.3 Boot option +--------------- + +In order to facilitate early boot debugging, use boot option: + + trace_event=[event-list] + +The format of this boot option is the same as described in section 2.1. + 3. Defining an event-enabled tracepoint ======================================= See The example provided in samples/trace_events +4. Event formats +================ + +Each trace event has a 'format' file associated with it that contains +a description of each field in a logged event. This information can +be used to parse the binary trace stream, and is also the place to +find the field names that can be used in event filters (see section 5). + +It also displays the format string that will be used to print the +event in text mode, along with the event name and ID used for +profiling. + +Every event has a set of 'common' fields associated with it; these are +the fields prefixed with 'common_'. The other fields vary between +events and correspond to the fields defined in the TRACE_EVENT +definition for that event. + +Each field in the format has the form: + + field:field-type field-name; offset:N; size:N; + +where offset is the offset of the field in the trace record and size +is the size of the data item, in bytes. + +For example, here's the information displayed for the 'sched_wakeup' +event: + +# cat /debug/tracing/events/sched/sched_wakeup/format + +name: sched_wakeup +ID: 60 +format: + field:unsigned short common_type; offset:0; size:2; + field:unsigned char common_flags; offset:2; size:1; + field:unsigned char common_preempt_count; offset:3; size:1; + field:int common_pid; offset:4; size:4; + field:int common_tgid; offset:8; size:4; + + field:char comm[TASK_COMM_LEN]; offset:12; size:16; + field:pid_t pid; offset:28; size:4; + field:int prio; offset:32; size:4; + field:int success; offset:36; size:4; + field:int cpu; offset:40; size:4; + +print fmt: "task %s:%d [%d] success=%d [%03d]", REC->comm, REC->pid, + REC->prio, REC->success, REC->cpu + +This event contains 10 fields, the first 5 common and the remaining 5 +event-specific. All the fields for this event are numeric, except for +'comm' which is a string, a distinction important for event filtering. + +5. Event filtering +================== + +Trace events can be filtered in the kernel by associating boolean +'filter expressions' with them. As soon as an event is logged into +the trace buffer, its fields are checked against the filter expression +associated with that event type. An event with field values that +'match' the filter will appear in the trace output, and an event whose +values don't match will be discarded. An event with no filter +associated with it matches everything, and is the default when no +filter has been set for an event. + +5.1 Expression syntax +--------------------- + +A filter expression consists of one or more 'predicates' that can be +combined using the logical operators '&&' and '||'. A predicate is +simply a clause that compares the value of a field contained within a +logged event with a constant value and returns either 0 or 1 depending +on whether the field value matched (1) or didn't match (0): + + field-name relational-operator value + +Parentheses can be used to provide arbitrary logical groupings and +double-quotes can be used to prevent the shell from interpreting +operators as shell metacharacters. + +The field-names available for use in filters can be found in the +'format' files for trace events (see section 4). + +The relational-operators depend on the type of the field being tested: + +The operators available for numeric fields are: + +==, !=, <, <=, >, >= + +And for string fields they are: + +==, != + +Currently, only exact string matches are supported. + +Currently, the maximum number of predicates in a filter is 16. + +5.2 Setting filters +------------------- + +A filter for an individual event is set by writing a filter expression +to the 'filter' file for the given event. + +For example: + +# cd /debug/tracing/events/sched/sched_wakeup +# echo "common_preempt_count > 4" > filter + +A slightly more involved example: + +# cd /debug/tracing/events/sched/sched_signal_send +# echo "((sig >= 10 && sig < 15) || sig == 17) && comm != bash" > filter + +If there is an error in the expression, you'll get an 'Invalid +argument' error when setting it, and the erroneous string along with +an error message can be seen by looking at the filter e.g.: + +# cd /debug/tracing/events/sched/sched_signal_send +# echo "((sig >= 10 && sig < 15) || dsig == 17) && comm != bash" > filter +-bash: echo: write error: Invalid argument +# cat filter +((sig >= 10 && sig < 15) || dsig == 17) && comm != bash +^ +parse_error: Field not found + +Currently the caret ('^') for an error always appears at the beginning of +the filter string; the error message should still be useful though +even without more accurate position info. + +5.3 Clearing filters +-------------------- + +To clear the filter for an event, write a '0' to the event's filter +file. + +To clear the filters for all events in a subsystem, write a '0' to the +subsystem's filter file. + +5.3 Subsystem filters +--------------------- + +For convenience, filters for every event in a subsystem can be set or +cleared as a group by writing a filter expression into the filter file +at the root of the subsytem. Note however, that if a filter for any +event within the subsystem lacks a field specified in the subsystem +filter, or if the filter can't be applied for any other reason, the +filter for that event will retain its previous setting. This can +result in an unintended mixture of filters which could lead to +confusing (to the user who might think different filters are in +effect) trace output. Only filters that reference just the common +fields can be guaranteed to propagate successfully to all events. + +Here are a few subsystem filter examples that also illustrate the +above points: + +Clear the filters on all events in the sched subsytem: + +# cd /sys/kernel/debug/tracing/events/sched +# echo 0 > filter +# cat sched_switch/filter +none +# cat sched_wakeup/filter +none + +Set a filter using only common fields for all events in the sched +subsytem (all events end up with the same filter): + +# cd /sys/kernel/debug/tracing/events/sched +# echo common_pid == 0 > filter +# cat sched_switch/filter +common_pid == 0 +# cat sched_wakeup/filter +common_pid == 0 + +Attempt to set a filter using a non-common field for all events in the +sched subsytem (all events but those that have a prev_pid field retain +their old filters): + +# cd /sys/kernel/debug/tracing/events/sched +# echo prev_pid == 0 > filter +# cat sched_switch/filter +prev_pid == 0 +# cat sched_wakeup/filter +common_pid == 0 diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index a39b3c7..a4fcad7 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -85,26 +85,19 @@ of ftrace. Here is a list of some of the key files: This file holds the output of the trace in a human readable format (described below). - latency_trace: - - This file shows the same trace but the information - is organized more to display possible latencies - in the system (described below). - trace_pipe: The output is the same as the "trace" file but this file is meant to be streamed with live tracing. - Reads from this file will block until new data - is retrieved. Unlike the "trace" and "latency_trace" - files, this file is a consumer. This means reading - from this file causes sequential reads to display - more current data. Once data is read from this - file, it is consumed, and will not be read - again with a sequential read. The "trace" and - "latency_trace" files are static, and if the - tracer is not adding more data, they will display - the same information every time they are read. + Reads from this file will block until new data is + retrieved. Unlike the "trace" file, this file is a + consumer. This means reading from this file causes + sequential reads to display more current data. Once + data is read from this file, it is consumed, and + will not be read again with a sequential read. The + "trace" file is static, and if the tracer is not + adding more data,they will display the same + information every time they are read. trace_options: @@ -117,10 +110,15 @@ of ftrace. Here is a list of some of the key files: Some of the tracers record the max latency. For example, the time interrupts are disabled. This time is saved in this file. The max trace - will also be stored, and displayed by either - "trace" or "latency_trace". A new max trace will - only be recorded if the latency is greater than - the value in this file. (in microseconds) + will also be stored, and displayed by "trace". + A new max trace will only be recorded, if the + latency is greater than the value in this + file (in microseconds). Note that the max latency + recorded by the wakeup and the wakeup_rt tracer + do not necessarily reflect the worst-case latency + of the system, but may be erroneously high in + case two or more processes share the maximum + priority of the system. buffer_size_kb: @@ -210,7 +208,7 @@ Here is the list of current tracers that may be configured. the trace with the longest max latency. See tracing_max_latency. When a new max is recorded, it replaces the old trace. It is best to view this - trace via the latency_trace file. + trace with the latency-format option enabled. "preemptoff" @@ -307,8 +305,8 @@ the lowest priority thread (pid 0). Latency trace format -------------------- -For traces that display latency times, the latency_trace file -gives somewhat more information to see why a latency happened. +When the latency-format option is enabled, the trace file gives +somewhat more information to see why a latency happened. Here is a typical trace. # tracer: irqsoff @@ -380,9 +378,10 @@ explains which is which. The above is mostly meaningful for kernel developers. - time: This differs from the trace file output. The trace file output - includes an absolute timestamp. The timestamp used by the - latency_trace file is relative to the start of the trace. + time: When the latency-format option is enabled, the trace file + output includes a timestamp relative to the start of the + trace. This differs from the output when latency-format + is disabled, which includes an absolute timestamp. delay: This is just to help catch your eye a bit better. And needs to be fixed to be only relative to the same CPU. @@ -440,7 +439,8 @@ Here are the available options: sym-addr: bash-4000 [01] 1477.606694: simple_strtoul - verbose - This deals with the latency_trace file. + verbose - This deals with the trace file when the + latency-format option is enabled. bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \ (+0.000ms): simple_strtoul (strict_strtoul) @@ -472,7 +472,7 @@ Here are the available options: the app is no longer running The lookup is performed when you read - trace,trace_pipe,latency_trace. Example: + trace,trace_pipe. Example: a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0 x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] @@ -481,6 +481,11 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] every scheduling event. Will add overhead if there's a lot of tasks running at once. + latency-format - This option changes the trace. When + it is enabled, the trace displays + additional information about the + latencies, as described in "Latency + trace format". sched_switch ------------ @@ -596,12 +601,13 @@ To reset the maximum, echo 0 into tracing_max_latency. Here is an example: # echo irqsoff > current_tracer + # echo latency-format > trace_options # echo 0 > tracing_max_latency # echo 1 > tracing_enabled # ls -ltr [...] # echo 0 > tracing_enabled - # cat latency_trace + # cat trace # tracer: irqsoff # irqsoff latency trace v1.1.5 on 2.6.26 @@ -703,12 +709,13 @@ which preemption was disabled. The control of preemptoff tracer is much like the irqsoff tracer. # echo preemptoff > current_tracer + # echo latency-format > trace_options # echo 0 > tracing_max_latency # echo 1 > tracing_enabled # ls -ltr [...] # echo 0 > tracing_enabled - # cat latency_trace + # cat trace # tracer: preemptoff # preemptoff latency trace v1.1.5 on 2.6.26-rc8 @@ -850,12 +857,13 @@ Again, using this trace is much like the irqsoff and preemptoff tracers. # echo preemptirqsoff > current_tracer + # echo latency-format > trace_options # echo 0 > tracing_max_latency # echo 1 > tracing_enabled # ls -ltr [...] # echo 0 > tracing_enabled - # cat latency_trace + # cat trace # tracer: preemptirqsoff # preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 @@ -1012,11 +1020,12 @@ Instead of performing an 'ls', we will run 'sleep 1' under 'chrt' which changes the priority of the task. # echo wakeup > current_tracer + # echo latency-format > trace_options # echo 0 > tracing_max_latency # echo 1 > tracing_enabled # chrt -f 5 sleep 1 # echo 0 > tracing_enabled - # cat latency_trace + # cat trace # tracer: wakeup # wakeup latency trace v1.1.5 on 2.6.26-rc8 diff --git a/Documentation/trace/function-graph-fold.vim b/Documentation/trace/function-graph-fold.vim new file mode 100644 index 0000000..0544b50 --- /dev/null +++ b/Documentation/trace/function-graph-fold.vim @@ -0,0 +1,42 @@ +" Enable folding for ftrace function_graph traces. +" +" To use, :source this file while viewing a function_graph trace, or use vim's +" -S option to load from the command-line together with a trace. You can then +" use the usual vim fold commands, such as "za", to open and close nested +" functions. While closed, a fold will show the total time taken for a call, +" as would normally appear on the line with the closing brace. Folded +" functions will not include finish_task_switch(), so folding should remain +" relatively sane even through a context switch. +" +" Note that this will almost certainly only work well with a +" single-CPU trace (e.g. trace-cmd report --cpu 1). + +function! FunctionGraphFoldExpr(lnum) + let line = getline(a:lnum) + if line[-1:] == '{' + if line =~ 'finish_task_switch() {$' + return '>1' + endif + return 'a1' + elseif line[-1:] == '}' + return 's1' + else + return '=' + endif +endfunction + +function! FunctionGraphFoldText() + let s = split(getline(v:foldstart), '|', 1) + if getline(v:foldend+1) =~ 'finish_task_switch() {$' + let s[2] = ' task switch ' + else + let e = split(getline(v:foldend), '|', 1) + let s[2] = e[2] + endif + return join(s, '|') +endfunction + +setlocal foldexpr=FunctionGraphFoldExpr(v:lnum) +setlocal foldtext=FunctionGraphFoldText() +setlocal foldcolumn=12 +setlocal foldmethod=expr diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt new file mode 100644 index 0000000..2f17967 --- /dev/null +++ b/Documentation/trace/histograms.txt @@ -0,0 +1,200 @@ + Using the Linux Kernel Latency Histograms + + +This document gives a short explanation how to enable, configure and use +latency histograms. Latency histograms are primarily relevant in the +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT) +and are used in the quality management of the Linux real-time +capabilities. + + +* Purpose of latency histograms + +A latency histogram continuously accumulates the frequencies of latency +data. There are two types of histograms +- potential sources of latencies +- effective latencies + + +* Potential sources of latencies + +Potential sources of latencies are code segments where interrupts, +preemption or both are disabled (aka critical sections). To create +histograms of potential sources of latency, the kernel stores the time +stamp at the start of a critical section, determines the time elapsed +when the end of the section is reached, and increments the frequency +counter of that latency value - irrespective of whether any concurrently +running process is affected by this latency or not. +- Configuration items (in the Kernel hacking/Tracers submenu) + CONFIG_INTERRUPT_OFF_HIST + CONFIG_PREEMPT_OFF_HIST + + +* Effective latencies +There are two types of effective latencies, wakeup latencies and missed +timer latencies + +* Wakeup latencies +Wakeup latencies may occur during wakeup of a process. To determine +wakeup latencies, the kernel stores the time stamp when a process is +scheduled to be woken up, and determines the duration of the wakeup time +shortly before control is passed over to this process. Note that the +apparent latency in user space may be considerably longer, since +i) interrupts may be disabled preventing the timer from waking up a process +in time +ii) the process may be interrupted after control is passed over to it +but before user space execution takes place. +If a particular wakeup latency is highest so far, details of the task +that is suffering from this latency are stored as well (see below). +- Configuration item (in the Kernel hacking/Tracers submenu) + CONFIG_WAKEUP_LATENCY_HIST + +* Missed timer latencies + +Missed timer latencies occur when a timer interrupt is serviced later +than it should. This is mainly due to disabled interrupts. To determine +the missed timer latency, the expected and the real execution time of a +timer are compared. If the former precedes the latter, the difference is +entered into the missed timer offsets histogram. If the timer is +responsible to wakeup a sleeping process and the latency is highest so +far among previous wakeup timers, details of the related task are stored +as well (see below). +- Configuration item (in the Kernel hacking/Tracers submenu) + CONFIG_MISSED_TIMER_OFFSETS_HIST + + +* Usage + +The interface to the administration of the latency histograms is located +in the debugfs file system. To mount it, either enter + +mount -t sysfs nodev /sys +mount -t debugfs nodev /sys/kernel/debug + +from shell command line level, or add + +nodev /sys sysfs defaults 0 0 +nodev /sys/kernel/debug debugfs defaults 0 0 + +to the file /etc/fstab in order to implicitly mount the debug file +system at every reboot. All latency histogram related files are +available in the directory /sys/kernel/debug/tracing/latency_hist. A +particular histogram type is enabled by writing non-zero to the related +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory. +Select "preemptirqsoff" for histograms of potential sources of +latencies, "wakeup" for histograms of wakeup latencies and +"missed_timer_offsets" for histograms of missed timer offsets, +respectively. + +The histogram data - one per CPU - are available in the files +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx. +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx. +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx. + +The histograms are reset by writing non-zero to the file "reset" in a +particular latency directory. To reset all latency data, use + +#!/bin/bash + +TRACINGDIR=/sys/kernel/debug/tracing +HISTDIR=$TRACINGDIR/latency_hist + +if test -d $HISTDIR +then + cd $HISTDIR + for i in `find . | grep /reset$` + do + echo 1 >$i + done +fi + + +* Data format + +Latency data are stored with a resolution of one microsecond. The +maximum latency is 10,240 microseconds. Every output line contains the +latency in microseconds in the first row and the number of samples in +the second row. To display only lines with a positive latency count, +use, for example, + +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0 + +#Minimum latency: 0 microseconds +#Average latency: 0 microseconds +#Maximum latency: 25 microseconds +#Total samples: 3104770694 +#There are 0 samples lower than 0 microseconds. +#There are 0 samples greater or equal than 10240 microseconds. +#usecs samples + 0 2984486876 + 1 49843506 + 2 58219047 + 3 5348126 + 4 2187960 + 5 3388262 + 6 959289 + 7 208294 + 8 40420 + 9 4485 + 10 14918 + 11 18340 + 12 25052 + 13 19455 + 14 5602 + 15 969 + 16 47 + 17 18 + 18 14 + 19 1 + 20 3 + 21 2 + 22 5 + 23 2 + 25 1 + + +* Two types of wakeup latency histograms + +Two different algorithms are used to determine the wakeup latency of a +process. One of them only considers processes that exclusively use the +highest priority of the system, the other one records the wakeup latency +of a process even if it shares the highest system latency with other +processes. The former is used to determine the worst-case latency of a +system; if higher than expected, the hardware and or system software +(e.g. the Linux kernel) may need to be debugged and fixed. The latter +reflects the priority design of a given system; if higher than expected, +the system design may need to be re-evaluated - the hardware +manufacturer or the kernel developers must not be blamed for such +latencies. The exclusive-priority wakeup latency histograms are located +in the "wakeup" subdirectory, the shared-priority histograms are located +in the "wakeup/sharedprio" subdirectory. + + +* Wakeup latency of a selected process + +To only collect wakeup latency data of a particular process, write the +PID of the requested process to + +/sys/kernel/debug/tracing/latency_hist/wakeup/pid. + +PIDs are not considered, if this variable is set to 0. + + +* Details of processes with the highest wakeup or missed timer +latency so far + +Selected data of processes that suffered from the highest wakeup or +missed timer latency that occurred on a particular CPU are available in +the files + +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/max_latency-CPUx +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/max_latency-CPUx + +The format of the data is + + +These data are also reset when the related histograms are reset. diff --git a/Documentation/trace/ring-buffer-design.txt b/Documentation/trace/ring-buffer-design.txt new file mode 100644 index 0000000..5b1d23d --- /dev/null +++ b/Documentation/trace/ring-buffer-design.txt @@ -0,0 +1,955 @@ + Lockless Ring Buffer Design + =========================== + +Copyright 2009 Red Hat Inc. + Author: Steven Rostedt + License: The GNU Free Documentation License, Version 1.2 + (dual licensed under the GPL v2) +Reviewers: Mathieu Desnoyers, Huang Ying, Hidetoshi Seto, + and Frederic Weisbecker. + + +Written for: 2.6.31 + +Terminology used in this Document +--------------------------------- + +tail - where new writes happen in the ring buffer. + +head - where new reads happen in the ring buffer. + +producer - the task that writes into the ring buffer (same as writer) + +writer - same as producer + +consumer - the task that reads from the buffer (same as reader) + +reader - same as consumer. + +reader_page - A page outside the ring buffer used solely (for the most part) + by the reader. + +head_page - a pointer to the page that the reader will use next + +tail_page - a pointer to the page that will be written to next + +commit_page - a pointer to the page with the last finished non nested write. + +cmpxchg - hardware assisted atomic transaction that performs the following: + + A = B iff previous A == C + + R = cmpxchg(A, C, B) is saying that we replace A with B if and only if + current A is equal to C, and we put the old (current) A into R + + R gets the previous A regardless if A is updated with B or not. + + To see if the update was successful a compare of R == C may be used. + +The Generic Ring Buffer +----------------------- + +The ring buffer can be used in either an overwrite mode or in +producer/consumer mode. + +Producer/consumer mode is where the producer were to fill up the +buffer before the consumer could free up anything, the producer +will stop writing to the buffer. This will lose most recent events. + +Overwrite mode is where the produce were to fill up the buffer +before the consumer could free up anything, the producer will +overwrite the older data. This will lose the oldest events. + +No two writers can write at the same time (on the same per cpu buffer), +but a writer may interrupt another writer, but it must finish writing +before the previous writer may continue. This is very important to the +algorithm. The writers act like a "stack". The way interrupts works +enforces this behavior. + + + writer1 start + writer2 start + writer3 start + writer3 finishes + writer2 finishes + writer1 finishes + +This is very much like a writer being preempted by an interrupt and +the interrupt doing a write as well. + +Readers can happen at any time. But no two readers may run at the +same time, nor can a reader preempt/interrupt another reader. A reader +can not preempt/interrupt a writer, but it may read/consume from the +buffer at the same time as a writer is writing, but the reader must be +on another processor to do so. A reader may read on its own processor +and can be preempted by a writer. + +A writer can preempt a reader, but a reader can not preempt a writer. +But a reader can read the buffer at the same time (on another processor) +as a writer. + +The ring buffer is made up of a list of pages held together by a link list. + +At initialization a reader page is allocated for the reader that is not +part of the ring buffer. + +The head_page, tail_page and commit_page are all initialized to point +to the same page. + +The reader page is initialized to have its next pointer pointing to +the head page, and its previous pointer pointing to a page before +the head page. + +The reader has its own page to use. At start up time, this page is +allocated but is not attached to the list. When the reader wants +to read from the buffer, if its page is empty (like it is on start up) +it will swap its page with the head_page. The old reader page will +become part of the ring buffer and the head_page will be removed. +The page after the inserted page (old reader_page) will become the +new head page. + +Once the new page is given to the reader, the reader could do what +it wants with it, as long as a writer has left that page. + +A sample of how the reader page is swapped: Note this does not +show the head page in the buffer, it is for demonstrating a swap +only. + + +------+ + |reader| RING BUFFER + |page | + +------+ + +---+ +---+ +---+ + | |-->| |-->| | + | |<--| |<--| | + +---+ +---+ +---+ + ^ | ^ | + | +-------------+ | + +-----------------+ + + + +------+ + |reader| RING BUFFER + |page |-------------------+ + +------+ v + | +---+ +---+ +---+ + | | |-->| |-->| | + | | |<--| |<--| |<-+ + | +---+ +---+ +---+ | + | ^ | ^ | | + | | +-------------+ | | + | +-----------------+ | + +------------------------------------+ + + +------+ + |reader| RING BUFFER + |page |-------------------+ + +------+ <---------------+ v + | ^ +---+ +---+ +---+ + | | | |-->| |-->| | + | | | | | |<--| |<-+ + | | +---+ +---+ +---+ | + | | | ^ | | + | | +-------------+ | | + | +-----------------------------+ | + +------------------------------------+ + + +------+ + |buffer| RING BUFFER + |page |-------------------+ + +------+ <---------------+ v + | ^ +---+ +---+ +---+ + | | | | | |-->| | + | | New | | | |<--| |<-+ + | | Reader +---+ +---+ +---+ | + | | page ----^ | | + | | | | + | +-----------------------------+ | + +------------------------------------+ + + + +It is possible that the page swapped is the commit page and the tail page, +if what is in the ring buffer is less than what is held in a buffer page. + + + reader page commit page tail page + | | | + v | | + +---+ | | + | |<----------+ | + | |<------------------------+ + | |------+ + +---+ | + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +This case is still valid for this algorithm. +When the writer leaves the page, it simply goes into the ring buffer +since the reader page still points to the next location in the ring +buffer. + + +The main pointers: + + reader page - The page used solely by the reader and is not part + of the ring buffer (may be swapped in) + + head page - the next page in the ring buffer that will be swapped + with the reader page. + + tail page - the page where the next write will take place. + + commit page - the page that last finished a write. + +The commit page only is updated by the outer most writer in the +writer stack. A writer that preempts another writer will not move the +commit page. + +When data is written into the ring buffer, a position is reserved +in the ring buffer and passed back to the writer. When the writer +is finished writing data into that position, it commits the write. + +Another write (or a read) may take place at anytime during this +transaction. If another write happens it must finish before continuing +with the previous write. + + + Write reserve: + + Buffer page + +---------+ + |written | + +---------+ <--- given back to writer (current commit) + |reserved | + +---------+ <--- tail pointer + | empty | + +---------+ + + Write commit: + + Buffer page + +---------+ + |written | + +---------+ + |written | + +---------+ <--- next positon for write (current commit) + | empty | + +---------+ + + + If a write happens after the first reserve: + + Buffer page + +---------+ + |written | + +---------+ <-- current commit + |reserved | + +---------+ <--- given back to second writer + |reserved | + +---------+ <--- tail pointer + + After second writer commits: + + + Buffer page + +---------+ + |written | + +---------+ <--(last full commit) + |reserved | + +---------+ + |pending | + |commit | + +---------+ <--- tail pointer + + When the first writer commits: + + Buffer page + +---------+ + |written | + +---------+ + |written | + +---------+ + |written | + +---------+ <--(last full commit and tail pointer) + + +The commit pointer points to the last write location that was +committed without preempting another write. When a write that +preempted another write is committed, it only becomes a pending commit +and will not be a full commit till all writes have been committed. + +The commit page points to the page that has the last full commit. +The tail page points to the page with the last write (before +committing). + +The tail page is always equal to or after the commit page. It may +be several pages ahead. If the tail page catches up to the commit +page then no more writes may take place (regardless of the mode +of the ring buffer: overwrite and produce/consumer). + +The order of pages are: + + head page + commit page + tail page + +Possible scenario: + tail page + head page commit page | + | | | + v v v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +There is a special case that the head page is after either the commit page +and possibly the tail page. That is when the commit (and tail) page has been +swapped with the reader page. This is because the head page is always +part of the ring buffer, but the reader page is not. When ever there +has been less than a full page that has been committed inside the ring buffer, +and a reader swaps out a page, it will be swapping out the commit page. + + + reader page commit page tail page + | | | + v | | + +---+ | | + | |<----------+ | + | |<------------------------+ + | |------+ + +---+ | + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + ^ + | + head page + + +In this case, the head page will not move when the tail and commit +move back into the ring buffer. + +The reader can not swap a page into the ring buffer if the commit page +is still on that page. If the read meets the last commit (real commit +not pending or reserved), then there is nothing more to read. +The buffer is considered empty until another full commit finishes. + +When the tail meets the head page, if the buffer is in overwrite mode, +the head page will be pushed ahead one. If the buffer is in producer/consumer +mode, the write will fail. + +Overwrite mode: + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + ^ + | + head page + + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + ^ + | + head page + + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + ^ + | + head page + +Note, the reader page will still point to the previous head page. +But when a swap takes place, it will use the most recent head page. + + +Making the Ring Buffer Lockless: +-------------------------------- + +The main idea behind the lockless algorithm is to combine the moving +of the head_page pointer with the swapping of pages with the reader. +State flags are placed inside the pointer to the page. To do this, +each page must be aligned in memory by 4 bytes. This will allow the 2 +least significant bits of the address to be used as flags. Since +they will always be zero for the address. To get the address, +simply mask out the flags. + + MASK = ~3 + + address & MASK + +Two flags will be kept by these two bits: + + HEADER - the page being pointed to is a head page + + UPDATE - the page being pointed to is being updated by a writer + and was or is about to be a head page. + + + reader page + | + v + +---+ + | |------+ + +---+ | + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-H->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + + +The above pointer "-H->" would have the HEADER flag set. That is +the next page is the next page to be swapped out by the reader. +This pointer means the next page is the head page. + +When the tail page meets the head pointer, it will use cmpxchg to +change the pointer to the UPDATE state: + + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-H->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +"-U->" represents a pointer in the UPDATE state. + +Any access to the reader will need to take some sort of lock to serialize +the readers. But the writers will never take a lock to write to the +ring buffer. This means we only need to worry about a single reader, +and writes only preempt in "stack" formation. + +When the reader tries to swap the page with the ring buffer, it +will also use cmpxchg. If the flag bit in the pointer to the +head page does not have the HEADER flag set, the compare will fail +and the reader will need to look for the new head page and try again. +Note, the flag UPDATE and HEADER are never set at the same time. + +The reader swaps the reader page as follows: + + +------+ + |reader| RING BUFFER + |page | + +------+ + +---+ +---+ +---+ + | |--->| |--->| | + | |<---| |<---| | + +---+ +---+ +---+ + ^ | ^ | + | +---------------+ | + +-----H-------------+ + +The reader sets the reader page next pointer as HEADER to the page after +the head page. + + + +------+ + |reader| RING BUFFER + |page |-------H-----------+ + +------+ v + | +---+ +---+ +---+ + | | |--->| |--->| | + | | |<---| |<---| |<-+ + | +---+ +---+ +---+ | + | ^ | ^ | | + | | +---------------+ | | + | +-----H-------------+ | + +--------------------------------------+ + +It does a cmpxchg with the pointer to the previous head page to make it +point to the reader page. Note that the new pointer does not have the HEADER +flag set. This action atomically moves the head page forward. + + +------+ + |reader| RING BUFFER + |page |-------H-----------+ + +------+ v + | ^ +---+ +---+ +---+ + | | | |-->| |-->| | + | | | |<--| |<--| |<-+ + | | +---+ +---+ +---+ | + | | | ^ | | + | | +-------------+ | | + | +-----------------------------+ | + +------------------------------------+ + +After the new head page is set, the previous pointer of the head page is +updated to the reader page. + + +------+ + |reader| RING BUFFER + |page |-------H-----------+ + +------+ <---------------+ v + | ^ +---+ +---+ +---+ + | | | |-->| |-->| | + | | | | | |<--| |<-+ + | | +---+ +---+ +---+ | + | | | ^ | | + | | +-------------+ | | + | +-----------------------------+ | + +------------------------------------+ + + +------+ + |buffer| RING BUFFER + |page |-------H-----------+ <--- New head page + +------+ <---------------+ v + | ^ +---+ +---+ +---+ + | | | | | |-->| | + | | New | | | |<--| |<-+ + | | Reader +---+ +---+ +---+ | + | | page ----^ | | + | | | | + | +-----------------------------+ | + +------------------------------------+ + +Another important point. The page that the reader page points back to +by its previous pointer (the one that now points to the new head page) +never points back to the reader page. That is because the reader page is +not part of the ring buffer. Traversing the ring buffer via the next pointers +will always stay in the ring buffer. Traversing the ring buffer via the +prev pointers may not. + +Note, the way to determine a reader page is simply by examining the previous +pointer of the page. If the next pointer of the previous page does not +point back to the original page, then the original page is a reader page: + + + +--------+ + | reader | next +----+ + | page |-------->| |<====== (buffer page) + +--------+ +----+ + | | ^ + | v | next + prev | +----+ + +------------->| | + +----+ + +The way the head page moves forward: + +When the tail page meets the head page and the buffer is in overwrite mode +and more writes take place, the head page must be moved forward before the +writer may move the tail page. The way this is done is that the writer +performs a cmpxchg to convert the pointer to the head page from the HEADER +flag to have the UPDATE flag set. Once this is done, the reader will +not be able to swap the head page from the buffer, nor will it be able to +move the head page, until the writer is finished with the move. + +This eliminates any races that the reader can have on the writer. The reader +must spin, and this is why the reader can not preempt the writer. + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-H->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +The following page will be made into the new head page. + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |-H->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +After the new head page has been set, we can set the old head page +pointer back to NORMAL. + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |-H->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +After the head page has been moved, the tail page may now move forward. + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |-H->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + + +The above are the trivial updates. Now for the more complex scenarios. + + +As stated before, if enough writes preempt the first write, the +tail page may make it all the way around the buffer and meet the commit +page. At this time, we must start dropping writes (usually with some kind +of warning to the user). But what happens if the commit was still on the +reader page? The commit page is not part of the ring buffer. The tail page +must account for this. + + + reader page commit page + | | + v | + +---+ | + | |<----------+ + | | + | |------+ + +---+ | + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-H->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + ^ + | + tail page + +If the tail page were to simply push the head page forward, the commit when +leaving the reader page would not be pointing to the correct page. + +The solution to this is to test if the commit page is on the reader page +before pushing the head page. If it is, then it can be assumed that the +tail page wrapped the buffer, and we must drop new writes. + +This is not a race condition, because the commit page can only be moved +by the outter most writer (the writer that was preempted). +This means that the commit will not move while a writer is moving the +tail page. The reader can not swap the reader page if it is also being +used as the commit page. The reader can simply check that the commit +is off the reader page. Once the commit page leaves the reader page +it will never go back on it unless a reader does another swap with the +buffer page that is also the commit page. + + +Nested writes +------------- + +In the pushing forward of the tail page we must first push forward +the head page if the head page is the next page. If the head page +is not the next page, the tail page is simply updated with a cmpxchg. + +Only writers move the tail page. This must be done atomically to protect +against nested writers. + + temp_page = tail_page + next_page = temp_page->next + cmpxchg(tail_page, temp_page, next_page) + +The above will update the tail page if it is still pointing to the expected +page. If this fails, a nested write pushed it forward, the the current write +does not need to push it. + + + temp page + | + v + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +Nested write comes in and moves the tail page forward: + + tail page (moved by nested writer) + temp page | + | | + v v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +The above would fail the cmpxchg, but since the tail page has already +been moved forward, the writer will just try again to reserve storage +on the new tail page. + +But the moving of the head page is a bit more complex. + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-H->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +The write converts the head page pointer to UPDATE. + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +But if a nested writer preempts here. It will see that the next +page is a head page, but it is also nested. It will detect that +it is nested and will save that information. The detection is the +fact that it sees the UPDATE flag instead of a HEADER or NORMAL +pointer. + +The nested writer will set the new head page pointer. + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |-H->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +But it will not reset the update back to normal. Only the writer +that converted a pointer from HEAD to UPDATE will convert it back +to NORMAL. + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |-H->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +After the nested writer finishes, the outer most writer will convert +the UPDATE pointer to NORMAL. + + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |-H->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + + +It can be even more complex if several nested writes came in and moved +the tail page ahead several pages: + + +(first writer) + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-H->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +The write converts the head page pointer to UPDATE. + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |--->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +Next writer comes in, and sees the update and sets up the new +head page. + +(second writer) + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |-H->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +The nested writer moves the tail page forward. But does not set the old +update page to NORMAL because it is not the outer most writer. + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |-H->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +Another writer preempts and sees the page after the tail page is a head page. +It changes it from HEAD to UPDATE. + +(third writer) + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |-U->| |---> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +The writer will move the head page forward: + + +(third writer) + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |-U->| |-H-> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +But now that the third writer did change the HEAD flag to UPDATE it +will convert it to normal: + + +(third writer) + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |--->| |-H-> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + + +Then it will move the tail page, and return back to the second writer. + + +(second writer) + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |--->| |-H-> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + + +The second writer will fail to move the tail page because it was already +moved, so it will try again and add its data to the new tail page. +It will return to the first writer. + + +(first writer) + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |--->| |-H-> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +The first writer can not know atomically test if the tail page moved +while it updates the HEAD page. It will then update the head page to +what it thinks is the new head page. + + +(first writer) + + tail page + | + v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |-H->| |-H-> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +Since the cmpxchg returns the old value of the pointer the first writer +will see it succeeded in updating the pointer from NORMAL to HEAD. +But as we can see, this is not good enough. It must also check to see +if the tail page is either where it use to be or on the next page: + + +(first writer) + + A B tail page + | | | + v v v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |-H->| |-H-> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +If tail page != A and tail page does not equal B, then it must reset the +pointer back to NORMAL. The fact that it only needs to worry about +nested writers, it only needs to check this after setting the HEAD page. + + +(first writer) + + A B tail page + | | | + v v v + +---+ +---+ +---+ +---+ +<---| |--->| |-U->| |--->| |-H-> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + +Now the writer can update the head page. This is also why the head page must +remain in UPDATE and only reset by the outer most writer. This prevents +the reader from seeing the incorrect head page. + + +(first writer) + + A B tail page + | | | + v v v + +---+ +---+ +---+ +---+ +<---| |--->| |--->| |--->| |-H-> +--->| |<---| |<---| |<---| |<--- + +---+ +---+ +---+ +---+ + diff --git a/MAINTAINERS b/MAINTAINERS index 94138c4..759b3f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2266,6 +2266,15 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git S: Maintained F: drivers/media/video/gspca/ +HARDWARE LATENCY DETECTOR +P: Jon Masters +M: jcm@jonmasters.org +W: http://www.kernel.org/pub/linux/kernel/people/jcm/hwlat_detector/ +S: Supported +L: linux-kernel@vger.kernel.org +F: Documentation/hwlat_detector.txt +F: drivers/misc/hwlat_detector.c + HARDWARE MONITORING L: lm-sensors@lm-sensors.org W: http://www.lm-sensors.org/ diff --git a/Makefile b/Makefile index 8190a1c..2480be6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 31 -EXTRAVERSION = .5 +EXTRAVERSION =.5-rt17 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index 99193b1..1b28306 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -33,6 +33,11 @@ config OPROFILE_IBS config HAVE_OPROFILE bool +config PROFILE_NMI + bool + depends on OPROFILE + default y + config KPROBES bool "Kprobes" depends on KALLSYMS && MODULES diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index 1570c0b..55f4f13 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -18,15 +18,18 @@ struct rwsem_waiter; -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore *rwsem_wake(struct rw_anon_semaphore *); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *sem); /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { long count; #define RWSEM_UNLOCKED_VALUE 0x0000000000000000L #define RWSEM_ACTIVE_BIAS 0x0000000000000001L @@ -38,6 +41,31 @@ struct rw_semaphore { struct list_head wait_list; }; +#define __RWSEM_ANON_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ + LIST_HEAD_INIT((name).wait_list) } + +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) + +static inline void init_anon_rwsem(struct rw_anon_semaphore *sem) +{ + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} + +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + +struct rw_semaphore { + long count; + spinlock_t wait_lock; + struct list_head wait_list; +}; + #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ LIST_HEAD_INIT((name).wait_list) } @@ -47,12 +75,15 @@ struct rw_semaphore { static inline void init_rwsem(struct rw_semaphore *sem) { - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); + init_anon_rwsem((struct rw_anon_semaphore *)sem); } -static inline void __down_read(struct rw_semaphore *sem) +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + +static inline void __down_read(struct rw_anon_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP @@ -79,7 +110,7 @@ static inline void __down_read(struct rw_semaphore *sem) /* * trylock for reading -- returns 1 if successful, 0 if contention */ -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { long old, new, res; @@ -94,7 +125,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) return res >= 0 ? 1 : 0; } -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP @@ -121,7 +152,7 @@ static inline void __down_write(struct rw_semaphore *sem) /* * trylock for writing -- returns 1 if successful, 0 if contention */ -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); @@ -130,7 +161,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) return 0; } -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP @@ -155,7 +186,7 @@ static inline void __up_read(struct rw_semaphore *sem) rwsem_wake(sem); } -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { long count; #ifndef CONFIG_SMP @@ -184,7 +215,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP @@ -208,7 +239,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(long val, struct rw_anon_semaphore *sem) { #ifndef CONFIG_SMP sem->count += val; @@ -227,7 +258,7 @@ static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem) #endif } -static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) +static inline long rwsem_atomic_update(long val, struct rw_anon_semaphore *sem) { #ifndef CONFIG_SMP sem->count += val; @@ -250,10 +281,5 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) #endif } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - #endif /* __KERNEL__ */ #endif /* _ALPHA_RWSEM_H */ diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index cc78346..2e0b75e 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -81,7 +81,7 @@ show_interrupts(struct seq_file *p, void *v) #endif if (irq < ACTUAL_NR_IRQS) { - spin_lock_irqsave(&irq_desc[irq].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[irq].lock, flags); action = irq_desc[irq].action; if (!action) goto unlock; @@ -105,7 +105,7 @@ show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } else if (irq == ACTUAL_NR_IRQS) { #ifdef CONFIG_SMP seq_puts(p, "IPI: "); diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index b04e2cb..991a967 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -106,7 +106,7 @@ irqreturn_t timer_interrupt(int irq, void *dev) profile_tick(CPU_PROFILING); #endif - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); /* * Calculate how many ticks have passed since the last update, @@ -136,7 +136,7 @@ irqreturn_t timer_interrupt(int irq, void *dev) state.last_rtc_update = xtime.tv_sec - (tmp ? 600 : 0); } - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifndef CONFIG_SMP while (nticks--) @@ -416,14 +416,14 @@ do_gettimeofday(struct timeval *tv) unsigned long delta_cycles, delta_usec, partial_tick; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); delta_cycles = rpcc() - state.last_time; sec = xtime.tv_sec; usec = (xtime.tv_nsec / 1000); partial_tick = state.partial_tick; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + } while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); #ifdef CONFIG_SMP /* Until and unless we figure out how to get cpu cycle counters @@ -470,7 +470,7 @@ do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* The offset that is added into time in do_gettimeofday above must be subtracted out here to keep a coherent view of the @@ -496,7 +496,7 @@ do_settimeofday(struct timespec *tv) ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index aef63c8..eac1a92 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -962,18 +962,7 @@ config LOCAL_TIMERS accounting to be spread across the timer interval, preventing a "thundering herd" at every timer tick. -config PREEMPT - bool "Preemptible Kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. +source kernel/Kconfig.preempt config HZ int diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h index 7edf353..c682ef4 100644 --- a/arch/arm/include/asm/dma.h +++ b/arch/arm/include/asm/dma.h @@ -31,18 +31,18 @@ #define DMA_MODE_CASCADE 0xc0 #define DMA_AUTOINIT 0x10 -extern spinlock_t dma_spin_lock; +extern atomic_spinlock_t dma_spin_lock; static inline unsigned long claim_dma_lock(void) { unsigned long flags; - spin_lock_irqsave(&dma_spin_lock, flags); + atomic_spin_lock_irqsave(&dma_spin_lock, flags); return flags; } static inline void release_dma_lock(unsigned long flags) { - spin_unlock_irqrestore(&dma_spin_lock, flags); + atomic_spin_unlock_irqrestore(&dma_spin_lock, flags); } /* Clear the 'DMA Pointer Flip Flop'. diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 39c8bc1..d74265c 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -11,4 +11,38 @@ extern void mcount(void); #endif +#ifndef __ASSEMBLY__ + +#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) +/* + * return_address uses walk_stackframe to do it's work. If both + * CONFIG_FRAME_POINTER=y and CONFIG_ARM_UNWIND=y walk_stackframe uses unwind + * information. For this to work in the function tracer many functions would + * have to be marked with __notrace. So for now just depend on + * !CONFIG_ARM_UNWIND. + */ + +void *return_address(unsigned int); + +#else + +extern inline void *return_address(unsigned int level) +{ + return NULL; +} + +#endif + +#define HAVE_ARCH_CALLER_ADDR + +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#define CALLER_ADDR1 ((unsigned long)return_address(1)) +#define CALLER_ADDR2 ((unsigned long)return_address(2)) +#define CALLER_ADDR3 ((unsigned long)return_address(3)) +#define CALLER_ADDR4 ((unsigned long)return_address(4)) +#define CALLER_ADDR5 ((unsigned long)return_address(5)) +#define CALLER_ADDR6 ((unsigned long)return_address(6)) + +#endif /* ifndef __ASSEMBLY__ */ + #endif /* _ASM_ARM_FTRACE */ diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h index acac530..3981cf2 100644 --- a/arch/arm/include/asm/mach/irq.h +++ b/arch/arm/include/asm/mach/irq.h @@ -26,9 +26,9 @@ extern int show_fiq_list(struct seq_file *, void *); */ #define do_bad_IRQ(irq,desc) \ do { \ - spin_lock(&desc->lock); \ + atomic_spin_lock(&desc->lock); \ handle_bad_irq(irq, desc); \ - spin_unlock(&desc->lock); \ + atomic_spin_unlock(&desc->lock); \ } while(0) #endif diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index d65b2f5..e849ed9 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -60,6 +60,8 @@ #include #include +#include + #define __exception __attribute__((section(".exception.text"))) struct thread_info; diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index f41a6f5..dd667f2 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -40,17 +40,12 @@ struct mmu_gather { unsigned long range_end; }; -DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); - -static inline struct mmu_gather * -tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) +static inline void +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned int full_mm_flush) { - struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); - tlb->mm = mm; tlb->fullmm = full_mm_flush; - - return tlb; } static inline void @@ -61,8 +56,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) /* keep the page table cache within bounds */ check_pgt_cache(); - - put_cpu_var(mmu_gathers); } /* diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 60be28d..c446aef 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -9,10 +9,12 @@ ifdef CONFIG_DYNAMIC_FTRACE CFLAGS_REMOVE_ftrace.o = -pg endif +CFLAGS_REMOVE_return_address.o = -pg + # Object file lists. obj-y := compat.o elf.o entry-armv.o entry-common.o irq.o \ - process.o ptrace.o setup.o signal.o \ + process.o ptrace.o return_address.o setup.o signal.o \ sys_arm.o stacktrace.o time.o traps.o obj-$(CONFIG_ISA_DMA_API) += dma.o diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c index 7d5b9fb..7a6f3d2 100644 --- a/arch/arm/kernel/dma.c +++ b/arch/arm/kernel/dma.c @@ -21,7 +21,7 @@ #include -DEFINE_SPINLOCK(dma_spin_lock); +DEFINE_ATOMIC_SPINLOCK(dma_spin_lock); EXPORT_SYMBOL(dma_spin_lock); static dma_t *dma_chan[MAX_DMA_CHANNELS]; diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 8c3de1a..aa5df73 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -59,7 +59,8 @@ work_pending: b ret_slow_syscall @ Check work again work_resched: - bl schedule + bl __schedule + /* * "slow" syscall return path. "why" tells us if this was a real syscall. */ diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index b7c3490..6ea2a03 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -69,7 +69,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -84,7 +84,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { #ifdef CONFIG_ARCH_ACORN show_fiq_list(p, v); @@ -139,7 +139,7 @@ void set_irq_flags(unsigned int irq, unsigned int iflags) } desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; if (iflags & IRQF_VALID) desc->status &= ~IRQ_NOREQUEST; @@ -147,7 +147,7 @@ void set_irq_flags(unsigned int irq, unsigned int iflags) desc->status &= ~IRQ_NOPROBE; if (!(iflags & IRQF_NOAUTOEN)) desc->status &= ~IRQ_NOAUTOEN; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } void __init init_IRQ(void) @@ -166,9 +166,9 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu) { pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->node, cpu); - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); desc->chip->set_affinity(irq, cpumask_of(cpu)); - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); } /* diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 39196df..fcecbb2 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -174,9 +174,11 @@ void cpu_idle(void) } leds_event(led_idle_end); tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c new file mode 100644 index 0000000..df246da --- /dev/null +++ b/arch/arm/kernel/return_address.c @@ -0,0 +1,71 @@ +/* + * arch/arm/kernel/return_address.c + * + * Copyright (C) 2009 Uwe Kleine-Koenig + * for Pengutronix + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ +#include + +#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) +#include + +#include + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static int save_return_addr(struct stackframe *frame, void *d) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)frame->lr; + + return 1; + } else { + --data->level; + return 0; + } +} + +void *return_address(unsigned int level) +{ + struct return_address_data data; + struct stackframe frame; + register unsigned long current_sp asm ("sp"); + + data.level = level + 1; + + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.lr = (unsigned long)__builtin_return_address(0); + frame.pc = (unsigned long)return_address; + + walk_stackframe(&frame, save_return_addr, &data); + + if (!data.level) + return data.addr; + else + return NULL; +} + +#else /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ + +#if defined(CONFIG_ARM_UNWIND) +#warning "TODO: return_address should use unwind tables" +#endif + +void *return_address(unsigned int level) +{ + return NULL; +} + +#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / else */ + +EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index f6bc5d4..95eb911 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -627,6 +627,14 @@ static int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall) siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif + /* * We want the common case to go fast, which * is why we may in certain cases get here from diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index de885fd..d825d4e 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -451,17 +451,17 @@ void __cpuinit percpu_timer_setup(void) local_timer_setup(evt); } -static DEFINE_SPINLOCK(stop_lock); +static DEFINE_ATOMIC_SPINLOCK(stop_lock); /* * ipi_cpu_stop - handle IPI from smp_send_stop() */ static void ipi_cpu_stop(unsigned int cpu) { - spin_lock(&stop_lock); + atomic_spin_lock(&stop_lock); printk(KERN_CRIT "CPU%u: stopping\n", cpu); dump_stack(); - spin_unlock(&stop_lock); + atomic_spin_unlock(&stop_lock); set_cpu_online(cpu, false); diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index 9f444e5..20b7411 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -21,7 +21,7 @@ * Note that with framepointer enabled, even the leaf functions have the same * prologue and epilogue, therefore we can ignore the LR value in this case. */ -int unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; @@ -43,7 +43,7 @@ int unwind_frame(struct stackframe *frame) } #endif -void walk_stackframe(struct stackframe *frame, +void notrace walk_stackframe(struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data) { while (1) { diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 4cdc4a0..8a545e2 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -244,11 +244,11 @@ void do_gettimeofday(struct timeval *tv) unsigned long usec, sec; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); usec = system_timer->offset(); sec = xtime.tv_sec; usec += xtime.tv_nsec / 1000; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + } while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); /* usec may have gone up a lot: be safe */ while (usec >= 1000000) { @@ -270,7 +270,7 @@ int do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* * This is revolting. We need to set "xtime" correctly. However, the * value in this location is the value at the most recent update of @@ -286,7 +286,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; } @@ -336,9 +336,9 @@ void timer_tick(void) profile_tick(CPU_PROFILING); do_leds(); do_set_rtc(); - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); #endif diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 57eb0f6..0cf0ae3 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -239,7 +239,7 @@ static void __die(const char *str, int err, struct thread_info *thread, struct p } } -DEFINE_SPINLOCK(die_lock); +DEFINE_ATOMIC_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. @@ -251,12 +251,12 @@ NORET_TYPE void die(const char *str, struct pt_regs *regs, int err) oops_enter(); console_verbose(); - spin_lock_irq(&die_lock); + atomic_spin_lock_irq(&die_lock); bust_spinlocks(1); __die(str, err, thread, regs); bust_spinlocks(0); add_taint(TAINT_DIE); - spin_unlock_irq(&die_lock); + atomic_spin_unlock_irq(&die_lock); if (in_interrupt()) panic("Fatal exception in interrupt"); @@ -282,24 +282,24 @@ void arm_notify_die(const char *str, struct pt_regs *regs, } static LIST_HEAD(undef_hook); -static DEFINE_SPINLOCK(undef_lock); +static DEFINE_ATOMIC_SPINLOCK(undef_lock); void register_undef_hook(struct undef_hook *hook) { unsigned long flags; - spin_lock_irqsave(&undef_lock, flags); + atomic_spin_lock_irqsave(&undef_lock, flags); list_add(&hook->node, &undef_hook); - spin_unlock_irqrestore(&undef_lock, flags); + atomic_spin_unlock_irqrestore(&undef_lock, flags); } void unregister_undef_hook(struct undef_hook *hook) { unsigned long flags; - spin_lock_irqsave(&undef_lock, flags); + atomic_spin_lock_irqsave(&undef_lock, flags); list_del(&hook->node); - spin_unlock_irqrestore(&undef_lock, flags); + atomic_spin_unlock_irqrestore(&undef_lock, flags); } static int call_undef_hook(struct pt_regs *regs, unsigned int instr) @@ -308,12 +308,12 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr) unsigned long flags; int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL; - spin_lock_irqsave(&undef_lock, flags); + atomic_spin_lock_irqsave(&undef_lock, flags); list_for_each_entry(hook, &undef_hook, node) if ((instr & hook->instr_mask) == hook->instr_val && (regs->ARM_cpsr & hook->cpsr_mask) == hook->cpsr_val) fn = hook->fn; - spin_unlock_irqrestore(&undef_lock, flags); + atomic_spin_unlock_irqrestore(&undef_lock, flags); return fn ? fn(regs, instr) : 1; } diff --git a/arch/arm/mach-at91/gpio.c b/arch/arm/mach-at91/gpio.c index f2236f0..cf609f8 100644 --- a/arch/arm/mach-at91/gpio.c +++ b/arch/arm/mach-at91/gpio.c @@ -375,12 +375,18 @@ static int gpio_irq_type(unsigned pin, unsigned type) } } +static void gpio_irq_ack_noop(unsigned int irq) +{ + /* Dummy function. */ +} + static struct irq_chip gpio_irqchip = { .name = "GPIO", .mask = gpio_irq_mask, .unmask = gpio_irq_unmask, .set_type = gpio_irq_type, .set_wake = gpio_irq_set_wake, + .ack = gpio_irq_ack_noop, }; static void gpio_irq_handler(unsigned irq, struct irq_desc *desc) @@ -527,7 +533,7 @@ void __init at91_gpio_irq_setup(void) * shorter, and the AIC handles interrupts sanely. */ set_irq_chip(pin, &gpio_irqchip); - set_irq_handler(pin, handle_simple_irq); + set_irq_handler(pin, handle_edge_irq); set_irq_flags(pin, IRQF_VALID); } diff --git a/arch/arm/mach-footbridge/include/mach/hardware.h b/arch/arm/mach-footbridge/include/mach/hardware.h index 51dd902..2276ebf 100644 --- a/arch/arm/mach-footbridge/include/mach/hardware.h +++ b/arch/arm/mach-footbridge/include/mach/hardware.h @@ -86,7 +86,7 @@ #define CPLD_FLASH_WR_ENABLE 1 #ifndef __ASSEMBLY__ -extern spinlock_t nw_gpio_lock; +extern atomic_spinlock_t nw_gpio_lock; extern void nw_gpio_modify_op(unsigned int mask, unsigned int set); extern void nw_gpio_modify_io(unsigned int mask, unsigned int in); extern unsigned int nw_gpio_read(void); diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c index ac7ffa6..574a57e 100644 --- a/arch/arm/mach-footbridge/netwinder-hw.c +++ b/arch/arm/mach-footbridge/netwinder-hw.c @@ -68,7 +68,7 @@ static inline void wb977_ww(int reg, int val) /* * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE */ -DEFINE_SPINLOCK(nw_gpio_lock); +DEFINE_ATOMIC_SPINLOCK(nw_gpio_lock); EXPORT_SYMBOL(nw_gpio_lock); static unsigned int current_gpio_op; @@ -327,9 +327,9 @@ static inline void wb977_init_gpio(void) /* * Set Group1/Group2 outputs */ - spin_lock_irqsave(&nw_gpio_lock, flags); + atomic_spin_lock_irqsave(&nw_gpio_lock, flags); nw_gpio_modify_op(-1, GPIO_RED_LED | GPIO_FAN); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + atomic_spin_unlock_irqrestore(&nw_gpio_lock, flags); } /* @@ -390,9 +390,9 @@ static void __init cpld_init(void) { unsigned long flags; - spin_lock_irqsave(&nw_gpio_lock, flags); + atomic_spin_lock_irqsave(&nw_gpio_lock, flags); nw_cpld_modify(-1, CPLD_UNMUTE | CPLD_7111_DISABLE); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + atomic_spin_unlock_irqrestore(&nw_gpio_lock, flags); } static unsigned char rwa_unlock[] __initdata = @@ -616,9 +616,9 @@ static int __init nw_hw_init(void) cpld_init(); rwa010_init(); - spin_lock_irqsave(&nw_gpio_lock, flags); + atomic_spin_lock_irqsave(&nw_gpio_lock, flags); nw_gpio_modify_op(GPIO_RED_LED|GPIO_GREEN_LED, DEFAULT_LEDS); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + atomic_spin_unlock_irqrestore(&nw_gpio_lock, flags); } return 0; } diff --git a/arch/arm/mach-footbridge/netwinder-leds.c b/arch/arm/mach-footbridge/netwinder-leds.c index 00269fe..642a443 100644 --- a/arch/arm/mach-footbridge/netwinder-leds.c +++ b/arch/arm/mach-footbridge/netwinder-leds.c @@ -31,13 +31,13 @@ static char led_state; static char hw_led_state; -static DEFINE_SPINLOCK(leds_lock); +static DEFINE_ATOMIC_SPINLOCK(leds_lock); static void netwinder_leds_event(led_event_t evt) { unsigned long flags; - spin_lock_irqsave(&leds_lock, flags); + atomic_spin_lock_irqsave(&leds_lock, flags); switch (evt) { case led_start: @@ -117,12 +117,12 @@ static void netwinder_leds_event(led_event_t evt) break; } - spin_unlock_irqrestore(&leds_lock, flags); + atomic_spin_unlock_irqrestore(&leds_lock, flags); if (led_state & LED_STATE_ENABLED) { - spin_lock_irqsave(&nw_gpio_lock, flags); + atomic_spin_lock_irqsave(&nw_gpio_lock, flags); nw_gpio_modify_op(GPIO_RED_LED | GPIO_GREEN_LED, hw_led_state); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + atomic_spin_unlock_irqrestore(&nw_gpio_lock, flags); } } diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index a0f60e5..523b160 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -199,7 +199,7 @@ static struct amba_pl010_data integrator_uart_data = { #define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET -static DEFINE_SPINLOCK(cm_lock); +static DEFINE_ATOMIC_SPINLOCK(cm_lock); /** * cm_control - update the CM_CTRL register. @@ -211,10 +211,10 @@ void cm_control(u32 mask, u32 set) unsigned long flags; u32 val; - spin_lock_irqsave(&cm_lock, flags); + atomic_spin_lock_irqsave(&cm_lock, flags); val = readl(CM_CTRL) & ~mask; writel(val | set, CM_CTRL); - spin_unlock_irqrestore(&cm_lock, flags); + atomic_spin_unlock_irqrestore(&cm_lock, flags); } EXPORT_SYMBOL(cm_control); diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c index f1d72b2..50dcb35 100644 --- a/arch/arm/mach-integrator/pci_v3.c +++ b/arch/arm/mach-integrator/pci_v3.c @@ -162,7 +162,7 @@ * 7:2 register number * */ -static DEFINE_SPINLOCK(v3_lock); +static DEFINE_ATOMIC_SPINLOCK(v3_lock); #define PCI_BUS_NONMEM_START 0x00000000 #define PCI_BUS_NONMEM_SIZE SZ_256M @@ -283,7 +283,7 @@ static int v3_read_config(struct pci_bus *bus, unsigned int devfn, int where, unsigned long flags; u32 v; - spin_lock_irqsave(&v3_lock, flags); + atomic_spin_lock_irqsave(&v3_lock, flags); addr = v3_open_config_window(bus, devfn, where); switch (size) { @@ -301,7 +301,7 @@ static int v3_read_config(struct pci_bus *bus, unsigned int devfn, int where, } v3_close_config_window(); - spin_unlock_irqrestore(&v3_lock, flags); + atomic_spin_unlock_irqrestore(&v3_lock, flags); *val = v; return PCIBIOS_SUCCESSFUL; @@ -313,7 +313,7 @@ static int v3_write_config(struct pci_bus *bus, unsigned int devfn, int where, unsigned long addr; unsigned long flags; - spin_lock_irqsave(&v3_lock, flags); + atomic_spin_lock_irqsave(&v3_lock, flags); addr = v3_open_config_window(bus, devfn, where); switch (size) { @@ -334,7 +334,7 @@ static int v3_write_config(struct pci_bus *bus, unsigned int devfn, int where, } v3_close_config_window(); - spin_unlock_irqrestore(&v3_lock, flags); + atomic_spin_unlock_irqrestore(&v3_lock, flags); return PCIBIOS_SUCCESSFUL; } @@ -509,7 +509,7 @@ void __init pci_v3_preinit(void) hook_fault_code(8, v3_pci_fault, SIGBUS, "external abort on non-linefetch"); hook_fault_code(10, v3_pci_fault, SIGBUS, "external abort on non-linefetch"); - spin_lock_irqsave(&v3_lock, flags); + atomic_spin_lock_irqsave(&v3_lock, flags); /* * Unlock V3 registers, but only if they were previously locked. @@ -582,7 +582,7 @@ void __init pci_v3_preinit(void) printk(KERN_ERR "PCI: unable to grab PCI error " "interrupt: %d\n", ret); - spin_unlock_irqrestore(&v3_lock, flags); + atomic_spin_unlock_irqrestore(&v3_lock, flags); } void __init pci_v3_postinit(void) diff --git a/arch/arm/mach-ixp2000/core.c b/arch/arm/mach-ixp2000/core.c index babb225..e24e3d0 100644 --- a/arch/arm/mach-ixp2000/core.c +++ b/arch/arm/mach-ixp2000/core.c @@ -197,7 +197,7 @@ unsigned long ixp2000_gettimeoffset (void) return offset / ticks_per_usec; } -static int ixp2000_timer_interrupt(int irq, void *dev_id) +static irqreturn_t ixp2000_timer_interrupt(int irq, void *dev_id) { /* clear timer 1 */ ixp2000_reg_wrb(IXP2000_T1_CLR, 1); diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 70afcfe..ef50a20 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -54,7 +54,7 @@ unsigned long ixp4xx_pci_reg_base = 0; * these transactions are atomic or we will end up * with corrupt data on the bus or in a driver. */ -static DEFINE_SPINLOCK(ixp4xx_pci_lock); +static DEFINE_ATOMIC_SPINLOCK(ixp4xx_pci_lock); /* * Read from PCI config space @@ -62,10 +62,10 @@ static DEFINE_SPINLOCK(ixp4xx_pci_lock); static void crp_read(u32 ad_cbe, u32 *data) { unsigned long flags; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + atomic_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_CRP_AD_CBE = ad_cbe; *data = *PCI_CRP_RDATA; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + atomic_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); } /* @@ -74,10 +74,10 @@ static void crp_read(u32 ad_cbe, u32 *data) static void crp_write(u32 ad_cbe, u32 data) { unsigned long flags; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + atomic_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_CRP_AD_CBE = CRP_AD_CBE_WRITE | ad_cbe; *PCI_CRP_WDATA = data; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + atomic_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); } static inline int check_master_abort(void) @@ -101,7 +101,7 @@ int ixp4xx_pci_read_errata(u32 addr, u32 cmd, u32* data) int retval = 0; int i; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + atomic_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_NP_AD = addr; @@ -118,7 +118,7 @@ int ixp4xx_pci_read_errata(u32 addr, u32 cmd, u32* data) if(check_master_abort()) retval = 1; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + atomic_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); return retval; } @@ -127,7 +127,7 @@ int ixp4xx_pci_read_no_errata(u32 addr, u32 cmd, u32* data) unsigned long flags; int retval = 0; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + atomic_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_NP_AD = addr; @@ -140,7 +140,7 @@ int ixp4xx_pci_read_no_errata(u32 addr, u32 cmd, u32* data) if(check_master_abort()) retval = 1; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + atomic_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); return retval; } @@ -149,7 +149,7 @@ int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data) unsigned long flags; int retval = 0; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + atomic_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_NP_AD = addr; @@ -162,7 +162,7 @@ int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data) if(check_master_abort()) retval = 1; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + atomic_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); return retval; } diff --git a/arch/arm/mach-msm/proc_comm.c b/arch/arm/mach-msm/proc_comm.c index 915ee70..e825c36 100644 --- a/arch/arm/mach-msm/proc_comm.c +++ b/arch/arm/mach-msm/proc_comm.c @@ -14,6 +14,7 @@ * */ +#include #include #include #include diff --git a/arch/arm/mach-ns9xxx/irq.c b/arch/arm/mach-ns9xxx/irq.c index feb0e54..6873b7b 100644 --- a/arch/arm/mach-ns9xxx/irq.c +++ b/arch/arm/mach-ns9xxx/irq.c @@ -66,7 +66,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc) struct irqaction *action; irqreturn_t action_ret; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); BUG_ON(desc->status & IRQ_INPROGRESS); @@ -78,7 +78,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc) goto out_mask; desc->status |= IRQ_INPROGRESS; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); @@ -87,7 +87,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc) * Maybe this function should go to kernel/irq/chip.c? */ note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; if (desc->status & IRQ_DISABLED) @@ -97,7 +97,7 @@ out_mask: /* ack unconditionally to unmask lower prio irqs */ desc->chip->ack(irq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } #define handle_irq handle_prio_irq #endif diff --git a/arch/arm/mach-sa1100/badge4.c b/arch/arm/mach-sa1100/badge4.c index ab5883b..0f0d555 100644 --- a/arch/arm/mach-sa1100/badge4.c +++ b/arch/arm/mach-sa1100/badge4.c @@ -240,15 +240,22 @@ void badge4_set_5V(unsigned subsystem, int on) /* detect on->off and off->on transitions */ if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { /* was off, now on */ - printk(KERN_INFO "%s: enabling 5V supply rail\n", __func__); GPSR = BADGE4_GPIO_PCMEN5V; } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { /* was on, now off */ - printk(KERN_INFO "%s: disabling 5V supply rail\n", __func__); GPCR = BADGE4_GPIO_PCMEN5V; } local_irq_restore(flags); + + /* detect on->off and off->on transitions */ + if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { + /* was off, now on */ + printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); + } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { + /* was on, now off */ + printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); + } } EXPORT_SYMBOL(badge4_set_5V); diff --git a/arch/arm/mach-shark/leds.c b/arch/arm/mach-shark/leds.c index c9e32de..6ae3314 100644 --- a/arch/arm/mach-shark/leds.c +++ b/arch/arm/mach-shark/leds.c @@ -36,7 +36,7 @@ static char led_state; static short hw_led_state; static short saved_state; -static DEFINE_SPINLOCK(leds_lock); +static DEFINE_ATOMIC_SPINLOCK(leds_lock); short sequoia_read(int addr) { outw(addr,0x24); @@ -52,7 +52,7 @@ static void sequoia_leds_event(led_event_t evt) { unsigned long flags; - spin_lock_irqsave(&leds_lock, flags); + atomic_spin_lock_irqsave(&leds_lock, flags); hw_led_state = sequoia_read(0x09); @@ -144,7 +144,7 @@ static void sequoia_leds_event(led_event_t evt) if (led_state & LED_STATE_ENABLED) sequoia_write(hw_led_state,0x09); - spin_unlock_irqrestore(&leds_lock, flags); + atomic_spin_unlock_irqrestore(&leds_lock, flags); } static int __init leds_init(void) diff --git a/arch/arm/mach-w90x900/mfp-w90p910.c b/arch/arm/mach-w90x900/mfp-w90p910.c index a3520fe..92adadf 100644 --- a/arch/arm/mach-w90x900/mfp-w90p910.c +++ b/arch/arm/mach-w90x900/mfp-w90p910.c @@ -34,7 +34,7 @@ #define GPSELEI0 (0x01 << 26) #define GPSELEI1 (0x01 << 27) -static DECLARE_MUTEX(mfp_sem); +static DEFINE_MUTEX(mfp_sem); void mfp_set_groupf(struct device *dev) { @@ -43,7 +43,7 @@ void mfp_set_groupf(struct device *dev) BUG_ON(!dev); - down(&mfp_sem); + mutex_lock(&mfp_sem); dev_id = dev_name(dev); @@ -56,7 +56,7 @@ void mfp_set_groupf(struct device *dev) __raw_writel(mfpen, REG_MFSEL); - up(&mfp_sem); + mutex_unlock(&mfp_sem); } EXPORT_SYMBOL(mfp_set_groupf); @@ -67,7 +67,7 @@ void mfp_set_groupc(struct device *dev) BUG_ON(!dev); - down(&mfp_sem); + mutex_lock(&mfp_sem); dev_id = dev_name(dev); @@ -86,7 +86,7 @@ void mfp_set_groupc(struct device *dev) __raw_writel(mfpen, REG_MFSEL); - up(&mfp_sem); + mutex_unlock(&mfp_sem); } EXPORT_SYMBOL(mfp_set_groupc); @@ -97,7 +97,7 @@ void mfp_set_groupi(struct device *dev, int gpio) BUG_ON(!dev); - down(&mfp_sem); + mutex_lock(&mfp_sem); dev_id = dev_name(dev); @@ -110,7 +110,7 @@ void mfp_set_groupi(struct device *dev, int gpio) __raw_writel(mfpen, REG_MFSEL); - up(&mfp_sem); + mutex_unlock(&mfp_sem); } EXPORT_SYMBOL(mfp_set_groupi); diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index b480f1d..ade5628 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -26,19 +26,19 @@ #define CACHE_LINE_SIZE 32 static void __iomem *l2x0_base; -static DEFINE_SPINLOCK(l2x0_lock); +static DEFINE_ATOMIC_SPINLOCK(l2x0_lock); static inline void sync_writel(unsigned long val, unsigned long reg, unsigned long complete_mask) { unsigned long flags; - spin_lock_irqsave(&l2x0_lock, flags); + atomic_spin_lock_irqsave(&l2x0_lock, flags); writel(val, l2x0_base + reg); /* wait for the operation to complete */ while (readl(l2x0_base + reg) & complete_mask) ; - spin_unlock_irqrestore(&l2x0_lock, flags); + atomic_spin_unlock_irqrestore(&l2x0_lock, flags); } static inline void cache_sync(void) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index fc84fcc..072b7e9 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -14,7 +14,7 @@ #include #include -static DEFINE_SPINLOCK(cpu_asid_lock); +static DEFINE_ATOMIC_SPINLOCK(cpu_asid_lock); unsigned int cpu_last_asid = ASID_FIRST_VERSION; /* @@ -32,7 +32,7 @@ void __new_context(struct mm_struct *mm) { unsigned int asid; - spin_lock(&cpu_asid_lock); + atomic_spin_lock(&cpu_asid_lock); asid = ++cpu_last_asid; if (asid == 0) asid = cpu_last_asid = ASID_FIRST_VERSION; @@ -57,7 +57,7 @@ void __new_context(struct mm_struct *mm) dsb(); } } - spin_unlock(&cpu_asid_lock); + atomic_spin_unlock(&cpu_asid_lock); mm->cpu_vm_mask = cpumask_of_cpu(smp_processor_id()); mm->context.id = asid; diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c index 7370a71..8b77925 100644 --- a/arch/arm/mm/copypage-v4mc.c +++ b/arch/arm/mm/copypage-v4mc.c @@ -30,7 +30,7 @@ #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ L_PTE_MT_MINICACHE) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_ATOMIC_SPINLOCK(minicache_lock); /* * ARMv4 mini-dcache optimised copy_user_highpage @@ -76,14 +76,14 @@ void v4_mc_copy_user_highpage(struct page *to, struct page *from, if (test_and_clear_bit(PG_dcache_dirty, &from->flags)) __flush_dcache_page(page_mapping(from), from); - spin_lock(&minicache_lock); + atomic_spin_lock(&minicache_lock); set_pte_ext(TOP_PTE(0xffff8000), pfn_pte(page_to_pfn(from), minicache_pgprot), 0); flush_tlb_kernel_page(0xffff8000); mc_copy_user_page((void *)0xffff8000, kto); - spin_unlock(&minicache_lock); + atomic_spin_unlock(&minicache_lock); kunmap_atomic(kto, KM_USER1); } diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 4127a7b..7c81541 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -27,7 +27,7 @@ #define from_address (0xffff8000) #define to_address (0xffffc000) -static DEFINE_SPINLOCK(v6_lock); +static DEFINE_ATOMIC_SPINLOCK(v6_lock); /* * Copy the user page. No aliasing to deal with so we can just @@ -88,7 +88,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to, * Now copy the page using the same cache colour as the * pages ultimate destination. */ - spin_lock(&v6_lock); + atomic_spin_lock(&v6_lock); set_pte_ext(TOP_PTE(from_address) + offset, pfn_pte(page_to_pfn(from), PAGE_KERNEL), 0); set_pte_ext(TOP_PTE(to_address) + offset, pfn_pte(page_to_pfn(to), PAGE_KERNEL), 0); @@ -101,7 +101,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to, copy_page((void *)kto, (void *)kfrom); - spin_unlock(&v6_lock); + atomic_spin_unlock(&v6_lock); } /* @@ -121,13 +121,13 @@ static void v6_clear_user_highpage_aliasing(struct page *page, unsigned long vad * Now clear the page using the same cache colour as * the pages ultimate destination. */ - spin_lock(&v6_lock); + atomic_spin_lock(&v6_lock); set_pte_ext(TOP_PTE(to_address) + offset, pfn_pte(page_to_pfn(page), PAGE_KERNEL), 0); flush_tlb_kernel_page(to); clear_page((void *)to); - spin_unlock(&v6_lock); + atomic_spin_unlock(&v6_lock); } struct cpu_user_fns v6_user_fns __initdata = { diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c index 76824d3..4320cbe 100644 --- a/arch/arm/mm/copypage-xscale.c +++ b/arch/arm/mm/copypage-xscale.c @@ -32,7 +32,7 @@ #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ L_PTE_MT_MINICACHE) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_ATOMIC_SPINLOCK(minicache_lock); /* * XScale mini-dcache optimised copy_user_highpage @@ -98,14 +98,14 @@ void xscale_mc_copy_user_highpage(struct page *to, struct page *from, if (test_and_clear_bit(PG_dcache_dirty, &from->flags)) __flush_dcache_page(page_mapping(from), from); - spin_lock(&minicache_lock); + atomic_spin_lock(&minicache_lock); set_pte_ext(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(page_to_pfn(from), minicache_pgprot), 0); flush_tlb_kernel_page(COPYPAGE_MINICACHE); mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); - spin_unlock(&minicache_lock); + atomic_spin_unlock(&minicache_lock); kunmap_atomic(kto, KM_USER1); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 510c179..1576176 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -41,7 +41,7 @@ * These are the page tables (2MB each) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte[NUM_CONSISTENT_PTES]; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_ATOMIC_SPINLOCK(consistent_lock); /* * VM region handling support. @@ -97,7 +97,7 @@ arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp) if (!new) goto out; - spin_lock_irqsave(&consistent_lock, flags); + atomic_spin_lock_irqsave(&consistent_lock, flags); list_for_each_entry(c, &head->vm_list, vm_list) { if ((addr + size) < addr) @@ -118,11 +118,11 @@ arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp) new->vm_end = addr + size; new->vm_active = 1; - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); return new; nospc: - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); kfree(new); out: return NULL; @@ -317,9 +317,9 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - spin_lock_irqsave(&consistent_lock, flags); + atomic_spin_lock_irqsave(&consistent_lock, flags); c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); if (c) { unsigned long off = vma->vm_pgoff; @@ -378,13 +378,13 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr size = PAGE_ALIGN(size); - spin_lock_irqsave(&consistent_lock, flags); + atomic_spin_lock_irqsave(&consistent_lock, flags); c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); if (!c) goto no_area; c->vm_active = 0; - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); if ((c->vm_end - c->vm_start) != size) { printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", @@ -431,15 +431,15 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr flush_tlb_kernel_range(c->vm_start, c->vm_end); - spin_lock_irqsave(&consistent_lock, flags); + atomic_spin_lock_irqsave(&consistent_lock, flags); list_del(&c->vm_list); - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); kfree(c); return; no_area: - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", __func__, cpu_addr); dump_stack(); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 6fdcbb7..02a07d8 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -258,7 +258,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (in_atomic() || !mm || current->pagefault_disabled) goto no_context; /* diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4426ee6..50b51f2 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -29,8 +29,6 @@ #include "mm.h" -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - /* * empty_zero_page is a special page that is used for * zero-initialized data and COW. diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 3fcd752..695f8e2 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -48,9 +48,9 @@ static int op_arm_setup(void) { int ret; - spin_lock(&oprofilefs_lock); + atomic_spin_lock(&oprofilefs_lock); ret = op_arm_model->setup_ctrs(); - spin_unlock(&oprofilefs_lock); + atomic_spin_unlock(&oprofilefs_lock); return ret; } diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c index 4ce0f98..55764a0 100644 --- a/arch/arm/oprofile/op_model_mpcore.c +++ b/arch/arm/oprofile/op_model_mpcore.c @@ -263,10 +263,10 @@ static void em_route_irq(int irq, unsigned int cpu) struct irq_desc *desc = irq_desc + irq; const struct cpumask *mask = cpumask_of(cpu); - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); cpumask_copy(desc->affinity, mask); desc->chip->set_affinity(irq, mask); - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); } static int em_setup(void) diff --git a/arch/arm/oprofile/op_model_xscale.c b/arch/arm/oprofile/op_model_xscale.c index 724ab9c..cbe91ee 100644 --- a/arch/arm/oprofile/op_model_xscale.c +++ b/arch/arm/oprofile/op_model_xscale.c @@ -381,8 +381,9 @@ static int xscale_pmu_start(void) { int ret; u32 pmnc = read_pmnc(); + unsigned long irq_flags = IRQF_DISABLED | IRQF_NODELAY; - ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, IRQF_DISABLED, + ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, irq_flags, "XScale PMU", (void *)results); if (ret < 0) { diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c index e8c327a..a4447ce 100644 --- a/arch/arm/plat-omap/clock.c +++ b/arch/arm/plat-omap/clock.c @@ -108,15 +108,12 @@ EXPORT_SYMBOL(clk_disable); unsigned long clk_get_rate(struct clk *clk) { - unsigned long flags; unsigned long ret = 0; if (clk == NULL || IS_ERR(clk)) return 0; - spin_lock_irqsave(&clockfw_lock, flags); ret = clk->rate; - spin_unlock_irqrestore(&clockfw_lock, flags); return ret; } diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c index 9f57222..778f6ef 100644 --- a/arch/avr32/kernel/irq.c +++ b/arch/avr32/kernel/irq.c @@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -66,7 +66,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c index 4b5fd36..d82bcb0 100644 --- a/arch/blackfin/kernel/irqchip.c +++ b/arch/blackfin/kernel/irqchip.c @@ -46,7 +46,7 @@ void ack_bad_irq(unsigned int irq) static struct irq_desc bad_irq_desc = { .handle_irq = handle_bad_irq, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), + .lock = __ATOMIC_SPIN_LOCK_UNLOCKED(irq_desc->lock), }; #ifdef CONFIG_CPUMASK_OFFSTACK @@ -62,7 +62,7 @@ int show_interrupts(struct seq_file *p, void *v) unsigned long flags; if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -76,7 +76,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c index adb54aa..96b676a 100644 --- a/arch/blackfin/kernel/time.c +++ b/arch/blackfin/kernel/time.c @@ -128,7 +128,7 @@ irqreturn_t timer_interrupt(int irq, void *dummy) /* last time the cmos clock got updated */ static long last_rtc_update; - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); /* @@ -148,7 +148,7 @@ irqreturn_t timer_interrupt(int irq, void *dummy) /* Do it again in 60s. */ last_rtc_update = xtime.tv_sec - 600; } - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifdef CONFIG_IPIPE update_root_process_times(get_irq_regs()); @@ -192,12 +192,12 @@ void do_gettimeofday(struct timeval *tv) unsigned long usec, sec; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); usec = gettimeoffset(); sec = xtime.tv_sec; usec += (xtime.tv_nsec / NSEC_PER_USEC); } - while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); while (usec >= USEC_PER_SEC) { usec -= USEC_PER_SEC; @@ -217,7 +217,7 @@ int do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* * This is revolting. We need to set the xtime.tv_usec * correctly. However, the value in this location is @@ -235,7 +235,7 @@ int do_settimeofday(struct timespec *tv) ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c index 7f642fc..45a270e 100644 --- a/arch/cris/kernel/irq.c +++ b/arch/cris/kernel/irq.c @@ -57,7 +57,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -76,7 +76,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c index 074fe7d..72408ed 100644 --- a/arch/cris/kernel/time.c +++ b/arch/cris/kernel/time.c @@ -87,7 +87,7 @@ int do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* * This is revolting. We need to set "xtime" correctly. However, the * value in this location is the value at the most recent update of @@ -103,7 +103,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; } diff --git a/arch/frv/include/asm/highmem.h b/arch/frv/include/asm/highmem.h index 68e4677..5cb8dff 100644 --- a/arch/frv/include/asm/highmem.h +++ b/arch/frv/include/asm/highmem.h @@ -116,6 +116,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) { unsigned long paddr; + preempt_disable(); pagefault_disable(); debug_kmap_atomic(type); paddr = page_to_phys(page); @@ -173,6 +174,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) BUG(); } pagefault_enable(); + preempt_enable(); } #endif /* !__ASSEMBLY__ */ diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c index af3e824..7b29f55 100644 --- a/arch/frv/kernel/irq.c +++ b/arch/frv/kernel/irq.c @@ -69,7 +69,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (action) { seq_printf(p, "%3d: ", i); @@ -85,7 +85,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count)); } diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c index fb0ce75..fced1e3 100644 --- a/arch/frv/kernel/time.c +++ b/arch/frv/kernel/time.c @@ -70,7 +70,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy) * the irq version of write_lock because as just said we have irq * locally disabled. -arca */ - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); @@ -96,7 +96,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy) __set_LEDS(n); #endif /* CONFIG_HEARTBEAT */ - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); update_process_times(user_mode(get_irq_regs())); diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c index 74f8dd7..7dde350 100644 --- a/arch/h8300/kernel/irq.c +++ b/arch/h8300/kernel/irq.c @@ -191,7 +191,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_puts(p, " CPU0"); if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -205,7 +205,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c index 7f2d6cf..cb110a4 100644 --- a/arch/h8300/kernel/time.c +++ b/arch/h8300/kernel/time.c @@ -35,9 +35,9 @@ void h8300_timer_tick(void) { if (current->pid) profile_tick(CPU_PROFILING); - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); update_process_times(user_mode(get_irq_regs())); } diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index fbee74b..55d006d 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -33,7 +33,7 @@ /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { signed long count; spinlock_t wait_lock; struct list_head wait_list; @@ -51,26 +51,47 @@ struct rw_semaphore { LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); - -static inline void -init_rwsem (struct rw_semaphore *sem) + struct rw_anon_semaphore name = __RWSEM_INITIALIZER(name) + +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_wake(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *sem); + +static inline void init_anon_rwsem (struct rw_anon_semaphore *sem) { sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } +struct rw_anon_semaphore { + signed long count; + spinlock_t wait_lock; + struct list_head wait_list; +}; + +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ + LIST_HEAD_INIT((name).wait_list) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void init_rwsem(struct rw_semaphore *sem) +{ + init_anon_rwsem((struct rw_anon_semaphore *)sem); +} + /* * lock for reading */ static inline void -__down_read (struct rw_semaphore *sem) +__down_read (struct rw_anon_semaphore *sem) { long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1); @@ -82,7 +103,7 @@ __down_read (struct rw_semaphore *sem) * lock for writing */ static inline void -__down_write (struct rw_semaphore *sem) +__down_write (struct rw_anon_semaphore *sem) { long old, new; @@ -99,7 +120,7 @@ __down_write (struct rw_semaphore *sem) * unlock after reading */ static inline void -__up_read (struct rw_semaphore *sem) +__up_read (struct rw_anon_semaphore *sem) { long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1); @@ -111,7 +132,7 @@ __up_read (struct rw_semaphore *sem) * unlock after writing */ static inline void -__up_write (struct rw_semaphore *sem) +__up_write (struct rw_anon_semaphore *sem) { long old, new; @@ -128,7 +149,7 @@ __up_write (struct rw_semaphore *sem) * trylock for reading -- returns 1 if successful, 0 if contention */ static inline int -__down_read_trylock (struct rw_semaphore *sem) +__down_read_trylock (struct rw_anon_semaphore *sem) { long tmp; while ((tmp = sem->count) >= 0) { @@ -143,7 +164,7 @@ __down_read_trylock (struct rw_semaphore *sem) * trylock for writing -- returns 1 if successful, 0 if contention */ static inline int -__down_write_trylock (struct rw_semaphore *sem) +__down_write_trylock (struct rw_anon_semaphore *sem) { long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); @@ -154,7 +175,7 @@ __down_write_trylock (struct rw_semaphore *sem) * downgrade write lock to read lock */ static inline void -__downgrade_write (struct rw_semaphore *sem) +__downgrade_write (struct rw_anon_semaphore *sem) { long old, new; @@ -174,6 +195,11 @@ __downgrade_write (struct rw_semaphore *sem) #define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) #define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 7d89512..26f5123 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -71,7 +71,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -91,7 +91,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); return 0; diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index dd9d7b5..70763a0 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -345,7 +345,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) desc = irq_desc + irq; cfg = irq_cfg + irq; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (!cfg->move_cleanup_count) goto unlock; @@ -358,7 +358,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&vector_lock, flags); cfg->move_cleanup_count--; unlock: - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } return IRQ_HANDLED; } diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index e6676fc..414a22e 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -643,7 +643,7 @@ salinfo_init(void) for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { data = salinfo_data + i; data->type = i; - init_MUTEX(&data->mutex); + semaphore_init(&data->mutex); dir = proc_mkdir(salinfo_log_name[i], salinfo_dir); if (!dir) continue; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 4990495..c6e8a37 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -197,10 +197,10 @@ timer_interrupt (int irq, void *dev_id) * another CPU. We need to avoid to SMP race by acquiring the * xtime_lock. */ - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); local_cpu_data->itm_next = new_itm; - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); } else local_cpu_data->itm_next = new_itm; @@ -477,7 +477,7 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c) { unsigned long flags; - write_seqlock_irqsave(&fsyscall_gtod_data.lock, flags); + write_atomic_seqlock_irqsave(&fsyscall_gtod_data.lock, flags); /* copy fsyscall clock data */ fsyscall_gtod_data.clk_mask = c->mask; @@ -500,6 +500,6 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c) fsyscall_gtod_data.monotonic_time.tv_sec++; } - write_sequnlock_irqrestore(&fsyscall_gtod_data.lock, flags); + write_atomic_sequnlock_irqrestore(&fsyscall_gtod_data.lock, flags); } diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c index fb83326..7ec3f56 100644 --- a/arch/ia64/xen/time.c +++ b/arch/ia64/xen/time.c @@ -141,10 +141,10 @@ consider_steal_time(unsigned long new_itm) delta_itm += local_cpu_data->itm_delta * (stolen + blocked); if (cpu == time_keeper_id) { - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(stolen + blocked); local_cpu_data->itm_next = delta_itm + new_itm; - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); } else { local_cpu_data->itm_next = delta_itm + new_itm; } diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c index 8dfd31e..351e82d 100644 --- a/arch/m32r/kernel/irq.c +++ b/arch/m32r/kernel/irq.c @@ -40,7 +40,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -59,7 +59,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c index cada3ba..3e00242 100644 --- a/arch/m32r/kernel/time.c +++ b/arch/m32r/kernel/time.c @@ -106,7 +106,7 @@ void do_gettimeofday(struct timeval *tv) unsigned long max_ntp_tick = tick_usec - tickadj; do { - seq = read_seqbegin(&xtime_lock); + seq = read_atomic_seqbegin(&xtime_lock); usec = do_gettimeoffset(); @@ -120,7 +120,7 @@ void do_gettimeofday(struct timeval *tv) sec = xtime.tv_sec; usec += (xtime.tv_nsec / 1000); - } while (read_seqretry(&xtime_lock, seq)); + } while (read_atomic_seqretry(&xtime_lock, seq)); while (usec >= 1000000) { usec -= 1000000; @@ -141,7 +141,7 @@ int do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* * This is revolting. We need to set "xtime" correctly. However, the * value in this location is the value at the most recent update of @@ -157,7 +157,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; @@ -202,7 +202,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. */ - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned)TICK_SIZE) / 2 @@ -213,7 +213,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) else /* do it again in 60 s */ last_rtc_update = xtime.tv_sec - 600; } - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); /* As we return to user mode fire off the other CPU schedulers.. this is basically because we don't yet share IRQ's around. This message is rigged to be safe on the 386 - basically it's diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index 54d9807..612259c 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -102,7 +102,7 @@ void do_gettimeofday(struct timeval *tv) unsigned long max_ntp_tick = tick_usec - tickadj; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); usec = mach_gettimeoffset(); @@ -116,7 +116,7 @@ void do_gettimeofday(struct timeval *tv) sec = xtime.tv_sec; usec += xtime.tv_nsec/1000; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + } while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); while (usec >= 1000000) { @@ -138,7 +138,7 @@ int do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* This is revolting. We need to set the xtime.tv_nsec * correctly. However, the value in this location is * is value at the last tick. @@ -154,7 +154,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; } diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c index d182b2f..d3c646d 100644 --- a/arch/m68knommu/kernel/time.c +++ b/arch/m68knommu/kernel/time.c @@ -44,11 +44,11 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy) if (current->pid) profile_tick(CPU_PROFILING); - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c index 7d5ddd6..cd7cdf5 100644 --- a/arch/microblaze/kernel/irq.c +++ b/arch/microblaze/kernel/irq.c @@ -68,7 +68,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < nr_irq) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -89,7 +89,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/mips/include/asm/i8253.h b/arch/mips/include/asm/i8253.h index 032ca73..ed7899f 100644 --- a/arch/mips/include/asm/i8253.h +++ b/arch/mips/include/asm/i8253.h @@ -12,7 +12,7 @@ #define PIT_CH0 0x40 #define PIT_CH2 0x42 -extern spinlock_t i8253_lock; +extern atomic_spinlock_t i8253_lock; extern void setup_pit_timer(void); diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c index f7d8d5d..4ac943d 100644 --- a/arch/mips/kernel/i8253.c +++ b/arch/mips/kernel/i8253.c @@ -15,7 +15,7 @@ #include #include -DEFINE_SPINLOCK(i8253_lock); +DEFINE_ATOMIC_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); /* @@ -26,7 +26,7 @@ EXPORT_SYMBOL(i8253_lock); static void init_pit_timer(enum clock_event_mode mode, struct clock_event_device *evt) { - spin_lock(&i8253_lock); + atomic_spin_lock(&i8253_lock); switch(mode) { case CLOCK_EVT_MODE_PERIODIC: @@ -55,7 +55,7 @@ static void init_pit_timer(enum clock_event_mode mode, /* Nothing to do here */ break; } - spin_unlock(&i8253_lock); + atomic_spin_unlock(&i8253_lock); } /* @@ -65,10 +65,10 @@ static void init_pit_timer(enum clock_event_mode mode, */ static int pit_next_event(unsigned long delta, struct clock_event_device *evt) { - spin_lock(&i8253_lock); + atomic_spin_lock(&i8253_lock); outb_p(delta & 0xff , PIT_CH0); /* LSB */ outb(delta >> 8 , PIT_CH0); /* MSB */ - spin_unlock(&i8253_lock); + atomic_spin_unlock(&i8253_lock); return 0; } @@ -137,7 +137,7 @@ static cycle_t pit_read(struct clocksource *cs) static int old_count; static u32 old_jifs; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); /* * Although our caller may have the read side of xtime_lock, * this is now a seqlock, and we are cheating in this routine @@ -183,7 +183,7 @@ static cycle_t pit_read(struct clocksource *cs) old_count = count; old_jifs = jifs; - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); count = (LATCH - 1) - count; diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index 7b845ba..50a7451 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -99,7 +99,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -118,7 +118,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_putc(p, '\n'); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index f956ecb..87714f7 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -69,7 +69,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (in_atomic() || !mm || current->pagefault_disabled) goto bad_area_nosemaphore; down_read(&mm->mmap_sem); diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index e274fda..c0c038b 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -45,7 +45,7 @@ void *__kmap_atomic(struct page *page, enum km_type type) enum fixed_addresses idx; unsigned long vaddr; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -71,6 +71,7 @@ void __kunmap_atomic(void *kvaddr, enum km_type type) if (vaddr < FIXADDR_START) { // FIXME pagefault_enable(); + preempt_enable(); return; } @@ -85,6 +86,7 @@ void __kunmap_atomic(void *kvaddr, enum km_type type) #endif pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); @@ -97,6 +99,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); pagefault_disable(); debug_kmap_atomic(type); diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 6d39e22..3da2ed2 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -159,9 +159,9 @@ void vr41xx_enable_piuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_set(MPIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -174,9 +174,9 @@ void vr41xx_disable_piuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MPIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -189,9 +189,9 @@ void vr41xx_enable_aiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_set(MAIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -204,9 +204,9 @@ void vr41xx_disable_aiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MAIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -219,9 +219,9 @@ void vr41xx_enable_kiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_set(MKIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -234,9 +234,9 @@ void vr41xx_disable_kiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MKIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -247,9 +247,9 @@ void vr41xx_enable_macint(uint16_t mask) struct irq_desc *desc = irq_desc + ETHERNET_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_set(MMACINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_enable_macint); @@ -259,9 +259,9 @@ void vr41xx_disable_macint(uint16_t mask) struct irq_desc *desc = irq_desc + ETHERNET_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MMACINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_disable_macint); @@ -271,9 +271,9 @@ void vr41xx_enable_dsiuint(uint16_t mask) struct irq_desc *desc = irq_desc + DSIU_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_set(MDSIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_enable_dsiuint); @@ -283,9 +283,9 @@ void vr41xx_disable_dsiuint(uint16_t mask) struct irq_desc *desc = irq_desc + DSIU_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MDSIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_disable_dsiuint); @@ -295,9 +295,9 @@ void vr41xx_enable_firint(uint16_t mask) struct irq_desc *desc = irq_desc + FIR_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_set(MFIRINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_enable_firint); @@ -307,9 +307,9 @@ void vr41xx_disable_firint(uint16_t mask) struct irq_desc *desc = irq_desc + FIR_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_clear(MFIRINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_disable_firint); @@ -322,9 +322,9 @@ void vr41xx_enable_pciint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MPCIINTREG, PCIINT0); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -338,9 +338,9 @@ void vr41xx_disable_pciint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MPCIINTREG, 0); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -354,9 +354,9 @@ void vr41xx_enable_scuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MSCUINTREG, SCUINT0); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -370,9 +370,9 @@ void vr41xx_disable_scuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MSCUINTREG, 0); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -386,9 +386,9 @@ void vr41xx_enable_csiint(uint16_t mask) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_set(MCSIINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -402,9 +402,9 @@ void vr41xx_disable_csiint(uint16_t mask) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_clear(MCSIINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -418,9 +418,9 @@ void vr41xx_enable_bcuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MBCUINTREG, BCUINTR); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -434,9 +434,9 @@ void vr41xx_disable_bcuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MBCUINTREG, 0); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -486,7 +486,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign) pin = SYSINT1_IRQ_TO_PIN(irq); - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); intassign0 = icu1_read(INTASSIGN0); intassign1 = icu1_read(INTASSIGN1); @@ -525,7 +525,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign) intassign1 |= (uint16_t)assign << 9; break; default: - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); return -EINVAL; } @@ -533,7 +533,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign) icu1_write(INTASSIGN0, intassign0); icu1_write(INTASSIGN1, intassign1); - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); return 0; } @@ -546,7 +546,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign) pin = SYSINT2_IRQ_TO_PIN(irq); - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); intassign2 = icu1_read(INTASSIGN2); intassign3 = icu1_read(INTASSIGN3); @@ -593,7 +593,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign) intassign3 |= (uint16_t)assign << 12; break; default: - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); return -EINVAL; } @@ -601,7 +601,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign) icu1_write(INTASSIGN2, intassign2); icu1_write(INTASSIGN3, intassign3); - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); return 0; } diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c index 4c3c58e..c076cff 100644 --- a/arch/mn10300/kernel/irq.c +++ b/arch/mn10300/kernel/irq.c @@ -215,7 +215,7 @@ int show_interrupts(struct seq_file *p, void *v) /* display information rows, one per active CPU */ case 1 ... NR_IRQS - 1: - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (action) { @@ -235,7 +235,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); break; /* polish off with NMI and error counters */ diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c index 395caf0..b25588c 100644 --- a/arch/mn10300/kernel/time.c +++ b/arch/mn10300/kernel/time.c @@ -99,7 +99,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) { unsigned tsc, elapse; - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); while (tsc = get_cycles(), elapse = mn10300_last_tsc - tsc, /* time elapsed since last @@ -114,7 +114,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) check_rtc_time(); } - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); update_process_times(user_mode(get_irq_regs())); diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 330f536..5bbc62b 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -180,7 +180,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i < NR_IRQS) { struct irqaction *action; - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -224,7 +224,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index a79c6f9..908cfde 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -163,9 +163,9 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id) } if (cpu == 0) { - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(ticks_elapsed); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); } return IRQ_HANDLED; @@ -268,12 +268,12 @@ void __init time_init(void) if (pdc_tod_read(&tod_data) == 0) { unsigned long flags; - write_seqlock_irqsave(&xtime_lock, flags); + write_atomic_seqlock_irqsave(&xtime_lock, flags); xtime.tv_sec = tod_data.tod_sec; xtime.tv_nsec = tod_data.tod_usec * 1000; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); - write_sequnlock_irqrestore(&xtime_lock, flags); + write_atomic_sequnlock_irqrestore(&xtime_lock, flags); } else { printk(KERN_ERR "Error reading tod clock\n"); xtime.tv_sec = 0; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d00131c..0b46b68 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -68,13 +68,6 @@ config LOCKDEP_SUPPORT bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config GENERIC_LOCKBREAK bool default y @@ -252,6 +245,14 @@ config HIGHMEM source kernel/time/Kconfig source kernel/Kconfig.hz source kernel/Kconfig.preempt + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + source "fs/Kconfig.binfmt" config HUGETLB_PAGE_SIZE_VARIABLE diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index a002682..582e47d 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -289,7 +289,7 @@ struct mpic #ifdef CONFIG_MPIC_U3_HT_IRQS /* The fixup table */ struct mpic_irq_fixup *fixups; - spinlock_t fixup_lock; + atomic_spinlock_t fixup_lock; #endif /* Register access method */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 8cd083c..a1ddb07 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -202,8 +202,15 @@ static inline unsigned long pte_update(struct mm_struct *mm, assert_pte_locked(mm, addr); #ifdef CONFIG_PPC_STD_MMU_64 - if (old & _PAGE_HASHPTE) + if (old & _PAGE_HASHPTE) { +#ifdef CONFIG_PREEMPT_RT + preempt_disable(); +#endif hpte_need_flush(mm, addr, ptep, old, huge); +#ifdef CONFIG_PREEMPT_RT + preempt_enable(); +#endif + } #endif return old; diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h index 877c35a..ba11723 100644 --- a/arch/powerpc/include/asm/pmac_feature.h +++ b/arch/powerpc/include/asm/pmac_feature.h @@ -378,7 +378,7 @@ extern struct macio_chip* macio_find(struct device_node* child, int type); * Those are exported by pmac feature for internal use by arch code * only like the platform function callbacks, do not use directly in drivers */ -extern spinlock_t feature_lock; +extern atomic_spinlock_t feature_lock; extern struct device_node *uninorth_node; extern u32 __iomem *uninorth_base; diff --git a/arch/powerpc/include/asm/rwsem.h b/arch/powerpc/include/asm/rwsem.h index 24cd928..c2494d4 100644 --- a/arch/powerpc/include/asm/rwsem.h +++ b/arch/powerpc/include/asm/rwsem.h @@ -21,7 +21,7 @@ /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { /* XXX this should be able to be an atomic_t -- paulus */ signed int count; #define RWSEM_UNLOCKED_VALUE 0x00000000 @@ -38,43 +38,47 @@ struct rw_semaphore { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -# define __RWSEM_DEP_MAP_INIT(lockname) +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) #endif -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEM_ANON_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } + LIST_HEAD_INIT((name).wait_list) __RWSEM_ANON_DEP_MAP_INIT(name) } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_wake(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *sem); -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, +extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, struct lock_class_key *key); -#define init_rwsem(sem) \ +#define init_anon_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - __init_rwsem((sem), #sem, &__key); \ + __init_anon_rwsem((sem), #sem, &__key); \ } while (0) /* * lock for reading */ -static inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct rw_anon_semaphore *sem) { if (unlikely(atomic_inc_return((atomic_t *)(&sem->count)) <= 0)) rwsem_down_read_failed(sem); } -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -90,7 +94,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void __down_write_nested(struct rw_anon_semaphore *sem, int subclass) { int tmp; @@ -100,12 +104,12 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_down_write_failed(sem); } -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { __down_write_nested(sem, 0); } -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -117,7 +121,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* * unlock after reading */ -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { int tmp; @@ -129,7 +133,7 @@ static inline void __up_read(struct rw_semaphore *sem) /* * unlock after writing */ -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { if (unlikely(atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, (atomic_t *)(&sem->count)) < 0)) @@ -139,7 +143,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(int delta, struct rw_anon_semaphore *sem) { atomic_add(delta, (atomic_t *)(&sem->count)); } @@ -147,7 +151,7 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { int tmp; @@ -159,15 +163,59 @@ static inline void __downgrade_write(struct rw_semaphore *sem) /* * implement exchange and add functionality */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline int rwsem_atomic_update(int delta, struct rw_anon_semaphore *sem) { return atomic_add_return(delta, (atomic_t *)(&sem->count)); } +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + +#ifndef CONFIG_PREEMPT_RT + +struct rw_semaphore { + /* XXX this should be able to be an atomic_t -- paulus */ + signed int count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ + LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ + __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); +} + +#define init_rwsem(sem) \ + do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ + } while (0) + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); } +#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_RWSEM_H */ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index e20ff75..3ddc8f6 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -30,26 +30,38 @@ struct mmu_gather; #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) +#define HAVE_ARCH_MMU_GATHER 1 + +struct pte_freelist_batch; + +struct arch_mmu_gather { + struct pte_freelist_batch *batch; +}; + +#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, } + #if !defined(CONFIG_PPC_STD_MMU) #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) #elif defined(__powerpc64__) -extern void pte_free_finish(void); +extern void pte_free_finish(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { - struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch); /* If there's a TLB batch pending, then we must flush it because the * pages are going to be freed and we really don't want to have a CPU * access a freed page because it has a stale TLB */ - if (tlbbatch->index) + if (tlbbatch->index) { __flush_tlb_pending(tlbbatch); + } - pte_free_finish(); + put_cpu_var(ppc64_tlb_batch); + pte_free_finish(tlb); } #else diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index abbe341..3f67596 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -101,18 +101,25 @@ extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, static inline void arch_enter_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch); batch->active = 1; + + put_cpu_var(ppc64_tlb_batch); } static inline void arch_leave_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch); + + if (batch->active) { + if (batch->index) { + __flush_tlb_pending(batch); + } + batch->active = 0; + } - if (batch->index) - __flush_tlb_pending(batch); - batch->active = 0; + put_cpu_var(ppc64_tlb_batch); } #define arch_flush_lazy_mmu_mode() do {} while (0) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 43e0734..4bb9ce4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -625,44 +625,52 @@ do_work: bne restore /* here we are preempting the current task */ 1: + /* + * preempt_schedule_irq() expects interrupts disabled and returns + * with interrupts disabled. No need to check preemption again, + * preempt_schedule_irq just did that for us. + */ + bl .preempt_schedule_irq #ifdef CONFIG_TRACE_IRQFLAGS bl .trace_hardirqs_on +#endif /* CONFIG_TRACE_IRQFLAGS */ + /* Note: we just clobbered r10 which used to contain the previous * MSR before the hard-disabling done by the caller of do_work. * We don't have that value anymore, but it doesn't matter as * we will hard-enable unconditionally, we can just reload the * current MSR into r10 */ + bl .preempt_schedule_irq mfmsr r10 -#endif /* CONFIG_TRACE_IRQFLAGS */ - li r0,1 - stb r0,PACASOFTIRQEN(r13) - stb r0,PACAHARDIRQEN(r13) - ori r10,r10,MSR_EE - mtmsrd r10,1 /* reenable interrupts */ - bl .preempt_schedule - mfmsr r10 - clrrdi r9,r1,THREAD_SHIFT - rldicl r10,r10,48,1 /* disable interrupts again */ - rotldi r10,r10,16 - mtmsrd r10,1 - ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED - bne 1b + clrrdi r9,r1,THREAD_SHIFT + rldicl r10,r10,48,1 /* disable interrupts again */ + rotldi r10,r10,16 + mtmsrd r10,1 + ld r4,TI_FLAGS(r9) + andi. r0,r4,(_TIF_NEED_RESCHED) + bne 1b b restore user_work: #endif - /* Enable interrupts */ - ori r10,r10,MSR_EE - mtmsrd r10,1 - andi. r0,r4,_TIF_NEED_RESCHED beq 1f - bl .schedule + + /* preempt_schedule_irq() expects interrupts disabled. */ + bl .preempt_schedule_irq b .ret_from_except_lite -1: bl .save_nvgprs + /* here we are preempting the current task */ +1: li r0,1 + stb r0,PACASOFTIRQEN(r13) + stb r0,PACAHARDIRQEN(r13) + + /* Enable interrupts */ + ori r10,r10,MSR_EE + mtmsrd r10,1 + + bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl .do_signal b .ret_from_except diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 88d9c1d..1a82d48 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -96,9 +96,11 @@ void cpu_idle(void) tick_nohz_restart_sched_tick(); if (cpu_should_die()) cpu_die(); - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f7f376e..9f56521 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -191,7 +191,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i < NR_IRQS) { desc = get_irq_desc(i); - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); action = desc->action; if (!action || !action->handler) goto skip; @@ -212,7 +212,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } else if (i == NR_IRQS) { #if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) if (tau_initialized){ @@ -453,7 +453,7 @@ void do_softirq(void) */ static LIST_HEAD(irq_hosts); -static DEFINE_SPINLOCK(irq_big_lock); +static DEFINE_ATOMIC_SPINLOCK(irq_big_lock); static unsigned int revmap_trees_allocated; static DEFINE_MUTEX(revmap_trees_mutex); struct irq_map_entry irq_map[NR_IRQS]; @@ -499,14 +499,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, if (host->ops->match == NULL) host->ops->match = default_irq_host_match; - spin_lock_irqsave(&irq_big_lock, flags); + atomic_spin_lock_irqsave(&irq_big_lock, flags); /* If it's a legacy controller, check for duplicates and * mark it as allocated (we use irq 0 host pointer for that */ if (revmap_type == IRQ_HOST_MAP_LEGACY) { if (irq_map[0].host != NULL) { - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); /* If we are early boot, we can't free the structure, * too bad... * this will be fixed once slab is made available early @@ -520,7 +520,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, } list_add(&host->link, &irq_hosts); - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); /* Additional setups per revmap type */ switch(revmap_type) { @@ -571,13 +571,13 @@ struct irq_host *irq_find_host(struct device_node *node) * the absence of a device node. This isn't a problem so far * yet though... */ - spin_lock_irqsave(&irq_big_lock, flags); + atomic_spin_lock_irqsave(&irq_big_lock, flags); list_for_each_entry(h, &irq_hosts, link) if (h->ops->match(h, node)) { found = h; break; } - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); return found; } EXPORT_SYMBOL_GPL(irq_find_host); @@ -935,7 +935,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS)) return NO_IRQ; - spin_lock_irqsave(&irq_big_lock, flags); + atomic_spin_lock_irqsave(&irq_big_lock, flags); /* Use hint for 1 interrupt if any */ if (count == 1 && hint >= NUM_ISA_INTERRUPTS && @@ -959,7 +959,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, } } if (found == NO_IRQ) { - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); return NO_IRQ; } hint_found: @@ -968,7 +968,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, smp_wmb(); irq_map[i].host = host; } - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); return found; } @@ -980,7 +980,7 @@ void irq_free_virt(unsigned int virq, unsigned int count) WARN_ON (virq < NUM_ISA_INTERRUPTS); WARN_ON (count == 0 || (virq + count) > irq_virq_count); - spin_lock_irqsave(&irq_big_lock, flags); + atomic_spin_lock_irqsave(&irq_big_lock, flags); for (i = virq; i < (virq + count); i++) { struct irq_host *host; @@ -993,7 +993,7 @@ void irq_free_virt(unsigned int virq, unsigned int count) smp_wmb(); irq_map[i].host = NULL; } - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); } void irq_early_init(void) @@ -1065,7 +1065,7 @@ static int virq_debug_show(struct seq_file *m, void *private) for (i = 1; i < NR_IRQS; i++) { desc = get_irq_desc(i); - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); if (desc->action && desc->action->handler) { seq_printf(m, "%5d ", i); @@ -1084,7 +1084,7 @@ static int virq_debug_show(struct seq_file *m, void *private) seq_printf(m, "%s\n", p); } - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } return 0; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index c932978..dcbf960 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -263,7 +263,7 @@ ss_probe: kcb->kprobe_status = KPROBE_HIT_SSDONE; reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); return 1; } else if (ret < 0) { /* @@ -282,7 +282,7 @@ ss_probe: return 1; no_kprobe: - preempt_enable_no_resched(); + preempt_enable(); return ret; } @@ -412,7 +412,7 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) } reset_current_kprobe(); out: - preempt_enable_no_resched(); + preempt_enable(); /* * if somebody else is singlestepping across a probe point, msr diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c index 0516e2d..e38729a 100644 --- a/arch/powerpc/kernel/pmc.c +++ b/arch/powerpc/kernel/pmc.c @@ -37,7 +37,7 @@ static void dummy_perf(struct pt_regs *regs) } -static DEFINE_SPINLOCK(pmc_owner_lock); +static DEFINE_ATOMIC_SPINLOCK(pmc_owner_lock); static void *pmc_owner_caller; /* mostly for debugging */ perf_irq_t perf_irq = dummy_perf; @@ -45,7 +45,7 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq) { int err = 0; - spin_lock(&pmc_owner_lock); + atomic_spin_lock(&pmc_owner_lock); if (pmc_owner_caller) { printk(KERN_WARNING "reserve_pmc_hardware: " @@ -59,21 +59,21 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq) perf_irq = new_perf_irq ? new_perf_irq : dummy_perf; out: - spin_unlock(&pmc_owner_lock); + atomic_spin_unlock(&pmc_owner_lock); return err; } EXPORT_SYMBOL_GPL(reserve_pmc_hardware); void release_pmc_hardware(void) { - spin_lock(&pmc_owner_lock); + atomic_spin_lock(&pmc_owner_lock); WARN_ON(! pmc_owner_caller); pmc_owner_caller = NULL; perf_irq = dummy_perf; - spin_unlock(&pmc_owner_lock); + atomic_spin_unlock(&pmc_owner_lock); } EXPORT_SYMBOL_GPL(release_pmc_hardware); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 892a9f2..ef9d506 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -305,6 +305,10 @@ struct task_struct *__switch_to(struct task_struct *prev, struct thread_struct *new_thread, *old_thread; unsigned long flags; struct task_struct *last; +#if defined(CONFIG_PPC64) && defined (CONFIG_PREEMPT_RT) + struct ppc64_tlb_batch *batch; + int hadbatch; +#endif #ifdef CONFIG_SMP /* avoid complexity of lazy save/restore of fpu @@ -396,6 +400,17 @@ struct task_struct *__switch_to(struct task_struct *prev, old_thread->accum_tb += (current_tb - start_tb); new_thread->start_tb = current_tb; } + +#ifdef CONFIG_PREEMPT_RT + batch = &__get_cpu_var(ppc64_tlb_batch); + if (batch->active) { + hadbatch = 1; + if (batch->index) { + __flush_tlb_pending(batch); + } + batch->active = 0; + } +#endif /* #ifdef CONFIG_PREEMPT_RT */ #endif local_irq_save(flags); @@ -414,6 +429,13 @@ struct task_struct *__switch_to(struct task_struct *prev, local_irq_restore(flags); +#if defined(CONFIG_PPC64) && defined(CONFIG_PREEMPT_RT) + if (hadbatch) { + batch = &__get_cpu_var(ppc64_tlb_batch); + batch->active = 1; + } +#endif + return last; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index d4405b9..de2295b 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -81,7 +81,7 @@ struct boot_param_header *initial_boot_params; extern struct device_node *allnodes; /* temporary while merging */ -extern rwlock_t devtree_lock; /* temporary while merging */ +extern atomic_spinlock_t devtree_lock; /* temporary while merging */ /* export that to outside world */ struct device_node *of_chosen; @@ -1275,12 +1275,12 @@ struct device_node *of_find_node_by_phandle(phandle handle) { struct device_node *np; - read_lock(&devtree_lock); + atomic_spin_lock(&devtree_lock); for (np = allnodes; np != 0; np = np->allnext) if (np->linux_phandle == handle) break; of_node_get(np); - read_unlock(&devtree_lock); + atomic_spin_unlock(&devtree_lock); return np; } EXPORT_SYMBOL(of_find_node_by_phandle); @@ -1328,13 +1328,13 @@ struct device_node *of_find_all_nodes(struct device_node *prev) { struct device_node *np; - read_lock(&devtree_lock); + atomic_spin_lock(&devtree_lock); np = prev ? prev->allnext : allnodes; for (; np != 0; np = np->allnext) if (of_node_get(np)) break; of_node_put(prev); - read_unlock(&devtree_lock); + atomic_spin_unlock(&devtree_lock); return np; } EXPORT_SYMBOL(of_find_all_nodes); @@ -1419,12 +1419,12 @@ void of_attach_node(struct device_node *np) { unsigned long flags; - write_lock_irqsave(&devtree_lock, flags); + atomic_spin_lock_irqsave(&devtree_lock, flags); np->sibling = np->parent->child; np->allnext = allnodes; np->parent->child = np; allnodes = np; - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); } /* @@ -1437,7 +1437,7 @@ void of_detach_node(struct device_node *np) struct device_node *parent; unsigned long flags; - write_lock_irqsave(&devtree_lock, flags); + atomic_spin_lock_irqsave(&devtree_lock, flags); parent = np->parent; if (!parent) @@ -1468,7 +1468,7 @@ void of_detach_node(struct device_node *np) of_node_set_flag(np, OF_DETACHED); out_unlock: - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); } #ifdef CONFIG_PPC_PSERIES @@ -1552,18 +1552,18 @@ int prom_add_property(struct device_node* np, struct property* prop) unsigned long flags; prop->next = NULL; - write_lock_irqsave(&devtree_lock, flags); + atomic_spin_lock_irqsave(&devtree_lock, flags); next = &np->properties; while (*next) { if (strcmp(prop->name, (*next)->name) == 0) { /* duplicate ! don't insert it */ - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return -1; } next = &(*next)->next; } *next = prop; - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); #ifdef CONFIG_PROC_DEVICETREE /* try to add to proc as well if it was initialized */ @@ -1586,7 +1586,7 @@ int prom_remove_property(struct device_node *np, struct property *prop) unsigned long flags; int found = 0; - write_lock_irqsave(&devtree_lock, flags); + atomic_spin_lock_irqsave(&devtree_lock, flags); next = &np->properties; while (*next) { if (*next == prop) { @@ -1599,7 +1599,7 @@ int prom_remove_property(struct device_node *np, struct property *prop) } next = &(*next)->next; } - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); if (!found) return -ENODEV; @@ -1628,7 +1628,7 @@ int prom_update_property(struct device_node *np, unsigned long flags; int found = 0; - write_lock_irqsave(&devtree_lock, flags); + atomic_spin_lock_irqsave(&devtree_lock, flags); next = &np->properties; while (*next) { if (*next == oldprop) { @@ -1642,7 +1642,7 @@ int prom_update_property(struct device_node *np, } next = &(*next)->next; } - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); if (!found) return -ENODEV; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index eae4511..15d4291 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -1000,7 +1000,7 @@ void __init time_init(void) /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ boot_tb = get_tb_or_rtc(); - write_seqlock_irqsave(&xtime_lock, flags); + write_atomic_seqlock_irqsave(&xtime_lock, flags); /* If platform provided a timezone (pmac), we correct the time */ if (timezone_offset) { @@ -1014,7 +1014,7 @@ void __init time_init(void) vdso_data->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC; vdso_data->tb_to_xs = tb_to_xs; - write_sequnlock_irqrestore(&xtime_lock, flags); + write_atomic_sequnlock_irqrestore(&xtime_lock, flags); /* Register the clocksource, if we're not running on iSeries */ if (!firmware_has_feature(FW_FEATURE_ISERIES)) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6f0ae1a..451a756 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -102,11 +102,11 @@ static inline void pmac_backlight_unblank(void) { } int die(const char *str, struct pt_regs *regs, long err) { static struct { - spinlock_t lock; + atomic_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = __ATOMIC_SPIN_LOCK_UNLOCKED(die.lock), .lock_owner = -1, .lock_owner_depth = 0 }; @@ -120,7 +120,7 @@ int die(const char *str, struct pt_regs *regs, long err) if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irqsave(&die.lock, flags); + atomic_spin_lock_irqsave(&die.lock, flags); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); @@ -155,7 +155,7 @@ int die(const char *str, struct pt_regs *regs, long err) bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); - spin_unlock_irqrestore(&die.lock, flags); + atomic_spin_unlock_irqrestore(&die.lock, flags); if (kexec_should_crash(current) || kexec_sr_activated(smp_processor_id())) @@ -193,6 +193,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) addr, regs->nip, regs->link, code); } +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 79d0fa3..106d6a5 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -86,8 +86,10 @@ void __raw_spin_unlock_wait(raw_spinlock_t *lock) { while (lock->slock) { HMT_low(); + preempt_disable(); if (SHARED_PROCESSOR) __spin_yield(lock); + preempt_enable(); } HMT_medium(); } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 830bef0..03c4343 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -159,7 +159,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, } #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ - if (in_atomic() || mm == NULL) { + if (in_atomic() || mm == NULL || current->pagefault_disabled) { if (!user_mode(regs)) return SIGSEGV; /* in_atomic() in user mode is really bad, diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 056d23a..a99d114 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -37,7 +37,7 @@ #define HPTE_LOCK_BIT 3 -static DEFINE_SPINLOCK(native_tlbie_lock); +static DEFINE_ATOMIC_SPINLOCK(native_tlbie_lock); static inline void __tlbie(unsigned long va, int psize, int ssize) { @@ -104,7 +104,7 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local) if (use_local) use_local = mmu_psize_defs[psize].tlbiel; if (lock_tlbie && !use_local) - spin_lock(&native_tlbie_lock); + atomic_spin_lock(&native_tlbie_lock); asm volatile("ptesync": : :"memory"); if (use_local) { __tlbiel(va, psize, ssize); @@ -114,7 +114,7 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) - spin_unlock(&native_tlbie_lock); + atomic_spin_unlock(&native_tlbie_lock); } static inline void native_lock_hpte(struct hash_pte *hptep) @@ -434,7 +434,7 @@ static void native_hpte_clear(void) /* we take the tlbie lock and hold it. Some hardware will * deadlock if we try to tlbie from two processors at once. */ - spin_lock(&native_tlbie_lock); + atomic_spin_lock(&native_tlbie_lock); slots = pteg_count * HPTES_PER_GROUP; @@ -458,7 +458,7 @@ static void native_hpte_clear(void) } asm volatile("eieio; tlbsync; ptesync":::"memory"); - spin_unlock(&native_tlbie_lock); + atomic_spin_unlock(&native_tlbie_lock); local_irq_restore(flags); } @@ -521,7 +521,7 @@ static void native_flush_hash_range(unsigned long number, int local) int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) - spin_lock(&native_tlbie_lock); + atomic_spin_lock(&native_tlbie_lock); asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { @@ -536,7 +536,7 @@ static void native_flush_hash_range(unsigned long number, int local) asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) - spin_unlock(&native_tlbie_lock); + atomic_spin_unlock(&native_tlbie_lock); } local_irq_restore(flags); diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index c2186c7..81310e2 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -35,6 +35,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -73,5 +74,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type) local_flush_tlb_page(NULL, vaddr); #endif pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(kunmap_atomic); diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 3de6a0d..3ef5084 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -54,8 +54,6 @@ #endif #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - phys_addr_t total_memory; phys_addr_t total_lowmem; diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index b1a727d..64fb358 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -46,7 +46,7 @@ static unsigned int next_context, nr_free_contexts; static unsigned long *context_map; static unsigned long *stale_map[NR_CPUS]; static struct mm_struct **context_mm; -static DEFINE_SPINLOCK(context_lock); +static DEFINE_ATOMIC_SPINLOCK(context_lock); #define CTX_MAP_SIZE \ (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) @@ -104,9 +104,9 @@ static unsigned int steal_context_smp(unsigned int id) /* This will happen if you have more CPUs than available contexts, * all we can do here is wait a bit and try again */ - spin_unlock(&context_lock); + atomic_spin_unlock(&context_lock); cpu_relax(); - spin_lock(&context_lock); + atomic_spin_lock(&context_lock); /* This will cause the caller to try again */ return MMU_NO_CONTEXT; @@ -177,7 +177,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) unsigned long *map; /* No lockless fast path .. yet */ - spin_lock(&context_lock); + atomic_spin_lock(&context_lock); #ifndef DEBUG_STEAL_ONLY pr_devel("[%d] activating context for mm @%p, active=%d, id=%d\n", @@ -258,7 +258,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) /* Flick the MMU and release lock */ set_context(id, next->pgd); - spin_unlock(&context_lock); + atomic_spin_unlock(&context_lock); } /* @@ -285,7 +285,7 @@ void destroy_context(struct mm_struct *mm) WARN_ON(mm->context.active != 0); - spin_lock_irqsave(&context_lock, flags); + atomic_spin_lock_irqsave(&context_lock, flags); id = mm->context.id; if (id != MMU_NO_CONTEXT) { __clear_bit(id, context_map); @@ -296,7 +296,7 @@ void destroy_context(struct mm_struct *mm) context_mm[id] = NULL; nr_free_contexts++; } - spin_unlock_irqrestore(&context_lock, flags); + atomic_spin_unlock_irqrestore(&context_lock, flags); } #ifdef CONFIG_SMP diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index d8e6725..ecaa316 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -32,7 +32,6 @@ #include "mmu_decl.h" -static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); static unsigned long pte_freelist_forced_free; struct pte_freelist_batch @@ -83,11 +82,11 @@ static void pte_free_submit(struct pte_freelist_batch *batch) void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) { - /* This is safe since tlb_gather_mmu has disabled preemption */ - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + struct pte_freelist_batch **batchp; - if (atomic_read(&tlb->mm->mm_users) < 2 || - cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){ + batchp = &tlb->arch.batch; + + if (atomic_read(&tlb->mm->mm_users) < 2) { pgtable_free(pgf); return; } @@ -107,15 +106,16 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) } } -void pte_free_finish(void) +void pte_free_finish(struct mmu_gather *tlb) { - /* This is safe since tlb_gather_mmu has disabled preemption */ - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + struct pte_freelist_batch **batchp; - if (*batchp == NULL) - return; - pte_free_submit(*batchp); - *batchp = NULL; + batchp = &tlb->arch.batch; + + if (*batchp) { + pte_free_submit(*batchp); + *batchp = NULL; + } } /* diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 937eb90..33c458f 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -30,14 +30,10 @@ #include #include #include +#include DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); -/* This is declared as we are using the more or less generic - * arch/powerpc/include/asm/tlb.h file -- tgall - */ -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - /* * A linux PTE was changed and the corresponding hash table entry * neesd to be flushed. This function will either perform the flush @@ -49,7 +45,7 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); void hpte_need_flush(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pte, int huge) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch); unsigned long vsid, vaddr; unsigned int psize; int ssize; @@ -100,6 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, */ if (!batch->active) { flush_hash_page(vaddr, rpte, psize, ssize, 0); + put_cpu_var(ppc64_tlb_batch); return; } @@ -126,8 +123,22 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, batch->pte[i] = rpte; batch->vaddr[i] = vaddr; batch->index = ++i; + +#ifdef CONFIG_PREEMPT_RT + /* + * Since flushing tlb needs expensive hypervisor call(s) on celleb, + * always flush it on RT to reduce scheduling latency. + */ + if (machine_is(celleb)) { + __flush_tlb_pending(batch); + put_cpu_var(ppc64_tlb_batch); + return; + } +#endif /* CONFIG_PREEMPT_RT */ + if (i >= PPC64_TLB_BATCH_NR) __flush_tlb_pending(batch); + put_cpu_var(ppc64_tlb_batch); } /* diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index ad2eb4d..f627eb6 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(local_flush_tlb_page); */ #ifdef CONFIG_SMP -static DEFINE_SPINLOCK(tlbivax_lock); +static DEFINE_ATOMIC_SPINLOCK(tlbivax_lock); struct tlb_flush_param { unsigned long addr; @@ -158,10 +158,10 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); if (lock) - spin_lock(&tlbivax_lock); + atomic_spin_lock(&tlbivax_lock); _tlbivax_bcast(vmaddr, pid); if (lock) - spin_unlock(&tlbivax_lock); + atomic_spin_unlock(&tlbivax_lock); goto bail; } else { struct tlb_flush_param p = { .pid = pid, .addr = vmaddr }; @@ -189,7 +189,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) _tlbil_pid(0); preempt_enable(); #else + preempt_disable(); _tlbil_pid(0); + preempt_enable(); #endif } EXPORT_SYMBOL(flush_tlb_kernel_range); diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 68e4f16..afffc6d 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -86,9 +86,9 @@ void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc) u32 status, enable; /* Mask off the cascaded IRQ */ - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->chip->mask(virq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); /* Ask the FPGA for IRQ status. If 'val' is 0, then no irqs * are pending. 'ffs()' is 1 based */ @@ -104,11 +104,11 @@ void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc) } /* Processing done; can reenable the cascade now */ - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->chip->ack(virq); if (!(desc->status & IRQ_DISABLED)) desc->chip->unmask(virq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } static int media5200_irq_map(struct irq_host *h, unsigned int virq, diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index 35b1ec4..e1afbdc 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c @@ -40,7 +40,7 @@ #define DBG_LOW(fmt...) do { } while (0) #endif -static DEFINE_SPINLOCK(beat_htab_lock); +static DEFINE_ATOMIC_SPINLOCK(beat_htab_lock); static inline unsigned int beat_read_mask(unsigned hpte_group) { @@ -114,18 +114,18 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group, if (rflags & _PAGE_NO_CACHE) hpte_r &= ~_PAGE_COHERENT; - spin_lock(&beat_htab_lock); + atomic_spin_lock(&beat_htab_lock); lpar_rc = beat_read_mask(hpte_group); if (lpar_rc == 0) { if (!(vflags & HPTE_V_BOLTED)) DBG_LOW(" full\n"); - spin_unlock(&beat_htab_lock); + atomic_spin_unlock(&beat_htab_lock); return -1; } lpar_rc = beat_insert_htab_entry(0, hpte_group, lpar_rc << 48, hpte_v, hpte_r, &slot); - spin_unlock(&beat_htab_lock); + atomic_spin_unlock(&beat_htab_lock); /* * Since we try and ioremap PHBs we don't own, the pte insert @@ -198,17 +198,17 @@ static long beat_lpar_hpte_updatepp(unsigned long slot, "avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ", want_v & HPTE_V_AVPN, slot, psize, newpp); - spin_lock(&beat_htab_lock); + atomic_spin_lock(&beat_htab_lock); dummy0 = beat_lpar_hpte_getword0(slot); if ((dummy0 & ~0x7FUL) != (want_v & ~0x7FUL)) { DBG_LOW("not found !\n"); - spin_unlock(&beat_htab_lock); + atomic_spin_unlock(&beat_htab_lock); return -1; } lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0, &dummy1); - spin_unlock(&beat_htab_lock); + atomic_spin_unlock(&beat_htab_lock); if (lpar_rc != 0 || dummy0 == 0) { DBG_LOW("not found !\n"); return -1; @@ -262,13 +262,13 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp, vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); va = (vsid << 28) | (ea & 0x0fffffff); - spin_lock(&beat_htab_lock); + atomic_spin_lock(&beat_htab_lock); slot = beat_lpar_hpte_find(va, psize); BUG_ON(slot == -1); lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0, &dummy1); - spin_unlock(&beat_htab_lock); + atomic_spin_unlock(&beat_htab_lock); BUG_ON(lpar_rc != 0); } @@ -285,18 +285,18 @@ static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va, slot, va, psize, local); want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M); - spin_lock_irqsave(&beat_htab_lock, flags); + atomic_spin_lock_irqsave(&beat_htab_lock, flags); dummy1 = beat_lpar_hpte_getword0(slot); if ((dummy1 & ~0x7FUL) != (want_v & ~0x7FUL)) { DBG_LOW("not found !\n"); - spin_unlock_irqrestore(&beat_htab_lock, flags); + atomic_spin_unlock_irqrestore(&beat_htab_lock, flags); return; } lpar_rc = beat_write_htab_entry(0, slot, 0, 0, HPTE_V_VALID, 0, &dummy1, &dummy2); - spin_unlock_irqrestore(&beat_htab_lock, flags); + atomic_spin_unlock_irqrestore(&beat_htab_lock, flags); BUG_ON(lpar_rc != 0); } diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index 7225484..e99c998 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -30,7 +30,7 @@ #include "beat_wrapper.h" #define MAX_IRQS NR_IRQS -static DEFINE_SPINLOCK(beatic_irq_mask_lock); +static DEFINE_ATOMIC_SPINLOCK(beatic_irq_mask_lock); static uint64_t beatic_irq_mask_enable[(MAX_IRQS+255)/64]; static uint64_t beatic_irq_mask_ack[(MAX_IRQS+255)/64]; @@ -65,30 +65,30 @@ static void beatic_mask_irq(unsigned int irq_plug) { unsigned long flags; - spin_lock_irqsave(&beatic_irq_mask_lock, flags); + atomic_spin_lock_irqsave(&beatic_irq_mask_lock, flags); beatic_irq_mask_enable[irq_plug/64] &= ~(1UL << (63 - (irq_plug%64))); beatic_update_irq_mask(irq_plug); - spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); + atomic_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); } static void beatic_unmask_irq(unsigned int irq_plug) { unsigned long flags; - spin_lock_irqsave(&beatic_irq_mask_lock, flags); + atomic_spin_lock_irqsave(&beatic_irq_mask_lock, flags); beatic_irq_mask_enable[irq_plug/64] |= 1UL << (63 - (irq_plug%64)); beatic_update_irq_mask(irq_plug); - spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); + atomic_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); } static void beatic_ack_irq(unsigned int irq_plug) { unsigned long flags; - spin_lock_irqsave(&beatic_irq_mask_lock, flags); + atomic_spin_lock_irqsave(&beatic_irq_mask_lock, flags); beatic_irq_mask_ack[irq_plug/64] &= ~(1UL << (63 - (irq_plug%64))); beatic_update_irq_mask(irq_plug); - spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); + atomic_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); } static void beatic_end_irq(unsigned int irq_plug) @@ -103,10 +103,10 @@ static void beatic_end_irq(unsigned int irq_plug) printk(KERN_ERR "IRQ over-downcounted, plug %d\n", irq_plug); } - spin_lock_irqsave(&beatic_irq_mask_lock, flags); + atomic_spin_lock_irqsave(&beatic_irq_mask_lock, flags); beatic_irq_mask_ack[irq_plug/64] |= 1UL << (63 - (irq_plug%64)); beatic_update_irq_mask(irq_plug); - spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); + atomic_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); } static struct irq_chip beatic_pic = { diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 882e470..0d3022e 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -237,7 +237,7 @@ extern int noirqdebug; static void handle_iic_irq(unsigned int irq, struct irq_desc *desc) { - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); @@ -265,18 +265,18 @@ static void handle_iic_irq(unsigned int irq, struct irq_desc *desc) goto out_eoi; desc->status &= ~IRQ_PENDING; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); desc->status &= ~IRQ_INPROGRESS; out_eoi: desc->chip->eoi(irq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } static int iic_host_map(struct irq_host *h, unsigned int virq, diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c index 054dfe5..8f1d8cd 100644 --- a/arch/powerpc/platforms/chrp/time.c +++ b/arch/powerpc/platforms/chrp/time.c @@ -83,7 +83,12 @@ int chrp_set_rtc_time(struct rtc_time *tmarg) unsigned char save_control, save_freq_select; struct rtc_time tm = *tmarg; +#if CONFIG_PREEMPT_RT + if (!spin_trylock(&rtc_lock)) + return -1; +#else spin_lock(&rtc_lock); +#endif save_control = chrp_cmos_clock_read(RTC_CONTROL); /* tell the clock it's being set */ diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c index 94f4447..af7cce0 100644 --- a/arch/powerpc/platforms/iseries/irq.c +++ b/arch/powerpc/platforms/iseries/irq.c @@ -217,9 +217,9 @@ void __init iSeries_activate_IRQs() struct irq_desc *desc = get_irq_desc(irq); if (desc && desc->chip && desc->chip->startup) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); desc->chip->startup(irq); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } } diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index e6c0040..276c335 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -59,10 +59,10 @@ extern struct device_node *k2_skiplist[2]; * We use a single global lock to protect accesses. Each driver has * to take care of its own locking */ -DEFINE_SPINLOCK(feature_lock); +DEFINE_ATOMIC_SPINLOCK(feature_lock); -#define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); -#define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); +#define LOCK(flags) atomic_spin_lock_irqsave(&feature_lock, flags); +#define UNLOCK(flags) atomic_spin_unlock_irqrestore(&feature_lock, flags); /* diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index c6f0f9e..7d57748 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -80,7 +80,7 @@ static int is_core_99; static int core99_bank = 0; static int nvram_partitions[3]; // XXX Turn that into a sem -static DEFINE_SPINLOCK(nv_lock); +static DEFINE_ATOMIC_SPINLOCK(nv_lock); static int (*core99_write_bank)(int bank, u8* datas); static int (*core99_erase_bank)(int bank); @@ -165,10 +165,10 @@ static unsigned char indirect_nvram_read_byte(int addr) unsigned char val; unsigned long flags; - spin_lock_irqsave(&nv_lock, flags); + atomic_spin_lock_irqsave(&nv_lock, flags); out_8(nvram_addr, addr >> 5); val = in_8(&nvram_data[(addr & 0x1f) << 4]); - spin_unlock_irqrestore(&nv_lock, flags); + atomic_spin_unlock_irqrestore(&nv_lock, flags); return val; } @@ -177,10 +177,10 @@ static void indirect_nvram_write_byte(int addr, unsigned char val) { unsigned long flags; - spin_lock_irqsave(&nv_lock, flags); + atomic_spin_lock_irqsave(&nv_lock, flags); out_8(nvram_addr, addr >> 5); out_8(&nvram_data[(addr & 0x1f) << 4], val); - spin_unlock_irqrestore(&nv_lock, flags); + atomic_spin_unlock_irqrestore(&nv_lock, flags); } @@ -481,7 +481,7 @@ static void core99_nvram_sync(void) if (!is_core_99 || !nvram_data || !nvram_image) return; - spin_lock_irqsave(&nv_lock, flags); + atomic_spin_lock_irqsave(&nv_lock, flags); if (!memcmp(nvram_image, (u8*)nvram_data + core99_bank*NVRAM_SIZE, NVRAM_SIZE)) goto bail; @@ -503,7 +503,7 @@ static void core99_nvram_sync(void) if (core99_write_bank(core99_bank, nvram_image)) printk("nvram: Error writing bank %d\n", core99_bank); bail: - spin_unlock_irqrestore(&nv_lock, flags); + atomic_spin_unlock_irqrestore(&nv_lock, flags); #ifdef DEBUG mdelay(2000); diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index db20de5..3ab56ec 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -50,13 +50,13 @@ static int macio_do_gpio_write(PMF_STD_ARGS, u8 value, u8 mask) value = ~value; /* Toggle the GPIO */ - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); tmp = readb(addr); tmp = (tmp & ~mask) | (value & mask); DBG("Do write 0x%02x to GPIO %s (%p)\n", tmp, func->node->full_name, addr); writeb(tmp, addr); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } @@ -145,9 +145,9 @@ static int macio_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask) struct macio_chip *macio = func->driver_data; unsigned long flags; - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); MACIO_OUT32(offset, (MACIO_IN32(offset) & ~mask) | (value & mask)); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } @@ -168,9 +168,9 @@ static int macio_do_write_reg8(PMF_STD_ARGS, u32 offset, u8 value, u8 mask) struct macio_chip *macio = func->driver_data; unsigned long flags; - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); MACIO_OUT8(offset, (MACIO_IN8(offset) & ~mask) | (value & mask)); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } @@ -223,12 +223,12 @@ static int macio_do_write_reg32_slm(PMF_STD_ARGS, u32 offset, u32 shift, if (args == NULL || args->count == 0) return -EINVAL; - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); tmp = MACIO_IN32(offset); val = args->u[0].v << shift; tmp = (tmp & ~mask) | (val & mask); MACIO_OUT32(offset, tmp); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } @@ -243,12 +243,12 @@ static int macio_do_write_reg8_slm(PMF_STD_ARGS, u32 offset, u32 shift, if (args == NULL || args->count == 0) return -EINVAL; - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); tmp = MACIO_IN8(offset); val = args->u[0].v << shift; tmp = (tmp & ~mask) | (val & mask); MACIO_OUT8(offset, tmp); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } @@ -278,12 +278,12 @@ static int unin_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask) { unsigned long flags; - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); /* This is fairly bogus in darwin, but it should work for our needs * implemeted that way: */ UN_OUT(offset, (UN_IN(offset) & ~mask) | (value & mask)); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index d212006..9814414 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -57,7 +57,7 @@ static int max_irqs; static int max_real_irqs; static u32 level_mask[4]; -static DEFINE_SPINLOCK(pmac_pic_lock); +static DEFINE_ATOMIC_SPINLOCK(pmac_pic_lock); #define NR_MASK_WORDS ((NR_IRQS + 31) / 32) static unsigned long ppc_lost_interrupts[NR_MASK_WORDS]; @@ -85,7 +85,7 @@ static void pmac_mask_and_ack_irq(unsigned int virq) int i = src >> 5; unsigned long flags; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); __clear_bit(src, ppc_cached_irq_mask); if (__test_and_clear_bit(src, ppc_lost_interrupts)) atomic_dec(&ppc_n_lost_interrupts); @@ -97,7 +97,7 @@ static void pmac_mask_and_ack_irq(unsigned int virq) mb(); } while((in_le32(&pmac_irq_hw[i]->enable) & bit) != (ppc_cached_irq_mask[i] & bit)); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); } static void pmac_ack_irq(unsigned int virq) @@ -107,12 +107,12 @@ static void pmac_ack_irq(unsigned int virq) int i = src >> 5; unsigned long flags; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); if (__test_and_clear_bit(src, ppc_lost_interrupts)) atomic_dec(&ppc_n_lost_interrupts); out_le32(&pmac_irq_hw[i]->ack, bit); (void)in_le32(&pmac_irq_hw[i]->ack); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); } static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost) @@ -152,12 +152,12 @@ static unsigned int pmac_startup_irq(unsigned int virq) unsigned long bit = 1UL << (src & 0x1f); int i = src >> 5; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); if ((irq_desc[virq].status & IRQ_LEVEL) == 0) out_le32(&pmac_irq_hw[i]->ack, bit); __set_bit(src, ppc_cached_irq_mask); __pmac_set_irq_mask(src, 0); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); return 0; } @@ -167,10 +167,10 @@ static void pmac_mask_irq(unsigned int virq) unsigned long flags; unsigned int src = irq_map[virq].hwirq; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); __clear_bit(src, ppc_cached_irq_mask); __pmac_set_irq_mask(src, 1); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); } static void pmac_unmask_irq(unsigned int virq) @@ -178,19 +178,19 @@ static void pmac_unmask_irq(unsigned int virq) unsigned long flags; unsigned int src = irq_map[virq].hwirq; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); __set_bit(src, ppc_cached_irq_mask); __pmac_set_irq_mask(src, 0); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); } static int pmac_retrigger(unsigned int virq) { unsigned long flags; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); __pmac_retrigger(irq_map[virq].hwirq); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); return 1; } @@ -210,7 +210,7 @@ static irqreturn_t gatwick_action(int cpl, void *dev_id) int irq, bits; int rc = IRQ_NONE; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) { int i = irq >> 5; bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; @@ -220,12 +220,12 @@ static irqreturn_t gatwick_action(int cpl, void *dev_id) if (bits == 0) continue; irq += __ilog2(bits); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); generic_handle_irq(irq); - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); rc = IRQ_HANDLED; } - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); return rc; } @@ -244,7 +244,7 @@ static unsigned int pmac_pic_get_irq(void) return NO_IRQ_IGNORE; /* ignore, already handled */ } #endif /* CONFIG_SMP */ - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); for (irq = max_real_irqs; (irq -= 32) >= 0; ) { int i = irq >> 5; bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; @@ -256,7 +256,7 @@ static unsigned int pmac_pic_get_irq(void) irq += __ilog2(bits); break; } - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); if (unlikely(irq < 0)) return NO_IRQ; return irq_linear_revmap(pmac_pic_host, irq); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 989d646..2db0689 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -100,7 +100,7 @@ int eeh_subsystem_enabled; EXPORT_SYMBOL(eeh_subsystem_enabled); /* Lock to avoid races due to multiple reports of an error */ -static DEFINE_SPINLOCK(confirm_error_lock); +static DEFINE_ATOMIC_SPINLOCK(confirm_error_lock); /* Buffer for reporting slot-error-detail rtas calls. Its here * in BSS, and not dynamically alloced, so that it ends up in @@ -436,7 +436,7 @@ static void __eeh_clear_slot(struct device_node *parent, int mode_flag) void eeh_clear_slot (struct device_node *dn, int mode_flag) { unsigned long flags; - spin_lock_irqsave(&confirm_error_lock, flags); + atomic_spin_lock_irqsave(&confirm_error_lock, flags); dn = find_device_pe (dn); @@ -447,7 +447,7 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag) PCI_DN(dn)->eeh_mode &= ~mode_flag; PCI_DN(dn)->eeh_check_count = 0; __eeh_clear_slot(dn, mode_flag); - spin_unlock_irqrestore(&confirm_error_lock, flags); + atomic_spin_unlock_irqrestore(&confirm_error_lock, flags); } /** @@ -506,7 +506,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * in one slot might report errors simultaneously, and we * only want one error recovery routine running. */ - spin_lock_irqsave(&confirm_error_lock, flags); + atomic_spin_lock_irqsave(&confirm_error_lock, flags); rc = 1; if (pdn->eeh_mode & EEH_MODE_ISOLATED) { pdn->eeh_check_count ++; @@ -575,7 +575,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * with other functions on this device, and functions under * bridges. */ eeh_mark_slot (dn, EEH_MODE_ISOLATED); - spin_unlock_irqrestore(&confirm_error_lock, flags); + atomic_spin_unlock_irqrestore(&confirm_error_lock, flags); eeh_send_failure_event (dn, dev); @@ -586,7 +586,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) return 1; dn_unlock: - spin_unlock_irqrestore(&confirm_error_lock, flags); + atomic_spin_unlock_irqrestore(&confirm_error_lock, flags); return rc; } @@ -1056,7 +1056,7 @@ void __init eeh_init(void) struct device_node *phb, *np; struct eeh_early_enable_info info; - spin_lock_init(&confirm_error_lock); + atomic_spin_lock_init(&confirm_error_lock); spin_lock_init(&slot_errbuf_lock); np = of_find_node_by_path("/rtas"); diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 0e8db67..82d4513 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -70,12 +70,12 @@ static int irq_in_use(unsigned int irq) { int rc = 0; unsigned long flags; - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); if (desc->action) rc = 1; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return rc; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 661c8e0..bcdce0a 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -140,7 +140,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, return ret; } -static DEFINE_PER_CPU(u64 *, tce_page) = NULL; +static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL; static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, @@ -154,13 +154,14 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long l, limit; long tcenum_start = tcenum, npages_start = npages; int ret = 0; + int cpu; if (npages == 1) { return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - tcep = __get_cpu_var(tce_page); + tcep = get_cpu_var_locked(tce_page, &cpu); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() @@ -169,10 +170,11 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcep = (u64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { + put_cpu_var_locked(tce_page, cpu); return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - __get_cpu_var(tce_page) = tcep; + per_cpu_var_locked(tce_page, cpu) = tcep; } rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; @@ -216,6 +218,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, printk("\ttce[0] val = 0x%llx\n", tcep[0]); show_stack(current, (unsigned long *)__get_SP()); } + put_cpu_var_locked(tce_page, cpu); return ret; } diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c index b3cbac8..493d8de 100644 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ b/arch/powerpc/platforms/pseries/rtasd.c @@ -208,7 +208,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) break; case ERR_TYPE_KERNEL_PANIC: default: - WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } @@ -228,7 +228,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) /* Check to see if we need to or have stopped logging */ if (fatal || !logging_enabled) { logging_enabled = 0; - WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } @@ -251,13 +251,13 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) else rtas_log_start += 1; - WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); wake_up_interruptible(&rtas_log_wait); break; case ERR_TYPE_KERNEL_PANIC: default: - WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 419f8a6..d4fbbdc 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -851,7 +851,7 @@ void xics_migrate_irqs_away(void) || desc->chip->set_affinity == NULL) continue; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); if (status) { @@ -875,7 +875,7 @@ void xics_migrate_irqs_away(void) cpumask_setall(irq_desc[virq].affinity); desc->chip->set_affinity(virq, cpu_all_mask); unlock: - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } #endif diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index da38a1f..d5702c4 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -173,7 +173,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) u32 intr_index; u32 have_shift = 0; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if ((msi_data->feature & FSL_PIC_IP_MASK) == FSL_PIC_IP_IPIC) { if (desc->chip->mask_ack) desc->chip->mask_ack(irq); @@ -225,7 +225,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) break; } unlock: - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } static int __devinit fsl_of_msi_probe(struct of_device *dev, diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index a96584a..60c4e42 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -23,7 +23,7 @@ static unsigned char cached_8259[2] = { 0xff, 0xff }; #define cached_A1 (cached_8259[0]) #define cached_21 (cached_8259[1]) -static DEFINE_SPINLOCK(i8259_lock); +static DEFINE_ATOMIC_SPINLOCK(i8259_lock); static struct irq_host *i8259_host; @@ -42,7 +42,7 @@ unsigned int i8259_irq(void) if (pci_intack) irq = readb(pci_intack); else { - spin_lock(&i8259_lock); + atomic_spin_lock(&i8259_lock); lock = 1; /* Perform an interrupt acknowledge cycle on controller 1. */ @@ -74,7 +74,7 @@ unsigned int i8259_irq(void) irq = NO_IRQ; if (lock) - spin_unlock(&i8259_lock); + atomic_spin_unlock(&i8259_lock); return irq; } @@ -82,7 +82,7 @@ static void i8259_mask_and_ack_irq(unsigned int irq_nr) { unsigned long flags; - spin_lock_irqsave(&i8259_lock, flags); + atomic_spin_lock_irqsave(&i8259_lock, flags); if (irq_nr > 7) { cached_A1 |= 1 << (irq_nr-8); inb(0xA1); /* DUMMY */ @@ -95,7 +95,7 @@ static void i8259_mask_and_ack_irq(unsigned int irq_nr) outb(cached_21, 0x21); outb(0x20, 0x20); /* Non-specific EOI */ } - spin_unlock_irqrestore(&i8259_lock, flags); + atomic_spin_unlock_irqrestore(&i8259_lock, flags); } static void i8259_set_irq_mask(int irq_nr) @@ -110,13 +110,13 @@ static void i8259_mask_irq(unsigned int irq_nr) pr_debug("i8259_mask_irq(%d)\n", irq_nr); - spin_lock_irqsave(&i8259_lock, flags); + atomic_spin_lock_irqsave(&i8259_lock, flags); if (irq_nr < 8) cached_21 |= 1 << irq_nr; else cached_A1 |= 1 << (irq_nr-8); i8259_set_irq_mask(irq_nr); - spin_unlock_irqrestore(&i8259_lock, flags); + atomic_spin_unlock_irqrestore(&i8259_lock, flags); } static void i8259_unmask_irq(unsigned int irq_nr) @@ -125,13 +125,13 @@ static void i8259_unmask_irq(unsigned int irq_nr) pr_debug("i8259_unmask_irq(%d)\n", irq_nr); - spin_lock_irqsave(&i8259_lock, flags); + atomic_spin_lock_irqsave(&i8259_lock, flags); if (irq_nr < 8) cached_21 &= ~(1 << irq_nr); else cached_A1 &= ~(1 << (irq_nr-8)); i8259_set_irq_mask(irq_nr); - spin_unlock_irqrestore(&i8259_lock, flags); + atomic_spin_unlock_irqrestore(&i8259_lock, flags); } static struct irq_chip i8259_pic = { @@ -241,7 +241,7 @@ void i8259_init(struct device_node *node, unsigned long intack_addr) unsigned long flags; /* initialize the controller */ - spin_lock_irqsave(&i8259_lock, flags); + atomic_spin_lock_irqsave(&i8259_lock, flags); /* Mask all first */ outb(0xff, 0xA1); @@ -273,7 +273,7 @@ void i8259_init(struct device_node *node, unsigned long intack_addr) outb(cached_A1, 0xA1); outb(cached_21, 0x21); - spin_unlock_irqrestore(&i8259_lock, flags); + atomic_spin_unlock_irqrestore(&i8259_lock, flags); /* create a legacy host */ i8259_host = irq_alloc_host(node, IRQ_HOST_MAP_LEGACY, diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 69e2630..7c84d27 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -32,7 +32,7 @@ static struct ipic * primary_ipic; static struct irq_chip ipic_level_irq_chip, ipic_edge_irq_chip; -static DEFINE_SPINLOCK(ipic_lock); +static DEFINE_ATOMIC_SPINLOCK(ipic_lock); static struct ipic_info ipic_info[] = { [1] = { @@ -530,13 +530,13 @@ static void ipic_unmask_irq(unsigned int virq) unsigned long flags; u32 temp; - spin_lock_irqsave(&ipic_lock, flags); + atomic_spin_lock_irqsave(&ipic_lock, flags); temp = ipic_read(ipic->regs, ipic_info[src].mask); temp |= (1 << (31 - ipic_info[src].bit)); ipic_write(ipic->regs, ipic_info[src].mask, temp); - spin_unlock_irqrestore(&ipic_lock, flags); + atomic_spin_unlock_irqrestore(&ipic_lock, flags); } static void ipic_mask_irq(unsigned int virq) @@ -546,7 +546,7 @@ static void ipic_mask_irq(unsigned int virq) unsigned long flags; u32 temp; - spin_lock_irqsave(&ipic_lock, flags); + atomic_spin_lock_irqsave(&ipic_lock, flags); temp = ipic_read(ipic->regs, ipic_info[src].mask); temp &= ~(1 << (31 - ipic_info[src].bit)); @@ -556,7 +556,7 @@ static void ipic_mask_irq(unsigned int virq) * for nearly all cases. */ mb(); - spin_unlock_irqrestore(&ipic_lock, flags); + atomic_spin_unlock_irqrestore(&ipic_lock, flags); } static void ipic_ack_irq(unsigned int virq) @@ -566,7 +566,7 @@ static void ipic_ack_irq(unsigned int virq) unsigned long flags; u32 temp; - spin_lock_irqsave(&ipic_lock, flags); + atomic_spin_lock_irqsave(&ipic_lock, flags); temp = 1 << (31 - ipic_info[src].bit); ipic_write(ipic->regs, ipic_info[src].ack, temp); @@ -575,7 +575,7 @@ static void ipic_ack_irq(unsigned int virq) * for nearly all cases. */ mb(); - spin_unlock_irqrestore(&ipic_lock, flags); + atomic_spin_unlock_irqrestore(&ipic_lock, flags); } static void ipic_mask_irq_and_ack(unsigned int virq) @@ -585,7 +585,7 @@ static void ipic_mask_irq_and_ack(unsigned int virq) unsigned long flags; u32 temp; - spin_lock_irqsave(&ipic_lock, flags); + atomic_spin_lock_irqsave(&ipic_lock, flags); temp = ipic_read(ipic->regs, ipic_info[src].mask); temp &= ~(1 << (31 - ipic_info[src].bit)); @@ -598,7 +598,7 @@ static void ipic_mask_irq_and_ack(unsigned int virq) * for nearly all cases. */ mb(); - spin_unlock_irqrestore(&ipic_lock, flags); + atomic_spin_unlock_irqrestore(&ipic_lock, flags); } static int ipic_set_irq_type(unsigned int virq, unsigned int flow_type) diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 3981ae4..b0f66f9 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -46,7 +46,7 @@ static struct mpic *mpics; static struct mpic *mpic_primary; -static DEFINE_SPINLOCK(mpic_lock); +static DEFINE_ATOMIC_SPINLOCK(mpic_lock); #ifdef CONFIG_PPC32 /* XXX for now */ #ifdef CONFIG_IRQ_ALL_CPUS @@ -344,10 +344,10 @@ static inline void mpic_ht_end_irq(struct mpic *mpic, unsigned int source) unsigned int mask = 1U << (fixup->index & 0x1f); writel(mask, fixup->applebase + soff); } else { - spin_lock(&mpic->fixup_lock); + atomic_spin_lock(&mpic->fixup_lock); writeb(0x11 + 2 * fixup->index, fixup->base + 2); writel(fixup->data, fixup->base + 4); - spin_unlock(&mpic->fixup_lock); + atomic_spin_unlock(&mpic->fixup_lock); } } @@ -363,7 +363,7 @@ static void mpic_startup_ht_interrupt(struct mpic *mpic, unsigned int source, DBG("startup_ht_interrupt(0x%x, 0x%x) index: %d\n", source, irqflags, fixup->index); - spin_lock_irqsave(&mpic->fixup_lock, flags); + atomic_spin_lock_irqsave(&mpic->fixup_lock, flags); /* Enable and configure */ writeb(0x10 + 2 * fixup->index, fixup->base + 2); tmp = readl(fixup->base + 4); @@ -371,7 +371,7 @@ static void mpic_startup_ht_interrupt(struct mpic *mpic, unsigned int source, if (irqflags & IRQ_LEVEL) tmp |= 0x22; writel(tmp, fixup->base + 4); - spin_unlock_irqrestore(&mpic->fixup_lock, flags); + atomic_spin_unlock_irqrestore(&mpic->fixup_lock, flags); #ifdef CONFIG_PM /* use the lowest bit inverted to the actual HW, @@ -393,12 +393,12 @@ static void mpic_shutdown_ht_interrupt(struct mpic *mpic, unsigned int source, DBG("shutdown_ht_interrupt(0x%x, 0x%x)\n", source, irqflags); /* Disable */ - spin_lock_irqsave(&mpic->fixup_lock, flags); + atomic_spin_lock_irqsave(&mpic->fixup_lock, flags); writeb(0x10 + 2 * fixup->index, fixup->base + 2); tmp = readl(fixup->base + 4); tmp |= 1; writel(tmp, fixup->base + 4); - spin_unlock_irqrestore(&mpic->fixup_lock, flags); + atomic_spin_unlock_irqrestore(&mpic->fixup_lock, flags); #ifdef CONFIG_PM /* use the lowest bit inverted to the actual HW, @@ -512,7 +512,7 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic) BUG_ON(mpic->fixups == NULL); /* Init spinlock */ - spin_lock_init(&mpic->fixup_lock); + atomic_spin_lock_init(&mpic->fixup_lock); /* Map U3 config space. We assume all IO-APICs are on the primary bus * so we only need to map 64kB. @@ -572,12 +572,12 @@ static int irq_choose_cpu(unsigned int virt_irq) cpumask_copy(&mask, irq_desc[virt_irq].affinity); if (cpus_equal(mask, CPU_MASK_ALL)) { static int irq_rover; - static DEFINE_SPINLOCK(irq_rover_lock); + static DEFINE_ATOMIC_SPINLOCK(irq_rover_lock); unsigned long flags; /* Round-robin distribution... */ do_round_robin: - spin_lock_irqsave(&irq_rover_lock, flags); + atomic_spin_lock_irqsave(&irq_rover_lock, flags); while (!cpu_online(irq_rover)) { if (++irq_rover >= NR_CPUS) @@ -589,7 +589,7 @@ static int irq_choose_cpu(unsigned int virt_irq) irq_rover = 0; } while (!cpu_online(irq_rover)); - spin_unlock_irqrestore(&irq_rover_lock, flags); + atomic_spin_unlock_irqrestore(&irq_rover_lock, flags); } else { cpumask_t tmp; @@ -1372,14 +1372,14 @@ void __init mpic_set_serial_int(struct mpic *mpic, int enable) unsigned long flags; u32 v; - spin_lock_irqsave(&mpic_lock, flags); + atomic_spin_lock_irqsave(&mpic_lock, flags); v = mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1); if (enable) v |= MPIC_GREG_GLOBAL_CONF_1_SIE; else v &= ~MPIC_GREG_GLOBAL_CONF_1_SIE; mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1, v); - spin_unlock_irqrestore(&mpic_lock, flags); + atomic_spin_unlock_irqrestore(&mpic_lock, flags); } void mpic_irq_set_priority(unsigned int irq, unsigned int pri) @@ -1392,7 +1392,7 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri) if (!mpic) return; - spin_lock_irqsave(&mpic_lock, flags); + atomic_spin_lock_irqsave(&mpic_lock, flags); if (mpic_is_ipi(mpic, irq)) { reg = mpic_ipi_read(src - mpic->ipi_vecs[0]) & ~MPIC_VECPRI_PRIORITY_MASK; @@ -1404,7 +1404,7 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri) mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); } - spin_unlock_irqrestore(&mpic_lock, flags); + atomic_spin_unlock_irqrestore(&mpic_lock, flags); } void mpic_setup_this_cpu(void) @@ -1419,7 +1419,7 @@ void mpic_setup_this_cpu(void) DBG("%s: setup_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); - spin_lock_irqsave(&mpic_lock, flags); + atomic_spin_lock_irqsave(&mpic_lock, flags); /* let the mpic know we want intrs. default affinity is 0xffffffff * until changed via /proc. That's how it's done on x86. If we want @@ -1435,7 +1435,7 @@ void mpic_setup_this_cpu(void) /* Set current processor priority to 0 */ mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0); - spin_unlock_irqrestore(&mpic_lock, flags); + atomic_spin_unlock_irqrestore(&mpic_lock, flags); #endif /* CONFIG_SMP */ } @@ -1464,7 +1464,7 @@ void mpic_teardown_this_cpu(int secondary) BUG_ON(mpic == NULL); DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); - spin_lock_irqsave(&mpic_lock, flags); + atomic_spin_lock_irqsave(&mpic_lock, flags); /* let the mpic know we don't want intrs. */ for (i = 0; i < mpic->num_sources ; i++) @@ -1478,7 +1478,7 @@ void mpic_teardown_this_cpu(int secondary) */ mpic_eoi(mpic); - spin_unlock_irqrestore(&mpic_lock, flags); + atomic_spin_unlock_irqrestore(&mpic_lock, flags); } diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 466ce9a..2492a4a 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -225,12 +225,12 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc) int src; int subvirq; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (desc->status & IRQ_LEVEL) desc->chip->mask(virq); else desc->chip->mask_ack(virq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); msr = mfdcr(uic->dcrbase + UIC_MSR); if (!msr) /* spurious interrupt */ @@ -242,12 +242,12 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc) generic_handle_irq(subvirq); uic_irq_ret: - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (desc->status & IRQ_LEVEL) desc->chip->ack(virq); if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) desc->chip->unmask(virq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } static struct uic * __init uic_init_one(struct device_node *node) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e1f33a8..db7cf0d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -348,6 +348,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) unsigned long timeout; #endif + preempt_disable(); local_irq_save(flags); bp = in_breakpoint_table(regs->nip, &offset); @@ -524,6 +525,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) insert_cpu_bpts(); local_irq_restore(flags); + preempt_enable(); return cmd != 'X' && cmd != EOF; } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2ae5d72..7238ef4 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -84,7 +84,7 @@ config S390 select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_FTRACE_SYSCALLS + select HAVE_SYSCALL_TRACEPOINTS select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_GRAPH_TRACER select HAVE_DEFAULT_NO_SPIN_MUTEXES diff --git a/arch/s390/defconfig b/arch/s390/defconfig index fcba206..4e91a25 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -900,7 +900,7 @@ CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y -CONFIG_HAVE_FTRACE_SYSCALLS=y +CONFIG_HAVE_SYSCALL_TRACEPOINTS=y CONFIG_TRACING_SUPPORT=y CONFIG_FTRACE=y # CONFIG_FUNCTION_TRACER is not set diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index 9d2a179..e70d6dd 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -48,16 +48,21 @@ struct rwsem_waiter; -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *); +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *); +extern struct rw_anon_semaphore * +rwsem_wake(struct rw_anon_semaphore *); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *); +extern struct rw_anon_semaphore * +rwsem_downgrade_write(struct rw_anon_semaphore *); /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { signed long count; spinlock_t wait_lock; struct list_head wait_list; @@ -85,40 +90,40 @@ struct rw_semaphore { */ #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -# define __RWSEM_DEP_MAP_INIT(lockname) +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) #endif -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEM_ANON_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } + LIST_HEAD_INIT((name).wait_list) __RWSEM_ANON_DEP_MAP_INIT(name) } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -static inline void init_rwsem(struct rw_semaphore *sem) +static inline void init_anon_rwsem(struct rw_anon_semaphore *sem) { sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); +extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, + struct lock_class_key *key); -#define init_rwsem(sem) \ +#define init_anon_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - __init_rwsem((sem), #sem, &__key); \ + __init_anon_rwsem((sem), #sem, &__key); \ } while (0) /* * lock for reading */ -static inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct rw_anon_semaphore *sem) { signed long old, new; @@ -146,7 +151,7 @@ static inline void __down_read(struct rw_semaphore *sem) /* * trylock for reading -- returns 1 if successful, 0 if contention */ -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { signed long old, new; @@ -177,7 +182,8 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void +__down_write_nested(struct rw_anon_semaphore *sem, int subclass) { signed long old, new, tmp; @@ -203,7 +209,7 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_down_write_failed(sem); } -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { __down_write_nested(sem, 0); } @@ -211,7 +217,7 @@ static inline void __down_write(struct rw_semaphore *sem) /* * trylock for writing -- returns 1 if successful, 0 if contention */ -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { signed long old; @@ -239,7 +245,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* * unlock after reading */ -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { signed long old, new; @@ -269,7 +275,7 @@ static inline void __up_read(struct rw_semaphore *sem) /* * unlock after writing */ -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { signed long old, new, tmp; @@ -299,7 +305,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { signed long old, new, tmp; @@ -328,7 +334,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(long delta, struct rw_anon_semaphore *sem) { signed long old, new; @@ -354,7 +360,8 @@ static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) /* * implement exchange and add functionality */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) +static inline long +rwsem_atomic_update(long delta, struct rw_anon_semaphore *sem) { signed long old, new; @@ -378,10 +385,52 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) return new; } -static inline int rwsem_is_locked(struct rw_semaphore *sem) +static inline int rwsem_is_locked(struct rw_anon_semaphore *sem) { return (sem->count != 0); } +struct rw_semaphore { + signed long count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \ + LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void init_rwsem(struct rw_anon_semaphore *sem) +{ + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} + +static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ + __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); +} + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + #endif /* __KERNEL__ */ #endif /* _S390_RWSEM_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index ba1cab9..07eb61b 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -92,7 +92,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ #define TIF_SECCOMP 10 /* secure computing */ -#define TIF_SYSCALL_FTRACE 11 /* ftrace syscall instrumentation */ +#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ @@ -111,7 +111,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_TRACE (1<>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_FTRACE>>8) + _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index f6618e9..3ceb53c 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -57,7 +57,7 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_FTRACE>>8) + _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) #define BASED(name) name-system_call(%r13) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 3e298e6..57bdcb1 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -220,6 +220,29 @@ struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } +int syscall_name_to_nr(char *name) +{ + int i; + + if (!syscalls_metadata) + return -1; + for (i = 0; i < NR_syscalls; i++) + if (syscalls_metadata[i]) + if (!strcmp(syscalls_metadata[i]->name, name)) + return i; + return -1; +} + +void set_syscall_enter_id(int num, int id) +{ + syscalls_metadata[num]->enter_id = id; +} + +void set_syscall_exit_id(int num, int id) +{ + syscalls_metadata[num]->exit_id = id; +} + static struct syscall_metadata *find_syscall_meta(unsigned long syscall) { struct syscall_metadata *start; @@ -237,24 +260,19 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall) return NULL; } -void arch_init_ftrace_syscalls(void) +static int __init arch_init_ftrace_syscalls(void) { struct syscall_metadata *meta; int i; - static atomic_t refs; - - if (atomic_inc_return(&refs) != 1) - goto out; syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls, GFP_KERNEL); if (!syscalls_metadata) - goto out; + return -ENOMEM; for (i = 0; i < NR_syscalls; i++) { meta = find_syscall_meta((unsigned long)sys_call_table[i]); syscalls_metadata[i] = meta; } - return; -out: - atomic_dec(&refs); + return 0; } +arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 43acd73..f3ddd7a 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -51,6 +51,9 @@ #include "compat_ptrace.h" #endif +#define CREATE_TRACE_POINTS +#include + enum s390_regset { REGSET_GENERAL, REGSET_FP, @@ -661,8 +664,8 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) ret = -1; } - if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) - ftrace_syscall_enter(regs); + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->gprs[2]); if (unlikely(current->audit_context)) audit_syscall_entry(is_compat_task() ? @@ -679,8 +682,8 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]); - if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) - ftrace_syscall_exit(regs); + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->gprs[2]); if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, 0); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index d4c8e9c..5582ff1 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -272,14 +272,14 @@ void __init time_init(void) * small for /proc/uptime to be accurate. * Reset xtime and wall_to_monotonic to sane values. */ - write_seqlock_irqsave(&xtime_lock, flags); + write_atomic_seqlock_irqsave(&xtime_lock, flags); now = get_clock(); tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime); clocksource_tod.cycle_last = now; clocksource_tod.raw_time = xtime; tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); - write_sequnlock_irqrestore(&xtime_lock, flags); + write_atomic_sequnlock_irqrestore(&xtime_lock, flags); /* Enable TOD clock interrupts on the boot cpu. */ init_cpu_timer(); diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h index 1987f3e..ed8c771 100644 --- a/arch/sh/include/asm/rwsem.h +++ b/arch/sh/include/asm/rwsem.h @@ -19,7 +19,7 @@ /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { long count; #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 @@ -35,35 +35,38 @@ struct rw_semaphore { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -# define __RWSEM_DEP_MAP_INIT(lockname) +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) #endif -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEM_ANON_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEP_MAP_INIT(name) } + __RWSEM_ANON_DEP_MAP_INIT(name) } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore *rwsem_wake(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *sem); -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); +extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, + struct lock_class_key *key); -#define init_rwsem(sem) \ +#define init_anon_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - __init_rwsem((sem), #sem, &__key); \ + __init_anon_rwsem((sem), #sem, &__key); \ } while (0) -static inline void init_rwsem(struct rw_semaphore *sem) +static inline void init_anon_rwsem(struct rw_anon_semaphore *sem) { sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); @@ -73,7 +76,7 @@ static inline void init_rwsem(struct rw_semaphore *sem) /* * lock for reading */ -static inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct rw_anon_semaphore *sem) { if (atomic_inc_return((atomic_t *)(&sem->count)) > 0) smp_wmb(); @@ -81,7 +84,7 @@ static inline void __down_read(struct rw_semaphore *sem) rwsem_down_read_failed(sem); } -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -98,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { int tmp; @@ -110,7 +113,7 @@ static inline void __down_write(struct rw_semaphore *sem) rwsem_down_write_failed(sem); } -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -123,7 +126,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* * unlock after reading */ -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { int tmp; @@ -136,7 +139,7 @@ static inline void __up_read(struct rw_semaphore *sem) /* * unlock after writing */ -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { smp_wmb(); if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, @@ -147,7 +150,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(int delta, struct rw_anon_semaphore *sem) { atomic_add(delta, (atomic_t *)(&sem->count)); } @@ -155,7 +158,7 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { int tmp; @@ -165,7 +168,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void + __down_write_nested(struct rw_anon_semaphore *sem, int subclass) { __down_write(sem); } @@ -173,12 +177,60 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) /* * implement exchange and add functionality */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline int rwsem_atomic_update(int delta, struct rw_anon_semaphore *sem) { smp_mb(); return atomic_add_return(delta, (atomic_t *)(&sem->count)); } +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + +struct rw_semaphore { + long count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ + LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ + __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); +} + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + +static inline void init_rwsem(struct rw_semaphore *sem) +{ + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 3d09062..b17f303 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -1,4 +1,4 @@ -/* +1/* * linux/arch/sh/kernel/irq.c * * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar @@ -67,7 +67,7 @@ int show_interrupts(struct seq_file *p, void *v) if (!desc) return 0; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); for_each_online_cpu(j) any_count |= kstat_irqs_cpu(i, j); action = desc->action; @@ -88,7 +88,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); out: - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return 0; } #endif diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h index 1dc129a..0825088 100644 --- a/arch/sparc/include/asm/rwsem.h +++ b/arch/sparc/include/asm/rwsem.h @@ -19,7 +19,7 @@ struct rwsem_waiter; -struct rw_semaphore { +struct rw_anon_semaphore { signed int count; spinlock_t wait_lock; struct list_head wait_list; @@ -29,51 +29,92 @@ struct rw_semaphore { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -# define __RWSEM_DEP_MAP_INIT(lockname) +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) #endif -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEMANON__INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEP_MAP_INIT(name) } + __RWSEM_ANON_DEP_MAP_INIT(name) } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); +extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, + struct lock_class_key *key); -#define init_rwsem(sem) \ +#define init_anon_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - __init_rwsem((sem), #sem, &__key); \ + __init_anon_rwsem((sem), #sem, &__key); \ } while (0) -extern void __down_read(struct rw_semaphore *sem); -extern int __down_read_trylock(struct rw_semaphore *sem); -extern void __down_write(struct rw_semaphore *sem); -extern int __down_write_trylock(struct rw_semaphore *sem); -extern void __up_read(struct rw_semaphore *sem); -extern void __up_write(struct rw_semaphore *sem); -extern void __downgrade_write(struct rw_semaphore *sem); +extern void __down_read(struct rw_anon_semaphore *sem); +extern int __down_read_trylock(struct rw_anon_semaphore *sem); +extern void __down_write(struct rw_anon_semaphore *sem); +extern int __down_write_trylock(struct rw_anon_semaphore *sem); +extern void __up_read(struct rw_anon_semaphore *sem); +extern void __up_write(struct rw_anon_semaphore *sem); +extern void __downgrade_write(struct rw_anon_semaphore *sem); -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void +__down_write_nested(struct rw_anon_semaphore *sem, int subclass) { __down_write(sem); } -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline int rwsem_atomic_update(int delta, struct rw_anon_semaphore *sem) { return atomic_add_return(delta, (atomic_t *)(&sem->count)); } -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(int delta, struct rw_anon_semaphore *sem) { atomic_add(delta, (atomic_t *)(&sem->count)); } +static inline int anon_rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + +struct rw_semaphore { + signed int count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + +#define __RWSEM_INITIALIZER(name) \ +{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ + __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); +} + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 8daab33..c72b1d5 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -176,7 +176,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -195,7 +195,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) @@ -785,14 +785,14 @@ void fixup_irqs(void) for (irq = 0; irq < NR_IRQS; irq++) { unsigned long flags; - spin_lock_irqsave(&irq_desc[irq].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[irq].lock, flags); if (irq_desc[irq].action && !(irq_desc[irq].status & IRQ_PER_CPU)) { if (irq_desc[irq].chip->set_affinity) irq_desc[irq].chip->set_affinity(irq, irq_desc[irq].affinity); } - spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } tick_ops->disable_irq(); diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 85e7037..bea30b2 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -703,10 +703,10 @@ static void pcic_clear_clock_irq(void) static irqreturn_t pcic_timer_handler (int irq, void *h) { - write_seqlock(&xtime_lock); /* Dummy, to show that we remember */ + write_atomic_seqlock(&xtime_lock); /* Dummy, to show that we remember */ pcic_clear_clock_irq(); do_timer(1); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); #endif @@ -766,7 +766,7 @@ static void pci_do_gettimeofday(struct timeval *tv) unsigned long max_ntp_tick = tick_usec - tickadj; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); usec = do_gettimeoffset(); /* @@ -779,7 +779,7 @@ static void pci_do_gettimeofday(struct timeval *tv) sec = xtime.tv_sec; usec += (xtime.tv_nsec / 1000); - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + } while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); while (usec >= 1000000) { usec -= 1000000; diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 614ac7b..6530942 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -93,7 +93,7 @@ static irqreturn_t timer_interrupt(int dummy, void *dev_id) #endif /* Protect counter clear so that do_gettimeoffset works */ - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); clear_clock_irq(); @@ -109,7 +109,7 @@ static irqreturn_t timer_interrupt(int dummy, void *dev_id) else last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */ } - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); @@ -251,7 +251,7 @@ void do_gettimeofday(struct timeval *tv) unsigned long max_ntp_tick = tick_usec - tickadj; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); usec = do_gettimeoffset(); /* @@ -264,7 +264,7 @@ void do_gettimeofday(struct timeval *tv) sec = xtime.tv_sec; usec += (xtime.tv_nsec / 1000); - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + } while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); while (usec >= 1000000) { usec -= 1000000; @@ -281,9 +281,9 @@ int do_settimeofday(struct timespec *tv) { int ret; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); ret = bus_do_settimeofday(tv); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return ret; } diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 7916feb..a9f414c 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -34,7 +34,7 @@ void *kmap_atomic(struct page *page, enum km_type type) unsigned long idx; unsigned long vaddr; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -73,6 +73,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) if (vaddr < FIXADDR_START) { // FIXME pagefault_enable(); + preempt_enable(); return; } @@ -99,6 +100,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) #endif pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(kunmap_atomic); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 454cdb4..048ca4a 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -33,7 +33,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -52,7 +52,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) seq_putc(p, '\n'); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 13ffa5d..bc24095 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -38,7 +38,7 @@ config X86 select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE - select HAVE_FTRACE_SYSCALLS + select HAVE_SYSCALL_TRACEPOINTS select HAVE_KVM select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK @@ -123,10 +123,18 @@ config ARCH_MAY_HAVE_PC_FDC def_bool y config RWSEM_GENERIC_SPINLOCK - def_bool !X86_XADD + bool + depends on !X86_XADD || PREEMPT_RT + default y + +config ASM_SEMAPHORES + bool + default y config RWSEM_XCHGADD_ALGORITHM - def_bool X86_XADD + bool + depends on X86_XADD && !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT + default y config ARCH_HAS_CPU_IDLE_WAIT def_bool y @@ -273,6 +281,7 @@ config X86_X2APIC config SPARSE_IRQ bool "Support sparse irq numbering" depends on PCI_MSI || HT_IRQ + depends on !PREEMPT_RT ---help--- This enables support for sparse irqs. This is useful for distro kernels that want to define a high CONFIG_NR_CPUS value but still @@ -672,7 +681,7 @@ config IOMMU_API config MAXSMP bool "Configure Maximum number of SMP Processors and NUMA Nodes" - depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL + depends on 0 && X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL select CPUMASK_OFFSTACK default n ---help--- @@ -1894,7 +1903,7 @@ config PCI_MMCONFIG config DMAR bool "Support for DMA Remapping Devices (EXPERIMENTAL)" - depends on PCI_MSI && ACPI && EXPERIMENTAL + depends on PCI_MSI && ACPI && EXPERIMENTAL && !PREEMPT_RT help DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. @@ -1937,6 +1946,7 @@ config DMAR_FLOPPY_WA config INTR_REMAP bool "Support for Interrupt Remapping (EXPERIMENTAL)" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL + depends on !PREEMPT_RT ---help--- Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29..3347029 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -76,6 +76,7 @@ config DEBUG_PER_CPU_MAPS bool "Debug access to per_cpu maps" depends on DEBUG_KERNEL depends on SMP + depends on !PREEMPT_RT default n ---help--- Say Y to verify that the per_cpu map being accessed has @@ -126,6 +127,7 @@ config DEBUG_NX_TEST config 4KSTACKS bool "Use 4Kb for kernel stacks instead of 8Kb" depends on X86_32 + default y ---help--- If you say Y here the kernel will use a 4Kb stacksize for the kernel stack attached to each process/thread. This facilitates diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index edb992e..d28fad1 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -2355,7 +2355,7 @@ CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_HW_BRANCH_TRACER=y -CONFIG_HAVE_FTRACE_SYSCALLS=y +CONFIG_HAVE_SYSCALL_TRACEPOINTS=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y CONFIG_TRACING_SUPPORT=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index cee1dd2..6c86acd 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -2329,7 +2329,7 @@ CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_HW_BRANCH_TRACER=y -CONFIG_HAVE_FTRACE_SYSCALLS=y +CONFIG_HAVE_SYSCALL_TRACEPOINTS=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y CONFIG_TRACING_SUPPORT=y diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 20d1465..b87dff9 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -50,8 +50,8 @@ #define ACPI_ASM_MACROS #define BREAKPOINT3 -#define ACPI_DISABLE_IRQS() local_irq_disable() -#define ACPI_ENABLE_IRQS() local_irq_enable() +#define ACPI_DISABLE_IRQS() local_irq_disable_nort() +#define ACPI_ENABLE_IRQS() local_irq_enable_nort() #define ACPI_FLUSH_CPU_CACHE() wbinvd() int __acpi_acquire_global_lock(unsigned int *lock); diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index dc5a667..166704f 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -186,10 +186,10 @@ static inline int atomic_add_return(int i, atomic_t *v) #ifdef CONFIG_M386 no_xadd: /* Legacy 386 processor */ - local_irq_save(flags); + raw_local_irq_save(flags); __i = atomic_read(v); atomic_set(v, i + __i); - local_irq_restore(flags); + raw_local_irq_restore(flags); return i + __i; #endif } diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 4a28d22..9d8c7f6 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -95,6 +95,7 @@ #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ +#define X86_FEATURE_APERFMPERF (3*32+27) /* APERFMPERF */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index bd2c651..db24c22 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -28,13 +28,6 @@ #endif -/* FIXME: I don't want to stay hardcoded */ -#ifdef CONFIG_X86_64 -# define FTRACE_SYSCALL_MAX 296 -#else -# define FTRACE_SYSCALL_MAX 333 -#endif - #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 014c2b8..433ae1f 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -58,16 +58,22 @@ extern void *kmap_high(struct page *page); extern void kunmap_high(struct page *page); void *kmap(struct page *page); +extern void kunmap_virt(void *ptr); +extern struct page *kmap_to_page(void *ptr); void kunmap(struct page *page); -void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); -void *kmap_atomic(struct page *page, enum km_type type); -void kunmap_atomic(void *kvaddr, enum km_type type); -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); + +void *__kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); +void *__kmap_atomic(struct page *page, enum km_type type); +void *__kmap_atomic_direct(struct page *page, enum km_type type); +void __kunmap_atomic(void *kvaddr, enum km_type type); +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type); +struct page *__kmap_atomic_to_page(void *ptr); + void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); -struct page *kmap_atomic_to_page(void *ptr); #ifndef CONFIG_PARAVIRT -#define kmap_atomic_pte(page, type) kmap_atomic(page, type) +#define kmap_atomic_pte(page, type) kmap_atomic(page, type) +#define kmap_atomic_pte_direct(page, type) kmap_atomic_direct(page, type) #endif #define flush_cache_kmaps() do { } while (0) @@ -75,6 +81,27 @@ struct page *kmap_atomic_to_page(void *ptr); extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, unsigned long end_pfn); +/* + * on PREEMPT_RT kmap_atomic() is a wrapper that uses kmap(): + */ +#ifdef CONFIG_PREEMPT_RT +# define kmap_atomic_prot(page, type, prot) ({ pagefault_disable(); kmap(page); }) +# define kmap_atomic(page, type) ({ pagefault_disable(); kmap(page); }) +# define kmap_atomic_pfn(pfn, type) kmap(pfn_to_page(pfn)) +# define kunmap_atomic(kvaddr, type) do { pagefault_enable(); kunmap_virt(kvaddr); } while(0) +# define kmap_atomic_to_page(kvaddr) kmap_to_page(kvaddr) +# define kmap_atomic_direct(page, type) __kmap_atomic_direct(page, type) +# define kunmap_atomic_direct(kvaddr, type) __kunmap_atomic(kvaddr, type) +#else +# define kmap_atomic_prot(page, type, prot) __kmap_atomic_prot(page, type, prot) +# define kmap_atomic(page, type) __kmap_atomic(page, type) +# define kmap_atomic_pfn(pfn, type) __kmap_atomic_pfn(pfn, type) +# define kunmap_atomic(kvaddr, type) __kunmap_atomic(kvaddr, type) +# define kmap_atomic_to_page(kvaddr) __kmap_atomic_to_page(kvaddr) +# define kmap_atomic_direct(page, type) __kmap_atomic(page, type) +# define kunmap_atomic_direct(kvaddr, type) __kunmap_atomic(kvaddr, type) +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_HIGHMEM_H */ diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h index 1edbf89..a2bd5b5 100644 --- a/arch/x86/include/asm/i8253.h +++ b/arch/x86/include/asm/i8253.h @@ -6,7 +6,7 @@ #define PIT_CH0 0x40 #define PIT_CH2 0x42 -extern spinlock_t i8253_lock; +extern atomic_spinlock_t i8253_lock; extern struct clock_event_device *global_clock_event; diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 58d7091..4720126 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask; #define SLAVE_ICW4_DEFAULT 0x01 #define PIC_ICW4_AEOI 2 -extern spinlock_t i8259A_lock; +extern atomic_spinlock_t i8259A_lock; extern void init_8259A(int auto_eoi); extern void enable_8259A_irq(unsigned int irq); diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index ad2668e..6d8723a 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -65,6 +65,8 @@ 6: osp nopl 0x00(%eax,%eax,1) 7: nopl 0x00000000(%eax) 8: nopl 0x00000000(%eax,%eax,1) + Note: All the above are assumed to be a single instruction. + There is kernel code that depends on this. */ #define P6_NOP1 GENERIC_NOP1 #define P6_NOP2 ".byte 0x66,0x90\n" diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 7639dbf..0ec050a 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -14,12 +14,21 @@ #define IRQ_STACK_ORDER 2 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) -#define STACKFAULT_STACK 1 -#define DOUBLEFAULT_STACK 2 -#define NMI_STACK 3 -#define DEBUG_STACK 4 -#define MCE_STACK 5 -#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#ifdef CONFIG_PREEMPT_RT +# define STACKFAULT_STACK 0 +# define DOUBLEFAULT_STACK 1 +# define NMI_STACK 2 +# define DEBUG_STACK 0 +# define MCE_STACK 3 +# define N_EXCEPTION_STACKS 3 /* hw limit: 7 */ +#else +# define STACKFAULT_STACK 1 +# define DOUBLEFAULT_STACK 2 +# define NMI_STACK 3 +# define DEBUG_STACK 4 +# define MCE_STACK 5 +# define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#endif #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 43b8adb..132c002 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -340,6 +340,7 @@ struct pv_mmu_ops { #ifdef CONFIG_HIGHPTE void *(*kmap_atomic_pte)(struct page *page, enum km_type type); + void *(*kmap_atomic_pte_direct)(struct page *page, enum km_type type); #endif struct pv_lazy_ops lazy_mode; @@ -1138,6 +1139,14 @@ static inline void *kmap_atomic_pte(struct page *page, enum km_type type) ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); return (void *)ret; } + +static inline void *kmap_atomic_pte_direct(struct page *page, enum km_type type) +{ + unsigned long ret; + ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte_direct, + page, type); + return (void *)ret; +} #endif static inline void pte_update(struct mm_struct *mm, unsigned long addr, diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b399988..a2baf26 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -83,7 +83,7 @@ struct irq_routing_table { extern unsigned int pcibios_irq_mask; extern int pcibios_scanned; -extern spinlock_t pci_config_lock; +extern atomic_spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 177b016..0e989a1 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -71,6 +71,7 @@ static inline void pud_clear(pud_t *pudp) { unsigned long pgd; + preempt_disable(); set_pud(pudp, __pud(0)); /* @@ -86,6 +87,7 @@ static inline void pud_clear(pud_t *pudp) if (__pa(pudp) >= pgd && __pa(pudp) < (pgd + sizeof(pgd_t)*PTRS_PER_PGD)) write_cr3(pgd); + preempt_enable(); } #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 01fd946..b323c3d 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -59,14 +59,20 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); #define pte_offset_map_nested(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ pte_index((address))) +#define pte_offset_map_direct(dir, address) \ + ((pte_t *)kmap_atomic_pte_direct(pmd_page(*(dir)), __KM_PTE) + \ + pte_index((address))) #define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) +#define pte_unmap_direct(pte) kunmap_atomic_direct((pte), __KM_PTE) #else #define pte_offset_map(dir, address) \ ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address))) #define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address)) +#define pte_offset_map_direct(dir, address) pte_offset_map((dir), (address)) #define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) +#define pte_unmap_direct(pte) do { } while (0) #endif /* Clear a kernel PTE and flush it from the TLB */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index c57a301..efc01ae 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -126,8 +126,10 @@ static inline int pgd_large(pgd_t pgd) { return 0; } /* x86-64 always has all page tables mapped. */ #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address)) -#define pte_unmap(pte) /* NOP */ -#define pte_unmap_nested(pte) /* NOP */ +#define pte_offset_map_direct(dir, address) pte_offset_kernel((dir), (address)) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) +#define pte_unmap_direct(pte) do { } while (0) #define update_mmu_cache(vma, address, pte) do { } while (0) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index e597ecc..df2a4c7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -27,6 +27,7 @@ struct mm_struct; #include #include #include +#include #include /* @@ -1010,4 +1011,33 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); +struct aperfmperf { + u64 aperf, mperf; +}; + +static inline void get_aperfmperf(struct aperfmperf *am) +{ + WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF)); + + rdmsrl(MSR_IA32_APERF, am->aperf); + rdmsrl(MSR_IA32_MPERF, am->mperf); +} + +#define APERFMPERF_SHIFT 10 + +static inline +unsigned long calc_aperfmperf_ratio(struct aperfmperf *old, + struct aperfmperf *new) +{ + u64 aperf = new->aperf - old->aperf; + u64 mperf = new->mperf - old->mperf; + unsigned long ratio = aperf; + + mperf >>= APERFMPERF_SHIFT; + if (mperf) + ratio = div64_u64(aperf, mperf); + + return ratio; +} + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index ca7517d..92d67a6 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -44,14 +44,14 @@ struct rwsem_waiter; -extern asmregparm struct rw_semaphore * - rwsem_down_read_failed(struct rw_semaphore *sem); -extern asmregparm struct rw_semaphore * - rwsem_down_write_failed(struct rw_semaphore *sem); -extern asmregparm struct rw_semaphore * - rwsem_wake(struct rw_semaphore *); -extern asmregparm struct rw_semaphore * - rwsem_downgrade_wake(struct rw_semaphore *sem); +extern asmregparm struct rw_anon_semaphore * + rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern asmregparm struct rw_anon_semaphore * + rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern asmregparm struct rw_anon_semaphore * + rwsem_wake(struct rw_anon_semaphore *); +extern asmregparm struct rw_anon_semaphore * + rwsem_downgrade_wake(struct rw_anon_semaphore *sem); /* * the semaphore definition @@ -64,7 +64,7 @@ extern asmregparm struct rw_semaphore * #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) -struct rw_semaphore { +struct rw_anon_semaphore { signed long count; spinlock_t wait_lock; struct list_head wait_list; @@ -74,35 +74,34 @@ struct rw_semaphore { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -# define __RWSEM_DEP_MAP_INIT(lockname) +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) #endif - -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEM_ANON_INITIALIZER(name) \ { \ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \ } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); +extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, + struct lock_class_key *key); -#define init_rwsem(sem) \ +#define init_anon_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - __init_rwsem((sem), #sem, &__key); \ + __init_anon_rwsem((sem), #sem, &__key); \ } while (0) /* * lock for reading */ -static inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct rw_anon_semaphore *sem) { asm volatile("# beginning down_read\n\t" LOCK_PREFIX " incl (%%eax)\n\t" @@ -119,7 +118,7 @@ static inline void __down_read(struct rw_semaphore *sem) /* * trylock for reading -- returns 1 if successful, 0 if contention */ -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { __s32 result, tmp; asm volatile("# beginning __down_read_trylock\n\t" @@ -141,7 +140,8 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void +__down_write_nested(struct rw_anon_semaphore *sem, int subclass) { int tmp; @@ -160,7 +160,7 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) : "memory", "cc"); } -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { __down_write_nested(sem, 0); } @@ -168,7 +168,7 @@ static inline void __down_write(struct rw_semaphore *sem) /* * trylock for writing -- returns 1 if successful, 0 if contention */ -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { signed long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, @@ -181,7 +181,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* * unlock after reading */ -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; asm volatile("# beginning __up_read\n\t" @@ -199,7 +199,7 @@ static inline void __up_read(struct rw_semaphore *sem) /* * unlock after writing */ -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { asm volatile("# beginning __up_write\n\t" " movl %2,%%edx\n\t" @@ -218,7 +218,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { asm volatile("# beginning __downgrade_write\n\t" LOCK_PREFIX " addl %2,(%%eax)\n\t" @@ -235,7 +235,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(int delta, struct rw_anon_semaphore *sem) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (sem->count) @@ -245,7 +245,7 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) /* * implement exchange and add functionality */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline int rwsem_atomic_update(int delta, struct rw_anon_semaphore *sem) { int tmp = delta; @@ -256,10 +256,54 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) return tmp + delta; } +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + +#ifndef CONFIG_PREEMPT_RT + +struct rw_semaphore { + signed long count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + +#define __RWSEM_INITIALIZER(name) \ +{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ + __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); +} + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); } +#endif #endif /* __KERNEL__ */ #endif /* _ASM_X86_RWSEM_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 4e77853..135097c 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -298,9 +298,9 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define __raw_spin_relax(lock) cpu_relax() +#define __raw_read_relax(lock) cpu_relax() +#define __raw_write_relax(lock) cpu_relax() /* The {read|write|spin}_lock() on x86 are full memory barriers. */ static inline void smp_mb__after_lock(void) { } diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index fad7d40..6f7786a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -95,7 +95,7 @@ struct thread_info { #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ -#define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */ +#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -118,17 +118,17 @@ struct thread_info { #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) -#define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) +#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_FTRACE | \ - _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP) + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ + _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ - _TIF_SYSCALL_FTRACE) + _TIF_SYSCALL_TRACEPOINT) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -137,7 +137,8 @@ struct thread_info { _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) /* work to do on any return to user space */ -#define _TIF_ALLWORK_MASK ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_FTRACE) +#define _TIF_ALLWORK_MASK \ + ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 20ca9c4..1c4277a 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -62,9 +62,9 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) unsigned long long ns; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); ns = __cycles_2_ns(cyc); - local_irq_restore(flags); + raw_local_irq_restore(flags); return ns; } diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 7f3eba0..1c77e81 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -7,6 +7,21 @@ #include #include +/* + * TLB-flush needs to be nonpreemptible on PREEMPT_RT due to the + * following complex race scenario: + * + * if the current task is lazy-TLB and does a TLB flush and + * gets preempted after the movl %%r3, %0 but before the + * movl %0, %%cr3 then its ->active_mm might change and it will + * install the wrong cr3 when it switches back. This is not a + * problem for the lazy-TLB task itself, but if the next task it + * switches to has an ->mm that is also the lazy-TLB task's + * new ->active_mm, then the scheduler will assume that cr3 is + * the new one, while we overwrote it with the old one. The result + * is the wrong cr3 in the new (non-lazy-TLB) task, which typically + * causes an infinite pagefault upon the next userspace access. + */ #ifdef CONFIG_PARAVIRT #include #else @@ -17,7 +32,9 @@ static inline void __native_flush_tlb(void) { + preempt_disable(); native_write_cr3(native_read_cr3()); + preempt_enable(); } static inline void __native_flush_tlb_global(void) @@ -95,6 +112,13 @@ static inline void __flush_tlb_one(unsigned long addr) static inline void flush_tlb_mm(struct mm_struct *mm) { + /* + * This is safe on PREEMPT_RT because if we preempt + * right after the check but before the __flush_tlb(), + * and if ->active_mm changes, then we might miss a + * TLB flush, but that TLB flush happened already when + * ->active_mm was changed: + */ if (mm == current->active_mm) __flush_tlb(); } diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 732a307..8deaada 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -345,6 +345,8 @@ #ifdef __KERNEL__ +#define NR_syscalls 337 + #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 900e161..b9f3c60 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -688,6 +688,12 @@ __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) #endif /* __NO_STUBS */ #ifdef __KERNEL__ + +#ifndef COMPILE_OFFSETS +#include +#define NR_syscalls (__NR_syscall_max + 1) +#endif + /* * "Conditional" syscalls * diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index dc27a69..c536000 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -5,7 +5,7 @@ #include struct vsyscall_gtod_data { - seqlock_t lock; + atomic_seqlock_t lock; /* open coded 'struct timespec' */ time_t wall_time_sec; diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 133b40a..7a6aa68 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -865,7 +865,21 @@ static struct xor_block_template xor_block_pIII_sse = { #include #undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ +/* + * MMX/SSE ops disable preemption for long periods of time, + * so on PREEMPT_RT use the register-based ops only: + */ +#ifdef CONFIG_PREEMPT_RT +# define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_8regs_p); \ + xor_speed(&xor_block_32regs); \ + xor_speed(&xor_block_32regs_p); \ + } while (0) +# define XOR_SELECT_TEMPLATE(FASTEST) (FASTEST) +#else +# define XOR_TRY_TEMPLATES \ do { \ xor_speed(&xor_block_8regs); \ xor_speed(&xor_block_8regs_p); \ @@ -882,7 +896,8 @@ do { \ /* We force the use of the SSE xor block because it can write around L2. We may also be able to load into the L1 only depending on how the cpu deals with a load to a line that is being prefetched. */ -#define XOR_SELECT_TEMPLATE(FASTEST) \ +# define XOR_SELECT_TEMPLATE(FASTEST) \ (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) +#endif /* CONFIG_PREEMPT_RT */ #endif /* _ASM_X86_XOR_32_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d2ed6c5..a3bf1db 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -73,8 +73,8 @@ */ int sis_apic_bug = -1; -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +static DEFINE_ATOMIC_SPINLOCK(ioapic_lock); +static DEFINE_ATOMIC_SPINLOCK(vector_lock); /* * # of IRQ routing registers @@ -413,7 +413,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) struct irq_pin_list *entry; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); entry = cfg->irq_2_pin; for (;;) { unsigned int reg; @@ -425,14 +425,14 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ if (reg & IO_APIC_REDIR_REMOTE_IRR) { - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return true; } if (!entry->next) break; entry = entry->next; } - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return false; } @@ -446,10 +446,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) { union entry_union eu; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return eu.entry; } @@ -472,9 +472,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -487,10 +487,10 @@ static void ioapic_mask_entry(int apic, int pin) unsigned long flags; union entry_union eu = { .entry.mask = 1 }; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x10 + 2*pin, eu.w1); io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -622,9 +622,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) BUG_ON(!cfg); - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); __mask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); } static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) @@ -632,9 +632,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) struct irq_cfg *cfg = desc->chip_data; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); __unmask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); } static void mask_IO_APIC_irq(unsigned int irq) @@ -1158,12 +1158,12 @@ void lock_vector_lock(void) /* Used to the online set of cpus does not change * during assign_irq_vector. */ - spin_lock(&vector_lock); + atomic_spin_lock(&vector_lock); } void unlock_vector_lock(void) { - spin_unlock(&vector_lock); + atomic_spin_unlock(&vector_lock); } static int @@ -1251,9 +1251,9 @@ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int err; unsigned long flags; - spin_lock_irqsave(&vector_lock, flags); + atomic_spin_lock_irqsave(&vector_lock, flags); err = __assign_irq_vector(irq, cfg, mask); - spin_unlock_irqrestore(&vector_lock, flags); + atomic_spin_unlock_irqrestore(&vector_lock, flags); return err; } @@ -1623,14 +1623,14 @@ __apicdebuginit(void) print_IO_APIC(void) for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); if (reg_01.bits.version >= 0x20) reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); @@ -1856,7 +1856,7 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "\nprinting PIC contents\n"); - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); v = inb(0xa1) << 8 | inb(0x21); printk(KERN_DEBUG "... PIC IMR: %04x\n", v); @@ -1870,7 +1870,7 @@ __apicdebuginit(void) print_PIC(void) outb(0x0a,0xa0); outb(0x0a,0x20); - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); printk(KERN_DEBUG "... PIC ISR: %04x\n", v); @@ -1909,9 +1909,9 @@ void __init enable_IO_APIC(void) * The number of IO-APIC IRQ registers (== #pins): */ for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); nr_ioapic_registers[apic] = reg_01.bits.entries+1; } for(apic = 0; apic < nr_ioapics; apic++) { @@ -2045,9 +2045,9 @@ static void __init setup_ioapic_ids_from_mpc(void) for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic_id, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); old_id = mp_ioapics[apic_id].apicid; @@ -2106,16 +2106,16 @@ static void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid); reg_00.bits.ID = mp_ioapics[apic_id].apicid; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic_id, 0, reg_00.raw); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic_id, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) printk("could not set ID!\n"); else @@ -2164,8 +2164,10 @@ static int __init timer_irq_works(void) */ /* jiffies wrap? */ - if (time_after(jiffies, t1 + 4)) + if (time_after(jiffies, t1 + 4) && + time_before(jiffies, t1 + 16)) return 1; + return 0; } @@ -2198,7 +2200,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) unsigned long flags; struct irq_cfg *cfg; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); if (irq < NR_IRQS_LEGACY) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) @@ -2206,7 +2208,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) } cfg = irq_cfg(irq); __unmask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; } @@ -2218,9 +2220,9 @@ static int ioapic_retrigger_irq(unsigned int irq) struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; - spin_lock_irqsave(&vector_lock, flags); + atomic_spin_lock_irqsave(&vector_lock, flags); apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); - spin_unlock_irqrestore(&vector_lock, flags); + atomic_spin_unlock_irqrestore(&vector_lock, flags); return 1; } @@ -2333,7 +2335,7 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) irq = desc->irq; cfg = desc->chip_data; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); dest = set_desc_affinity(desc, mask); if (dest != BAD_APICID) { /* Only the high 8 bits are valid. */ @@ -2341,7 +2343,7 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) __target_IO_APIC_irq(irq, dest, cfg); ret = 0; } - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; } @@ -2454,7 +2456,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) continue; cfg = irq_cfg(irq); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (!cfg->move_cleanup_count) goto unlock; @@ -2476,7 +2478,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) __get_cpu_var(vector_irq)[vector] = -1; cfg->move_cleanup_count--; unlock: - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } irq_exit(); @@ -2526,7 +2528,8 @@ static void ack_apic_level(unsigned int irq) irq_complete_move(&desc); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(desc->status & IRQ_MOVE_PENDING)) { + if (unlikely(desc->status & IRQ_MOVE_PENDING) && + !(desc->status & IRQ_INPROGRESS)) { do_unmask_irq = 1; mask_IO_APIC_irq_desc(desc); } @@ -2597,14 +2600,23 @@ static void ack_apic_level(unsigned int irq) move_masked_irq(irq); unmask_IO_APIC_irq_desc(desc); } +#if (defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)) && \ + defined(CONFIG_PREEMPT_HARDIRQS) + /* + * With threaded interrupts, we always have IRQ_INPROGRESS + * when acking. + */ + else if (unlikely(desc->status & IRQ_MOVE_PENDING)) + move_masked_irq(irq); +#endif #ifdef CONFIG_X86_32 if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); + atomic_spin_lock(&ioapic_lock); __mask_and_edge_IO_APIC_irq(cfg); __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); + atomic_spin_unlock(&ioapic_lock); } #endif } @@ -2638,9 +2650,9 @@ eoi_ioapic_irq(struct irq_desc *desc) irq = desc->irq; cfg = desc->chip_data; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); } static void ir_ack_apic_edge(unsigned int irq) @@ -3116,13 +3128,13 @@ static int ioapic_resume(struct sys_device *dev) data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { reg_00.bits.ID = mp_ioapics[dev->id].apicid; io_apic_write(dev->id, 0, reg_00.raw); } - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); for (i = 0; i < nr_ioapic_registers[dev->id]; i++) ioapic_write_entry(dev->id, i, entry[i]); @@ -3186,7 +3198,6 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) if (irq_want < nr_irqs_gsi) irq_want = nr_irqs_gsi; - spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { desc_new = irq_to_desc_alloc_node(new, node); if (!desc_new) { @@ -3198,13 +3209,14 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) if (cfg_new->vector != 0) continue; + atomic_spin_lock_irqsave(&vector_lock, flags); desc_new = move_irq_desc(desc_new, node); if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; + atomic_spin_unlock_irqrestore(&vector_lock, flags); break; } - spin_unlock_irqrestore(&vector_lock, flags); if (irq > 0) { dynamic_irq_init(irq); @@ -3245,9 +3257,9 @@ void destroy_irq(unsigned int irq) desc->chip_data = cfg; free_irte(irq); - spin_lock_irqsave(&vector_lock, flags); + atomic_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); - spin_unlock_irqrestore(&vector_lock, flags); + atomic_spin_unlock_irqrestore(&vector_lock, flags); } /* @@ -3775,10 +3787,10 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, if (err != 0) return err; - spin_lock_irqsave(&vector_lock, flags); + atomic_spin_lock_irqsave(&vector_lock, flags); set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, irq_name); - spin_unlock_irqrestore(&vector_lock, flags); + atomic_spin_unlock_irqrestore(&vector_lock, flags); mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; @@ -3825,9 +3837,9 @@ int __init io_apic_get_redir_entries (int ioapic) union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.entries; } @@ -3968,9 +3980,9 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) if (physids_empty(apic_id_map)) apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); if (apic_id >= get_physical_broadcast()) { printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " @@ -4004,10 +4016,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) if (reg_00.bits.ID != apic_id) { reg_00.bits.ID = apic_id; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0, reg_00.raw); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); /* Sanity check */ if (reg_00.bits.ID != apic_id) { @@ -4028,9 +4040,9 @@ int __init io_apic_get_version(int ioapic) union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.version; } diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 6ef00ba..c83acda 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; - if (WARN_ONCE(!mask, "empty IPI mask")) + if (!mask) return; local_irq_save(flags); diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index b3025b4..f6755e5 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -90,7 +90,9 @@ static inline unsigned int get_timer_irqs(int cpu) */ static __init void nmi_cpu_busy(void *data) { +#ifndef CONFIG_PREEMPT_RT local_irq_enable_in_hardirq(); +#endif /* * Intentionally don't use cpu_relax here. This is * to make sure that the performance counter really ticks, @@ -416,12 +418,12 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) /* We can be called before check_nmi_watchdog, hence NULL check. */ if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { - static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + static DEFINE_ATOMIC_SPINLOCK(lock); /* Serialise the printks */ - spin_lock(&lock); + atomic_spin_lock(&lock); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); dump_stack(); - spin_unlock(&lock); + atomic_spin_unlock(&lock); cpumask_clear_cpu(cpu, backtrace_mask); } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 442b550..ac6d430 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1216,7 +1216,7 @@ static void reinit_timer(void) #ifdef INIT_TIMER_AFTER_SUSPEND unsigned long flags; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); /* set the clock to HZ */ outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ udelay(10); @@ -1224,7 +1224,7 @@ static void reinit_timer(void) udelay(10); outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ udelay(10); - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); #endif } diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 898ecc4..4a6aeed 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -3,6 +3,7 @@ * This code generates raw asm output which is post-processed to extract * and format the required data. */ +#define COMPILE_OFFSETS #include #include diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c1f253d..8dd3063 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp) obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o capflags.o powerflags.o common.o -obj-y += vmware.o hypervisor.o +obj-y += vmware.o hypervisor.o sched.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e338b5c..8ac5975 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1004,7 +1004,9 @@ DEFINE_PER_CPU(unsigned int, irq_count) = -1; */ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, +#if DEBUG_STACK > 0 [DEBUG_STACK - 1] = DEBUG_STKSZ +#endif }; static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ae9b503..4109679 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -60,7 +60,6 @@ enum { }; #define INTEL_MSR_RANGE (0xffff) -#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) struct acpi_cpufreq_data { struct acpi_processor_performance *acpi_data; @@ -71,11 +70,7 @@ struct acpi_cpufreq_data { static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); -struct acpi_msr_data { - u64 saved_aperf, saved_mperf; -}; - -static DEFINE_PER_CPU(struct acpi_msr_data, msr_data); +static DEFINE_PER_CPU(struct aperfmperf, old_perf); DEFINE_TRACE(power_mark); @@ -244,23 +239,12 @@ static u32 get_cur_val(const struct cpumask *mask) return cmd.val; } -struct perf_pair { - union { - struct { - u32 lo; - u32 hi; - } split; - u64 whole; - } aperf, mperf; -}; - /* Called via smp_call_function_single(), on the target CPU */ static void read_measured_perf_ctrs(void *_cur) { - struct perf_pair *cur = _cur; + struct aperfmperf *am = _cur; - rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); - rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); + get_aperfmperf(am); } /* @@ -279,63 +263,17 @@ static void read_measured_perf_ctrs(void *_cur) static unsigned int get_measured_perf(struct cpufreq_policy *policy, unsigned int cpu) { - struct perf_pair readin, cur; - unsigned int perf_percent; + struct aperfmperf perf; + unsigned long ratio; unsigned int retval; - if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) + if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) return 0; - cur.aperf.whole = readin.aperf.whole - - per_cpu(msr_data, cpu).saved_aperf; - cur.mperf.whole = readin.mperf.whole - - per_cpu(msr_data, cpu).saved_mperf; - per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole; - per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole; - -#ifdef __i386__ - /* - * We dont want to do 64 bit divide with 32 bit kernel - * Get an approximate value. Return failure in case we cannot get - * an approximate value. - */ - if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { - int shift_count; - u32 h; - - h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); - shift_count = fls(h); - - cur.aperf.whole >>= shift_count; - cur.mperf.whole >>= shift_count; - } - - if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { - int shift_count = 7; - cur.aperf.split.lo >>= shift_count; - cur.mperf.split.lo >>= shift_count; - } - - if (cur.aperf.split.lo && cur.mperf.split.lo) - perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; - else - perf_percent = 0; + ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf); + per_cpu(old_perf, cpu) = perf; -#else - if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { - int shift_count = 7; - cur.aperf.whole >>= shift_count; - cur.mperf.whole >>= shift_count; - } - - if (cur.aperf.whole && cur.mperf.whole) - perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; - else - perf_percent = 0; - -#endif - - retval = (policy->cpuinfo.max_freq * perf_percent) / 100; + retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; return retval; } @@ -731,12 +669,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) acpi_processor_notify_smm(THIS_MODULE); /* Check for APERF/MPERF support in hardware */ - if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { - unsigned int ecx; - ecx = cpuid_ecx(6); - if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) - acpi_cpufreq_driver.getavg = get_measured_perf; - } + if (cpu_has(c, X86_FEATURE_APERFMPERF)) + acpi_cpufreq_driver.getavg = get_measured_perf; dprintk("CPU%u - ACPI performance management activated.\n", cpu); for (i = 0; i < perf->state_count; i++) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3260ab0..30cf8be 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -349,6 +349,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } + if (c->cpuid_level > 6) { + unsigned ecx = cpuid_ecx(6); + if (ecx & 0x01) + set_cpu_cap(c, X86_FEATURE_APERFMPERF); + } + if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (cpu_has_ds) { diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 0543f69..d486197 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -549,7 +549,7 @@ static unsigned long set_mtrr_state(void) static unsigned long cr4 = 0; -static DEFINE_SPINLOCK(set_atomicity_lock); +static DEFINE_ATOMIC_SPINLOCK(set_atomicity_lock); /* * Since we are disabling the cache don't allow any interrupts - they @@ -566,7 +566,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) for this CPU while the MTRRs are changed, but changing this requires more invasive changes to the way the kernel boots */ - spin_lock(&set_atomicity_lock); + atomic_spin_lock(&set_atomicity_lock); /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ cr0 = read_cr0() | X86_CR0_CD; @@ -603,7 +603,7 @@ static void post_set(void) __releases(set_atomicity_lock) /* Restore value of CR4 */ if ( cpu_has_pge ) write_cr4(cr4); - spin_unlock(&set_atomicity_lock); + atomic_spin_unlock(&set_atomicity_lock); } static void generic_set_all(void) diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c new file mode 100644 index 0000000..2e59c71 --- /dev/null +++ b/arch/x86/kernel/cpu/sched.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include + +#include +#include + +static DEFINE_PER_CPU(struct aperfmperf, old_aperfmperf); + +static unsigned long scale_aperfmperf(void) +{ + struct aperfmperf cur, val, *old = &__get_cpu_var(old_aperfmperf); + unsigned long ratio = SCHED_LOAD_SCALE; + unsigned long flags; + + local_irq_save(flags); + get_aperfmperf(&val); + local_irq_restore(flags); + + cur = val; + cur.aperf -= old->aperf; + cur.mperf -= old->mperf; + *old = val; + + cur.mperf >>= SCHED_LOAD_SHIFT; + if (cur.mperf) + ratio = div_u64(cur.aperf, cur.mperf); + + return ratio; +} + +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + /* + * do aperf/mperf on the cpu level because it includes things + * like turbo mode, which are relevant to full cores. + */ + if (boot_cpu_has(X86_FEATURE_APERFMPERF)) + return scale_aperfmperf(); + + /* + * maybe have something cpufreq here + */ + + return default_scale_freq_power(sd, cpu); +} + +unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) +{ + /* + * aperf/mperf already includes the smt gain + */ + if (boot_cpu_has(X86_FEATURE_APERFMPERF)) + return SCHED_LOAD_SCALE; + + return default_scale_smt_power(sd, cpu); +} diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index bca5fba..ffb9886 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -99,6 +99,12 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, } +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_EVENT_TRACE) +extern unsigned long worst_stack_left; +#else +# define worst_stack_left -1L +#endif + void show_registers(struct pt_regs *regs) { int i; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 54b0a32..5a11d2a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -21,10 +21,14 @@ static char x86_stack_ids[][8] = { +#if DEBUG_STACK > 0 [DEBUG_STACK - 1] = "#DB", +#endif [NMI_STACK - 1] = "NMI", [DOUBLEFAULT_STACK - 1] = "#DF", +#if STACKFAULT_STACK > 0 [STACKFAULT_STACK - 1] = "#SS", +#endif [MCE_STACK - 1] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ [N_EXCEPTION_STACKS ... diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 335f049..8b60080 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -59,7 +59,7 @@ static void early_vga_write(struct console *con, const char *str, unsigned n) static struct console early_vga_console = { .name = "earlyvga", .write = early_vga_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_ATOMIC, .index = -1, }; @@ -156,7 +156,7 @@ static __init void early_serial_init(char *s) static struct console early_serial_console = { .name = "earlyser", .write = early_serial_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_ATOMIC, .index = -1, }; @@ -881,7 +881,7 @@ static int __initdata early_console_initialized; asmlinkage void early_printk(const char *fmt, ...) { - char buf[512]; + static char buf[512]; int n; va_list ap; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d..f86fc3b 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -371,13 +371,13 @@ END(ret_from_exception) ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? - jnz restore_all + jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl - jz restore_all + jz restore_nocheck testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all + jz restore_nocheck call preempt_schedule_irq jmp need_resched END(resume_kernel) @@ -627,12 +627,9 @@ work_pending: testb $_TIF_NEED_RESCHED, %cl jz work_notifysig work_resched: - call schedule + call __schedule LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - TRACE_IRQS_OFF + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c251be7..d59fe32 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -146,7 +146,7 @@ ENTRY(ftrace_graph_caller) END(ftrace_graph_caller) GLOBAL(return_to_handler) - subq $80, %rsp + subq $24, %rsp /* Save the return values */ movq %rax, (%rsp) @@ -155,10 +155,10 @@ GLOBAL(return_to_handler) call ftrace_return_to_handler - movq %rax, 72(%rsp) + movq %rax, 16(%rsp) movq 8(%rsp), %rdx movq (%rsp), %rax - addq $72, %rsp + addq $16, %rsp retq #endif diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d94e1ea..9dbb527 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -417,10 +417,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long return_hooker = (unsigned long) &return_to_handler; - /* Nmi's are currently unsupported */ - if (unlikely(in_nmi())) - return; - if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; @@ -498,37 +494,56 @@ static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) struct syscall_metadata *syscall_nr_to_meta(int nr) { - if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0) + if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) return NULL; return syscalls_metadata[nr]; } -void arch_init_ftrace_syscalls(void) +int syscall_name_to_nr(char *name) +{ + int i; + + if (!syscalls_metadata) + return -1; + + for (i = 0; i < NR_syscalls; i++) { + if (syscalls_metadata[i]) { + if (!strcmp(syscalls_metadata[i]->name, name)) + return i; + } + } + return -1; +} + +void set_syscall_enter_id(int num, int id) +{ + syscalls_metadata[num]->enter_id = id; +} + +void set_syscall_exit_id(int num, int id) +{ + syscalls_metadata[num]->exit_id = id; +} + +static int __init arch_init_ftrace_syscalls(void) { int i; struct syscall_metadata *meta; unsigned long **psys_syscall_table = &sys_call_table; - static atomic_t refs; - - if (atomic_inc_return(&refs) != 1) - goto end; syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * - FTRACE_SYSCALL_MAX, GFP_KERNEL); + NR_syscalls, GFP_KERNEL); if (!syscalls_metadata) { WARN_ON(1); - return; + return -ENOMEM; } - for (i = 0; i < FTRACE_SYSCALL_MAX; i++) { + for (i = 0; i < NR_syscalls; i++) { meta = find_syscall_meta(psys_syscall_table[i]); syscalls_metadata[i] = meta; } - return; - - /* Paranoid: avoid overflow */ -end: - atomic_dec(&refs); + return 0; } +arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 70eaa85..4d7255c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -30,7 +30,11 @@ static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); pgd_clear(pgd); - __flush_tlb_all(); + /* + * preempt_disable/enable does not work this early in the + * bootup yet: + */ + write_cr3(read_cr3()); } /* Don't add a printk in there. printk relies on the PDA which is not initialized diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 7ffec6b..6d98b18 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -591,6 +591,7 @@ ignore_int: call dump_stack addl $(5*4),%esp + call dump_stack popl %ds popl %es popl %edx diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index da890f0..3b4ea29 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -16,7 +16,7 @@ #include #include -DEFINE_SPINLOCK(i8253_lock); +DEFINE_ATOMIC_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); #ifdef CONFIG_X86_32 @@ -41,7 +41,7 @@ struct clock_event_device *global_clock_event; static void init_pit_timer(enum clock_event_mode mode, struct clock_event_device *evt) { - spin_lock(&i8253_lock); + atomic_spin_lock(&i8253_lock); switch (mode) { case CLOCK_EVT_MODE_PERIODIC: @@ -72,7 +72,7 @@ static void init_pit_timer(enum clock_event_mode mode, pit_enable_clocksource(); break; } - spin_unlock(&i8253_lock); + atomic_spin_unlock(&i8253_lock); } /* @@ -82,10 +82,10 @@ static void init_pit_timer(enum clock_event_mode mode, */ static int pit_next_event(unsigned long delta, struct clock_event_device *evt) { - spin_lock(&i8253_lock); + atomic_spin_lock(&i8253_lock); outb_pit(delta & 0xff , PIT_CH0); /* LSB */ outb_pit(delta >> 8 , PIT_CH0); /* MSB */ - spin_unlock(&i8253_lock); + atomic_spin_unlock(&i8253_lock); return 0; } @@ -140,7 +140,7 @@ static cycle_t pit_read(struct clocksource *cs) int count; u32 jifs; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); /* * Although our caller may have the read side of xtime_lock, * this is now a seqlock, and we are cheating in this routine @@ -186,7 +186,7 @@ static cycle_t pit_read(struct clocksource *cs) old_count = count; old_jifs = jifs; - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); count = (LATCH - 1) - count; diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index df89102..6fbe669 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -32,7 +32,7 @@ */ static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_ATOMIC_SPINLOCK(i8259A_lock); static void mask_and_ack_8259A(unsigned int); struct irq_chip i8259A_chip = { @@ -68,13 +68,13 @@ void disable_8259A_irq(unsigned int irq) unsigned int mask = 1 << irq; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask |= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); } void enable_8259A_irq(unsigned int irq) @@ -82,13 +82,13 @@ void enable_8259A_irq(unsigned int irq) unsigned int mask = ~(1 << irq); unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask &= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); } int i8259A_irq_pending(unsigned int irq) @@ -97,12 +97,12 @@ int i8259A_irq_pending(unsigned int irq) unsigned long flags; int ret; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); if (irq < 8) ret = inb(PIC_MASTER_CMD) & mask; else ret = inb(PIC_SLAVE_CMD) & (mask >> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); return ret; } @@ -150,7 +150,7 @@ static void mask_and_ack_8259A(unsigned int irq) unsigned int irqmask = 1 << irq; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); /* * Lightweight spurious IRQ detection. We do not want * to overdo spurious IRQ handling - it's usually a sign @@ -168,6 +168,8 @@ static void mask_and_ack_8259A(unsigned int irq) */ if (cached_irq_mask & irqmask) goto spurious_8259A_irq; + if (irq & 8) + outb(0x60+(irq&7), PIC_SLAVE_CMD); /* 'Specific EOI' to slave */ cached_irq_mask |= irqmask; handle_real_irq: @@ -183,7 +185,7 @@ handle_real_irq: outb(cached_master_mask, PIC_MASTER_IMR); outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ } - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); return; spurious_8259A_irq: @@ -285,24 +287,24 @@ void mask_8259A(void) { unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); } void unmask_8259A(void) { unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); } void init_8259A(int auto_eoi) @@ -311,7 +313,7 @@ void init_8259A(int auto_eoi) i8259A_auto_eoi = auto_eoi; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ @@ -328,10 +330,10 @@ void init_8259A(int auto_eoi) /* 8259A-1 (the master) has a slave on IR2 */ outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); + if (!auto_eoi) /* master expects normal EOI */ + outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); + else /* master does Auto EOI */ + outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ @@ -356,5 +358,5 @@ void init_8259A(int auto_eoi) outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index b0cdde6..c51d8e6 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -149,7 +149,7 @@ int show_interrupts(struct seq_file *p, void *v) if (!desc) return 0; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); for_each_online_cpu(j) any_count |= kstat_irqs_cpu(i, j); action = desc->action; @@ -170,7 +170,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); out: - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return 0; } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 977d8b4..1aa5228 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -81,12 +81,12 @@ void fixup_irqs(void) continue; /* interrupt's are disabled at this point */ - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); affinity = desc->affinity; if (!irq_has_action(irq) || cpumask_equal(affinity, cpu_online_mask)) { - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); continue; } @@ -106,7 +106,7 @@ void fixup_irqs(void) if (desc->chip->unmask) desc->chip->unmask(irq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); if (break_affinity && set_affinity) printk("Broke affinity for irq %i\n", irq); diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 92b7703..49dcf16 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -72,6 +72,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) static struct irqaction fpu_irq = { .handler = math_error_irq, .name = "fpu", + .flags = IRQF_NODELAY, }; #endif @@ -81,6 +82,7 @@ static struct irqaction fpu_irq = { static struct irqaction irq2 = { .handler = no_action, .name = "cascade", + .flags = IRQF_NODELAY, }; DEFINE_PER_CPU(vector_irq_t, vector_irq) = { diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d..d23c755 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -454,7 +454,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); regs->ip = (unsigned long)p->ainsn.insn; - preempt_enable_no_resched(); + preempt_enable(); return; } #endif @@ -480,7 +480,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, arch_disarm_kprobe(p); regs->ip = (unsigned long)p->addr; reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); break; #endif case KPROBE_HIT_ACTIVE: @@ -576,7 +576,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } } /* else: not a kprobe fault; let the kernel handle it */ - preempt_enable_no_resched(); + preempt_enable(); return 0; } @@ -876,7 +876,7 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) } reset_current_kprobe(); out: - preempt_enable_no_resched(); + preempt_enable(); /* * if somebody else is singlestepping across a probe point, flags @@ -910,7 +910,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) restore_previous_kprobe(kcb); else reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); break; case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: @@ -1051,7 +1051,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp), kcb->jprobes_stack, MIN_STACK_SIZE(kcb->jprobe_saved_sp)); - preempt_enable_no_resched(); + preempt_enable(); return 1; } return 0; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 70ec9b9..5611ed6 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -422,6 +422,20 @@ struct pv_apic_ops pv_apic_ops = { #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) #endif +#ifdef CONFIG_HIGHPTE +/* + * kmap_atomic() might be an inline or a macro: + */ +static void *kmap_atomic_func(struct page *page, enum km_type idx) +{ + return kmap_atomic(page, idx); +} +static void *kmap_atomic_direct_func(struct page *page, enum km_type idx) +{ + return kmap_atomic_direct(page, idx); +} +#endif + struct pv_mmu_ops pv_mmu_ops = { #ifndef CONFIG_X86_64 .pagetable_setup_start = native_pagetable_setup_start, @@ -462,7 +476,8 @@ struct pv_mmu_ops pv_mmu_ops = { .ptep_modify_prot_commit = __ptep_modify_prot_commit, #ifdef CONFIG_HIGHPTE - .kmap_atomic_pte = kmap_atomic, + .kmap_atomic_pte = kmap_atomic_func, + .kmap_atomic_pte_direct = kmap_atomic_direct_func, #endif #if PAGETABLE_LEVELS >= 3 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 59f4524..b9e7a3f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -105,7 +105,6 @@ void cpu_idle(void) tick_nohz_stop_sched_tick(1); while (!need_resched()) { - check_pgt_cache(); rmb(); if (cpu_is_offline(cpu)) @@ -117,10 +116,12 @@ void cpu_idle(void) pm_idle(); start_critical_timings(); } + local_irq_disable(); tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } @@ -162,8 +163,10 @@ void __show_regs(struct pt_regs *regs, int all) regs->ax, regs->bx, regs->cx, regs->dx); printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", regs->si, regs->di, regs->bp, sp); - printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", - (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); + printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x" + " preempt:%08x\n", + (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, + preempt_count()); if (!all) return; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ebefb54..c8d0ece 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -152,9 +152,11 @@ void cpu_idle(void) } tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 09ecbde..8d7d5c9 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -35,10 +35,11 @@ #include #include -#include - #include "tls.h" +#define CREATE_TRACE_POINTS +#include + enum x86_regset { REGSET_GENERAL, REGSET_FP, @@ -1497,8 +1498,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) tracehook_report_syscall_entry(regs)) ret = -1L; - if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) - ftrace_syscall_enter(regs); + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->orig_ax); if (unlikely(current->audit_context)) { if (IS_IA32) @@ -1523,8 +1524,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); - if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) - ftrace_syscall_exit(regs); + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->ax); if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, 0); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4c57875..5777895 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -782,6 +782,13 @@ static void do_signal(struct pt_regs *regs) int signr; sigset_t *oldset; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index ec1de97..a83e38d 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -120,6 +120,16 @@ static void native_smp_send_reschedule(int cpu) apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + apic->send_IPI_allbutself(RESCHEDULE_VECTOR); +} + void native_send_call_func_single_ipi(int cpu) { apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 6bc211a..45e00eb 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -18,9 +18,9 @@ #include #include -asmlinkage long sys_mmap(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long off) +SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, off) { long error; struct file *file; @@ -226,7 +226,7 @@ bottomup: } -asmlinkage long sys_uname(struct new_utsname __user *name) +SYSCALL_DEFINE1(uname, struct new_utsname __user *, name) { int err; down_read(&uts_sem); diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 5c5d87f..9e2cb0b 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -84,11 +84,11 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) * manually to deassert NMI lines for the watchdog if run * on an 82489DX-based system. */ - spin_lock(&i8259A_lock); + atomic_spin_lock(&i8259A_lock); outb(0x0c, PIC_MASTER_OCW3); /* Ack the IRQ; AEOI will end it automatically. */ inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); + atomic_spin_unlock(&i8259A_lock); } #endif diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5204332..20ee0b1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -91,9 +91,10 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } -static inline void preempt_conditional_sti(struct pt_regs *regs) +static inline void preempt_conditional_sti(struct pt_regs *regs, int stack) { - inc_preempt_count(); + if (stack) + inc_preempt_count(); if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } @@ -104,11 +105,12 @@ static inline void conditional_cli(struct pt_regs *regs) local_irq_disable(); } -static inline void preempt_conditional_cli(struct pt_regs *regs) +static inline void preempt_conditional_cli(struct pt_regs *regs, int stack) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); - dec_preempt_count(); + if (stack) + dec_preempt_count(); } #ifdef CONFIG_X86_32 @@ -235,9 +237,9 @@ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) if (notify_die(DIE_TRAP, "stack segment", regs, error_code, 12, SIGBUS) == NOTIFY_STOP) return; - preempt_conditional_sti(regs); + preempt_conditional_sti(regs, STACKFAULT_STACK); do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); + preempt_conditional_cli(regs, STACKFAULT_STACK); } dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) @@ -473,9 +475,9 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) return; #endif - preempt_conditional_sti(regs); + preempt_conditional_sti(regs, DEBUG_STACK); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); + preempt_conditional_cli(regs, DEBUG_STACK); } #ifdef CONFIG_X86_64 @@ -552,7 +554,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) return; /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); + preempt_conditional_sti(regs, DEBUG_STACK); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { @@ -587,7 +589,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) */ clear_dr7: set_debugreg(0, 7); - preempt_conditional_cli(regs); + preempt_conditional_cli(regs, DEBUG_STACK); return; #ifdef CONFIG_X86_32 @@ -602,7 +604,7 @@ debug_vm86: clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; - preempt_conditional_cli(regs); + preempt_conditional_cli(regs, DEBUG_STACK); return; } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 027b5b4..76315a4 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -104,6 +104,7 @@ static __cpuinit void check_tsc_warp(void) */ void __cpuinit check_tsc_sync_source(int cpu) { + unsigned long flags; int cpus = 2; /* @@ -129,8 +130,11 @@ void __cpuinit check_tsc_sync_source(int cpu) /* * Wait for the target to arrive: */ + local_save_flags(flags); + local_irq_enable(); while (atomic_read(&start_count) != cpus-1) cpu_relax(); + local_irq_restore(flags); /* * Trigger the target to continue into the measurement too: */ diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 31ffc24..24e6d2b 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -581,7 +581,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) struct irq_desc *desc; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); /* Find out what's interrupting in the PIIX4 master 8259 */ outb(0x0c, 0x20); /* OCW3 Poll command */ @@ -618,7 +618,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) outb(0x60 + realirq, 0x20); } - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); desc = irq_to_desc(realirq); @@ -636,18 +636,20 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) return IRQ_HANDLED; out_unlock: - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); return IRQ_NONE; } static struct irqaction master_action = { .handler = piix4_master_intr, .name = "PIIX4-8259", + .flags = IRQF_NODELAY, }; static struct irqaction cascade_action = { .handler = no_action, .name = "cascade", + .flags = IRQF_NODELAY, }; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 9c4e625..cdaeef2 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -137,6 +137,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) local_irq_enable(); if (!current->thread.vm86_info) { + local_irq_disable(); printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 25ee06a..a6c5525 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -59,7 +59,7 @@ int __vgetcpu_mode __section_vgetcpu_mode; struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data = { - .lock = SEQLOCK_UNLOCKED, + .lock = __ATOMIC_SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), .sysctl_enabled = 1, }; @@ -67,27 +67,54 @@ void update_vsyscall_tz(void) { unsigned long flags; - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + write_atomic_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); /* sys_tz has changed */ vsyscall_gtod_data.sys_tz = sys_tz; - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); + write_atomic_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) { unsigned long flags; - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + write_atomic_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + + if (likely(vsyscall_gtod_data.sysctl_enabled == 2)) { + struct timespec tmp = *(wall_time); + cycle_t (*vread)(void); + cycle_t now; + + vread = vsyscall_gtod_data.clock.vread; + if (likely(vread)) + now = vread(); + else + now = clock->read(clock); + + /* calculate interval: */ + now = (now - clock->cycle_last) & clock->mask; + /* convert to nsecs: */ + tmp.tv_nsec += ( now * clock->mult) >> clock->shift; + + while (tmp.tv_nsec >= NSEC_PER_SEC) { + tmp.tv_sec += 1; + tmp.tv_nsec -= NSEC_PER_SEC; + } + + vsyscall_gtod_data.wall_time_sec = tmp.tv_sec; + vsyscall_gtod_data.wall_time_nsec = tmp.tv_nsec; + } else { + vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; + vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; + } + /* copy vsyscall data */ vsyscall_gtod_data.clock.vread = clock->vread; vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; vsyscall_gtod_data.clock.mask = clock->mask; vsyscall_gtod_data.clock.mult = clock->mult; vsyscall_gtod_data.clock.shift = clock->shift; - vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); + write_atomic_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } /* RED-PEN may want to readd seq locking, but then the variable should be @@ -123,8 +150,28 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) unsigned seq; unsigned long mult, shift, nsec; cycle_t (*vread)(void); + + if (likely(__vsyscall_gtod_data.sysctl_enabled == 2)) { + struct timeval tmp; + + do { + barrier(); + tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; + tv->tv_usec = __vsyscall_gtod_data.wall_time_nsec; + barrier(); + tmp.tv_sec = __vsyscall_gtod_data.wall_time_sec; + tmp.tv_usec = __vsyscall_gtod_data.wall_time_nsec; + + } while (tmp.tv_usec != tv->tv_usec || + tmp.tv_sec != tv->tv_sec); + + tv->tv_usec /= NSEC_PER_MSEC; + tv->tv_usec *= USEC_PER_MSEC; + return; + } + do { - seq = read_seqbegin(&__vsyscall_gtod_data.lock); + seq = read_atomic_seqbegin(&__vsyscall_gtod_data.lock); vread = __vsyscall_gtod_data.clock.vread; if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { @@ -133,6 +180,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) } now = vread(); + base = __vsyscall_gtod_data.clock.cycle_last; mask = __vsyscall_gtod_data.clock.mask; mult = __vsyscall_gtod_data.clock.mult; @@ -140,7 +188,9 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; nsec = __vsyscall_gtod_data.wall_time_nsec; - } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); + } while (read_atomic_seqretry(&__vsyscall_gtod_data.lock, seq)); + + now = vread(); /* calculate interval: */ cycle_delta = (now - base) & mask; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 8600a09..2d89b40 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -65,7 +65,7 @@ config KVM_AMD config KVM_TRACE bool "KVM trace support" - depends on KVM && SYSFS + depends on KVM && SYSFS && !PREEMPTRT select MARKERS select RELAY select DEBUG_FS diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 21f68e0..463f15a 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -240,11 +240,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) { struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, irq_ack_notifier); - spin_lock(&ps->inject_lock); + atomic_spin_lock(&ps->inject_lock); if (atomic_dec_return(&ps->pit_timer.pending) < 0) atomic_inc(&ps->pit_timer.pending); ps->irq_ack = 1; - spin_unlock(&ps->inject_lock); + atomic_spin_unlock(&ps->inject_lock); } void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) @@ -580,7 +580,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) mutex_init(&pit->pit_state.lock); mutex_lock(&pit->pit_state.lock); - spin_lock_init(&pit->pit_state.inject_lock); + atomic_spin_lock_init(&pit->pit_state.inject_lock); /* Initialize PIO device */ pit->dev.read = pit_ioport_read; @@ -672,12 +672,12 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) /* Try to inject pending interrupts when * last one has been acked. */ - spin_lock(&ps->inject_lock); + atomic_spin_lock(&ps->inject_lock); if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { ps->irq_ack = 0; inject = 1; } - spin_unlock(&ps->inject_lock); + atomic_spin_unlock(&ps->inject_lock); if (inject) __inject_pit_timer_intr(kvm); } diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index bbd863f..33e30e3 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -26,7 +26,7 @@ struct kvm_kpit_state { u32 speaker_data_on; struct mutex lock; struct kvm_pit *pit; - spinlock_t inject_lock; + atomic_spinlock_t inject_lock; unsigned long irq_ack; struct kvm_irq_ack_notifier irq_ack_notifier; }; diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 1ccb50c..91fcca1 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -34,7 +34,7 @@ static void pic_lock(struct kvm_pic *s) __acquires(&s->lock) { - spin_lock(&s->lock); + atomic_spin_lock(&s->lock); } static void pic_unlock(struct kvm_pic *s) @@ -48,7 +48,7 @@ static void pic_unlock(struct kvm_pic *s) s->pending_acks = 0; s->wakeup_needed = false; - spin_unlock(&s->lock); + atomic_spin_unlock(&s->lock); while (acks) { kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)), @@ -522,7 +522,7 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); if (!s) return NULL; - spin_lock_init(&s->lock); + atomic_spin_lock_init(&s->lock); s->kvm = kvm; s->pics[0].elcr_mask = 0xf8; s->pics[1].elcr_mask = 0xde; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 9f59318..fdae2ef 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -62,7 +62,7 @@ struct kvm_kpic_state { }; struct kvm_pic { - spinlock_t lock; + atomic_spinlock_t lock; bool wakeup_needed; unsigned pending_acks; struct kvm *kvm; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index bfae139..07df264 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -561,6 +561,7 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) nr = (address - idt_descr.address) >> 3; if (nr == 6) { + zap_rt_locks(); do_invalid_op(regs, 0); return 1; } @@ -1032,7 +1033,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: */ - if (unlikely(in_atomic() || !mm)) { + if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 71da1bc..71871c8 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -77,13 +77,13 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, if (write) mask |= _PAGE_RW; - ptep = pte_offset_map(&pmd, addr); + ptep = pte_offset_map_direct(&pmd, addr); do { pte_t pte = gup_get_pte(ptep); struct page *page; if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { - pte_unmap(ptep); + pte_unmap_direct(ptep); return 0; } VM_BUG_ON(!pfn_valid(pte_pfn(pte))); @@ -93,7 +93,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); + pte_unmap_direct(ptep - 1); return 1; } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 2112ed5..e33ec9f 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -4,9 +4,9 @@ void *kmap(struct page *page) { - might_sleep(); if (!PageHighMem(page)) return page_address(page); + might_sleep(); return kmap_high(page); } @@ -19,6 +19,27 @@ void kunmap(struct page *page) kunmap_high(page); } +void kunmap_virt(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return; + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + kunmap(page); +} + +struct page *kmap_to_page(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return virt_to_page(ptr); + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + return page; +} +EXPORT_SYMBOL_GPL(kmap_to_page); /* PREEMPT_RT converts some modules to use this */ + /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB @@ -27,12 +48,13 @@ void kunmap(struct page *page) * However when holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) +void *__kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) { enum fixed_addresses idx; unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) @@ -42,18 +64,23 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - BUG_ON(!pte_none(*(kmap_pte-idx))); + WARN_ON(!pte_none(*(kmap_pte-idx))); set_pte(kmap_pte-idx, mk_pte(page, prot)); return (void *)vaddr; } -void *kmap_atomic(struct page *page, enum km_type type) +void *__kmap_atomic_direct(struct page *page, enum km_type type) +{ + return __kmap_atomic_prot(page, type, kmap_prot); +} + +void *__kmap_atomic(struct page *page, enum km_type type) { return kmap_atomic_prot(page, type, kmap_prot); } -void kunmap_atomic(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr, enum km_type type) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); @@ -74,19 +101,21 @@ void kunmap_atomic(void *kvaddr, enum km_type type) } pagefault_enable(); + preempt_enable(); } /* * This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type) { + preempt_disable(); return kmap_atomic_prot_pfn(pfn, type, kmap_prot); } -EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ +EXPORT_SYMBOL_GPL(__kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ -struct page *kmap_atomic_to_page(void *ptr) +struct page *__kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long)ptr; pte_t *pte; @@ -101,9 +130,10 @@ struct page *kmap_atomic_to_page(void *ptr) EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_prot); +EXPORT_SYMBOL(kunmap_virt); +EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(__kunmap_atomic); +EXPORT_SYMBOL(__kmap_atomic_prot); void __init set_highmem_pages_init(void) { diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 0607119..eab22a7 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -14,8 +14,6 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - unsigned long __initdata e820_table_start; unsigned long __meminitdata e820_table_end; unsigned long __meminitdata e820_table_top; diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index fe6f84c..bd045c2 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -37,6 +37,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); pagefault_disable(); debug_kmap_atomic(type); @@ -83,5 +84,6 @@ iounmap_atomic(void *kvaddr, enum km_type type) kpte_clear_flush(kmap_pte-idx, vaddr); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL_GPL(iounmap_atomic); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index fbb46d6..dd6a170 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -862,8 +862,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, baddr = *addr; } +#if 0 /* Must avoid aliasing mappings in the highmem code */ kmap_flush_unused(); +#endif vm_unmap_aliases(); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ed34f5e..c2ea747 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -132,6 +132,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) reserved at the pmd (PDPT) level. */ set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); + preempt_disable(); /* * According to Intel App note "TLBs, Paging-Structure Caches, * and Their Invalidation", April 2007, document 317080-001, @@ -140,6 +141,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) */ if (mm == current->active_mm) write_cr3(read_cr3()); + preempt_enable(); } #else /* !CONFIG_X86_PAE */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c814e14..fe5d800 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -40,7 +40,7 @@ union smp_flush_state { struct { struct mm_struct *flush_mm; unsigned long flush_va; - spinlock_t tlbstate_lock; + atomic_spinlock_t tlbstate_lock; DECLARE_BITMAP(flush_cpumask, NR_CPUS); }; char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; @@ -179,7 +179,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is * probably not worth checking this for a cache-hot lock. */ - spin_lock(&f->tlbstate_lock); + atomic_spin_lock(&f->tlbstate_lock); f->flush_mm = mm; f->flush_va = va; @@ -197,7 +197,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, f->flush_mm = NULL; f->flush_va = 0; - spin_unlock(&f->tlbstate_lock); + atomic_spin_unlock(&f->tlbstate_lock); } void native_flush_tlb_others(const struct cpumask *cpumask, @@ -221,7 +221,7 @@ static int __cpuinit init_smp_flush(void) int i; for (i = 0; i < ARRAY_SIZE(flush_state); i++) - spin_lock_init(&flush_state[i].tlbstate_lock); + atomic_spin_lock_init(&flush_state[i].tlbstate_lock); return 0; } diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 89b9a5c..dea0100 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -125,9 +125,9 @@ static void nmi_cpu_setup(void *dummy) { int cpu = smp_processor_id(); struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); - spin_lock(&oprofilefs_lock); + atomic_spin_lock(&oprofilefs_lock); model->setup_ctrs(msrs); - spin_unlock(&oprofilefs_lock); + atomic_spin_unlock(&oprofilefs_lock); per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); } diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2202b62..fc2697c 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -81,7 +81,7 @@ int pcibios_scanned; * This interrupt-safe spinlock protects all accesses to PCI * configuration space. */ -DEFINE_SPINLOCK(pci_config_lock); +DEFINE_ATOMIC_SPINLOCK(pci_config_lock); static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) { diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index bd13c3e..e76cff3 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -27,7 +27,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus, return -EINVAL; } - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); @@ -43,7 +43,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -56,7 +56,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, if ((bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); @@ -72,7 +72,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -108,7 +108,7 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus, if (dev & 0x10) return PCIBIOS_DEVICE_NOT_FOUND; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outb((u8)(0xF0 | (fn << 1)), 0xCF8); outb((u8)bus, 0xCFA); @@ -127,7 +127,7 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus, outb(0, 0xCF8); - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -147,7 +147,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, if (dev & 0x10) return PCIBIOS_DEVICE_NOT_FOUND; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outb((u8)(0xF0 | (fn << 1)), 0xCF8); outb((u8)bus, 0xCFA); @@ -166,7 +166,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, outb(0, 0xCF8); - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -223,16 +223,23 @@ static int __init pci_check_type1(void) unsigned int tmp; int works = 0; - local_irq_save(flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outb(0x01, 0xCFB); tmp = inl(0xCF8); outl(0x80000000, 0xCF8); - if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { - works = 1; + + if (inl(0xCF8) == 0x80000000) { + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf1)) + works = 1; + + atomic_spin_lock_irqsave(&pci_config_lock, flags); } outl(tmp, 0xCF8); - local_irq_restore(flags); + + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return works; } @@ -242,17 +249,19 @@ static int __init pci_check_type2(void) unsigned long flags; int works = 0; - local_irq_save(flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outb(0x00, 0xCFB); outb(0x00, 0xCF8); outb(0x00, 0xCFA); - if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && - pci_sanity_check(&pci_direct_conf2)) { - works = 1; - } - local_irq_restore(flags); + if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) { + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf2)) + works = 1; + } else + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return works; } diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 8b2d561..7ca333f 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -72,7 +72,7 @@ err: *value = -1; if (!base) goto err; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); pci_exp_set_dev_base(base, bus, devfn); @@ -87,7 +87,7 @@ err: *value = -1; *value = mmio_config_readl(mmcfg_virt_addr + reg); break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -105,7 +105,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, if (!base) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); pci_exp_set_dev_base(base, bus, devfn); @@ -120,7 +120,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, mmio_config_writel(mmcfg_virt_addr + reg, value); break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 8eb295e..f2a1f1f 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -41,7 +41,7 @@ static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); write_cf8(bus, devfn, reg); @@ -66,7 +66,7 @@ static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -80,7 +80,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); write_cf8(bus, devfn, reg); @@ -105,7 +105,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 1c975cc..ffebd80 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -161,7 +161,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); switch (len) { case 1: @@ -212,7 +212,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return (int)((result & 0xff00) >> 8); } @@ -227,7 +227,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, if ((bus > 255) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); switch (len) { case 1: @@ -268,7 +268,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return (int)((result & 0xff00) >> 8); } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6a40b78..cd36532 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -47,11 +47,11 @@ notrace static noinline int do_realtime(struct timespec *ts) { unsigned long seq, ns; do { - seq = read_seqbegin(>od->lock); + seq = read_atomic_seqbegin(>od->lock); ts->tv_sec = gtod->wall_time_sec; ts->tv_nsec = gtod->wall_time_nsec; ns = vgetns(); - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_atomic_seqretry(>od->lock, seq))); timespec_add_ns(ts, ns); return 0; } @@ -76,12 +76,12 @@ notrace static noinline int do_monotonic(struct timespec *ts) { unsigned long seq, ns, secs; do { - seq = read_seqbegin(>od->lock); + seq = read_atomic_seqbegin(>od->lock); secs = gtod->wall_time_sec; ns = gtod->wall_time_nsec + vgetns(); secs += gtod->wall_to_monotonic.tv_sec; ns += gtod->wall_to_monotonic.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_atomic_seqretry(>od->lock, seq))); vset_normalized_timespec(ts, secs, ns); return 0; } diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h index e39edf5..32c5e28 100644 --- a/arch/xtensa/include/asm/rwsem.h +++ b/arch/xtensa/include/asm/rwsem.h @@ -25,7 +25,7 @@ /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { signed long count; #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 @@ -37,29 +37,37 @@ struct rw_semaphore { struct list_head wait_list; }; -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEM_ANON_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ LIST_HEAD_INIT((name).wait_list) } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore *rwsem_wake(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *sem); -static inline void init_rwsem(struct rw_semaphore *sem) +static inline void init_anon_rwsem(struct rw_anon_semaphore *sem) { sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + /* * lock for reading */ -static inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct rw_anon_semaphore *sem) { if (atomic_add_return(1,(atomic_t *)(&sem->count)) > 0) smp_wmb(); @@ -67,7 +75,7 @@ static inline void __down_read(struct rw_semaphore *sem) rwsem_down_read_failed(sem); } -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -84,7 +92,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { int tmp; @@ -96,7 +104,7 @@ static inline void __down_write(struct rw_semaphore *sem) rwsem_down_write_failed(sem); } -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -109,7 +117,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* * unlock after reading */ -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { int tmp; @@ -122,7 +130,7 @@ static inline void __up_read(struct rw_semaphore *sem) /* * unlock after writing */ -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { smp_wmb(); if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, @@ -133,7 +141,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(int delta, struct rw_anon_semaphore *sem) { atomic_add(delta, (atomic_t *)(&sem->count)); } @@ -141,7 +149,7 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { int tmp; @@ -154,12 +162,37 @@ static inline void __downgrade_write(struct rw_semaphore *sem) /* * implement exchange and add functionality */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline int rwsem_atomic_update(int delta, struct rw_anon_semaphore *sem) { smp_mb(); return atomic_add_return(delta, (atomic_t *)(&sem->count)); } +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + +struct rw_semaphore { + signed long count; + spinlock_t wait_lock; + struct list_head wait_list; +}; + +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ + LIST_HEAD_INIT((name).wait_list) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void init_rwsem(struct rw_semaphore *sem) +{ + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index a1badb3..bb599c3 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -90,7 +90,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -109,7 +109,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) diff --git a/block/blk-core.c b/block/blk-core.c index e3299a7..620579e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -201,7 +201,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags); */ void blk_plug_device(struct request_queue *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); /* * don't plug a stopped queue, it must be paired with blk_start_queue() @@ -241,7 +241,7 @@ EXPORT_SYMBOL(blk_plug_device_unlocked); */ int blk_remove_plug(struct request_queue *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q)) return 0; @@ -333,7 +333,7 @@ EXPORT_SYMBOL(blk_unplug); **/ void blk_start_queue(struct request_queue *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); __blk_run_queue(q); diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 3d87362..feee017 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -197,7 +197,12 @@ ACPI_EXTERN u8 acpi_gbl_global_lock_present; * interrupt level */ ACPI_EXTERN spinlock_t _acpi_gbl_gpe_lock; /* For GPE data structs and registers */ -ACPI_EXTERN spinlock_t _acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ + +/* + * Need to be raw because it might be used in acpi_processor_idle(): + */ +ACPI_EXTERN atomic_spinlock_t _acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ + #define acpi_gbl_gpe_lock &_acpi_gbl_gpe_lock #define acpi_gbl_hardware_lock &_acpi_gbl_hardware_lock diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c index 23d5505..756fe9e 100644 --- a/drivers/acpi/acpica/hwregs.c +++ b/drivers/acpi/acpica/hwregs.c @@ -85,7 +85,7 @@ acpi_status acpi_hw_clear_acpi_status(void) ACPI_BITMASK_ALL_FIXED_STATUS, ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + atomic_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); /* Clear the fixed events in PM1 A/B */ @@ -100,7 +100,7 @@ acpi_status acpi_hw_clear_acpi_status(void) status = acpi_ev_walk_gpe_list(acpi_hw_clear_gpe_block, NULL); unlock_and_exit: - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + atomic_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c index 9829979..3898b18 100644 --- a/drivers/acpi/acpica/hwxface.c +++ b/drivers/acpi/acpica/hwxface.c @@ -341,7 +341,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) return_ACPI_STATUS(AE_BAD_PARAMETER); } - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + atomic_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); /* * At this point, we know that the parent register is one of the @@ -402,7 +402,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) unlock_and_exit: - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + atomic_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index 80bb651..477de01 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -84,7 +84,7 @@ acpi_status acpi_ut_mutex_initialize(void) /* Create the spinlocks for use at interrupt level */ spin_lock_init(acpi_gbl_gpe_lock); - spin_lock_init(acpi_gbl_hardware_lock); + atomic_spin_lock_init(acpi_gbl_hardware_lock); /* Create the reader/writer lock for namespace access */ @@ -117,11 +117,6 @@ void acpi_ut_mutex_terminate(void) (void)acpi_ut_delete_mutex(i); } - /* Delete the spinlocks */ - - acpi_os_delete_lock(acpi_gbl_gpe_lock); - acpi_os_delete_lock(acpi_gbl_hardware_lock); - /* Delete the reader/writer lock */ acpi_ut_delete_rw_lock(&acpi_gbl_namespace_rw_lock); diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 391f331..fe086ca 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -573,8 +573,22 @@ static u32 acpi_ec_gpe_handler(void *data) if (test_bit(EC_FLAGS_GPE_MODE, &ec->flags)) { gpe_transaction(ec, status); if (ec_transaction_done(ec) && - (status & ACPI_EC_FLAG_IBF) == 0) + (status & ACPI_EC_FLAG_IBF) == 0) { + +#ifndef CONFIG_PREEMPT_RT wake_up(&ec->wait); +#else + // hack ... + if (waitqueue_active(&ec->wait)) { + struct task_struct *task; + + task = list_entry(ec->wait.task_list.next, + wait_queue_t, task_list)->private; + if (task) + wake_up_process(task); + } +#endif + } } ec_check_sci(ec, status); diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 8dff236..44e7df8 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -833,14 +833,14 @@ void acpi_os_delete_lock(acpi_spinlock handle) acpi_status acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) { - struct semaphore *sem = NULL; + struct anon_semaphore *sem = NULL; - sem = acpi_os_allocate(sizeof(struct semaphore)); + sem = acpi_os_allocate(sizeof(struct anon_semaphore)); if (!sem) return AE_NO_MEMORY; - memset(sem, 0, sizeof(struct semaphore)); + memset(sem, 0, sizeof(struct anon_semaphore)); - sema_init(sem, initial_units); + anon_sema_init(sem, initial_units); *handle = (acpi_handle *) sem; @@ -859,7 +859,7 @@ acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) acpi_status acpi_os_delete_semaphore(acpi_handle handle) { - struct semaphore *sem = (struct semaphore *)handle; + struct anon_semaphore *sem = (struct anon_semaphore *)handle; if (!sem) return AE_BAD_PARAMETER; @@ -879,7 +879,7 @@ acpi_status acpi_os_delete_semaphore(acpi_handle handle) acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout) { acpi_status status = AE_OK; - struct semaphore *sem = (struct semaphore *)handle; + struct anon_semaphore *sem = (struct anon_semaphore *)handle; long jiffies; int ret = 0; @@ -897,7 +897,7 @@ acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout) else jiffies = msecs_to_jiffies(timeout); - ret = down_timeout(sem, jiffies); + ret = anon_down_timeout(sem, jiffies); if (ret) status = AE_TIME; @@ -920,7 +920,7 @@ acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout) */ acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units) { - struct semaphore *sem = (struct semaphore *)handle; + struct anon_semaphore *sem = (struct anon_semaphore *)handle; if (!sem || (units < 1)) return AE_BAD_PARAMETER; @@ -931,7 +931,7 @@ acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units) ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Signaling semaphore[%p|%d]\n", handle, units)); - up(sem); + anon_up(sem); return AE_OK; } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 66393d5..716709b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -921,7 +921,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, } static int c3_cpu_count; -static DEFINE_SPINLOCK(c3_lock); +static DEFINE_ATOMIC_SPINLOCK(c3_lock); /** * acpi_idle_enter_bm - enters C3 with proper BM handling @@ -996,12 +996,12 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, * without doing anything. */ if (pr->flags.bm_check && pr->flags.bm_control) { - spin_lock(&c3_lock); + atomic_spin_lock(&c3_lock); c3_cpu_count++; /* Disable bus master arbitration when all CPUs are in C3 */ if (c3_cpu_count == num_online_cpus()) acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1); - spin_unlock(&c3_lock); + atomic_spin_unlock(&c3_lock); } else if (!pr->flags.bm_check) { ACPI_FLUSH_CPU_CACHE(); } @@ -1010,10 +1010,10 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, /* Re-enable bus master arbitration */ if (pr->flags.bm_check && pr->flags.bm_control) { - spin_lock(&c3_lock); + atomic_spin_lock(&c3_lock); acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0); c3_cpu_count--; - spin_unlock(&c3_lock); + atomic_spin_unlock(&c3_lock); } kt2 = ktime_get_real(); idle_time = ktime_to_us(ktime_sub(kt2, kt1)); diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index bbbb1fa..b79d110 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -837,9 +837,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, unsigned long flags; unsigned int consumed; - local_irq_save(flags); + local_irq_save_nort(flags); consumed = ata_sff_data_xfer(dev, buf, buflen, rw); - local_irq_restore(flags); + local_irq_restore_nort(flags); return consumed; } @@ -878,7 +878,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) unsigned long flags; /* FIXME: use a bounce buffer */ - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page, KM_IRQ0); /* do the actual data transfer */ @@ -886,7 +886,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) do_write); kunmap_atomic(buf, KM_IRQ0); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { buf = page_address(page); ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size, @@ -1016,7 +1016,7 @@ next_sg: unsigned long flags; /* FIXME: use bounce buffer */ - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page, KM_IRQ0); /* do the actual data transfer */ @@ -1024,7 +1024,7 @@ next_sg: count, rw); kunmap_atomic(buf, KM_IRQ0); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { buf = page_address(page); consumed = ap->ops->sff_data_xfer(dev, buf + offset, diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 973bf2a..27ad896 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -173,10 +173,10 @@ static ssize_t driver_unbind(struct device_driver *drv, dev = bus_find_device_by_name(bus, NULL, buf); if (dev && dev->driver == drv) { if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); + mutex_lock(&dev->parent->mutex); device_release_driver(dev); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); err = count; } put_device(dev); @@ -200,12 +200,12 @@ static ssize_t driver_bind(struct device_driver *drv, dev = bus_find_device_by_name(bus, NULL, buf); if (dev && dev->driver == NULL && driver_match_device(drv, dev)) { if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); - down(&dev->sem); + mutex_lock(&dev->parent->mutex); + mutex_lock(&dev->mutex); err = driver_probe_device(drv, dev); - up(&dev->sem); + mutex_unlock(&dev->mutex); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); if (err > 0) { /* success */ @@ -739,10 +739,10 @@ static int __must_check bus_rescan_devices_helper(struct device *dev, if (!dev->driver) { if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); + mutex_lock(&dev->parent->mutex); ret = device_attach(dev); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); } return ret < 0 ? ret : 0; } @@ -774,10 +774,10 @@ int device_reprobe(struct device *dev) { if (dev->driver) { if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); + mutex_lock(&dev->parent->mutex); device_release_driver(dev); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); } return bus_rescan_devices_helper(dev, NULL); } diff --git a/drivers/base/core.c b/drivers/base/core.c index c34774d..5b91be3 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -550,7 +549,7 @@ void device_initialize(struct device *dev) dev->kobj.kset = devices_kset; kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); - init_MUTEX(&dev->sem); + mutex_init(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); device_init_wakeup(dev, 0); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f010687..c90f82b 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -84,7 +84,7 @@ static void driver_sysfs_remove(struct device *dev) * for before calling this. (It is ok to call with no other effort * from a driver's probe() method.) * - * This function must be called with @dev->sem held. + * This function must be called with @dev->mutex held. */ int device_bind_driver(struct device *dev) { @@ -189,8 +189,8 @@ EXPORT_SYMBOL_GPL(wait_for_device_probe); * This function returns -ENODEV if the device is not registered, * 1 if the device is bound sucessfully and 0 otherwise. * - * This function must be called with @dev->sem held. When called for a - * USB interface, @dev->parent->sem must be held as well. + * This function must be called with @dev->mutex held. When called for a + * USB interface, @dev->parent->mutex must be held as well. */ int driver_probe_device(struct device_driver *drv, struct device *dev) { @@ -229,13 +229,13 @@ static int __device_attach(struct device_driver *drv, void *data) * 0 if no matching driver was found; * -ENODEV if the device is not registered. * - * When called for a USB interface, @dev->parent->sem must be held. + * When called for a USB interface, @dev->parent->mutex must be held. */ int device_attach(struct device *dev) { int ret = 0; - down(&dev->sem); + mutex_lock(&dev->mutex); if (dev->driver) { ret = device_bind_driver(dev); if (ret == 0) @@ -247,7 +247,7 @@ int device_attach(struct device *dev) } else { ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach); } - up(&dev->sem); + mutex_unlock(&dev->mutex); return ret; } EXPORT_SYMBOL_GPL(device_attach); @@ -270,13 +270,13 @@ static int __driver_attach(struct device *dev, void *data) return 0; if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); - down(&dev->sem); + mutex_lock(&dev->parent->mutex); + mutex_lock(&dev->mutex); if (!dev->driver) driver_probe_device(drv, dev); - up(&dev->sem); + mutex_unlock(&dev->mutex); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); return 0; } @@ -297,8 +297,8 @@ int driver_attach(struct device_driver *drv) EXPORT_SYMBOL_GPL(driver_attach); /* - * __device_release_driver() must be called with @dev->sem held. - * When called for a USB interface, @dev->parent->sem must be held as well. + * __device_release_driver() must be called with @dev->mutex held. + * When called for a USB interface, @dev->parent->mutex must be held as well. */ static void __device_release_driver(struct device *dev) { @@ -332,7 +332,7 @@ static void __device_release_driver(struct device *dev) * @dev: device. * * Manually detach device from driver. - * When called for a USB interface, @dev->parent->sem must be held. + * When called for a USB interface, @dev->parent->mutex must be held. */ void device_release_driver(struct device *dev) { @@ -341,9 +341,9 @@ void device_release_driver(struct device *dev) * within their ->remove callback for the same device, they * will deadlock right here. */ - down(&dev->sem); + mutex_lock(&dev->mutex); __device_release_driver(dev); - up(&dev->sem); + mutex_unlock(&dev->mutex); } EXPORT_SYMBOL_GPL(device_release_driver); @@ -370,13 +370,13 @@ void driver_detach(struct device_driver *drv) spin_unlock(&drv->p->klist_devices.k_lock); if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); - down(&dev->sem); + mutex_lock(&dev->parent->mutex); + mutex_lock(&dev->mutex); if (dev->driver == drv) __device_release_driver(dev); - up(&dev->sem); + mutex_unlock(&dev->mutex); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); put_device(dev); } } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 58a3e57..01d026d 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -33,8 +33,8 @@ * because children are guaranteed to be discovered after parents, and * are inserted at the back of the list on discovery. * - * Since device_pm_add() may be called with a device semaphore held, - * we must never try to acquire a device semaphore while holding + * Since device_pm_add() may be called with a device mutex held, + * we must never try to acquire a device mutex while holding * dpm_list_mutex. */ @@ -381,7 +381,7 @@ static int device_resume(struct device *dev, pm_message_t state) TRACE_DEVICE(dev); TRACE_RESUME(0); - down(&dev->sem); + mutex_lock(&dev->mutex); if (dev->bus) { if (dev->bus->pm) { @@ -414,7 +414,7 @@ static int device_resume(struct device *dev, pm_message_t state) } } End: - up(&dev->sem); + mutex_unlock(&dev->mutex); TRACE_RESUME(error); return error; @@ -468,7 +468,7 @@ static void dpm_resume(pm_message_t state) */ static void device_complete(struct device *dev, pm_message_t state) { - down(&dev->sem); + mutex_lock(&dev->mutex); if (dev->class && dev->class->pm && dev->class->pm->complete) { pm_dev_dbg(dev, state, "completing class "); @@ -485,7 +485,7 @@ static void device_complete(struct device *dev, pm_message_t state) dev->bus->pm->complete(dev); } - up(&dev->sem); + mutex_unlock(&dev->mutex); } /** @@ -619,7 +619,7 @@ static int device_suspend(struct device *dev, pm_message_t state) { int error = 0; - down(&dev->sem); + mutex_lock(&dev->mutex); if (dev->class) { if (dev->class->pm) { @@ -654,7 +654,7 @@ static int device_suspend(struct device *dev, pm_message_t state) } } End: - up(&dev->sem); + mutex_unlock(&dev->mutex); return error; } @@ -705,7 +705,7 @@ static int device_prepare(struct device *dev, pm_message_t state) { int error = 0; - down(&dev->sem); + mutex_lock(&dev->mutex); if (dev->bus && dev->bus->pm && dev->bus->pm->prepare) { pm_dev_dbg(dev, state, "preparing "); @@ -729,7 +729,7 @@ static int device_prepare(struct device *dev, pm_message_t state) suspend_report_result(dev->class->pm->prepare, error); } End: - up(&dev->sem); + mutex_unlock(&dev->mutex); return error; } diff --git a/drivers/block/hd.c b/drivers/block/hd.c index f9d0160..cc71770 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -165,12 +165,12 @@ unsigned long read_timer(void) unsigned long t, flags; int i; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); t = jiffies * 11932; outb_p(0, 0x43); i = inb_p(0x40); i |= inb(0x40) << 8; - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); return(t - i); } #endif diff --git a/drivers/block/paride/pseudo.h b/drivers/block/paride/pseudo.h index bc37032..0fbc78c 100644 --- a/drivers/block/paride/pseudo.h +++ b/drivers/block/paride/pseudo.h @@ -43,7 +43,7 @@ static unsigned long ps_timeout; static int ps_tq_active = 0; static int ps_nice = 0; -static DEFINE_SPINLOCK(ps_spinlock __attribute__((unused))); +static __attribute__((unused)) DEFINE_SPINLOCK(ps_spinlock); static DECLARE_DELAYED_WORK(ps_tq, ps_tq_int); diff --git a/drivers/char/random.c b/drivers/char/random.c index 8c74448..91cc9c6 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -623,8 +623,11 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) preempt_disable(); /* if over the trickle threshold, use only 1 in 4096 samples */ if (input_pool.entropy_count > trickle_thresh && - (__get_cpu_var(trickle_count)++ & 0xfff)) - goto out; + (__get_cpu_var(trickle_count)++ & 0xfff)) { + preempt_enable(); + return; + } + preempt_enable(); sample.jiffies = jiffies; sample.cycles = get_cycles(); @@ -666,8 +669,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) credit_entropy_bits(&input_pool, min_t(int, fls(delta>>1), 11)); } -out: - preempt_enable(); } void add_input_randomness(unsigned int type, unsigned int code, diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index e0d0f8b..d809c4d 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -1197,10 +1197,12 @@ static void rtc_dropped_irq(unsigned long data) spin_unlock_irq(&rtc_lock); +#ifndef CONFIG_PREEMPT_RT if (printk_ratelimit()) { printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", freq); } +#endif /* Now we have new data */ wake_up_interruptible(&rtc_wait); diff --git a/drivers/char/tty_buffer.c b/drivers/char/tty_buffer.c index 0296612..feebaf5 100644 --- a/drivers/char/tty_buffer.c +++ b/drivers/char/tty_buffer.c @@ -492,10 +492,14 @@ void tty_flip_buffer_push(struct tty_struct *tty) tty->buf.tail->commit = tty->buf.tail->used; spin_unlock_irqrestore(&tty->buf.lock, flags); +#ifndef CONFIG_PREEMPT_RT if (tty->low_latency) flush_to_ldisc(&tty->buf.work.work); else schedule_delayed_work(&tty->buf.work, 1); +#else + flush_to_ldisc(&tty->buf.work.work); +#endif } EXPORT_SYMBOL(tty_flip_buffer_push); diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index 414372a..c90dcc1 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -69,7 +69,7 @@ static void put_ldisc(struct tty_ldisc *ld) * We really want an "atomic_dec_and_lock_irqsave()", * but we don't have it, so this does it by hand. */ - local_irq_save(flags); + local_irq_save_nort(flags); if (atomic_dec_and_lock(&ld->users, &tty_ldisc_lock)) { struct tty_ldisc_ops *ldo = ld->ops; @@ -80,7 +80,7 @@ static void put_ldisc(struct tty_ldisc *ld) kfree(ld); return; } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /** diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 404f4c1..dee3f64 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -2537,7 +2537,7 @@ static struct console vt_console_driver = { .write = vt_console_print, .device = vt_console_device, .unblank = unblank_screen, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_ATOMIC, .index = -1, }; #endif diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 97e656a..16c0e4a 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -762,9 +762,9 @@ static int update_unit(struct device *dev, void *data) struct fw_driver *driver = (struct fw_driver *)dev->driver; if (is_fw_unit(dev) && driver != NULL && driver->update != NULL) { - down(&dev->sem); + mutex_lock(&dev->mutex); driver->update(unit); - up(&dev->sem); + mutex_unlock(&dev->mutex); } return 0; diff --git a/drivers/gpu/drm/r128/r128_cce.c b/drivers/gpu/drm/r128/r128_cce.c index c75fd35..ebf9f63 100644 --- a/drivers/gpu/drm/r128/r128_cce.c +++ b/drivers/gpu/drm/r128/r128_cce.c @@ -353,6 +353,11 @@ static int r128_do_init_cce(struct drm_device * dev, drm_r128_init_t * init) DRM_DEBUG("\n"); + if (dev->dev_private) { + DRM_DEBUG("called when already initialized\n"); + return -EINVAL; + } + dev_priv = kzalloc(sizeof(drm_r128_private_t), GFP_KERNEL); if (dev_priv == NULL) return -ENOMEM; @@ -649,6 +654,8 @@ int r128_cce_start(struct drm_device *dev, void *data, struct drm_file *file_pri LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + if (dev_priv->cce_running || dev_priv->cce_mode == R128_PM4_NONPM4) { DRM_DEBUG("while CCE running\n"); return 0; @@ -671,6 +678,8 @@ int r128_cce_stop(struct drm_device *dev, void *data, struct drm_file *file_priv LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + /* Flush any pending CCE commands. This ensures any outstanding * commands are exectuted by the engine before we turn it off. */ @@ -708,10 +717,7 @@ int r128_cce_reset(struct drm_device *dev, void *data, struct drm_file *file_pri LOCK_TEST_WITH_RETURN(dev, file_priv); - if (!dev_priv) { - DRM_DEBUG("called before init done\n"); - return -EINVAL; - } + DEV_INIT_TEST_WITH_RETURN(dev_priv); r128_do_cce_reset(dev_priv); @@ -728,6 +734,8 @@ int r128_cce_idle(struct drm_device *dev, void *data, struct drm_file *file_priv LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + if (dev_priv->cce_running) { r128_do_cce_flush(dev_priv); } @@ -741,6 +749,8 @@ int r128_engine_reset(struct drm_device *dev, void *data, struct drm_file *file_ LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev->dev_private); + return r128_do_engine_reset(dev); } diff --git a/drivers/gpu/drm/r128/r128_drv.h b/drivers/gpu/drm/r128/r128_drv.h index 797a26c..3c60829 100644 --- a/drivers/gpu/drm/r128/r128_drv.h +++ b/drivers/gpu/drm/r128/r128_drv.h @@ -422,6 +422,14 @@ static __inline__ void r128_update_ring_snapshot(drm_r128_private_t * dev_priv) * Misc helper macros */ +#define DEV_INIT_TEST_WITH_RETURN(_dev_priv) \ +do { \ + if (!_dev_priv) { \ + DRM_ERROR("called with no initialization\n"); \ + return -EINVAL; \ + } \ +} while (0) + #define RING_SPACE_TEST_WITH_RETURN( dev_priv ) \ do { \ drm_r128_ring_buffer_t *ring = &dev_priv->ring; int i; \ diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c index 026a48c..af2665c 100644 --- a/drivers/gpu/drm/r128/r128_state.c +++ b/drivers/gpu/drm/r128/r128_state.c @@ -1244,14 +1244,18 @@ static void r128_cce_dispatch_stipple(struct drm_device * dev, u32 * stipple) static int r128_cce_clear(struct drm_device *dev, void *data, struct drm_file *file_priv) { drm_r128_private_t *dev_priv = dev->dev_private; - drm_r128_sarea_t *sarea_priv = dev_priv->sarea_priv; + drm_r128_sarea_t *sarea_priv; drm_r128_clear_t *clear = data; DRM_DEBUG("\n"); LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + RING_SPACE_TEST_WITH_RETURN(dev_priv); + sarea_priv = dev_priv->sarea_priv; + if (sarea_priv->nbox > R128_NR_SAREA_CLIPRECTS) sarea_priv->nbox = R128_NR_SAREA_CLIPRECTS; @@ -1312,6 +1316,8 @@ static int r128_cce_flip(struct drm_device *dev, void *data, struct drm_file *fi LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + RING_SPACE_TEST_WITH_RETURN(dev_priv); if (!dev_priv->page_flipping) @@ -1331,6 +1337,8 @@ static int r128_cce_swap(struct drm_device *dev, void *data, struct drm_file *fi LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + RING_SPACE_TEST_WITH_RETURN(dev_priv); if (sarea_priv->nbox > R128_NR_SAREA_CLIPRECTS) @@ -1354,10 +1362,7 @@ static int r128_cce_vertex(struct drm_device *dev, void *data, struct drm_file * LOCK_TEST_WITH_RETURN(dev, file_priv); - if (!dev_priv) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } + DEV_INIT_TEST_WITH_RETURN(dev_priv); DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n", DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard); @@ -1410,10 +1415,7 @@ static int r128_cce_indices(struct drm_device *dev, void *data, struct drm_file LOCK_TEST_WITH_RETURN(dev, file_priv); - if (!dev_priv) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } + DEV_INIT_TEST_WITH_RETURN(dev_priv); DRM_DEBUG("pid=%d buf=%d s=%d e=%d d=%d\n", DRM_CURRENTPID, elts->idx, elts->start, elts->end, elts->discard); @@ -1476,6 +1478,8 @@ static int r128_cce_blit(struct drm_device *dev, void *data, struct drm_file *fi LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + DRM_DEBUG("pid=%d index=%d\n", DRM_CURRENTPID, blit->idx); if (blit->idx < 0 || blit->idx >= dma->buf_count) { @@ -1501,6 +1505,8 @@ static int r128_cce_depth(struct drm_device *dev, void *data, struct drm_file *f LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + RING_SPACE_TEST_WITH_RETURN(dev_priv); ret = -EINVAL; @@ -1531,6 +1537,8 @@ static int r128_cce_stipple(struct drm_device *dev, void *data, struct drm_file LOCK_TEST_WITH_RETURN(dev, file_priv); + DEV_INIT_TEST_WITH_RETURN(dev_priv); + if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32))) return -EFAULT; @@ -1555,10 +1563,7 @@ static int r128_cce_indirect(struct drm_device *dev, void *data, struct drm_file LOCK_TEST_WITH_RETURN(dev, file_priv); - if (!dev_priv) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } + DEV_INIT_TEST_WITH_RETURN(dev_priv); DRM_DEBUG("idx=%d s=%d e=%d d=%d\n", indirect->idx, indirect->start, indirect->end, @@ -1620,10 +1625,7 @@ static int r128_getparam(struct drm_device *dev, void *data, struct drm_file *fi drm_r128_getparam_t *param = data; int value; - if (!dev_priv) { - DRM_ERROR("called with no initialization\n"); - return -EINVAL; - } + DEV_INIT_TEST_WITH_RETURN(dev_priv); DRM_DEBUG("pid=%d\n", DRM_CURRENTPID); diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index e59b6de..b35ede8 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -90,7 +90,7 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) if (r_clc >= 16) r_clc = 0; } - local_irq_save(flags); + local_irq_save_nort(flags); /* * PIO mode => ATA FIFO on, ATAPI FIFO off @@ -112,7 +112,7 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) pci_write_config_byte(dev, port, s_clc); pci_write_config_byte(dev, port + unit + 2, (a_clc << 4) | r_clc); - local_irq_restore(flags); + local_irq_restore_nort(flags); } /** @@ -223,7 +223,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); - local_irq_save(flags); + local_irq_save_nort(flags); if (m5229_revision < 0xC2) { /* @@ -314,7 +314,7 @@ out: } pci_dev_put(north); pci_dev_put(isa_dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); return 0; } @@ -376,7 +376,7 @@ static u8 ali_cable_detect(ide_hwif_t *hwif) unsigned long flags; u8 cbl = ATA_CBL_PATA40, tmpbyte; - local_irq_save(flags); + local_irq_save_nort(flags); if (m5229_revision >= 0xC2) { /* @@ -397,7 +397,7 @@ static u8 ali_cable_detect(ide_hwif_t *hwif) } } - local_irq_restore(flags); + local_irq_restore_nort(flags); return cbl; } diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index 7ce68ef..26c3a83 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -1302,7 +1302,7 @@ static int __devinit init_dma_hpt366(ide_hwif_t *hwif, dma_old = inb(base + 2); - local_irq_save(flags); + local_irq_save_nort(flags); dma_new = dma_old; pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma); @@ -1313,7 +1313,7 @@ static int __devinit init_dma_hpt366(ide_hwif_t *hwif, if (dma_new != dma_old) outb(dma_new, base + 2); - local_irq_restore(flags); + local_irq_restore_nort(flags); printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n", hwif->name, base, base + 7); diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c index 46721c4..b6f114a 100644 --- a/drivers/ide/ide-io-std.c +++ b/drivers/ide/ide-io-std.c @@ -174,7 +174,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, unsigned long uninitialized_var(flags); if ((io_32bit & 2) && !mmio) { - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(io_ports->nsect_addr); } @@ -185,7 +185,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, insl(data_addr, buf, words); if ((io_32bit & 2) && !mmio) - local_irq_restore(flags); + local_irq_restore_nort(flags); if (((len + 1) & 3) < 2) return; @@ -218,7 +218,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, unsigned long uninitialized_var(flags); if ((io_32bit & 2) && !mmio) { - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(io_ports->nsect_addr); } @@ -229,7 +229,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, outsl(data_addr, buf, words); if ((io_32bit & 2) && !mmio) - local_irq_restore(flags); + local_irq_restore_nort(flags); if (((len + 1) & 3) < 2) return; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index db96138..b8b3ab6 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -667,7 +667,7 @@ void ide_timer_expiry (unsigned long data) /* disable_irq_nosync ?? */ disable_irq(hwif->irq); /* local CPU only, as if we were handling an interrupt */ - local_irq_disable(); + local_irq_disable_nort(); if (hwif->polling) { startstop = handler(drive); } else if (drive_is_ready(drive)) { diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 2892b24..b1765dd 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -129,12 +129,12 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, if ((stat & ATA_BUSY) == 0) break; - local_irq_restore(flags); + local_irq_restore_nort(flags); *rstat = stat; return -EBUSY; } } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* * Allow status to settle, then read it again. diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 1bb106f..596f186 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) int bswap = 1; /* local CPU only; some systems need this */ - local_irq_save(flags); + local_irq_save_nort(flags); /* read 512 bytes of id info */ hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); - local_irq_restore(flags); + local_irq_restore_nort(flags); drive->dev_flags |= IDE_DFLAG_ID_READ; #ifdef DEBUG diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 75b85a8..bf341a0 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -248,7 +248,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, offset %= PAGE_SIZE; if (PageHighMem(page)) - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page, KM_BIO_SRC_IRQ) + offset; @@ -269,7 +269,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, kunmap_atomic(buf, KM_BIO_SRC_IRQ); if (PageHighMem(page)) - local_irq_restore(flags); + local_irq_restore_nort(flags); len -= nr_bytes; } @@ -406,7 +406,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, } if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0) - local_irq_disable(); + local_irq_disable_nort(); ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c index 5122b5a..bcc3d32 100644 --- a/drivers/ieee1394/nodemgr.c +++ b/drivers/ieee1394/nodemgr.c @@ -1397,9 +1397,9 @@ static int update_pdrv(struct device *dev, void *data) pdrv = container_of(drv, struct hpsb_protocol_driver, driver); if (pdrv->update) { - down(&ud->device.sem); + mutex_lock(&ud->device.mutex); error = pdrv->update(ud); - up(&ud->device.sem); + mutex_unlock(&ud->device.mutex); } if (error) device_release_driver(&ud->device); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 8c46f22..727776a 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1003,7 +1003,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, port->ib_dev = device; port->port_num = port_num; - init_MUTEX(&port->sm_sem); + semaphore_init(&port->sm_sem); mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index a0e9753..1dc6446 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -773,7 +773,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) ipoib_mcast_stop_thread(dev, 0); - local_irq_save(flags); + local_irq_save_nort(flags); netif_addr_lock(dev); spin_lock(&priv->lock); @@ -852,7 +852,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) spin_unlock(&priv->lock); netif_addr_unlock(dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); /* We have to cancel outside of the spinlock */ list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index ac11be0..29577a8 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -20,6 +20,7 @@ #include #include #include +#include #include /* HZ */ #include #include @@ -57,11 +58,11 @@ static unsigned int get_time_pit(void) unsigned long flags; unsigned int count; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); outb_p(0x00, 0x43); count = inb_p(0x40); count |= inb_p(0x40) << 8; - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); return count; } @@ -87,12 +88,12 @@ static int gameport_measure_speed(struct gameport *gameport) tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); GET_TIME(t1); for (t = 0; t < 50; t++) gameport_read(gameport); GET_TIME(t2); GET_TIME(t3); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; } @@ -111,11 +112,11 @@ static int gameport_measure_speed(struct gameport *gameport) tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); rdtscl(t1); for (t = 0; t < 50; t++) gameport_read(gameport); rdtscl(t2); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; } diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 1c0b529..2c52c68 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -146,11 +146,11 @@ static unsigned int get_time_pit(void) unsigned long flags; unsigned int count; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); outb_p(0x00, 0x43); count = inb_p(0x40); count |= inb_p(0x40) << 8; - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); return count; } diff --git a/drivers/input/keyboard/hil_kbd.c b/drivers/input/keyboard/hil_kbd.c index 6f35670..53c2547 100644 --- a/drivers/input/keyboard/hil_kbd.c +++ b/drivers/input/keyboard/hil_kbd.c @@ -277,7 +277,7 @@ static int hil_kbd_connect(struct serio *serio, struct serio_driver *drv) serio_set_drvdata(serio, kbd); kbd->serio = serio; - init_MUTEX_LOCKED(&kbd->sem); + semaphore_init_locked(&kbd->sem); /* Get device info. MLC driver supplies devid/status/etc. */ serio->write(serio, 0); diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c index 216a559..a59c141 100644 --- a/drivers/input/misc/hp_sdc_rtc.c +++ b/drivers/input/misc/hp_sdc_rtc.c @@ -104,7 +104,7 @@ static int hp_sdc_rtc_do_read_bbrtc (struct rtc_time *rtctm) t.endidx = 91; t.seq = tseq; t.act.semaphore = &tsem; - init_MUTEX_LOCKED(&tsem); + semaphore_init_locked(&tsem); if (hp_sdc_enqueue_transaction(&t)) return -1; @@ -686,7 +686,7 @@ static int __init hp_sdc_rtc_init(void) return -ENODEV; #endif - init_MUTEX(&i8042tregs); + semaphore_init(&i8042tregs); if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr))) return ret; diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c index 21cb755..5dc0ccd 100644 --- a/drivers/input/misc/pcspkr.c +++ b/drivers/input/misc/pcspkr.c @@ -30,7 +30,7 @@ MODULE_ALIAS("platform:pcspkr"); #include #else #include -static DEFINE_SPINLOCK(i8253_lock); +static DEFINE_ATOMIC_SPINLOCK(i8253_lock); #endif static int pcspkr_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) @@ -50,7 +50,7 @@ static int pcspkr_event(struct input_dev *dev, unsigned int type, unsigned int c if (value > 20 && value < 32767) count = PIT_TICK_RATE / value; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); if (count) { /* set command for counter 2, 2 byte write */ @@ -65,7 +65,7 @@ static int pcspkr_event(struct input_dev *dev, unsigned int type, unsigned int c outb(inb_p(0x61) & 0xFC, 0x61); } - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); return 0; } diff --git a/drivers/input/mouse/hil_ptr.c b/drivers/input/mouse/hil_ptr.c index 3263ce0..4fa00a0 100644 --- a/drivers/input/mouse/hil_ptr.c +++ b/drivers/input/mouse/hil_ptr.c @@ -270,7 +270,7 @@ static int hil_ptr_connect(struct serio *serio, struct serio_driver *driver) serio_set_drvdata(serio, ptr); ptr->serio = serio; - init_MUTEX_LOCKED(&ptr->sem); + semaphore_init_locked(&ptr->sem); /* Get device info. MLC driver supplies devid/status/etc. */ serio->write(serio, 0); diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c index 7ba9f2b..17f3641 100644 --- a/drivers/input/serio/hil_mlc.c +++ b/drivers/input/serio/hil_mlc.c @@ -914,15 +914,15 @@ int hil_mlc_register(hil_mlc *mlc) mlc->ostarted = 0; rwlock_init(&mlc->lock); - init_MUTEX(&mlc->osem); + semaphore_init(&mlc->osem); - init_MUTEX(&mlc->isem); + semaphore_init(&mlc->isem); mlc->icount = -1; mlc->imatch = 0; mlc->opercnt = 0; - init_MUTEX_LOCKED(&(mlc->csem)); + semaphore_init(&(mlc->csem)); hil_mlc_clear_di_scratch(mlc); hil_mlc_clear_di_map(mlc, 0); diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 1c9410d..14b7ccb 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -1039,7 +1039,7 @@ static int __init hp_sdc_register(void) return hp_sdc.dev_err; } - init_MUTEX_LOCKED(&tq_init_sem); + semaphore_init(&tq_init_sem); tq_init.actidx = 0; tq_init.idx = 1; diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 23741ce..4d9b203 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -83,7 +83,7 @@ static struct adb_driver *adb_controller; BLOCKING_NOTIFIER_HEAD(adb_client_list); static int adb_got_sleep; static int adb_inited; -static DECLARE_MUTEX(adb_probe_mutex); +static DEFINE_SEMAPHORE(adb_probe_mutex); static int sleepy_trackpad; static int autopoll_devs; int __adb_probe_sync; diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c index f50ca72..28f0e3f 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -101,7 +101,7 @@ struct dvb_frontend_private { struct dvb_device *dvbdev; struct dvb_frontend_parameters parameters; struct dvb_fe_events events; - struct semaphore sem; + struct anon_semaphore sem; struct list_head list_head; wait_queue_head_t wait_queue; struct task_struct *thread; @@ -189,12 +189,12 @@ static int dvb_frontend_get_event(struct dvb_frontend *fe, if (flags & O_NONBLOCK) return -EWOULDBLOCK; - up(&fepriv->sem); + anon_up(&fepriv->sem); ret = wait_event_interruptible (events->wait_queue, events->eventw != events->eventr); - if (down_interruptible (&fepriv->sem)) + if (anon_down_interruptible (&fepriv->sem)) return -ERESTARTSYS; if (ret < 0) @@ -534,7 +534,7 @@ static int dvb_frontend_thread(void *data) set_freezable(); while (1) { - up(&fepriv->sem); /* is locked when we enter the thread... */ + anon_up(&fepriv->sem); /* is locked when we enter the thread... */ restart: timeout = wait_event_interruptible_timeout(fepriv->wait_queue, dvb_frontend_should_wakeup(fe) || kthread_should_stop() @@ -550,7 +550,7 @@ restart: if (try_to_freeze()) goto restart; - if (down_interruptible(&fepriv->sem)) + if (anon_down_interruptible(&fepriv->sem)) break; if (fepriv->reinitialise) { @@ -678,7 +678,7 @@ static void dvb_frontend_stop(struct dvb_frontend *fe) kthread_stop(fepriv->thread); - init_MUTEX (&fepriv->sem); + anon_semaphore_init(&fepriv->sem); fepriv->state = FESTATE_IDLE; /* paranoia check in case a signal arrived */ @@ -747,7 +747,7 @@ static int dvb_frontend_start(struct dvb_frontend *fe) if (signal_pending(current)) return -EINTR; - if (down_interruptible (&fepriv->sem)) + if (anon_down_interruptible (&fepriv->sem)) return -EINTR; fepriv->state = FESTATE_IDLE; @@ -760,7 +760,7 @@ static int dvb_frontend_start(struct dvb_frontend *fe) if (IS_ERR(fe_thread)) { ret = PTR_ERR(fe_thread); printk("dvb_frontend_start: failed to start kthread (%d)\n", ret); - up(&fepriv->sem); + anon_up(&fepriv->sem); return ret; } fepriv->thread = fe_thread; @@ -1372,7 +1372,7 @@ static int dvb_frontend_ioctl(struct inode *inode, struct file *file, cmd == FE_DISEQC_RECV_SLAVE_REPLY)) return -EPERM; - if (down_interruptible (&fepriv->sem)) + if (anon_down_interruptible (&fepriv->sem)) return -ERESTARTSYS; if ((cmd == FE_SET_PROPERTY) || (cmd == FE_GET_PROPERTY)) @@ -1382,7 +1382,7 @@ static int dvb_frontend_ioctl(struct inode *inode, struct file *file, err = dvb_frontend_ioctl_legacy(inode, file, cmd, parg); } - up(&fepriv->sem); + anon_up(&fepriv->sem); return err; } @@ -1909,7 +1909,7 @@ int dvb_register_frontend(struct dvb_adapter* dvb, } fepriv = fe->frontend_priv; - init_MUTEX (&fepriv->sem); + anon_semaphore_init(&fepriv->sem); init_waitqueue_head (&fepriv->wait_queue); init_waitqueue_head (&fepriv->events.wait_queue); mutex_init(&fepriv->events.mtx); diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index 7d43083..c1bc157 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c @@ -458,12 +458,12 @@ static void twl4030_sih_do_edge(struct work_struct *work) bytes[byte] &= ~(0x03 << off); - spin_lock_irq(&d->lock); + atomic_spin_lock_irq(&d->lock); if (d->status & IRQ_TYPE_EDGE_RISING) bytes[byte] |= BIT(off + 1); if (d->status & IRQ_TYPE_EDGE_FALLING) bytes[byte] |= BIT(off + 0); - spin_unlock_irq(&d->lock); + atomic_spin_unlock_irq(&d->lock); edge_change &= ~BIT(i); } diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index fea9085..3c59c26 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 68ab39d..d16b79f 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -76,6 +76,35 @@ config IBM_ASM information on the specific driver level and support statement for your IBM server. +config HWLAT_DETECTOR + tristate "Testing module to detect hardware-induced latencies" + depends on DEBUG_FS + depends on RING_BUFFER + default m + ---help--- + A simple hardware latency detector. Use this module to detect + large latencies introduced by the behavior of the underlying + system firmware external to Linux. We do this using periodic + use of stop_machine to grab all available CPUs and measure + for unexplainable gaps in the CPU timestamp counter(s). By + default, the module is not enabled until the "enable" file + within the "hwlat_detector" debugfs directory is toggled. + + This module is often used to detect SMI (System Management + Interrupts) on x86 systems, though is not x86 specific. To + this end, we default to using a sample window of 1 second, + during which we will sample for 0.5 seconds. If an SMI or + similar event occurs during that time, it is recorded + into an 8K samples global ring buffer until retreived. + + WARNING: This software should never be enabled (it can be built + but should not be turned on after it is loaded) in a production + environment where high latencies are a concern since the + sampling mechanism actually introduces latencies for + regular tasks while the CPU(s) are being held. + + If unsure, say N + config PHANTOM tristate "Sensable PHANToM (PCI)" depends on PCI diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 36f733c..854d9ff 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -22,3 +22,4 @@ obj-$(CONFIG_ISL29003) += isl29003.o obj-$(CONFIG_C2PORT) += c2port/ obj-y += eeprom/ obj-y += cb710/ +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c new file mode 100644 index 0000000..d9549e9 --- /dev/null +++ b/drivers/misc/hwlat_detector.c @@ -0,0 +1,1216 @@ +/* + * hwlat_detector.c - A simple Hardware Latency detector. + * + * Use this module to detect large system latencies induced by the behavior of + * certain underlying system hardware or firmware, independent of Linux itself. + * The code was developed originally to detect the presence of SMIs on Intel + * and AMD systems, although there is no dependency upon x86 herein. + * + * The classical example usage of this module is in detecting the presence of + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a + * somewhat special form of hardware interrupt spawned from earlier CPU debug + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge + * LPC (or other device) to generate a special interrupt under certain + * circumstances, for example, upon expiration of a special SMI timer device, + * due to certain external thermal readings, on certain I/O address accesses, + * and other situations. An SMI hits a special CPU pin, triggers a special + * SMI mode (complete with special memory map), and the OS is unaware. + * + * Although certain hardware-inducing latencies are necessary (for example, + * a modern system often requires an SMI handler for correct thermal control + * and remote management) they can wreak havoc upon any OS-level performance + * guarantees toward low-latency, especially when the OS is not even made + * aware of the presence of these interrupts. For this reason, we need a + * somewhat brute force mechanism to detect these interrupts. In this case, + * we do it by hogging all of the CPU(s) for configurable timer intervals, + * sampling the built-in CPU timer, looking for discontiguous readings. + * + * WARNING: This implementation necessarily introduces latencies. Therefore, + * you should NEVER use this module in a production environment + * requiring any kind of low-latency performance guarantee(s). + * + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. + * + * Includes useful feedback from Clark Williams + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */ +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */ +#define U64STR_SIZE 22 /* 20 digits max */ + +#define VERSION "1.0.0" +#define BANNER "hwlat_detector: " +#define DRVNAME "hwlat_detector" +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */ + +/* Module metadata */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jon Masters "); +MODULE_DESCRIPTION("A simple hardware latency detector"); +MODULE_VERSION(VERSION); + +/* Module parameters */ + +static int debug; +static int enabled; +static int threshold; + +module_param(debug, int, 0); /* enable debug */ +module_param(enabled, int, 0); /* enable detector */ +module_param(threshold, int, 0); /* latency threshold */ + +/* Buffering and sampling */ + +static struct ring_buffer *ring_buffer; /* sample buffer */ +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */ +static unsigned long buf_size = BUF_SIZE_DEFAULT; +static struct task_struct *kthread; /* sampling thread */ + +/* DebugFS filesystem entries */ + +static struct dentry *debug_dir; /* debugfs directory */ +static struct dentry *debug_max; /* maximum TSC delta */ +static struct dentry *debug_count; /* total detect count */ +static struct dentry *debug_sample_width; /* sample width us */ +static struct dentry *debug_sample_window; /* sample window us */ +static struct dentry *debug_sample; /* raw samples us */ +static struct dentry *debug_threshold; /* threshold us */ +static struct dentry *debug_enable; /* enable/disable */ + +/* Individual samples and global state */ + +struct sample; /* latency sample */ +struct data; /* Global state */ + +/* Sampling functions */ +static int __buffer_add_sample(struct sample *sample); +static struct sample *buffer_get_sample(struct sample *sample); +static int get_sample(void *unused); + +/* Threading and state */ +static int kthread_fn(void *unused); +static int start_kthread(void); +static int stop_kthread(void); +static void __reset_stats(void); +static int init_stats(void); + +/* Debugfs interface */ +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos, const u64 *entry); +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, u64 *entry); +static int debug_sample_fopen(struct inode *inode, struct file *filp); +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); +static int debug_sample_release(struct inode *inode, struct file *filp); +static int debug_enable_fopen(struct inode *inode, struct file *filp); +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); +static ssize_t debug_enable_fwrite(struct file *file, + const char __user *user_buffer, + size_t user_size, loff_t *offset); + +/* Initialization functions */ +static int init_debugfs(void); +static void free_debugfs(void); +static int detector_init(void); +static void detector_exit(void); + +/* Individual latency samples are stored here when detected and packed into + * the ring_buffer circular buffer, where they are overwritten when + * more than buf_size/sizeof(sample) samples are received. */ +struct sample { + u64 seqnum; /* unique sequence */ + u64 duration; /* ktime delta */ + struct timespec timestamp; /* wall time */ +}; + +/* keep the global state somewhere. Mostly used under stop_machine. */ +static struct data { + + struct mutex lock; /* protect changes */ + + u64 count; /* total since reset */ + u64 max_sample; /* max hardware latency */ + u64 threshold; /* sample threshold level */ + + u64 sample_window; /* total sampling window (on+off) */ + u64 sample_width; /* active sampling portion of window */ + + atomic_t sample_open; /* whether the sample file is open */ + + wait_queue_head_t wq; /* waitqeue for new sample values */ + +} data; + +/** + * __buffer_add_sample - add a new latency sample recording to the ring buffer + * @sample: The new latency sample value + * + * This receives a new latency sample and records it in a global ring buffer. + * No additional locking is used in this case - suited for stop_machine use. + */ +static int __buffer_add_sample(struct sample *sample) +{ + return ring_buffer_write(ring_buffer, + sizeof(struct sample), sample); +} + +/** + * buffer_get_sample - remove a hardware latency sample from the ring buffer + * @sample: Pre-allocated storage for the sample + * + * This retrieves a hardware latency sample from the global circular buffer + */ +static struct sample *buffer_get_sample(struct sample *sample) +{ + struct ring_buffer_event *e = NULL; + struct sample *s = NULL; + unsigned int cpu = 0; + + if (!sample) + return NULL; + + /* ring_buffers are per-cpu but we just want any value */ + /* so we'll start with this cpu and try others if not */ + /* Steven is planning to add a generic mechanism */ + mutex_lock(&ring_buffer_mutex); + e = ring_buffer_consume(ring_buffer, smp_processor_id(), NULL); + if (!e) { + for_each_online_cpu(cpu) { + e = ring_buffer_consume(ring_buffer, cpu, NULL); + if (e) + break; + } + } + + if (e) { + s = ring_buffer_event_data(e); + memcpy(sample, s, sizeof(struct sample)); + } else + sample = NULL; + mutex_unlock(&ring_buffer_mutex); + + return sample; +} + +/** + * get_sample - sample the CPU TSC and look for likely hardware latencies + * @unused: This is not used but is a part of the stop_machine API + * + * Used to repeatedly capture the CPU TSC (or similar), looking for potential + * hardware-induced latency. Called under stop_machine, with data.lock held. + */ +static int get_sample(void *unused) +{ + ktime_t start, t1, t2; + s64 diff, total = 0; + u64 sample = 0; + int ret = 1; + + start = ktime_get(); /* start timestamp */ + + do { + + t1 = ktime_get(); /* we'll look for a discontinuity */ + t2 = ktime_get(); + + total = ktime_to_us(ktime_sub(t2, start)); /* sample width */ + diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */ + + /* This shouldn't happen */ + if (diff < 0) { + printk(KERN_ERR BANNER "time running backwards\n"); + goto out; + } + + if (diff > sample) + sample = diff; /* only want highest value */ + + } while (total <= data.sample_width); + + /* If we exceed the threshold value, we have found a hardware latency */ + if (sample > data.threshold) { + struct sample s; + + data.count++; + s.seqnum = data.count; + s.duration = sample; + s.timestamp = CURRENT_TIME; + __buffer_add_sample(&s); + + /* Keep a running maximum ever recorded hardware latency */ + if (sample > data.max_sample) + data.max_sample = sample; + } + + ret = 0; +out: + return ret; +} + +/* + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread + * @unused: A required part of the kthread API. + * + * Used to periodically sample the CPU TSC via a call to get_sample. We + * use stop_machine, whith does (intentionally) introduce latency since we + * need to ensure nothing else might be running (and thus pre-empting). + * Obviously this should never be used in production environments. + * + * stop_machine will schedule us typically only on CPU0 which is fine for + * almost every real-world hardware latency situation - but we might later + * generalize this if we find there are any actualy systems with alternate + * SMI delivery or other non CPU0 hardware latencies. + */ +static int kthread_fn(void *unused) +{ + int err = 0; + u64 interval = 0; + + while (!kthread_should_stop()) { + + mutex_lock(&data.lock); + + err = stop_machine(get_sample, unused, 0); + if (err) { + /* Houston, we have a problem */ + mutex_unlock(&data.lock); + goto err_out; + } + + wake_up(&data.wq); /* wake up reader(s) */ + + interval = data.sample_window - data.sample_width; + do_div(interval, USEC_PER_MSEC); /* modifies interval value */ + + mutex_unlock(&data.lock); + + if (msleep_interruptible(interval)) + goto out; + } + goto out; +err_out: + printk(KERN_ERR BANNER "could not call stop_machine, disabling\n"); + enabled = 0; +out: + return err; + +} + +/** + * start_kthread - Kick off the hardware latency sampling/detector kthread + * + * This starts a kernel thread that will sit and sample the CPU timestamp + * counter (TSC or similar) and look for potential hardware latencies. + */ +static int start_kthread(void) +{ + kthread = kthread_run(kthread_fn, NULL, + DRVNAME); + if (IS_ERR(kthread)) { + printk(KERN_ERR BANNER "could not start sampling thread\n"); + enabled = 0; + return -ENOMEM; + } + + return 0; +} + +/** + * stop_kthread - Inform the hardware latency samping/detector kthread to stop + * + * This kicks the running hardware latency sampling/detector kernel thread and + * tells it to stop sampling now. Use this on unload and at system shutdown. + */ +static int stop_kthread(void) +{ + int ret; + + ret = kthread_stop(kthread); + + return ret; +} + +/** + * __reset_stats - Reset statistics for the hardware latency detector + * + * We use data to store various statistics and global state. We call this + * function in order to reset those when "enable" is toggled on or off, and + * also at initialization. Should be called with data.lock held. + */ +static void __reset_stats(void) +{ + data.count = 0; + data.max_sample = 0; + ring_buffer_reset(ring_buffer); /* flush out old sample entries */ +} + +/** + * init_stats - Setup global state statistics for the hardware latency detector + * + * We use data to store various statistics and global state. We also use + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware + * induced system latencies. This function initializes these structures and + * allocates the global ring buffer also. + */ +static int init_stats(void) +{ + int ret = -ENOMEM; + + mutex_init(&data.lock); + init_waitqueue_head(&data.wq); + atomic_set(&data.sample_open, 0); + + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS); + + if (WARN(!ring_buffer, KERN_ERR BANNER + "failed to allocate ring buffer!\n")) + goto out; + + __reset_stats(); + data.threshold = DEFAULT_LAT_THRESHOLD; /* threshold us */ + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */ + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */ + + ret = 0; + +out: + return ret; + +} + +/* + * simple_data_read - Wrapper read function for global state debugfs entries + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * @entry: The entry to read from + * + * This function provides a generic read implementation for the global state + * "data" structure debugfs filesystem entries. It would be nice to use + * simple_attr_read directly, but we need to make sure that the data.lock + * spinlock is held during the actual read (even though we likely won't ever + * actually race here as the updater runs under a stop_machine context). + */ +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos, const u64 *entry) +{ + char buf[U64STR_SIZE]; + u64 val = 0; + int len = 0; + + memset(buf, 0, sizeof(buf)); + + if (!entry) + return -EFAULT; + + mutex_lock(&data.lock); + val = *entry; + mutex_unlock(&data.lock); + + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); + +} + +/* + * simple_data_write - Wrapper write function for global state debugfs entries + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to write value from + * @cnt: The maximum number of bytes to write + * @ppos: The current "file" position + * @entry: The entry to write to + * + * This function provides a generic write implementation for the global state + * "data" structure debugfs filesystem entries. It would be nice to use + * simple_attr_write directly, but we need to make sure that the data.lock + * spinlock is held during the actual write (even though we likely won't ever + * actually race here as the updater runs under a stop_machine context). + */ +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, u64 *entry) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = strict_strtoull(buf, 10, &val); + if (err) + return -EINVAL; + + mutex_lock(&data.lock); + *entry = val; + mutex_unlock(&data.lock); + + return csize; +} + +/** + * debug_count_fopen - Open function for "count" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "count" debugfs + * interface to the hardware latency detector. + */ +static int debug_count_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_count_fread - Read function for "count" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "count" debugfs + * interface to the hardware latency detector. Can be used to read the + * number of latency readings exceeding the configured threshold since + * the detector was last reset (e.g. by writing a zero into "count"). + */ +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.count); +} + +/** + * debug_count_fwrite - Write function for "count" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "count" debugfs + * interface to the hardware latency detector. Can be used to write a + * desired value, especially to zero the total count. + */ +static ssize_t debug_count_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + return simple_data_write(filp, ubuf, cnt, ppos, &data.count); +} + +/** + * debug_enable_fopen - Dummy open function for "enable" debugfs interface + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "enable" debugfs + * interface to the hardware latency detector. + */ +static int debug_enable_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_enable_fread - Read function for "enable" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "enable" debugfs + * interface to the hardware latency detector. Can be used to determine + * whether the detector is currently enabled ("0\n" or "1\n" returned). + */ +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[4]; + + if ((cnt < sizeof(buf)) || (*ppos)) + return 0; + + buf[0] = enabled ? '1' : '0'; + buf[1] = '\n'; + buf[2] = '\0'; + if (copy_to_user(ubuf, buf, strlen(buf))) + return -EFAULT; + return *ppos = strlen(buf); +} + +/** + * debug_enable_fwrite - Write function for "enable" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "enable" debugfs + * interface to the hardware latency detector. Can be used to enable or + * disable the detector, which will have the side-effect of possibly + * also resetting the global stats and kicking off the measuring + * kthread (on an enable) or the converse (upon a disable). + */ +static ssize_t debug_enable_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[4]; + int csize = min(cnt, sizeof(buf)); + long val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[sizeof(buf)-1] = '\0'; /* just in case */ + err = strict_strtoul(buf, 10, &val); + if (0 != err) + return -EINVAL; + + if (val) { + if (enabled) + goto unlock; + enabled = 1; + __reset_stats(); + if (start_kthread()) + return -EFAULT; + } else { + if (!enabled) + goto unlock; + enabled = 0; + err = stop_kthread(); + if (err) { + printk(KERN_ERR BANNER "cannot stop kthread\n"); + return -EFAULT; + } + wake_up(&data.wq); /* reader(s) should return */ + } +unlock: + return csize; +} + +/** + * debug_max_fopen - Open function for "max" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "max" debugfs + * interface to the hardware latency detector. + */ +static int debug_max_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_max_fread - Read function for "max" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "max" debugfs + * interface to the hardware latency detector. Can be used to determine + * the maximum latency value observed since it was last reset. + */ +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample); +} + +/** + * debug_max_fwrite - Write function for "max" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "max" debugfs + * interface to the hardware latency detector. Can be used to reset the + * maximum or set it to some other desired value - if, then, subsequent + * measurements exceed this value, the maximum will be updated. + */ +static ssize_t debug_max_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample); +} + + +/** + * debug_sample_fopen - An open function for "sample" debugfs interface + * @inode: The in-kernel inode representation of this debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function handles opening the "sample" file within the hardware + * latency detector debugfs directory interface. This file is used to read + * raw samples from the global ring_buffer and allows the user to see a + * running latency history. Can be opened blocking or non-blocking, + * affecting whether it behaves as a buffer read pipe, or does not. + * Implements simple locking to prevent multiple simultaneous use. + */ +static int debug_sample_fopen(struct inode *inode, struct file *filp) +{ + if (!atomic_add_unless(&data.sample_open, 1, 1)) + return -EBUSY; + else + return 0; +} + +/** + * debug_sample_fread - A read function for "sample" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that will contain the samples read + * @cnt: The maximum bytes to read from the debugfs "file" + * @ppos: The current position in the debugfs "file" + * + * This function handles reading from the "sample" file within the hardware + * latency detector debugfs directory interface. This file is used to read + * raw samples from the global ring_buffer and allows the user to see a + * running latency history. By default this will block pending a new + * value written into the sample buffer, unless there are already a + * number of value(s) waiting in the buffer, or the sample file was + * previously opened in a non-blocking mode of operation. + */ +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int len = 0; + char buf[64]; + struct sample *sample = NULL; + + if (!enabled) + return 0; + + sample = kzalloc(sizeof(struct sample), GFP_KERNEL); + if (!sample) + return -ENOMEM; + + while (!buffer_get_sample(sample)) { + + DEFINE_WAIT(wait); + + if (filp->f_flags & O_NONBLOCK) { + len = -EAGAIN; + goto out; + } + + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE); + schedule(); + finish_wait(&data.wq, &wait); + + if (signal_pending(current)) { + len = -EINTR; + goto out; + } + + if (!enabled) { /* enable was toggled */ + len = 0; + goto out; + } + } + + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n", + sample->timestamp.tv_sec, + sample->timestamp.tv_nsec, + sample->duration); + + + /* handling partial reads is more trouble than it's worth */ + if (len > cnt) + goto out; + + if (copy_to_user(ubuf, buf, len)) + len = -EFAULT; + +out: + kfree(sample); + return len; +} + +/** + * debug_sample_release - Release function for "sample" debugfs interface + * @inode: The in-kernel inode represenation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function completes the close of the debugfs interface "sample" file. + * Frees the sample_open "lock" so that other users may open the interface. + */ +static int debug_sample_release(struct inode *inode, struct file *filp) +{ + atomic_dec(&data.sample_open); + + return 0; +} + +/** + * debug_threshold_fopen - Open function for "threshold" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "threshold" debugfs + * interface to the hardware latency detector. + */ +static int debug_threshold_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_threshold_fread - Read function for "threshold" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "threshold" debugfs + * interface to the hardware latency detector. It can be used to determine + * the current threshold level at which a latency will be recorded in the + * global ring buffer, typically on the order of 10us. + */ +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold); +} + +/** + * debug_threshold_fwrite - Write function for "threshold" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "threshold" debugfs + * interface to the hardware latency detector. It can be used to configure + * the threshold level at which any subsequently detected latencies will + * be recorded into the global ring buffer. + */ +static ssize_t debug_threshold_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + int ret; + + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold); + + if (enabled) + wake_up_process(kthread); + + return ret; +} + +/** + * debug_width_fopen - Open function for "width" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "width" debugfs + * interface to the hardware latency detector. + */ +static int debug_width_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_width_fread - Read function for "width" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "width" debugfs + * interface to the hardware latency detector. It can be used to determine + * for how many us of the total window us we will actively sample for any + * hardware-induced latecy periods. Obviously, it is not possible to + * sample constantly and have the system respond to a sample reader, or, + * worse, without having the system appear to have gone out to lunch. + */ +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width); +} + +/** + * debug_width_fwrite - Write function for "width" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "width" debugfs + * interface to the hardware latency detector. It can be used to configure + * for how many us of the total window us we will actively sample for any + * hardware-induced latency periods. Obviously, it is not possible to + * sample constantly and have the system respond to a sample reader, or, + * worse, without having the system appear to have gone out to lunch. It + * is enforced that width is less that the total window size. + */ +static ssize_t debug_width_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = strict_strtoull(buf, 10, &val); + if (0 != err) + return -EINVAL; + + mutex_lock(&data.lock); + if (val < data.sample_window) + data.sample_width = val; + else { + mutex_unlock(&data.lock); + return -EINVAL; + } + mutex_unlock(&data.lock); + + if (enabled) + wake_up_process(kthread); + + return csize; +} + +/** + * debug_window_fopen - Open function for "window" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "window" debugfs + * interface to the hardware latency detector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. + */ +static int debug_window_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_window_fread - Read function for "window" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "window" debugfs + * interface to the hardware latency detector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. Can be used to read the total window size. + */ +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window); +} + +/** + * debug_window_fwrite - Write function for "window" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "window" debufds + * interface to the hardware latency detetector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. Can be used to write a new total window size. It + * is enfoced that any value written must be greater than the sample width + * size, or an error results. + */ +static ssize_t debug_window_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = strict_strtoull(buf, 10, &val); + if (0 != err) + return -EINVAL; + + mutex_lock(&data.lock); + if (data.sample_width < val) + data.sample_window = val; + else { + mutex_unlock(&data.lock); + return -EINVAL; + } + mutex_unlock(&data.lock); + + return csize; +} + +/* + * Function pointers for the "count" debugfs file operations + */ +static const struct file_operations count_fops = { + .open = debug_count_fopen, + .read = debug_count_fread, + .write = debug_count_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "enable" debugfs file operations + */ +static const struct file_operations enable_fops = { + .open = debug_enable_fopen, + .read = debug_enable_fread, + .write = debug_enable_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "max" debugfs file operations + */ +static const struct file_operations max_fops = { + .open = debug_max_fopen, + .read = debug_max_fread, + .write = debug_max_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "sample" debugfs file operations + */ +static const struct file_operations sample_fops = { + .open = debug_sample_fopen, + .read = debug_sample_fread, + .release = debug_sample_release, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "threshold" debugfs file operations + */ +static const struct file_operations threshold_fops = { + .open = debug_threshold_fopen, + .read = debug_threshold_fread, + .write = debug_threshold_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "width" debugfs file operations + */ +static const struct file_operations width_fops = { + .open = debug_width_fopen, + .read = debug_width_fread, + .write = debug_width_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "window" debugfs file operations + */ +static const struct file_operations window_fops = { + .open = debug_window_fopen, + .read = debug_window_fread, + .write = debug_window_fwrite, + .owner = THIS_MODULE, +}; + +/** + * init_debugfs - A function to initialize the debugfs interface files + * + * This function creates entries in debugfs for "hwlat_detector", including + * files to read values from the detector, current samples, and the + * maximum sample that has been captured since the hardware latency + * dectector was started. + */ +static int init_debugfs(void) +{ + int ret = -ENOMEM; + + debug_dir = debugfs_create_dir(DRVNAME, NULL); + if (!debug_dir) + goto err_debug_dir; + + debug_sample = debugfs_create_file("sample", 0444, + debug_dir, NULL, + &sample_fops); + if (!debug_sample) + goto err_sample; + + debug_count = debugfs_create_file("count", 0444, + debug_dir, NULL, + &count_fops); + if (!debug_count) + goto err_count; + + debug_max = debugfs_create_file("max", 0444, + debug_dir, NULL, + &max_fops); + if (!debug_max) + goto err_max; + + debug_sample_window = debugfs_create_file("window", 0644, + debug_dir, NULL, + &window_fops); + if (!debug_sample_window) + goto err_window; + + debug_sample_width = debugfs_create_file("width", 0644, + debug_dir, NULL, + &width_fops); + if (!debug_sample_width) + goto err_width; + + debug_threshold = debugfs_create_file("threshold", 0644, + debug_dir, NULL, + &threshold_fops); + if (!debug_threshold) + goto err_threshold; + + debug_enable = debugfs_create_file("enable", 0644, + debug_dir, &enabled, + &enable_fops); + if (!debug_enable) + goto err_enable; + + else { + ret = 0; + goto out; + } + +err_enable: + debugfs_remove(debug_threshold); +err_threshold: + debugfs_remove(debug_sample_width); +err_width: + debugfs_remove(debug_sample_window); +err_window: + debugfs_remove(debug_max); +err_max: + debugfs_remove(debug_count); +err_count: + debugfs_remove(debug_sample); +err_sample: + debugfs_remove(debug_dir); +err_debug_dir: +out: + return ret; +} + +/** + * free_debugfs - A function to cleanup the debugfs file interface + */ +static void free_debugfs(void) +{ + /* could also use a debugfs_remove_recursive */ + debugfs_remove(debug_enable); + debugfs_remove(debug_threshold); + debugfs_remove(debug_sample_width); + debugfs_remove(debug_sample_window); + debugfs_remove(debug_max); + debugfs_remove(debug_count); + debugfs_remove(debug_sample); + debugfs_remove(debug_dir); +} + +/** + * detector_init - Standard module initialization code + */ +static int detector_init(void) +{ + int ret = -ENOMEM; + + printk(KERN_INFO BANNER "version %s\n", VERSION); + + ret = init_stats(); + if (0 != ret) + goto out; + + ret = init_debugfs(); + if (0 != ret) + goto err_stats; + + if (enabled) + ret = start_kthread(); + + goto out; + +err_stats: + ring_buffer_free(ring_buffer); +out: + return ret; + +} + +/** + * detector_exit - Standard module cleanup code + */ +static void detector_exit(void) +{ + int err; + + if (enabled) { + enabled = 0; + err = stop_kthread(); + if (err) + printk(KERN_ERR BANNER "cannot stop kthread\n"); + } + + free_debugfs(); + ring_buffer_free(ring_buffer); /* free up the ring buffer */ + +} + +module_init(detector_init); +module_exit(detector_exit); diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 49e5823..8df561b 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -194,7 +194,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock sg_init_table(mq->sg, host->max_phys_segs); } - init_MUTEX(&mq->thread_sem); + semaphore_init(&mq->thread_sem); mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd"); if (IS_ERR(mq->thread)) { diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c index aaa8a9f..1b28ef5 100644 --- a/drivers/net/3c527.c +++ b/drivers/net/3c527.c @@ -179,7 +179,7 @@ struct mc32_local u16 rx_ring_tail; /* index to rx de-queue end */ - struct semaphore cmd_mutex; /* Serialises issuing of execute commands */ + struct anon_semaphore cmd_mutex; /* Serialises issuing of execute commands */ struct completion execution_cmd; /* Card has completed an execute command */ struct completion xceiver_cmd; /* Card has completed a tx or rx command */ }; @@ -521,7 +521,7 @@ static int __init mc32_probe1(struct net_device *dev, int slot) lp->tx_len = lp->exec_box->data[9]; /* Transmit list count */ lp->rx_len = lp->exec_box->data[11]; /* Receive list count */ - init_MUTEX_LOCKED(&lp->cmd_mutex); + anon_semaphore_init_locked(&lp->cmd_mutex); init_completion(&lp->execution_cmd); init_completion(&lp->xceiver_cmd); @@ -580,7 +580,7 @@ static int mc32_command_nowait(struct net_device *dev, u16 cmd, void *data, int int ioaddr = dev->base_addr; int ret = -1; - if (down_trylock(&lp->cmd_mutex) == 0) + if (anon_down_trylock(&lp->cmd_mutex) == 0) { lp->cmd_nonblocking=1; lp->exec_box->mbox=0; @@ -626,7 +626,7 @@ static int mc32_command(struct net_device *dev, u16 cmd, void *data, int len) int ioaddr = dev->base_addr; int ret = 0; - down(&lp->cmd_mutex); + anon_down(&lp->cmd_mutex); /* * My Turn @@ -646,7 +646,7 @@ static int mc32_command(struct net_device *dev, u16 cmd, void *data, int len) if(lp->exec_box->mbox&(1<<13)) ret = -1; - up(&lp->cmd_mutex); + anon_up(&lp->cmd_mutex); /* * A multicast set got blocked - try it now @@ -916,7 +916,7 @@ static int mc32_open(struct net_device *dev) * Allow ourselves to issue commands */ - up(&lp->cmd_mutex); + anon_up(&lp->cmd_mutex); /* @@ -1384,7 +1384,7 @@ static irqreturn_t mc32_interrupt(int irq, void *dev_id) */ if (lp->cmd_nonblocking) { - up(&lp->cmd_mutex); + anon_up(&lp->cmd_mutex); if (lp->mc_reload_wait) mc32_reset_multicast_list(dev); } @@ -1461,7 +1461,7 @@ static int mc32_close(struct net_device *dev) /* Ensure we issue no more commands beyond this point */ - down(&lp->cmd_mutex); + anon_down(&lp->cmd_mutex); /* Ok the card is now stopping */ diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index 4567588..38e920f 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -795,9 +795,9 @@ static void poll_vortex(struct net_device *dev) { struct vortex_private *vp = netdev_priv(dev); unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); } #endif @@ -1766,6 +1766,7 @@ vortex_timer(unsigned long data) int next_tick = 60*HZ; int ok = 0; int media_status, old_window; + unsigned long flags; if (vortex_debug > 2) { pr_debug("%s: Media selection timer tick happened, %s.\n", @@ -1773,7 +1774,7 @@ vortex_timer(unsigned long data) pr_debug("dev->watchdog_timeo=%d\n", dev->watchdog_timeo); } - disable_irq_lockdep(dev->irq); + spin_lock_irqsave(&vp->lock, flags); old_window = ioread16(ioaddr + EL3_CMD) >> 13; EL3WINDOW(4); media_status = ioread16(ioaddr + Wn4_Media); @@ -1796,10 +1797,7 @@ vortex_timer(unsigned long data) case XCVR_MII: case XCVR_NWAY: { ok = 1; - /* Interrupts are already disabled */ - spin_lock(&vp->lock); vortex_check_media(dev, 0); - spin_unlock(&vp->lock); } break; default: /* Other media types handled by Tx timeouts. */ @@ -1853,7 +1851,7 @@ leave_media_alone: dev->name, media_tbl[dev->if_port].name); EL3WINDOW(old_window); - enable_irq_lockdep(dev->irq); + spin_unlock_irqrestore(&vp->lock, flags); mod_timer(&vp->timer, RUN_AT(next_tick)); if (vp->deferred) iowrite16(FakeIntr, ioaddr + EL3_CMD); @@ -1887,12 +1885,12 @@ static void vortex_tx_timeout(struct net_device *dev) * Block interrupts because vortex_interrupt does a bare spin_lock() */ unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); if (vp->full_bus_master_tx) boomerang_interrupt(dev->irq, dev); else vortex_interrupt(dev->irq, dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); } } diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c index 0e2ba21..560c233 100644 --- a/drivers/net/8139too.c +++ b/drivers/net/8139too.c @@ -2195,7 +2195,11 @@ static irqreturn_t rtl8139_interrupt (int irq, void *dev_instance) */ static void rtl8139_poll_controller(struct net_device *dev) { - disable_irq(dev->irq); + /* + * use _nosync() variant - might be used by netconsole + * from atomic contexts: + */ + disable_irq_nosync(dev->irq); rtl8139_interrupt(dev->irq, dev); enable_irq(dev->irq); } diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c index a383122..74cf8f5 100644 --- a/drivers/net/atl1c/atl1c_main.c +++ b/drivers/net/atl1c/atl1c_main.c @@ -2069,11 +2069,8 @@ static int atl1c_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } tpd_req = atl1c_cal_tpd_req(skb); - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) { - if (netif_msg_pktdata(adapter)) - dev_info(&adapter->pdev->dev, "tx locked\n"); - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&adapter->tx_lock, flags); + if (skb->mark == 0x01) type = atl1c_trans_high; else diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c index 9fc6d6d..4f6df51 100644 --- a/drivers/net/atl1e/atl1e_main.c +++ b/drivers/net/atl1e/atl1e_main.c @@ -1856,8 +1856,7 @@ static int atl1e_xmit_frame(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } tpd_req = atl1e_cal_tdp_req(skb); - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) - return NETDEV_TX_LOCKED; + spin_lock_irqsave(&adapter->tx_lock, flags); if (atl1e_tpd_avail(adapter) < tpd_req) { /* no enough descriptor, just stop queue */ diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 06b9011..ae014d1 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -2838,7 +2838,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) if (unlikely(netif_tx_queue_stopped(txq)) && (bnx2_tx_avail(bp, txr) > bp->tx_wake_thresh)) { - __netif_tx_lock(txq, smp_processor_id()); + __netif_tx_lock(txq); if ((netif_tx_queue_stopped(txq)) && (bnx2_tx_avail(bp, txr) > bp->tx_wake_thresh)) netif_tx_wake_queue(txq); diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index c36a5f3..2922e00 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -926,7 +926,7 @@ static void bnx2x_tx_int(struct bnx2x_fastpath *fp) /* TBD need a thresh? */ if (unlikely(netif_tx_queue_stopped(txq))) { - __netif_tx_lock(txq, smp_processor_id()); + __netif_tx_lock(txq); /* Need to make the tx_bd_cons update visible to start_xmit() * before checking for netif_tx_queue_stopped(). Without the diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index 3711d64..e6e5abd 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1671,8 +1671,7 @@ static int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, struct cmdQ *q = &sge->cmdQ[qid]; unsigned int credits, pidx, genbit, count, use_sched_skb = 0; - if (!spin_trylock(&q->lock)) - return NETDEV_TX_LOCKED; + spin_lock(&q->lock); reclaim_completed_tx(sge, q); diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 981ab53..fcc67e0 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -120,7 +120,7 @@ struct sixpack { struct timer_list tx_t; struct timer_list resync_t; atomic_t refcnt; - struct semaphore dead_sem; + struct anon_semaphore dead_sem; spinlock_t lock; }; @@ -412,7 +412,7 @@ static struct sixpack *sp_get(struct tty_struct *tty) static void sp_put(struct sixpack *sp) { if (atomic_dec_and_test(&sp->refcnt)) - up(&sp->dead_sem); + anon_up(&sp->dead_sem); } /* @@ -606,7 +606,7 @@ static int sixpack_open(struct tty_struct *tty) spin_lock_init(&sp->lock); atomic_set(&sp->refcnt, 1); - init_MUTEX_LOCKED(&sp->dead_sem); + anon_semaphore_init_locked(&sp->dead_sem); /* !!! length of the buffers. MTU is IP MTU, not PACLEN! */ @@ -702,7 +702,7 @@ static void sixpack_close(struct tty_struct *tty) * we have to wait for all existing users to finish. */ if (!atomic_dec_and_test(&sp->refcnt)) - down(&sp->dead_sem); + anon_down(&sp->dead_sem); unregister_netdev(sp->dev); diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index fda2fc8..dd897b0 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -82,7 +82,7 @@ struct mkiss { #define CRC_MODE_SMACK_TEST 4 atomic_t refcnt; - struct semaphore dead_sem; + struct anon_semaphore dead_sem; }; /*---------------------------------------------------------------------------*/ @@ -718,7 +718,7 @@ static struct mkiss *mkiss_get(struct tty_struct *tty) static void mkiss_put(struct mkiss *ax) { if (atomic_dec_and_test(&ax->refcnt)) - up(&ax->dead_sem); + anon_up(&ax->dead_sem); } static int crc_force = 0; /* Can be overridden with insmod */ @@ -745,7 +745,7 @@ static int mkiss_open(struct tty_struct *tty) spin_lock_init(&ax->buflock); atomic_set(&ax->refcnt, 1); - init_MUTEX_LOCKED(&ax->dead_sem); + anon_semaphore_init_locked(&ax->dead_sem); ax->tty = tty; tty->disc_data = ax; @@ -824,7 +824,7 @@ static void mkiss_close(struct tty_struct *tty) * we have to wait for all existing users to finish. */ if (!atomic_dec_and_test(&ax->refcnt)) - down(&ax->dead_sem); + anon_down(&ax->dead_sem); unregister_netdev(ax->dev); diff --git a/drivers/net/irda/sir_dev.c b/drivers/net/irda/sir_dev.c index fd0796c..aa04bbd 100644 --- a/drivers/net/irda/sir_dev.c +++ b/drivers/net/irda/sir_dev.c @@ -908,7 +908,7 @@ struct sir_dev * sirdev_get_instance(const struct sir_driver *drv, const char *n dev->tx_skb = NULL; spin_lock_init(&dev->tx_lock); - init_MUTEX(&dev->fsm.sem); + semaphore_init(&dev->fsm.sem); dev->drv = drv; dev->netdev = ndev; diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index da472c6..15c6599 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -65,6 +65,14 @@ struct pcpu_lstats { unsigned long drops; }; +#ifdef CONFIG_PREEMPT_RT +# define xmit_get_cpu() get_cpu() +# define xmit_put_cpu() put_cpu() +#else +# define xmit_get_cpu() smp_processor_id() +# define xmit_put_cpu() do { } while (0) +#endif + /* * The higher levels take care of making this non-reentrant (it's * called with bh's disabled). @@ -72,22 +80,23 @@ struct pcpu_lstats { static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) { struct pcpu_lstats *pcpu_lstats, *lb_stats; - int len; + int len, res; skb_orphan(skb); skb->protocol = eth_type_trans(skb, dev); + len = skb->len; + res = netif_rx_ni(skb); - /* it's OK to use per_cpu_ptr() because BHs are off */ pcpu_lstats = dev->ml_priv; - lb_stats = per_cpu_ptr(pcpu_lstats, smp_processor_id()); + lb_stats = per_cpu_ptr(pcpu_lstats, xmit_get_cpu()); - len = skb->len; - if (likely(netif_rx(skb) == NET_RX_SUCCESS)) { + if (likely(res == NET_RX_SUCCESS)) { lb_stats->bytes += len; lb_stats->packets++; } else lb_stats->drops++; + xmit_put_cpu(); return 0; } diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index bc72d6e..13343e8 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index 0f32db3..ee2c56d 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -508,7 +508,7 @@ static void txq_maybe_wake(struct tx_queue *txq) struct netdev_queue *nq = netdev_get_tx_queue(mp->dev, txq->index); if (netif_tx_queue_stopped(nq)) { - __netif_tx_lock(nq, smp_processor_id()); + __netif_tx_lock(nq); if (txq->tx_ring_size - txq->tx_desc_count >= MAX_SKB_FRAGS + 1) netif_tx_wake_queue(nq); __netif_tx_unlock(nq); @@ -899,7 +899,7 @@ static void txq_kick(struct tx_queue *txq) u32 hw_desc_ptr; u32 expected_ptr; - __netif_tx_lock(nq, smp_processor_id()); + __netif_tx_lock(nq); if (rdlp(mp, TXQ_COMMAND) & (1 << txq->index)) goto out; @@ -923,7 +923,7 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force) struct netdev_queue *nq = netdev_get_tx_queue(mp->dev, txq->index); int reclaimed; - __netif_tx_lock(nq, smp_processor_id()); + __netif_tx_lock(nq); reclaimed = 0; while (reclaimed < budget && txq->tx_desc_count > 0) { diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c index 5d3343e..3fb47ee 100644 --- a/drivers/net/netxen/netxen_nic_init.c +++ b/drivers/net/netxen/netxen_nic_init.c @@ -1401,7 +1401,7 @@ int netxen_process_cmd_ring(struct netxen_adapter *adapter) smp_mb(); if (netif_queue_stopped(netdev) && netif_carrier_ok(netdev)) { - __netif_tx_lock(tx_ring->txq, smp_processor_id()); + __netif_tx_lock(tx_ring->txq); if (netxen_tx_avail(tx_ring) > TX_STOP_THRESH) netif_wake_queue(netdev); __netif_tx_unlock(tx_ring->txq); diff --git a/drivers/net/niu.c b/drivers/net/niu.c index d2146d4..4b6d8ce 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -3681,7 +3681,7 @@ static void niu_tx_work(struct niu *np, struct tx_ring_info *rp) out: if (unlikely(netif_tx_queue_stopped(txq) && (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp)))) { - __netif_tx_lock(txq, smp_processor_id()); + __netif_tx_lock(txq); if (netif_tx_queue_stopped(txq) && (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp))) netif_tx_wake_queue(txq); diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index 6de8399..1a29a1c 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -67,7 +67,7 @@ struct asyncppp { struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct anon_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ unsigned char obuf[OBUFSIZE]; }; @@ -145,7 +145,7 @@ static struct asyncppp *ap_get(struct tty_struct *tty) static void ap_put(struct asyncppp *ap) { if (atomic_dec_and_test(&ap->refcnt)) - up(&ap->dead_sem); + anon_up(&ap->dead_sem); } /* @@ -183,7 +183,7 @@ ppp_asynctty_open(struct tty_struct *tty) tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap); atomic_set(&ap->refcnt, 1); - init_MUTEX_LOCKED(&ap->dead_sem); + anon_semaphore_init_locked(&ap->dead_sem); ap->chan.private = ap; ap->chan.ops = &async_ops; @@ -232,7 +232,7 @@ ppp_asynctty_close(struct tty_struct *tty) * by the time it returns. */ if (!atomic_dec_and_test(&ap->refcnt)) - down(&ap->dead_sem); + anon_down(&ap->dead_sem); tasklet_kill(&ap->tsk); ppp_unregister_channel(&ap->chan); diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 8702e7a..f4703ac 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -180,11 +180,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) u16 destid; unsigned long flags; - local_irq_save(flags); - if (!spin_trylock(&rnet->tx_lock)) { - local_irq_restore(flags); - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&rnet->tx_lock, flags); if ((rnet->tx_cnt + 1) > RIONET_TX_RING_SIZE) { netif_stop_queue(ndev); diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 458daa0..60913bf 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -4161,12 +4161,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) [skb->priority & (MAX_TX_FIFOS - 1)]; fifo = &mac_control->fifos[queue]; - if (do_spin_lock) - spin_lock_irqsave(&fifo->tx_lock, flags); - else { - if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags))) - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&fifo->tx_lock, flags); if (sp->config.multiq) { if (__netif_subqueue_stopped(dev, fifo->fifo_no)) { diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index d2dfe0a..d47ce6c 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -1032,12 +1032,8 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) (csum_stuff_off << 21)); } - local_irq_save(flags); - if (!spin_trylock(&gp->tx_lock)) { - /* Tell upper layer to requeue */ - local_irq_restore(flags); - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&gp->tx_lock, flags); + /* We raced with gem_do_stop() */ if (!gp->running) { spin_unlock_irqrestore(&gp->tx_lock, flags); diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index 3c2679c..ce65846 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -1638,13 +1638,8 @@ static int bdx_tx_transmit(struct sk_buff *skb, struct net_device *ndev) unsigned long flags; ENTER; - local_irq_save(flags); - if (!spin_trylock(&priv->tx_lock)) { - local_irq_restore(flags); - DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n", - BDX_DRV_NAME, ndev->name); - return NETDEV_TX_LOCKED; - } + + spin_lock_irqsave(&priv->tx_lock, flags); /* build tx descriptor */ BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */ diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index 4cf9a65..c5cf990 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -1817,6 +1817,7 @@ static void __devexit tulip_remove_one (struct pci_dev *pdev) pci_iounmap(pdev, tp->base_addr); free_netdev (dev); pci_release_regions (pdev); + pci_disable_device (pdev); pci_set_drvdata (pdev, NULL); /* pci_power_off (pdev, -1); */ diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 61581ee..40d64ce 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -574,7 +574,7 @@ static int cosa_probe(int base, int irq, int dma) /* Initialize the chardev data structures */ mutex_init(&chan->rlock); - init_MUTEX(&chan->wsem); + semaphore_init(&chan->wsem); /* Register the network interface */ if (!(chan->netdev = alloc_hdlcdev(chan))) { diff --git a/drivers/of/base.c b/drivers/of/base.c index 69f85c0..fc0c206 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -25,7 +25,7 @@ struct device_node *allnodes; /* use when traversing tree through the allnext, child, sibling, * or parent members of struct device_node. */ -DEFINE_RWLOCK(devtree_lock); +DEFINE_ATOMIC_SPINLOCK(devtree_lock); int of_n_addr_cells(struct device_node *np) { @@ -59,16 +59,14 @@ int of_n_size_cells(struct device_node *np) } EXPORT_SYMBOL(of_n_size_cells); -struct property *of_find_property(const struct device_node *np, - const char *name, - int *lenp) +static struct property *__of_find_property(const struct device_node *np, + const char *name, int *lenp) { struct property *pp; if (!np) return NULL; - read_lock(&devtree_lock); for (pp = np->properties; pp != 0; pp = pp->next) { if (of_prop_cmp(pp->name, name) == 0) { if (lenp != 0) @@ -76,7 +74,20 @@ struct property *of_find_property(const struct device_node *np, break; } } - read_unlock(&devtree_lock); + + return pp; +} + +struct property *of_find_property(const struct device_node *np, + const char *name, + int *lenp) +{ + struct property *pp; + unsigned long flags; + + atomic_spin_lock_irqsave(&devtree_lock, flags); + pp = __of_find_property(np, name, lenp); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return pp; } @@ -86,8 +97,20 @@ EXPORT_SYMBOL(of_find_property); * Find a property with a given name for a given node * and return the value. */ +static const void *__of_get_property(const struct device_node *np, + const char *name, int *lenp) +{ + struct property *pp = __of_find_property(np, name, lenp); + + return pp ? pp->value : NULL; +} + +/* + * Find a property with a given name for a given node + * and return the value. + */ const void *of_get_property(const struct device_node *np, const char *name, - int *lenp) + int *lenp) { struct property *pp = of_find_property(np, name, lenp); @@ -98,13 +121,13 @@ EXPORT_SYMBOL(of_get_property); /** Checks if the given "compat" string matches one of the strings in * the device's "compatible" property */ -int of_device_is_compatible(const struct device_node *device, - const char *compat) +static int __of_device_is_compatible(const struct device_node *device, + const char *compat) { const char* cp; - int cplen, l; + int uninitialized_var(cplen), l; - cp = of_get_property(device, "compatible", &cplen); + cp = __of_get_property(device, "compatible", &cplen); if (cp == NULL) return 0; while (cplen > 0) { @@ -117,6 +140,21 @@ int of_device_is_compatible(const struct device_node *device, return 0; } + +/** Checks if the given "compat" string matches one of the strings in + * the device's "compatible" property + */ +int of_device_is_compatible(const struct device_node *device, + const char *compat) +{ + unsigned long flags; + int res; + + atomic_spin_lock_irqsave(&devtree_lock, flags); + res = __of_device_is_compatible(device, compat); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); + return res; +} EXPORT_SYMBOL(of_device_is_compatible); /** @@ -155,13 +193,14 @@ EXPORT_SYMBOL(of_device_is_available); struct device_node *of_get_parent(const struct device_node *node) { struct device_node *np; + unsigned long flags; if (!node) return NULL; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = of_node_get(node->parent); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_get_parent); @@ -180,14 +219,15 @@ EXPORT_SYMBOL(of_get_parent); struct device_node *of_get_next_parent(struct device_node *node) { struct device_node *parent; + unsigned long flags; if (!node) return NULL; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); parent = of_node_get(node->parent); of_node_put(node); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return parent; } @@ -203,14 +243,15 @@ struct device_node *of_get_next_child(const struct device_node *node, struct device_node *prev) { struct device_node *next; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); next = prev ? prev->sibling : node->child; for (; next; next = next->sibling) if (of_node_get(next)) break; of_node_put(prev); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return next; } EXPORT_SYMBOL(of_get_next_child); @@ -225,14 +266,15 @@ EXPORT_SYMBOL(of_get_next_child); struct device_node *of_find_node_by_path(const char *path) { struct device_node *np = allnodes; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); for (; np; np = np->allnext) { if (np->full_name && (of_node_cmp(np->full_name, path) == 0) && of_node_get(np)) break; } - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_by_path); @@ -252,15 +294,16 @@ struct device_node *of_find_node_by_name(struct device_node *from, const char *name) { struct device_node *np; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = from ? from->allnext : allnodes; for (; np; np = np->allnext) if (np->name && (of_node_cmp(np->name, name) == 0) && of_node_get(np)) break; of_node_put(from); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_by_name); @@ -281,15 +324,16 @@ struct device_node *of_find_node_by_type(struct device_node *from, const char *type) { struct device_node *np; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = from ? from->allnext : allnodes; for (; np; np = np->allnext) if (np->type && (of_node_cmp(np->type, type) == 0) && of_node_get(np)) break; of_node_put(from); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_by_type); @@ -312,18 +356,20 @@ struct device_node *of_find_compatible_node(struct device_node *from, const char *type, const char *compatible) { struct device_node *np; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = from ? from->allnext : allnodes; for (; np; np = np->allnext) { if (type && !(np->type && (of_node_cmp(np->type, type) == 0))) continue; - if (of_device_is_compatible(np, compatible) && of_node_get(np)) + if (__of_device_is_compatible(np, compatible) && + of_node_get(np)) break; } of_node_put(from); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_compatible_node); @@ -345,8 +391,9 @@ struct device_node *of_find_node_with_property(struct device_node *from, { struct device_node *np; struct property *pp; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = from ? from->allnext : allnodes; for (; np; np = np->allnext) { for (pp = np->properties; pp != 0; pp = pp->next) { @@ -358,20 +405,14 @@ struct device_node *of_find_node_with_property(struct device_node *from, } out: of_node_put(from); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_with_property); -/** - * of_match_node - Tell if an device_node has a matching of_match structure - * @matches: array of of device match structures to search in - * @node: the of device structure to match against - * - * Low level utility function used by device matching. - */ -const struct of_device_id *of_match_node(const struct of_device_id *matches, - const struct device_node *node) +static const struct of_device_id * +__of_match_node(const struct of_device_id *matches, + const struct device_node *node) { while (matches->name[0] || matches->type[0] || matches->compatible[0]) { int match = 1; @@ -382,14 +423,33 @@ const struct of_device_id *of_match_node(const struct of_device_id *matches, match &= node->type && !strcmp(matches->type, node->type); if (matches->compatible[0]) - match &= of_device_is_compatible(node, - matches->compatible); + match &= __of_device_is_compatible(node, + matches->compatible); if (match) return matches; matches++; } return NULL; } + +/** + * of_match_node - Tell if an device_node has a matching of_match structure + * @matches: array of of device match structures to search in + * @node: the of device structure to match against + * + * Low level utility function used by device matching. + */ +const struct of_device_id *of_match_node(const struct of_device_id *matches, + const struct device_node *node) +{ + const struct of_device_id *match; + unsigned long flags; + + atomic_spin_lock_irqsave(&devtree_lock, flags); + match = __of_match_node(matches, node); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); + return match; +} EXPORT_SYMBOL(of_match_node); /** @@ -408,15 +468,16 @@ struct device_node *of_find_matching_node(struct device_node *from, const struct of_device_id *matches) { struct device_node *np; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = from ? from->allnext : allnodes; for (; np; np = np->allnext) { - if (of_match_node(matches, np) && of_node_get(np)) + if (__of_match_node(matches, np) && of_node_get(np)) break; } of_node_put(from); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_matching_node); diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c index 2b7ae36..284814b 100644 --- a/drivers/oprofile/event_buffer.c +++ b/drivers/oprofile/event_buffer.c @@ -72,10 +72,10 @@ int alloc_event_buffer(void) int err = -ENOMEM; unsigned long flags; - spin_lock_irqsave(&oprofilefs_lock, flags); + atomic_spin_lock_irqsave(&oprofilefs_lock, flags); buffer_size = oprofile_buffer_size; buffer_watershed = oprofile_buffer_watershed; - spin_unlock_irqrestore(&oprofilefs_lock, flags); + atomic_spin_unlock_irqrestore(&oprofilefs_lock, flags); if (buffer_watershed >= buffer_size) return -EINVAL; diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index b7e4cee..7cbf76d 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -21,7 +21,7 @@ #define OPROFILEFS_MAGIC 0x6f70726f -DEFINE_SPINLOCK(oprofilefs_lock); +DEFINE_ATOMIC_SPINLOCK(oprofilefs_lock); static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode) { @@ -75,9 +75,9 @@ int oprofilefs_ulong_from_user(unsigned long *val, char const __user *buf, size_ if (copy_from_user(tmpbuf, buf, count)) return -EFAULT; - spin_lock_irqsave(&oprofilefs_lock, flags); + atomic_spin_lock_irqsave(&oprofilefs_lock, flags); *val = simple_strtoul(tmpbuf, NULL, 0); - spin_unlock_irqrestore(&oprofilefs_lock, flags); + atomic_spin_unlock_irqrestore(&oprofilefs_lock, flags); return 0; } diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c index 8901ecf..6e2f206 100644 --- a/drivers/parport/ieee1284.c +++ b/drivers/parport/ieee1284.c @@ -41,7 +41,7 @@ * It will be useful to call this from an interrupt handler. */ static void parport_ieee1284_wakeup (struct parport *port) { - up (&port->physport->ieee1284.irq); + anon_up (&port->physport->ieee1284.irq); } static struct parport *port_from_cookie[PARPORT_MAX]; @@ -83,7 +83,7 @@ int parport_wait_event (struct parport *port, signed long timeout) timer.data = port->number; add_timer (&timer); - ret = down_interruptible (&port->physport->ieee1284.irq); + ret = anon_down_interruptible (&port->physport->ieee1284.irq); if (!del_timer_sync(&timer) && !ret) /* Timed out. */ ret = 1; diff --git a/drivers/parport/share.c b/drivers/parport/share.c index dffa5d4..228942f 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -306,7 +306,7 @@ struct parport *parport_register_port(unsigned long base, int irq, int dma, spin_lock_init(&tmp->pardevice_lock); tmp->ieee1284.mode = IEEE1284_MODE_COMPAT; tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE; - init_MUTEX_LOCKED (&tmp->ieee1284.irq); /* actually a semaphore at 0 */ + anon_semaphore_init_locked(&tmp->ieee1284.irq); tmp->spintime = parport_default_spintime; atomic_set (&tmp->ref_count, 1); INIT_LIST_HEAD(&tmp->full_list); diff --git a/drivers/pci/access.c b/drivers/pci/access.c index db23200..fddeb63 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -12,7 +12,7 @@ * configuration space. */ -static DEFINE_SPINLOCK(pci_lock); +static DEFINE_ATOMIC_SPINLOCK(pci_lock); /* * Wrappers for all PCI configuration access functions. They just check @@ -32,10 +32,10 @@ int pci_bus_read_config_##size \ unsigned long flags; \ u32 data = 0; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irqsave(&pci_lock, flags); \ + atomic_spin_lock_irqsave(&pci_lock, flags); \ res = bus->ops->read(bus, devfn, pos, len, &data); \ *value = (type)data; \ - spin_unlock_irqrestore(&pci_lock, flags); \ + atomic_spin_unlock_irqrestore(&pci_lock, flags); \ return res; \ } @@ -46,9 +46,9 @@ int pci_bus_write_config_##size \ int res; \ unsigned long flags; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irqsave(&pci_lock, flags); \ + atomic_spin_lock_irqsave(&pci_lock, flags); \ res = bus->ops->write(bus, devfn, pos, len, value); \ - spin_unlock_irqrestore(&pci_lock, flags); \ + atomic_spin_unlock_irqrestore(&pci_lock, flags); \ return res; \ } @@ -78,10 +78,10 @@ struct pci_ops *pci_bus_set_ops(struct pci_bus *bus, struct pci_ops *ops) struct pci_ops *old_ops; unsigned long flags; - spin_lock_irqsave(&pci_lock, flags); + atomic_spin_lock_irqsave(&pci_lock, flags); old_ops = bus->ops; bus->ops = ops; - spin_unlock_irqrestore(&pci_lock, flags); + atomic_spin_unlock_irqrestore(&pci_lock, flags); return old_ops; } EXPORT_SYMBOL(pci_bus_set_ops); @@ -135,9 +135,9 @@ static noinline void pci_wait_ucfg(struct pci_dev *dev) __add_wait_queue(&pci_ucfg_wait, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&pci_lock); + atomic_spin_unlock_irq(&pci_lock); schedule(); - spin_lock_irq(&pci_lock); + atomic_spin_lock_irq(&pci_lock); } while (dev->block_ucfg_access); __remove_wait_queue(&pci_ucfg_wait, &wait); } @@ -149,11 +149,11 @@ int pci_user_read_config_##size \ int ret = 0; \ u32 data = -1; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irq(&pci_lock); \ + atomic_spin_lock_irq(&pci_lock); \ if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ ret = dev->bus->ops->read(dev->bus, dev->devfn, \ pos, sizeof(type), &data); \ - spin_unlock_irq(&pci_lock); \ + atomic_spin_unlock_irq(&pci_lock); \ *val = (type)data; \ return ret; \ } @@ -164,11 +164,11 @@ int pci_user_write_config_##size \ { \ int ret = -EIO; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irq(&pci_lock); \ + atomic_spin_lock_irq(&pci_lock); \ if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ ret = dev->bus->ops->write(dev->bus, dev->devfn, \ pos, sizeof(type), val); \ - spin_unlock_irq(&pci_lock); \ + atomic_spin_unlock_irq(&pci_lock); \ return ret; \ } @@ -395,10 +395,10 @@ void pci_block_user_cfg_access(struct pci_dev *dev) unsigned long flags; int was_blocked; - spin_lock_irqsave(&pci_lock, flags); + atomic_spin_lock_irqsave(&pci_lock, flags); was_blocked = dev->block_ucfg_access; dev->block_ucfg_access = 1; - spin_unlock_irqrestore(&pci_lock, flags); + atomic_spin_unlock_irqrestore(&pci_lock, flags); /* If we BUG() inside the pci_lock, we're guaranteed to hose * the machine */ @@ -416,7 +416,7 @@ void pci_unblock_user_cfg_access(struct pci_dev *dev) { unsigned long flags; - spin_lock_irqsave(&pci_lock, flags); + atomic_spin_lock_irqsave(&pci_lock, flags); /* This indicates a problem in the caller, but we don't need * to kill them, unlike a double-block above. */ @@ -424,6 +424,6 @@ void pci_unblock_user_cfg_access(struct pci_dev *dev) dev->block_ucfg_access = 0; wake_up_all(&pci_ucfg_wait); - spin_unlock_irqrestore(&pci_lock, flags); + atomic_spin_unlock_irqrestore(&pci_lock, flags); } EXPORT_SYMBOL_GPL(pci_unblock_user_cfg_access); diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index cef28a7..caa6295 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -240,9 +240,9 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), next = dev->bus_list.next; /* Run device routines with the device locked */ - down(&dev->dev.sem); + mutex_lock(&dev->dev.mutex); retval = cb(dev, userdata); - up(&dev->dev.sem); + mutex_unlock(&dev->dev.mutex); if (retval) break; } diff --git a/drivers/pci/hotplug/ibmphp_hpc.c b/drivers/pci/hotplug/ibmphp_hpc.c index 83f337c..d120da6 100644 --- a/drivers/pci/hotplug/ibmphp_hpc.c +++ b/drivers/pci/hotplug/ibmphp_hpc.c @@ -104,7 +104,7 @@ static int to_debug = 0; static struct mutex sem_hpcaccess; // lock access to HPC static struct semaphore semOperations; // lock all operations and // access to data structures -static struct semaphore sem_exit; // make sure polling thread goes away +static struct anon_semaphore sem_exit; // make sure polling thread goes away static struct task_struct *ibmphp_poll_thread; //---------------------------------------------------------------------------- // local function prototypes @@ -132,8 +132,8 @@ void __init ibmphp_hpc_initvars (void) debug ("%s - Entry\n", __func__); mutex_init(&sem_hpcaccess); - init_MUTEX (&semOperations); - init_MUTEX_LOCKED (&sem_exit); + semaphore_init(&semOperations); + anon_semaphore_init_locked(&sem_exit); to_debug = 0; debug ("%s - Exit\n", __func__); @@ -906,7 +906,7 @@ static int poll_hpc(void *data) /* sleep for a short time just for good measure */ msleep(100); } - up (&sem_exit); + anon_up (&sem_exit); debug ("%s - Exit\n", __func__); return 0; } @@ -1076,7 +1076,7 @@ void __exit ibmphp_hpc_stop_poll_thread (void) // wait for poll thread to exit debug ("before sem_exit down \n"); - down (&sem_exit); + anon_down (&sem_exit); debug ("after sem_exit down \n"); // cleanup @@ -1085,7 +1085,7 @@ void __exit ibmphp_hpc_stop_poll_thread (void) debug ("after free_hpc_access \n"); ibmphp_unlock_operations (); debug ("after unlock operations \n"); - up (&sem_exit); + anon_up (&sem_exit); debug ("after sem exit up\n"); debug ("%s - Exit\n", __func__); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7b70312..662b5ce 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2213,7 +2213,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe) if (!probe) { pci_block_user_cfg_access(dev); /* block PM suspend, driver probe, etc. */ - down(&dev->dev.sem); + mutex_lock(&dev->dev.mutex); } rc = pcie_flr(dev, probe); @@ -2231,7 +2231,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe) rc = pci_parent_bus_reset(dev, probe); done: if (!probe) { - up(&dev->dev.sem); + mutex_unlock(&dev->dev.mutex); pci_unblock_user_cfg_access(dev); } diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 304ff6d..a5bcb5c 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -1082,9 +1082,9 @@ static int runtime_suspend(struct device *dev) { int rc; - down(&dev->sem); + mutex_lock(&dev->mutex); rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND); - up(&dev->sem); + mutex_unlock(&dev->mutex); return rc; } @@ -1092,9 +1092,9 @@ static void runtime_resume(struct device *dev) { int rc; - down(&dev->sem); + mutex_lock(&dev->mutex); rc = pcmcia_dev_resume(dev); - up(&dev->sem); + mutex_unlock(&dev->mutex); } /************************ per-device sysfs output ***************************/ diff --git a/drivers/s390/cio/crw.c b/drivers/s390/cio/crw.c index d157665..dde3d10 100644 --- a/drivers/s390/cio/crw.c +++ b/drivers/s390/cio/crw.c @@ -137,7 +137,7 @@ void crw_handle_channel_report(void) */ static int __init crw_init_semaphore(void) { - init_MUTEX_LOCKED(&crw_semaphore); + semaphore_init_locked(&crw_semaphore); return 0; } pure_initcall(crw_init_semaphore); diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index cdbdec9..5ccaa8d 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -719,7 +719,7 @@ struct aac_fib_context { u32 unique; // unique value representing this context ulong jiffies; // used for cleanup - dmb changed to ulong struct list_head next; // used to link context's into a linked list - struct semaphore wait_sem; // this is used to wait for the next fib to arrive. + struct anon_semaphore wait_sem; // this is used to wait for the next fib to arrive. int wait; // Set to true when thread is in WaitForSingleObject unsigned long count; // total number of FIBs on FibList struct list_head fib_list; // this holds fibs and their attachd hw_fibs @@ -789,7 +789,7 @@ struct fib { * This is the event the sendfib routine will wait on if the * caller did not pass one and this is synch io. */ - struct semaphore event_wait; + struct anon_semaphore event_wait; spinlock_t event_lock; u32 done; /* gets set to 1 when fib is complete */ diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index 0391d75..ab39bfc 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -190,7 +190,7 @@ static int open_getadapter_fib(struct aac_dev * dev, void __user *arg) /* * Initialize the mutex used to wait for the next AIF. */ - init_MUTEX_LOCKED(&fibctx->wait_sem); + anon_semaphore_init_locked(&fibctx->wait_sem); fibctx->wait = 0; /* * Initialize the fibs and set the count of fibs on @@ -321,7 +321,7 @@ return_fib: ssleep(1); } if (f.wait) { - if(down_interruptible(&fibctx->wait_sem) < 0) { + if(anon_down_interruptible(&fibctx->wait_sem) < 0) { status = -EINTR; } else { /* Lock again and retry */ diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 956261f..b5bda58 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -124,7 +124,7 @@ int aac_fib_setup(struct aac_dev * dev) fibptr->hw_fib_va = hw_fib; fibptr->data = (void *) fibptr->hw_fib_va->data; fibptr->next = fibptr+1; /* Forward chain the fibs */ - init_MUTEX_LOCKED(&fibptr->event_wait); + anon_semaphore_init_locked(&fibptr->event_wait); spin_lock_init(&fibptr->event_lock); hw_fib->header.XferState = cpu_to_le32(0xffffffff); hw_fib->header.SenderSize = cpu_to_le16(dev->max_fib_size); @@ -490,7 +490,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, * hardware failure has occurred. */ unsigned long count = 36000000L; /* 3 minutes */ - while (down_trylock(&fibptr->event_wait)) { + while (anon_down_trylock(&fibptr->event_wait)) { int blink; if (--count == 0) { struct aac_queue * q = &dev->queues->queue[AdapNormCmdQueue]; @@ -515,9 +515,9 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, } udelay(5); } - } else if (down_interruptible(&fibptr->event_wait)) { + } else if (anon_down_interruptible(&fibptr->event_wait)) { fibptr->done = 2; - up(&fibptr->event_wait); + anon_up(&fibptr->event_wait); } spin_lock_irqsave(&fibptr->event_lock, flags); if ((fibptr->done == 0) || (fibptr->done == 2)) { @@ -1177,7 +1177,7 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced) (fib->hw_fib_va->header.XferState & cpu_to_le32(ResponseExpected))) { unsigned long flagv; spin_lock_irqsave(&fib->event_lock, flagv); - up(&fib->event_wait); + anon_up(&fib->event_wait); spin_unlock_irqrestore(&fib->event_lock, flagv); schedule(); retval = 0; @@ -1460,7 +1460,7 @@ int aac_check_health(struct aac_dev * aac) * Set the event to wake up the * thread that will waiting. */ - up(&fibctx->wait_sem); + anon_up(&fibctx->wait_sem); } else { printk(KERN_WARNING "aifd: didn't allocate NewFib.\n"); kfree(fib); @@ -1691,7 +1691,7 @@ int aac_command_thread(void *data) * Set the event to wake up the * thread that is waiting. */ - up(&fibctx->wait_sem); + anon_up(&fibctx->wait_sem); } else { printk(KERN_WARNING "aifd: didn't allocate NewFib.\n"); } diff --git a/drivers/scsi/aacraid/dpcsup.c b/drivers/scsi/aacraid/dpcsup.c index abc9ef5..0e29b5f 100644 --- a/drivers/scsi/aacraid/dpcsup.c +++ b/drivers/scsi/aacraid/dpcsup.c @@ -127,7 +127,7 @@ unsigned int aac_response_normal(struct aac_queue * q) spin_lock_irqsave(&fib->event_lock, flagv); if (!fib->done) fib->done = 1; - up(&fib->event_wait); + anon_up(&fib->event_wait); spin_unlock_irqrestore(&fib->event_lock, flagv); FIB_COUNTER_INCREMENT(aac_config.NormalRecved); if (fib->done == 2) { @@ -322,7 +322,7 @@ unsigned int aac_intr_normal(struct aac_dev * dev, u32 index) spin_lock_irqsave(&fib->event_lock, flagv); if (!fib->done) fib->done = 1; - up(&fib->event_wait); + anon_up(&fib->event_wait); spin_unlock_irqrestore(&fib->event_lock, flagv); FIB_COUNTER_INCREMENT(aac_config.NormalRecved); } diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index fb867a9..d406333 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -1595,7 +1595,12 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id) l = l->next; - if (l == i->head && pass_counter++ > PASS_LIMIT) { + /* + * On preempt-rt we can be preempted and run in our + * own thread. + */ + if (!preempt_rt() && l == i->head && + pass_counter++ > PASS_LIMIT) { /* If we hit this, we're dead. */ printk(KERN_ERR "serial8250: too much work for " "irq%d\n", irq); @@ -2729,14 +2734,10 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count) touch_nmi_watchdog(); - local_irq_save(flags); - if (up->port.sysrq) { - /* serial8250_handle_port() already took the lock */ - locked = 0; - } else if (oops_in_progress) { - locked = spin_trylock(&up->port.lock); - } else - spin_lock(&up->port.lock); + if (up->port.sysrq || oops_in_progress || preempt_rt()) + locked = spin_trylock_irqsave(&up->port.lock, flags); + else + spin_lock_irqsave(&up->port.lock, flags); /* * First save the IER then disable the interrupts @@ -2768,8 +2769,7 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count) check_modem_status(up); if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&up->port.lock, flags); } static int __init serial8250_console_setup(struct console *co, char *options) diff --git a/drivers/staging/comedi/drivers/dt9812.c b/drivers/staging/comedi/drivers/dt9812.c index cc4c046..aac9a65 100644 --- a/drivers/staging/comedi/drivers/dt9812.c +++ b/drivers/staging/comedi/drivers/dt9812.c @@ -262,7 +262,7 @@ struct dt9812_usb_cmd { #define DT9812_NUM_SLOTS 16 -static DECLARE_MUTEX(dt9812_mutex); +static DEFINE_SEMAPHORE(dt9812_mutex); static struct usb_device_id dt9812_table[] = { {USB_DEVICE(0x0867, 0x9812)}, @@ -1121,7 +1121,7 @@ static int __init usb_dt9812_init(void) /* Initialize all driver slots */ for (i = 0; i < DT9812_NUM_SLOTS; i++) { - init_MUTEX(&dt9812[i].mutex); + semaphore_init(&dt9812[i].mutex); dt9812[i].serial = 0; dt9812[i].usb = NULL; dt9812[i].comedi = NULL; diff --git a/drivers/staging/comedi/drivers/usbdux.c b/drivers/staging/comedi/drivers/usbdux.c index 171a6f2..7c84121 100644 --- a/drivers/staging/comedi/drivers/usbdux.c +++ b/drivers/staging/comedi/drivers/usbdux.c @@ -307,7 +307,7 @@ struct usbduxsub { */ static struct usbduxsub usbduxsub[NUMUSBDUX]; -static DECLARE_MUTEX(start_stop_sem); +static DEFINE_SEMAPHORE(start_stop_sem); /* * Stops the data acquision @@ -2349,7 +2349,7 @@ static int usbduxsub_probe(struct usb_interface *uinterf, dev_dbg(dev, "comedi_: usbdux: " "usbduxsub[%d] is ready to connect to comedi.\n", index); - init_MUTEX(&(usbduxsub[index].sem)); + semaphore_init(&(usbduxsub[index].sem)); /* save a pointer to the usb device */ usbduxsub[index].usbdev = udev; diff --git a/drivers/staging/comedi/drivers/usbduxfast.c b/drivers/staging/comedi/drivers/usbduxfast.c index 939b53f..3a465ba 100644 --- a/drivers/staging/comedi/drivers/usbduxfast.c +++ b/drivers/staging/comedi/drivers/usbduxfast.c @@ -201,7 +201,7 @@ struct usbduxfastsub_s { */ static struct usbduxfastsub_s usbduxfastsub[NUMUSBDUXFAST]; -static DECLARE_MUTEX(start_stop_sem); +static DEFINE_SEMAPHORE(start_stop_sem); /* * bulk transfers to usbduxfast @@ -1500,7 +1500,7 @@ static int usbduxfastsub_probe(struct usb_interface *uinterf, "connect to comedi.\n", index); #endif - init_MUTEX(&(usbduxfastsub[index].sem)); + semaphore_init(&(usbduxfastsub[index].sem)); /* save a pointer to the usb device */ usbduxfastsub[index].usbdev = udev; diff --git a/drivers/staging/cpc-usb/cpc-usb_drv.c b/drivers/staging/cpc-usb/cpc-usb_drv.c index 9bf3f98..9a51965 100644 --- a/drivers/staging/cpc-usb/cpc-usb_drv.c +++ b/drivers/staging/cpc-usb/cpc-usb_drv.c @@ -83,7 +83,7 @@ static CPC_USB_T *CPCUSB_Table[CPC_USB_CARD_CNT] = { 0 }; static unsigned int CPCUsbCnt; /* prevent races between open() and disconnect() */ -static DECLARE_MUTEX(disconnect_sem); +static DEFINE_SEMAPHORE(disconnect_sem); /* local function prototypes */ static ssize_t cpcusb_read(struct file *file, char *buffer, size_t count, @@ -903,7 +903,7 @@ static int cpcusb_probe(struct usb_interface *interface, memset(chan, 0, sizeof(CPC_CHAN_T)); ResetBuffer(chan); - init_MUTEX(&card->sem); + semaphore_init(&card->sem); spin_lock_init(&card->slock); card->udev = udev; diff --git a/drivers/staging/frontier/alphatrack.c b/drivers/staging/frontier/alphatrack.c index 15aed87..d4d801e 100644 --- a/drivers/staging/frontier/alphatrack.c +++ b/drivers/staging/frontier/alphatrack.c @@ -678,7 +678,7 @@ static int usb_alphatrack_probe(struct usb_interface *intf, dev_err(&intf->dev, "Out of memory\n"); goto exit; } - init_MUTEX(&dev->sem); + semaphore_init(&dev->sem); dev->intf = intf; init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); diff --git a/drivers/staging/frontier/tranzport.c b/drivers/staging/frontier/tranzport.c index ef8fcc8..81db34b 100644 --- a/drivers/staging/frontier/tranzport.c +++ b/drivers/staging/frontier/tranzport.c @@ -800,7 +800,7 @@ static int usb_tranzport_probe(struct usb_interface *intf, dev_err(&intf->dev, "Out of memory\n"); goto exit; } - init_MUTEX(&dev->sem); + semaphore_init(&dev->sem); dev->intf = intf; init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); diff --git a/drivers/staging/go7007/go7007-driver.c b/drivers/staging/go7007/go7007-driver.c index 77b1e76..6efcd79 100644 --- a/drivers/staging/go7007/go7007-driver.c +++ b/drivers/staging/go7007/go7007-driver.c @@ -604,7 +604,7 @@ struct go7007 *go7007_alloc(struct go7007_board_info *board, struct device *dev) go->tuner_type = -1; go->channel_number = 0; go->name[0] = 0; - init_MUTEX(&go->hw_lock); + semaphore_init(&go->hw_lock); init_waitqueue_head(&go->frame_waitq); spin_lock_init(&go->spinlock); go->video_dev = NULL; diff --git a/drivers/staging/go7007/go7007-i2c.c b/drivers/staging/go7007/go7007-i2c.c index c82867f..f9d9d71 100644 --- a/drivers/staging/go7007/go7007-i2c.c +++ b/drivers/staging/go7007/go7007-i2c.c @@ -48,7 +48,7 @@ /* There is only one I2C port on the TW2804 that feeds all four GO7007 VIPs * on the Adlink PCI-MPG24, so access is shared between all of them. */ -static DECLARE_MUTEX(adlink_mpg24_i2c_lock); +static DEFINE_SEMAPHORE(adlink_mpg24_i2c_lock); static int go7007_i2c_xfer(struct go7007 *go, u16 addr, int read, u16 command, int flags, u8 *data) diff --git a/drivers/staging/go7007/go7007-usb.c b/drivers/staging/go7007/go7007-usb.c index aa4a9e0..d988d05 100644 --- a/drivers/staging/go7007/go7007-usb.c +++ b/drivers/staging/go7007/go7007-usb.c @@ -1065,7 +1065,7 @@ static int go7007_usb_probe(struct usb_interface *intf, if (board->flags & GO7007_USB_EZUSB_I2C) { memcpy(&go->i2c_adapter, &go7007_usb_adap_templ, sizeof(go7007_usb_adap_templ)); - init_MUTEX(&usb->i2c_lock); + semaphore_init(&usb->i2c_lock); go->i2c_adapter.dev.parent = go->dev; i2c_set_adapdata(&go->i2c_adapter, go); if (i2c_add_adapter(&go->i2c_adapter) < 0) { diff --git a/drivers/staging/go7007/go7007-v4l2.c b/drivers/staging/go7007/go7007-v4l2.c index 06cacd3..daf6b73 100644 --- a/drivers/staging/go7007/go7007-v4l2.c +++ b/drivers/staging/go7007/go7007-v4l2.c @@ -101,7 +101,7 @@ static int go7007_open(struct file *file) return -ENOMEM; ++go->ref_count; gofh->go = go; - init_MUTEX(&gofh->lock); + semaphore_init(&gofh->lock); gofh->buf_count = 0; file->private_data = gofh; return 0; diff --git a/drivers/staging/go7007/s2250-loader.c b/drivers/staging/go7007/s2250-loader.c index bb22347..5031bbc 100644 --- a/drivers/staging/go7007/s2250-loader.c +++ b/drivers/staging/go7007/s2250-loader.c @@ -35,7 +35,7 @@ typedef struct device_extension_s { #define MAX_DEVICES 256 static pdevice_extension_t s2250_dev_table[MAX_DEVICES]; -static DECLARE_MUTEX(s2250_dev_table_mutex); +static DEFINE_SEMAPHORE(s2250_dev_table_mutex); #define to_s2250loader_dev_common(d) container_of(d, device_extension_t, kref) static void s2250loader_delete(struct kref *kref) diff --git a/drivers/staging/mimio/mimio.c b/drivers/staging/mimio/mimio.c index 1ba8103..63bf2db 100644 --- a/drivers/staging/mimio/mimio.c +++ b/drivers/staging/mimio/mimio.c @@ -160,7 +160,7 @@ static struct usb_driver mimio_driver = { .id_table = mimio_table, }; -static DECLARE_MUTEX(disconnect_sem); +static DEFINE_SEMAPHORE(disconnect_sem); static void mimio_close(struct input_dev *idev) { diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index 93cab0a..6c7dd49 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -39,7 +39,7 @@ #include "cvmx-smix-defs.h" -DECLARE_MUTEX(mdio_sem); +DEFINE_SEMAPHORE(mdio_sem); /** * Perform an MII read. Called by the generic MII routines diff --git a/drivers/staging/otus/wwrap.c b/drivers/staging/otus/wwrap.c index 4db8f6e..1de941e 100644 --- a/drivers/staging/otus/wwrap.c +++ b/drivers/staging/otus/wwrap.c @@ -1066,7 +1066,7 @@ u8_t zfLnxCreateThread(zdev_t *dev) /* Create Mutex and keventd */ INIT_WORK(&macp->kevent, kevent); - init_MUTEX(&macp->ioctl_sem); + semaphore_init(&macp->ioctl_sem); return 0; } diff --git a/drivers/staging/p9auth/p9auth.c b/drivers/staging/p9auth/p9auth.c index 9111dcb..b23e201 100644 --- a/drivers/staging/p9auth/p9auth.c +++ b/drivers/staging/p9auth/p9auth.c @@ -388,7 +388,7 @@ static int cap_init_module(void) /* Initialize each device. */ for (i = 0; i < cap_nr_devs; i++) { cap_devices[i].node_size = cap_node_size; - init_MUTEX(&cap_devices[i].sem); + semaphore_init(&cap_devices[i].sem); cap_setup_cdev(&cap_devices[i], i); } diff --git a/drivers/staging/rspiusb/rspiusb.c b/drivers/staging/rspiusb/rspiusb.c index 04e2f92..ca9688f 100644 --- a/drivers/staging/rspiusb/rspiusb.c +++ b/drivers/staging/rspiusb/rspiusb.c @@ -63,7 +63,7 @@ static int debug; #endif /* prevent races between open() and disconnect() */ -static DECLARE_MUTEX(disconnect_sem); +static DEFINE_SEMAPHORE(disconnect_sem); /* Structure to hold all of our device specific stuff */ struct device_extension { diff --git a/drivers/staging/rt2870/common/2870_rtmp_init.c b/drivers/staging/rt2870/common/2870_rtmp_init.c index 80909e9..114bdc7 100644 --- a/drivers/staging/rt2870/common/2870_rtmp_init.c +++ b/drivers/staging/rt2870/common/2870_rtmp_init.c @@ -751,13 +751,13 @@ NDIS_STATUS CreateThreads( //init_MUTEX(&(pAd->usbdev_semaphore)); - init_MUTEX_LOCKED(&(pAd->mlme_semaphore)); + semaphore_init_locked(&(pAd->mlme_semaphore)); init_completion (&pAd->mlmeComplete); - init_MUTEX_LOCKED(&(pAd->RTUSBCmd_semaphore)); + semaphore_init_locked(&(pAd->RTUSBCmd_semaphore)); init_completion (&pAd->CmdQComplete); - init_MUTEX_LOCKED(&(pAd->RTUSBTimer_semaphore)); + semaphore_init_locked(&(pAd->RTUSBTimer_semaphore)); init_completion (&pAd->TimerQComplete); // Creat MLME Thread diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 4247ecc..57ba3b1 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -330,8 +330,9 @@ static void async_completed(struct urb *urb) uid_t euid = 0; u32 secid = 0; int signr; + unsigned long flags; - spin_lock(&ps->lock); + spin_lock_irqsave(&ps->lock, flags); list_move_tail(&as->asynclist, &ps->async_completed); as->status = urb->status; signr = as->signr; @@ -347,7 +348,7 @@ static void async_completed(struct urb *urb) } snoop(&urb->dev->dev, "urb complete\n"); snoop_urb(urb, as->userurb); - spin_unlock(&ps->lock); + spin_unlock_irqrestore(&ps->lock, flags); if (signr) kill_pid_info_as_uid(sinfo.si_signo, &sinfo, pid, uid, diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 69e5773..e0dacaf 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -391,10 +391,10 @@ void usb_driver_release_interface(struct usb_driver *driver, if (device_is_registered(dev)) { device_release_driver(dev); } else { - down(&dev->sem); + mutex_lock(&dev->mutex); usb_unbind_interface(dev); dev->driver = NULL; - up(&dev->sem); + mutex_unlock(&dev->mutex); } } EXPORT_SYMBOL_GPL(usb_driver_release_interface); diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 95ccfa0..167548a 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1880,7 +1880,7 @@ irqreturn_t usb_hcd_irq (int irq, void *__hcd) * when the first handler doesn't use it. So let's just * assume it's never used. */ - local_irq_save(flags); + local_irq_save_nort(flags); if (unlikely(hcd->state == HC_STATE_HALT || !test_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags))) { @@ -1895,7 +1895,7 @@ irqreturn_t usb_hcd_irq (int irq, void *__hcd) rc = IRQ_HANDLED; } - local_irq_restore(flags); + local_irq_restore_nort(flags); return rc; } diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 9720e69..b529a76 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -269,8 +269,9 @@ static void sg_complete(struct urb *urb) { struct usb_sg_request *io = urb->context; int status = urb->status; + unsigned long flags; - spin_lock(&io->lock); + spin_lock_irqsave (&io->lock, flags); /* In 2.5 we require hcds' endpoint queues not to progress after fault * reports, until the completion callback (this!) returns. That lets @@ -304,7 +305,7 @@ static void sg_complete(struct urb *urb) * unlink pending urbs so they won't rx/tx bad data. * careful: unlink can sometimes be synchronous... */ - spin_unlock(&io->lock); + spin_unlock_irqrestore (&io->lock, flags); for (i = 0, found = 0; i < io->entries; i++) { if (!io->urbs [i] || !io->urbs [i]->dev) continue; @@ -319,7 +320,7 @@ static void sg_complete(struct urb *urb) } else if (urb == io->urbs [i]) found = 1; } - spin_lock(&io->lock); + spin_lock_irqsave (&io->lock, flags); } urb->dev = NULL; @@ -329,7 +330,7 @@ static void sg_complete(struct urb *urb) if (!io->count) complete(&io->complete); - spin_unlock(&io->lock); + spin_unlock_irqrestore (&io->lock, flags); } @@ -643,7 +644,7 @@ void usb_sg_cancel(struct usb_sg_request *io) int i; io->status = -ECONNRESET; - spin_unlock(&io->lock); + spin_unlock_irqrestore(&io->lock, flags); for (i = 0; i < io->entries; i++) { int retval; @@ -654,7 +655,7 @@ void usb_sg_cancel(struct usb_sg_request *io) dev_warn(&io->dev->dev, "%s, unlink --> %d\n", __func__, retval); } - spin_lock(&io->lock); + spin_lock_irqsave(&io->lock, flags); } spin_unlock_irqrestore(&io->lock, flags); } diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 7d33f50..2e2ae7f 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -193,7 +193,7 @@ enum ep_state { }; struct ep_data { - struct semaphore lock; + struct mutex lock; enum ep_state state; atomic_t count; struct dev_data *dev; @@ -297,10 +297,10 @@ get_ready_ep (unsigned f_flags, struct ep_data *epdata) int val; if (f_flags & O_NONBLOCK) { - if (down_trylock (&epdata->lock) != 0) + if (mutex_trylock(&epdata->lock) != 0) goto nonblock; if (epdata->state != STATE_EP_ENABLED) { - up (&epdata->lock); + mutex_unlock(&epdata->lock); nonblock: val = -EAGAIN; } else @@ -308,7 +308,8 @@ nonblock: return val; } - if ((val = down_interruptible (&epdata->lock)) < 0) + val = mutex_lock_interruptible(&epdata->lock); + if (val < 0) return val; switch (epdata->state) { @@ -322,7 +323,7 @@ nonblock: // FALLTHROUGH case STATE_EP_UNBOUND: /* clean disconnect */ val = -ENODEV; - up (&epdata->lock); + mutex_unlock(&epdata->lock); } return val; } @@ -392,7 +393,7 @@ ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) if (likely (data->ep != NULL)) usb_ep_set_halt (data->ep); spin_unlock_irq (&data->dev->lock); - up (&data->lock); + mutex_unlock(&data->lock); return -EBADMSG; } @@ -410,7 +411,7 @@ ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) value = -EFAULT; free1: - up (&data->lock); + mutex_unlock(&data->lock); kfree (kbuf); return value; } @@ -435,7 +436,7 @@ ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) if (likely (data->ep != NULL)) usb_ep_set_halt (data->ep); spin_unlock_irq (&data->dev->lock); - up (&data->lock); + mutex_unlock(&data->lock); return -EBADMSG; } @@ -454,7 +455,7 @@ ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) VDEBUG (data->dev, "%s write %zu IN, status %d\n", data->name, len, (int) value); free1: - up (&data->lock); + mutex_unlock(&data->lock); kfree (kbuf); return value; } @@ -465,7 +466,8 @@ ep_release (struct inode *inode, struct file *fd) struct ep_data *data = fd->private_data; int value; - if ((value = down_interruptible(&data->lock)) < 0) + value = mutex_lock_interruptible(&data->lock); + if (value < 0) return value; /* clean up if this can be reopened */ @@ -475,7 +477,7 @@ ep_release (struct inode *inode, struct file *fd) data->hs_desc.bDescriptorType = 0; usb_ep_disable(data->ep); } - up (&data->lock); + mutex_unlock(&data->lock); put_ep (data); return 0; } @@ -506,7 +508,7 @@ static long ep_ioctl(struct file *fd, unsigned code, unsigned long value) } else status = -ENODEV; spin_unlock_irq (&data->dev->lock); - up (&data->lock); + mutex_unlock(&data->lock); return status; } @@ -672,7 +674,7 @@ fail: value = -ENODEV; spin_unlock_irq(&epdata->dev->lock); - up(&epdata->lock); + mutex_unlock(&epdata->lock); if (unlikely(value)) { kfree(priv); @@ -764,7 +766,8 @@ ep_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) u32 tag; int value, length = len; - if ((value = down_interruptible (&data->lock)) < 0) + value = mutex_lock_interruptible(&data->lock); + if (value < 0) return value; if (data->state != STATE_EP_READY) { @@ -853,7 +856,7 @@ fail: data->desc.bDescriptorType = 0; data->hs_desc.bDescriptorType = 0; } - up (&data->lock); + mutex_unlock(&data->lock); return value; fail0: value = -EINVAL; @@ -869,7 +872,7 @@ ep_open (struct inode *inode, struct file *fd) struct ep_data *data = inode->i_private; int value = -EBUSY; - if (down_interruptible (&data->lock) != 0) + if (mutex_lock_interruptible(&data->lock) != 0) return -EINTR; spin_lock_irq (&data->dev->lock); if (data->dev->state == STATE_DEV_UNBOUND) @@ -884,7 +887,7 @@ ep_open (struct inode *inode, struct file *fd) DBG (data->dev, "%s state %d\n", data->name, data->state); spin_unlock_irq (&data->dev->lock); - up (&data->lock); + mutex_unlock(&data->lock); return value; } @@ -1630,7 +1633,7 @@ static int activate_ep_files (struct dev_data *dev) if (!data) goto enomem0; data->state = STATE_EP_DISABLED; - init_MUTEX (&data->lock); + mutex_init(&data->lock); init_waitqueue_head (&data->wait); strncpy (data->name, ep->name, sizeof (data->name) - 1); diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c index 9d0675e..d20cb67 100644 --- a/drivers/usb/misc/ftdi-elan.c +++ b/drivers/usb/misc/ftdi-elan.c @@ -2766,7 +2766,7 @@ static int ftdi_elan_probe(struct usb_interface *interface, ftdi->sequence_num = ++ftdi_instances; mutex_unlock(&ftdi_module_lock); ftdi_elan_init_kref(ftdi); - init_MUTEX(&ftdi->sw_lock); + semaphore_init(&ftdi->sw_lock); ftdi->udev = usb_get_dev(interface_to_usbdev(interface)); ftdi->interface = interface; mutex_init(&ftdi->u132_lock); diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c index 5ad3616..125d19e 100644 --- a/drivers/uwb/umc-bus.c +++ b/drivers/uwb/umc-bus.c @@ -62,12 +62,12 @@ int umc_controller_reset(struct umc_dev *umc) struct device *parent = umc->dev.parent; int ret = 0; - if(down_trylock(&parent->sem)) + if (mutex_trylock(&parent->mutex)) return -EAGAIN; ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper); if (ret >= 0) device_for_each_child(parent, parent, umc_bus_post_reset_helper); - up(&parent->sem); + mutex_unlock(&parent->mutex); return ret; } diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h index d5bcfc1..17b10b9 100644 --- a/drivers/uwb/uwb-internal.h +++ b/drivers/uwb/uwb-internal.h @@ -366,12 +366,12 @@ struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal); static inline void uwb_dev_lock(struct uwb_dev *uwb_dev) { - down(&uwb_dev->dev.sem); + mutex_lock(&uwb_dev->dev.mutex); } static inline void uwb_dev_unlock(struct uwb_dev *uwb_dev) { - up(&uwb_dev->dev.sem); + mutex_unlock(&uwb_dev->dev.mutex); } #endif /* #ifndef __UWB_INTERNAL_H__ */ diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 29ff5ea..d951790 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -1206,7 +1206,6 @@ static void fbcon_clear(struct vc_data *vc, int sy, int sx, int height, { struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; struct fbcon_ops *ops = info->fbcon_par; - struct display *p = &fb_display[vc->vc_num]; u_int y_break; @@ -1238,10 +1237,11 @@ static void fbcon_putcs(struct vc_data *vc, const unsigned short *s, struct display *p = &fb_display[vc->vc_num]; struct fbcon_ops *ops = info->fbcon_par; - if (!fbcon_is_inactive(vc, info)) + if (!fbcon_is_inactive(vc, info)) { ops->putcs(vc, info, s, count, real_y(p, ypos), xpos, get_color(vc, info, scr_readw(s), 1), get_color(vc, info, scr_readw(s), 0)); + } } static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos) @@ -3235,6 +3235,7 @@ static const struct consw fb_con = { .con_screen_pos = fbcon_screen_pos, .con_getxy = fbcon_getxy, .con_resize = fbcon_resize, + .con_preemptible = 1, }; static struct notifier_block fbcon_event_notifier = { diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 59d7d5e..41cc04c 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -51,7 +51,7 @@ #include