aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2011-11-09 17:11:07 -0800
committerJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2011-11-09 17:11:07 -0800
commit0ceb8e096b264d8a915bcf1c54a69d4250d88815 (patch)
treec91c41a93f86d34713f20cef90a62e8e504a1a94
parentcf55b8419d76ce015ea5ec1ad2a002a546ad48f2 (diff)
parent1bdd09ebef189a8433056af84dea44eaada5e935 (diff)
downloadxen-0ceb8e096b264d8a915bcf1c54a69d4250d88815.tar.gz
Merge remote-tracking branch 'konrad-xen/testing' into xen/next-3.2
* konrad-xen/testing: (123 commits) pvhvm: compile error. x86/microcode: check proper return code. xen/v86d: Fix /dev/mem to access memory below 1MB xen/pvhvm: Use default_idle on pm_idle call. xen/pm_idle: Make pm_idle call default_idle cpuidle: If pm_idle is not set and cpuidle is turned off, use the default_idle xen: document balloon driver sysfs files ttm:dma: Add 'ttm_dma' module to radeon and nouveau to force enable the TTM DMA drm/nouveau: enable the TTM DMA pool on 32-bit DMA only device V2 drm/radeon/kms: Enable the TTM DMA pool if swiotlb is on V2 swiotlb: Expose swiotlb_nr_tlb function to modules ttm: Provide DMA aware TTM page pool code. V5 drm/ttm: introduce callback for ttm_tt populate & unpopulate V2 drm/ttm: merge ttm_backend and ttm_tt V2 drm/ttm: test for dma_address array allocation failure drm/ttm: convert page allocation to use page ptr array instead of list V4 drm/ttm: overhaul memory accounting drm/ttm: use ttm put pages function to properly restore cache attribute drm/ttm: remove unused backend flags field drm/ttm: remove split btw highmen and lowmem page ...
-rw-r--r--Documentation/ABI/stable/sysfs-devices-system-xen_memory77
-rw-r--r--Documentation/DocBook/drm.tmpl308
-rw-r--r--Documentation/cgroups/freezer-subsystem.txt4
-rw-r--r--arch/sparc/include/asm/unistd.h4
-rw-r--r--arch/sparc/kernel/systbls_32.S2
-rw-r--r--arch/sparc/kernel/systbls_64.S4
-rw-r--r--arch/x86/include/asm/microcode.h9
-rw-r--r--arch/x86/include/asm/pgtable.h5
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/microcode_core.c5
-rw-r--r--arch/x86/kernel/microcode_xen.c198
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/mm/pageattr-test.c6
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/xen/Kconfig5
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/grant-table.c2
-rw-r--r--arch/x86/xen/setup.c2
-rw-r--r--drivers/base/power/opp.c2
-rw-r--r--drivers/block/xen-blkback/blkback.c69
-rw-r--r--drivers/block/xen-blkback/common.h67
-rw-r--r--drivers/block/xen-blkback/xenbus.c12
-rw-r--r--drivers/block/xen-blkfront.c77
-rw-r--r--drivers/char/mem.c11
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c86
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_debugfs.c1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.c4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sgdma.c170
-rw-r--r--drivers/gpu/drm/radeon/radeon.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c29
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c304
-rw-r--r--drivers/gpu/drm/ttm/Makefile4
-rw-r--r--drivers/gpu/drm/ttm/ttm_agp_backend.c88
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c33
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c31
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c13
-rw-r--r--drivers/gpu/drm/ttm/ttm_memory.c534
-rw-r--r--drivers/gpu/drm/ttm/ttm_object.c3
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc.c135
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc_dma.c1235
-rw-r--r--drivers/gpu/drm/ttm/ttm_tt.c281
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c69
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fence.c8
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource.c8
-rw-r--r--drivers/hwspinlock/u8500_hsem.c7
-rw-r--r--drivers/md/raid5.c16
-rw-r--r--drivers/media/video/s5k6aa.c1
-rw-r--r--drivers/mfd/ab5500-core.c1
-rw-r--r--drivers/mfd/ab5500-debugfs.c1
-rw-r--r--drivers/mtd/maps/bcm963xx-flash.c1
-rw-r--r--drivers/platform/x86/Kconfig4
-rw-r--r--drivers/scsi/Kconfig16
-rw-r--r--drivers/scsi/Makefile2
-rw-r--r--drivers/scsi/xen-scsiback/Makefile4
-rw-r--r--drivers/scsi/xen-scsiback/common.h187
-rw-r--r--drivers/scsi/xen-scsiback/emulate.c470
-rw-r--r--drivers/scsi/xen-scsiback/interface.c141
-rw-r--r--drivers/scsi/xen-scsiback/scsiback.c757
-rw-r--r--drivers/scsi/xen-scsiback/translate.c168
-rw-r--r--drivers/scsi/xen-scsiback/xenbus.c374
-rw-r--r--drivers/scsi/xen-scsifront/Makefile4
-rw-r--r--drivers/scsi/xen-scsifront/common.h137
-rw-r--r--drivers/scsi/xen-scsifront/scsifront.c469
-rw-r--r--drivers/scsi/xen-scsifront/xenbus.c413
-rw-r--r--drivers/tty/n_gsm.c12
-rw-r--r--drivers/xen/balloon.c4
-rw-r--r--drivers/xen/gntalloc.c4
-rw-r--r--drivers/xen/gntdev.c10
-rw-r--r--drivers/xen/swiotlb-xen.c2
-rw-r--r--drivers/xen/xenbus/xenbus_client.c11
-rw-r--r--include/drm/ttm/ttm_bo_api.h5
-rw-r--r--include/drm/ttm/ttm_bo_driver.h178
-rw-r--r--include/drm/ttm/ttm_memory.h34
-rw-r--r--include/drm/ttm/ttm_page_alloc.h71
-rw-r--r--include/linux/cpuidle.h2
-rw-r--r--include/linux/devfreq.h2
-rw-r--r--include/linux/hwspinlock.h1
-rw-r--r--include/linux/swiotlb.h2
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--include/xen/interface/grant_table.h4
-rw-r--r--include/xen/interface/io/blkif.h40
-rw-r--r--include/xen/interface/io/vscsiif.h105
-rw-r--r--include/xen/platform_pci.h6
-rw-r--r--kernel/power/qos.c1
-rw-r--r--lib/swiotlb.c5
-rw-r--r--mm/vmalloc.c27
-rwxr-xr-xtools/testing/ktest/ktest.pl515
-rw-r--r--tools/testing/ktest/sample.conf146
93 files changed, 6623 insertions, 1688 deletions
diff --git a/Documentation/ABI/stable/sysfs-devices-system-xen_memory b/Documentation/ABI/stable/sysfs-devices-system-xen_memory
new file mode 100644
index 00000000000000..caa311d59ac1d2
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-devices-system-xen_memory
@@ -0,0 +1,77 @@
+What: /sys/devices/system/xen_memory/xen_memory0/max_retry_count
+Date: May 2011
+KernelVersion: 2.6.39
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The maximum number of times the balloon driver will
+ attempt to increase the balloon before giving up. See
+ also 'retry_count' below.
+ A value of zero means retry forever and is the default one.
+
+What: /sys/devices/system/xen_memory/xen_memory0/max_schedule_delay
+Date: May 2011
+KernelVersion: 2.6.39
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The limit that 'schedule_delay' (see below) will be
+ increased to. The default value is 32 seconds.
+
+What: /sys/devices/system/xen_memory/xen_memory0/retry_count
+Date: May 2011
+KernelVersion: 2.6.39
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The current number of times that the balloon driver
+ has attempted to increase the size of the balloon.
+ The default value is one. With max_retry_count being
+ zero (unlimited), this means that the driver will attempt
+ to retry with a 'schedule_delay' delay.
+
+What: /sys/devices/system/xen_memory/xen_memory0/schedule_delay
+Date: May 2011
+KernelVersion: 2.6.39
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The time (in seconds) to wait between attempts to
+ increase the balloon. Each time the balloon cannot be
+ increased, 'schedule_delay' is increased (until
+ 'max_schedule_delay' is reached at which point it
+ will use the max value).
+
+What: /sys/devices/system/xen_memory/xen_memory0/target
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The target number of pages to adjust this domain's
+ memory reservation to.
+
+What: /sys/devices/system/xen_memory/xen_memory0/target_kb
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ As target above, except the value is in KiB.
+
+What: /sys/devices/system/xen_memory/xen_memory0/info/current_kb
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Current size (in KiB) of this domain's memory
+ reservation.
+
+What: /sys/devices/system/xen_memory/xen_memory0/info/high_kb
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Amount (in KiB) of high memory in the balloon.
+
+What: /sys/devices/system/xen_memory/xen_memory0/info/low_kb
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Amount (in KiB) of low (or normal) memory in the
+ balloon.
diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index c2791589397479..196b8b9dba1112 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -32,7 +32,7 @@
The Linux DRM layer contains code intended to support the needs
of complex graphics devices, usually containing programmable
pipelines well suited to 3D graphics acceleration. Graphics
- drivers in the kernel can make use of DRM functions to make
+ drivers in the kernel may make use of DRM functions to make
tasks like memory management, interrupt handling and DMA easier,
and provide a uniform interface to applications.
</para>
@@ -57,10 +57,10 @@
existing drivers.
</para>
<para>
- First, we'll go over some typical driver initialization
+ First, we go over some typical driver initialization
requirements, like setting up command buffers, creating an
initial output configuration, and initializing core services.
- Subsequent sections will cover core internals in more detail,
+ Subsequent sections cover core internals in more detail,
providing implementation notes and examples.
</para>
<para>
@@ -74,7 +74,7 @@
</para>
<para>
The core of every DRM driver is struct drm_driver. Drivers
- will typically statically initialize a drm_driver structure,
+ typically statically initialize a drm_driver structure,
then pass it to drm_init() at load time.
</para>
@@ -88,8 +88,8 @@
</para>
<programlisting>
static struct drm_driver driver = {
- /* don't use mtrr's here, the Xserver or user space app should
- * deal with them for intel hardware.
+ /* Don't use MTRRs here; the Xserver or userspace app should
+ * deal with them for Intel hardware.
*/
.driver_features =
DRIVER_USE_AGP | DRIVER_REQUIRE_AGP |
@@ -154,8 +154,8 @@
</programlisting>
<para>
In the example above, taken from the i915 DRM driver, the driver
- sets several flags indicating what core features it supports.
- We'll go over the individual callbacks in later sections. Since
+ sets several flags indicating what core features it supports;
+ we go over the individual callbacks in later sections. Since
flags indicate which features your driver supports to the DRM
core, you need to set most of them prior to calling drm_init(). Some,
like DRIVER_MODESET can be set later based on user supplied parameters,
@@ -203,8 +203,8 @@
<term>DRIVER_HAVE_IRQ</term><term>DRIVER_IRQ_SHARED</term>
<listitem>
<para>
- DRIVER_HAVE_IRQ indicates whether the driver has a IRQ
- handler, DRIVER_IRQ_SHARED indicates whether the device &amp;
+ DRIVER_HAVE_IRQ indicates whether the driver has an IRQ
+ handler. DRIVER_IRQ_SHARED indicates whether the device &amp;
handler support shared IRQs (note that this is required of
PCI drivers).
</para>
@@ -214,8 +214,8 @@
<term>DRIVER_DMA_QUEUE</term>
<listitem>
<para>
- If the driver queues DMA requests and completes them
- asynchronously, this flag should be set. Deprecated.
+ Should be set if the driver queues DMA requests and completes them
+ asynchronously. Deprecated.
</para>
</listitem>
</varlistentry>
@@ -238,7 +238,7 @@
</variablelist>
<para>
In this specific case, the driver requires AGP and supports
- IRQs. DMA, as we'll see, is handled by device specific ioctls
+ IRQs. DMA, as discussed later, is handled by device-specific ioctls
in this case. It also supports the kernel mode setting APIs, though
unlike in the actual i915 driver source, this example unconditionally
exports KMS capability.
@@ -269,36 +269,34 @@
initial output configuration.
</para>
<para>
- Note that the tasks performed at driver load time must not
- conflict with DRM client requirements. For instance, if user
+ If compatibility is a concern (e.g. with drivers converted over
+ to the new interfaces from the old ones), care must be taken to
+ prevent device initialization and control that is incompatible with
+ currently active userspace drivers. For instance, if user
level mode setting drivers are in use, it would be problematic
to perform output discovery &amp; configuration at load time.
- Likewise, if pre-memory management aware user level drivers are
+ Likewise, if user-level drivers unaware of memory management are
in use, memory management and command buffer setup may need to
- be omitted. These requirements are driver specific, and care
+ be omitted. These requirements are driver-specific, and care
needs to be taken to keep both old and new applications and
libraries working. The i915 driver supports the "modeset"
module parameter to control whether advanced features are
- enabled at load time or in legacy fashion. If compatibility is
- a concern (e.g. with drivers converted over to the new interfaces
- from the old ones), care must be taken to prevent incompatible
- device initialization and control with the currently active
- userspace drivers.
+ enabled at load time or in legacy fashion.
</para>
<sect2>
<title>Driver private &amp; performance counters</title>
<para>
The driver private hangs off the main drm_device structure and
- can be used for tracking various device specific bits of
+ can be used for tracking various device-specific bits of
information, like register offsets, command buffer status,
register state for suspend/resume, etc. At load time, a
- driver can simply allocate one and set drm_device.dev_priv
- appropriately; at unload the driver can free it and set
- drm_device.dev_priv to NULL.
+ driver may simply allocate one and set drm_device.dev_priv
+ appropriately; it should be freed and drm_device.dev_priv set
+ to NULL when the driver is unloaded.
</para>
<para>
- The DRM supports several counters which can be used for rough
+ The DRM supports several counters which may be used for rough
performance characterization. Note that the DRM stat counter
system is not often used by applications, and supporting
additional counters is completely optional.
@@ -307,15 +305,15 @@
These interfaces are deprecated and should not be used. If performance
monitoring is desired, the developer should investigate and
potentially enhance the kernel perf and tracing infrastructure to export
- GPU related performance information to performance monitoring
- tools and applications.
+ GPU related performance information for consumption by performance
+ monitoring tools and applications.
</para>
</sect2>
<sect2>
<title>Configuring the device</title>
<para>
- Obviously, device configuration will be device specific.
+ Obviously, device configuration is device-specific.
However, there are several common operations: finding a
device's PCI resources, mapping them, and potentially setting
up an IRQ handler.
@@ -323,10 +321,10 @@
<para>
Finding &amp; mapping resources is fairly straightforward. The
DRM wrapper functions, drm_get_resource_start() and
- drm_get_resource_len() can be used to find BARs on the given
+ drm_get_resource_len(), may be used to find BARs on the given
drm_device struct. Once those values have been retrieved, the
driver load function can call drm_addmap() to create a new
- mapping for the BAR in question. Note you'll probably want a
+ mapping for the BAR in question. Note that you probably want a
drm_local_map_t in your driver private structure to track any
mappings you create.
<!-- !Fdrivers/gpu/drm/drm_bufs.c drm_get_resource_* -->
@@ -335,20 +333,20 @@
<para>
if compatibility with other operating systems isn't a concern
(DRM drivers can run under various BSD variants and OpenSolaris),
- native Linux calls can be used for the above, e.g. pci_resource_*
+ native Linux calls may be used for the above, e.g. pci_resource_*
and iomap*/iounmap. See the Linux device driver book for more
info.
</para>
<para>
- Once you have a register map, you can use the DRM_READn() and
+ Once you have a register map, you may use the DRM_READn() and
DRM_WRITEn() macros to access the registers on your device, or
- use driver specific versions to offset into your MMIO space
- relative to a driver specific base pointer (see I915_READ for
- example).
+ use driver-specific versions to offset into your MMIO space
+ relative to a driver-specific base pointer (see I915_READ for
+ an example).
</para>
<para>
If your device supports interrupt generation, you may want to
- setup an interrupt handler at driver load time as well. This
+ set up an interrupt handler when the driver is loaded. This
is done using the drm_irq_install() function. If your device
supports vertical blank interrupts, it should call
drm_vblank_init() to initialize the core vblank handling code before
@@ -357,7 +355,7 @@
</para>
<!--!Fdrivers/char/drm/drm_irq.c drm_irq_install-->
<para>
- Once your interrupt handler is registered (it'll use your
+ Once your interrupt handler is registered (it uses your
drm_driver.irq_handler as the actual interrupt handling
function), you can safely enable interrupts on your device,
assuming any other state your interrupt handler uses is also
@@ -371,10 +369,10 @@
using the pci_map_rom() call, a convenience function that
takes care of mapping the actual ROM, whether it has been
shadowed into memory (typically at address 0xc0000) or exists
- on the PCI device in the ROM BAR. Note that once you've
- mapped the ROM and extracted any necessary information, be
- sure to unmap it; on many devices the ROM address decoder is
- shared with other BARs, so leaving it mapped can cause
+ on the PCI device in the ROM BAR. Note that after the ROM
+ has been mapped and any necessary information has been extracted,
+ it should be unmapped; on many devices, the ROM address decoder is
+ shared with other BARs, so leaving it mapped could cause
undesired behavior like hangs or memory corruption.
<!--!Fdrivers/pci/rom.c pci_map_rom-->
</para>
@@ -389,9 +387,9 @@
should support a memory manager.
</para>
<para>
- If your driver supports memory management (it should!), you'll
+ If your driver supports memory management (it should!), you
need to set that up at load time as well. How you initialize
- it depends on which memory manager you're using, TTM or GEM.
+ it depends on which memory manager you're using: TTM or GEM.
</para>
<sect3>
<title>TTM initialization</title>
@@ -401,7 +399,7 @@
and devices with dedicated video RAM (VRAM), i.e. most discrete
graphics devices. If your device has dedicated RAM, supporting
TTM is desirable. TTM also integrates tightly with your
- driver specific buffer execution function. See the radeon
+ driver-specific buffer execution function. See the radeon
driver for examples.
</para>
<para>
@@ -429,21 +427,21 @@
created by the memory manager at runtime. Your global TTM should
have a type of TTM_GLOBAL_TTM_MEM. The size field for the global
object should be sizeof(struct ttm_mem_global), and the init and
- release hooks should point at your driver specific init and
- release routines, which will probably eventually call
- ttm_mem_global_init and ttm_mem_global_release respectively.
+ release hooks should point at your driver-specific init and
+ release routines, which probably eventually call
+ ttm_mem_global_init and ttm_mem_global_release, respectively.
</para>
<para>
Once your global TTM accounting structure is set up and initialized
- (done by calling ttm_global_item_ref on the global object you
- just created), you'll need to create a buffer object TTM to
+ by calling ttm_global_item_ref() on it,
+ you need to create a buffer object TTM to
provide a pool for buffer object allocation by clients and the
kernel itself. The type of this object should be TTM_GLOBAL_TTM_BO,
and its size should be sizeof(struct ttm_bo_global). Again,
- driver specific init and release functions can be provided,
- likely eventually calling ttm_bo_global_init and
- ttm_bo_global_release, respectively. Also like the previous
- object, ttm_global_item_ref is used to create an initial reference
+ driver-specific init and release functions may be provided,
+ likely eventually calling ttm_bo_global_init() and
+ ttm_bo_global_release(), respectively. Also, like the previous
+ object, ttm_global_item_ref() is used to create an initial reference
count for the TTM, which will call your initialization function.
</para>
</sect3>
@@ -453,27 +451,26 @@
GEM is an alternative to TTM, designed specifically for UMA
devices. It has simpler initialization and execution requirements
than TTM, but has no VRAM management capability. Core GEM
- initialization is comprised of a basic drm_mm_init call to create
+ is initialized by calling drm_mm_init() to create
a GTT DRM MM object, which provides an address space pool for
- object allocation. In a KMS configuration, the driver will
- need to allocate and initialize a command ring buffer following
- basic GEM initialization. Most UMA devices have a so-called
+ object allocation. In a KMS configuration, the driver
+ needs to allocate and initialize a command ring buffer following
+ core GEM initialization. A UMA device usually has what is called a
"stolen" memory region, which provides space for the initial
framebuffer and large, contiguous memory regions required by the
- device. This space is not typically managed by GEM, and must
+ device. This space is not typically managed by GEM, and it must
be initialized separately into its own DRM MM object.
</para>
<para>
- Initialization will be driver specific, and will depend on
- the architecture of the device. In the case of Intel
+ Initialization is driver-specific. In the case of Intel
integrated graphics chips like 965GM, GEM initialization can
be done by calling the internal GEM init function,
i915_gem_do_init(). Since the 965GM is a UMA device
- (i.e. it doesn't have dedicated VRAM), GEM will manage
+ (i.e. it doesn't have dedicated VRAM), GEM manages
making regular RAM available for GPU operations. Memory set
aside by the BIOS (called "stolen" memory by the i915
- driver) will be managed by the DRM memrange allocator; the
- rest of the aperture will be managed by GEM.
+ driver) is managed by the DRM memrange allocator; the
+ rest of the aperture is managed by GEM.
<programlisting>
/* Basic memrange allocator for stolen space (aka vram) */
drm_memrange_init(&amp;dev_priv->vram, 0, prealloc_size);
@@ -483,7 +480,7 @@
<!--!Edrivers/char/drm/drm_memrange.c-->
</para>
<para>
- Once the memory manager has been set up, we can allocate the
+ Once the memory manager has been set up, we may allocate the
command buffer. In the i915 case, this is also done with a
GEM function, i915_gem_init_ringbuffer().
</para>
@@ -493,16 +490,25 @@
<sect2>
<title>Output configuration</title>
<para>
- The final initialization task is output configuration. This involves
- finding and initializing the CRTCs, encoders and connectors
- for your device, creating an initial configuration and
- registering a framebuffer console driver.
+ The final initialization task is output configuration. This involves:
+ <itemizedlist>
+ <listitem>
+ Finding and initializing the CRTCs, encoders, and connectors
+ for the device.
+ </listitem>
+ <listitem>
+ Creating an initial configuration.
+ </listitem>
+ <listitem>
+ Registering a framebuffer console driver.
+ </listitem>
+ </itemizedlist>
</para>
<sect3>
<title>Output discovery and initialization</title>
<para>
- Several core functions exist to create CRTCs, encoders and
- connectors, namely drm_crtc_init(), drm_connector_init() and
+ Several core functions exist to create CRTCs, encoders, and
+ connectors, namely: drm_crtc_init(), drm_connector_init(), and
drm_encoder_init(), along with several "helper" functions to
perform common tasks.
</para>
@@ -555,10 +561,10 @@ void intel_crt_init(struct drm_device *dev)
</programlisting>
<para>
In the example above (again, taken from the i915 driver), a
- CRT connector and encoder combination is created. A device
- specific i2c bus is also created, for fetching EDID data and
+ CRT connector and encoder combination is created. A device-specific
+ i2c bus is also created for fetching EDID data and
performing monitor detection. Once the process is complete,
- the new connector is registered with sysfs, to make its
+ the new connector is registered with sysfs to make its
properties available to applications.
</para>
<sect4>
@@ -567,12 +573,12 @@ void intel_crt_init(struct drm_device *dev)
Since many PC-class graphics devices have similar display output
designs, the DRM provides a set of helper functions to make
output management easier. The core helper routines handle
- encoder re-routing and disabling of unused functions following
- mode set. Using the helpers is optional, but recommended for
+ encoder re-routing and the disabling of unused functions following
+ mode setting. Using the helpers is optional, but recommended for
devices with PC-style architectures (i.e. a set of display planes
for feeding pixels to encoders which are in turn routed to
connectors). Devices with more complex requirements needing
- finer grained management can opt to use the core callbacks
+ finer grained management may opt to use the core callbacks
directly.
</para>
<para>
@@ -580,17 +586,25 @@ void intel_crt_init(struct drm_device *dev)
</para>
</sect4>
<para>
- For each encoder, CRTC and connector, several functions must
- be provided, depending on the object type. Encoder objects
- need to provide a DPMS (basically on/off) function, mode fixup
- (for converting requested modes into native hardware timings),
- and prepare, set and commit functions for use by the core DRM
- helper functions. Connector helpers need to provide mode fetch and
- validity functions as well as an encoder matching function for
- returning an ideal encoder for a given connector. The core
- connector functions include a DPMS callback, (deprecated)
- save/restore routines, detection, mode probing, property handling,
- and cleanup functions.
+ Each encoder object needs to provide:
+ <itemizedlist>
+ <listitem>
+ A DPMS (basically on/off) function.
+ </listitem>
+ <listitem>
+ A mode-fixup function (for converting requested modes into
+ native hardware timings).
+ </listitem>
+ <listitem>
+ Functions (prepare, set, and commit) for use by the core DRM
+ helper functions.
+ </listitem>
+ </itemizedlist>
+ Connector helpers need to provide functions (mode-fetch, validity,
+ and encoder-matching) for returning an ideal encoder for a given
+ connector. The core connector functions include a DPMS callback,
+ save/restore routines (deprecated), detection, mode probing,
+ property handling, and cleanup functions.
</para>
<!--!Edrivers/char/drm/drm_crtc.h-->
<!--!Edrivers/char/drm/drm_crtc.c-->
@@ -605,23 +619,34 @@ void intel_crt_init(struct drm_device *dev)
<title>VBlank event handling</title>
<para>
The DRM core exposes two vertical blank related ioctls:
- DRM_IOCTL_WAIT_VBLANK and DRM_IOCTL_MODESET_CTL.
+ <variablelist>
+ <varlistentry>
+ <term>DRM_IOCTL_WAIT_VBLANK</term>
+ <listitem>
+ <para>
+ This takes a struct drm_wait_vblank structure as its argument,
+ and it is used to block or request a signal when a specified
+ vblank event occurs.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_IOCTL_MODESET_CTL</term>
+ <listitem>
+ <para>
+ This should be called by application level drivers before and
+ after mode setting, since on many devices the vertical blank
+ counter is reset at that time. Internally, the DRM snapshots
+ the last vblank count when the ioctl is called with the
+ _DRM_PRE_MODESET command, so that the counter won't go backwards
+ (which is dealt with when _DRM_POST_MODESET is used).
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
<!--!Edrivers/char/drm/drm_irq.c-->
</para>
<para>
- DRM_IOCTL_WAIT_VBLANK takes a struct drm_wait_vblank structure
- as its argument, and is used to block or request a signal when a
- specified vblank event occurs.
- </para>
- <para>
- DRM_IOCTL_MODESET_CTL should be called by application level
- drivers before and after mode setting, since on many devices the
- vertical blank counter will be reset at that time. Internally,
- the DRM snapshots the last vblank count when the ioctl is called
- with the _DRM_PRE_MODESET command so that the counter won't go
- backwards (which is dealt with when _DRM_POST_MODESET is used).
- </para>
- <para>
To support the functions above, the DRM core provides several
helper functions for tracking vertical blank counters, and
requires drivers to provide several callbacks:
@@ -632,24 +657,24 @@ void intel_crt_init(struct drm_device *dev)
register. The enable and disable vblank callbacks should enable
and disable vertical blank interrupts, respectively. In the
absence of DRM clients waiting on vblank events, the core DRM
- code will use the disable_vblank() function to disable
- interrupts, which saves power. They'll be re-enabled again when
+ code uses the disable_vblank() function to disable
+ interrupts, which saves power. They are re-enabled again when
a client calls the vblank wait ioctl above.
</para>
<para>
- Devices that don't provide a count register can simply use an
+ A device that doesn't provide a count register may simply use an
internal atomic counter incremented on every vertical blank
- interrupt, and can make their enable and disable vblank
- functions into no-ops.
+ interrupt (and then treat the enable_vblank() and disable_vblank()
+ callbacks as no-ops).
</para>
</sect1>
<sect1>
<title>Memory management</title>
<para>
- The memory manager lies at the heart of many DRM operations, and
- is also required to support advanced client features like OpenGL
- pbuffers. The DRM currently contains two memory managers, TTM
+ The memory manager lies at the heart of many DRM operations; it
+ is required to support advanced client features like OpenGL
+ pbuffers. The DRM currently contains two memory managers: TTM
and GEM.
</para>
@@ -679,41 +704,46 @@ void intel_crt_init(struct drm_device *dev)
<para>
GEM-enabled drivers must provide gem_init_object() and
gem_free_object() callbacks to support the core memory
- allocation routines. They should also provide several driver
- specific ioctls to support command execution, pinning, buffer
+ allocation routines. They should also provide several driver-specific
+ ioctls to support command execution, pinning, buffer
read &amp; write, mapping, and domain ownership transfers.
</para>
<para>
- On a fundamental level, GEM involves several operations: memory
- allocation and freeing, command execution, and aperture management
- at command execution time. Buffer object allocation is relatively
+ On a fundamental level, GEM involves several operations:
+ <itemizedlist>
+ <listitem>Memory allocation and freeing</listitem>
+ <listitem>Command execution</listitem>
+ <listitem>Aperture management at command execution time</listitem>
+ </itemizedlist>
+ Buffer object allocation is relatively
straightforward and largely provided by Linux's shmem layer, which
provides memory to back each object. When mapped into the GTT
or used in a command buffer, the backing pages for an object are
flushed to memory and marked write combined so as to be coherent
- with the GPU. Likewise, when the GPU finishes rendering to an object,
- if the CPU accesses it, it must be made coherent with the CPU's view
+ with the GPU. Likewise, if the CPU accesses an object after the GPU
+ has finished rendering to the object, then the object must be made
+ coherent with the CPU's view
of memory, usually involving GPU cache flushing of various kinds.
- This core CPU&lt;-&gt;GPU coherency management is provided by the GEM
- set domain function, which evaluates an object's current domain and
+ This core CPU&lt;-&gt;GPU coherency management is provided by a
+ device-specific ioctl, which evaluates an object's current domain and
performs any necessary flushing or synchronization to put the object
into the desired coherency domain (note that the object may be busy,
- i.e. an active render target; in that case the set domain function
- will block the client and wait for rendering to complete before
+ i.e. an active render target; in that case, setting the domain
+ blocks the client and waits for rendering to complete before
performing any necessary flushing operations).
</para>
<para>
Perhaps the most important GEM function is providing a command
execution interface to clients. Client programs construct command
- buffers containing references to previously allocated memory objects
- and submit them to GEM. At that point, GEM will take care to bind
+ buffers containing references to previously allocated memory objects,
+ and then submit them to GEM. At that point, GEM takes care to bind
all the objects into the GTT, execute the buffer, and provide
necessary synchronization between clients accessing the same buffers.
This often involves evicting some objects from the GTT and re-binding
others (a fairly expensive operation), and providing relocation
support which hides fixed GTT offsets from clients. Clients must
take care not to submit command buffers that reference more objects
- than can fit in the GTT or GEM will reject them and no rendering
+ than can fit in the GTT; otherwise, GEM will reject them and no rendering
will occur. Similarly, if several objects in the buffer require
fence registers to be allocated for correct rendering (e.g. 2D blits
on pre-965 chips), care must be taken not to require more fence
@@ -729,7 +759,7 @@ void intel_crt_init(struct drm_device *dev)
<title>Output management</title>
<para>
At the core of the DRM output management code is a set of
- structures representing CRTCs, encoders and connectors.
+ structures representing CRTCs, encoders, and connectors.
</para>
<para>
A CRTC is an abstraction representing a part of the chip that
@@ -765,21 +795,19 @@ void intel_crt_init(struct drm_device *dev)
<sect1>
<title>Framebuffer management</title>
<para>
- In order to set a mode on a given CRTC, encoder and connector
- configuration, clients need to provide a framebuffer object which
- will provide a source of pixels for the CRTC to deliver to the encoder(s)
- and ultimately the connector(s) in the configuration. A framebuffer
- is fundamentally a driver specific memory object, made into an opaque
- handle by the DRM addfb function. Once an fb has been created this
- way it can be passed to the KMS mode setting routines for use in
- a configuration.
+ Clients need to provide a framebuffer object which provides a source
+ of pixels for a CRTC to deliver to the encoder(s) and ultimately the
+ connector(s). A framebuffer is fundamentally a driver-specific memory
+ object, made into an opaque handle by the DRM's addfb() function.
+ Once a framebuffer has been created this way, it may be passed to the
+ KMS mode setting routines for use in a completed configuration.
</para>
</sect1>
<sect1>
<title>Command submission &amp; fencing</title>
<para>
- This should cover a few device specific command submission
+ This should cover a few device-specific command submission
implementations.
</para>
</sect1>
@@ -789,7 +817,7 @@ void intel_crt_init(struct drm_device *dev)
<para>
The DRM core provides some suspend/resume code, but drivers
wanting full suspend/resume support should provide save() and
- restore() functions. These will be called at suspend,
+ restore() functions. These are called at suspend,
hibernate, or resume time, and should perform any state save or
restore required by your device across suspend or hibernate
states.
@@ -812,8 +840,8 @@ void intel_crt_init(struct drm_device *dev)
<para>
The DRM core exports several interfaces to applications,
generally intended to be used through corresponding libdrm
- wrapper functions. In addition, drivers export device specific
- interfaces for use by userspace drivers &amp; device aware
+ wrapper functions. In addition, drivers export device-specific
+ interfaces for use by userspace drivers &amp; device-aware
applications through ioctls and sysfs files.
</para>
<para>
@@ -822,8 +850,8 @@ void intel_crt_init(struct drm_device *dev)
management, memory management, and output management.
</para>
<para>
- Cover generic ioctls and sysfs layout here. Only need high
- level info, since man pages will cover the rest.
+ Cover generic ioctls and sysfs layout here. We only need high-level
+ info, since man pages should cover the rest.
</para>
</chapter>
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt
index c21d77742a0799..7e62de1e59ff03 100644
--- a/Documentation/cgroups/freezer-subsystem.txt
+++ b/Documentation/cgroups/freezer-subsystem.txt
@@ -33,9 +33,9 @@ demonstrate this problem using nested bash shells:
From a second, unrelated bash shell:
$ kill -SIGSTOP 16690
- $ kill -SIGCONT 16990
+ $ kill -SIGCONT 16690
- <at this point 16990 exits and causes 16644 to exit too>
+ <at this point 16690 exits and causes 16644 to exit too>
This happens because bash can observe both signals and choose how it
responds to them.
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 6260d5deeabca0..c7cb0af0eb59cb 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -406,8 +406,10 @@
#define __NR_syncfs 335
#define __NR_sendmmsg 336
#define __NR_setns 337
+#define __NR_process_vm_readv 338
+#define __NR_process_vm_writev 339
-#define NR_syscalls 338
+#define NR_syscalls 340
#ifdef __32bit_syscall_numbers__
/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 09d8ec454450bc..63402f9e9f51f7 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -84,4 +84,4 @@ sys_call_table:
/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
/*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-/*335*/ .long sys_syncfs, sys_sendmmsg, sys_setns
+/*335*/ .long sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index edbec45d46884c..db86b1a0e9a9ff 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,7 @@ sys_call_table32:
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
/*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
- .word sys_syncfs, compat_sys_sendmmsg, sys_setns
+ .word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev
#endif /* CONFIG_COMPAT */
@@ -162,4 +162,4 @@ sys_call_table:
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
/*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
- .word sys_syncfs, sys_sendmmsg, sys_setns
+ .word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 24215072d0e1e5..22677d67dffbe0 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -61,4 +61,13 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
}
#endif
+#ifdef CONFIG_MICROCODE_XEN
+extern struct microcode_ops * __init init_xen_microcode(void);
+#else
+static inline struct microcode_ops * __init init_xen_microcode(void)
+{
+ return NULL;
+}
+#endif
+
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 18601c86fab187..34027b0b76207d 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -349,6 +349,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
return __pgprot(preservebits | addbits);
}
+static inline pgprot_t pte_attrs(pte_t pte)
+{
+ return __pgprot(pte_val(pte) & PTE_FLAGS_MASK);
+}
+
#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8baca3c4871c7f..fab5ac407649a3 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
microcode-y := microcode_core.o
microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
+microcode-$(CONFIG_MICROCODE_XEN) += microcode_xen.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index f2d2a664e7975a..ef2c583173146a 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -84,6 +84,7 @@
#include <linux/mm.h>
#include <linux/syscore_ops.h>
+#include <xen/xen.h>
#include <asm/microcode.h>
#include <asm/processor.h>
@@ -507,7 +508,9 @@ static int __init microcode_init(void)
struct cpuinfo_x86 *c = &cpu_data(0);
int error;
- if (c->x86_vendor == X86_VENDOR_INTEL)
+ if (xen_pv_domain())
+ microcode_ops = init_xen_microcode();
+ else if (c->x86_vendor == X86_VENDOR_INTEL)
microcode_ops = init_intel_microcode();
else if (c->x86_vendor == X86_VENDOR_AMD)
microcode_ops = init_amd_microcode();
diff --git a/arch/x86/kernel/microcode_xen.c b/arch/x86/kernel/microcode_xen.c
new file mode 100644
index 00000000000000..6a73957dd18758
--- /dev/null
+++ b/arch/x86/kernel/microcode_xen.c
@@ -0,0 +1,198 @@
+/*
+ * Xen microcode update driver
+ *
+ * Xen does most of the work here. We just pass the whole blob into
+ * Xen, and it will apply it to all CPUs as appropriate. Xen will
+ * worry about how different CPU models are actually updated.
+ */
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/firmware.h>
+#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
+
+#include <asm/microcode.h>
+
+#include <xen/xen.h>
+#include <xen/interface/platform.h>
+#include <xen/interface/xen.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+MODULE_DESCRIPTION("Xen microcode update driver");
+MODULE_LICENSE("GPL");
+
+struct xen_microcode {
+ size_t len;
+ char data[0];
+};
+
+static int xen_microcode_update(int cpu)
+{
+ int err;
+ struct xen_platform_op op;
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct xen_microcode *uc = uci->mc;
+
+ if (uc == NULL || uc->len == 0) {
+ /*
+ * We do all cpus at once, so we don't need to do
+ * other cpus explicitly (besides, these vcpu numbers
+ * have no relationship to underlying physical cpus).
+ */
+ return 0;
+ }
+
+ op.cmd = XENPF_microcode_update;
+ set_xen_guest_handle(op.u.microcode.data, uc->data);
+ op.u.microcode.length = uc->len;
+
+ err = HYPERVISOR_dom0_op(&op);
+
+ if (err != 0)
+ printk(KERN_WARNING "microcode_xen: microcode update failed: %d\n", err);
+
+ return err;
+}
+
+static enum ucode_state xen_request_microcode_fw(int cpu, struct device *device)
+{
+ char name[30];
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ const struct firmware *firmware;
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ enum ucode_state ret;
+ struct xen_microcode *uc;
+ size_t size;
+ int err;
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_INTEL:
+ snprintf(name, sizeof(name), "intel-ucode/%02x-%02x-%02x",
+ c->x86, c->x86_model, c->x86_mask);
+ break;
+
+ case X86_VENDOR_AMD:
+ snprintf(name, sizeof(name), "amd-ucode/microcode_amd.bin");
+ break;
+
+ default:
+ return UCODE_NFOUND;
+ }
+
+ err = request_firmware(&firmware, name, device);
+ if (err) {
+ pr_debug("microcode: data file %s load failed\n", name);
+ return UCODE_NFOUND;
+ }
+
+ /*
+ * Only bother getting real firmware for cpu 0; the others get
+ * dummy placeholders.
+ */
+ if (cpu == 0)
+ size = firmware->size;
+ else
+ size = 0;
+
+ if (uci->mc != NULL) {
+ vfree(uci->mc);
+ uci->mc = NULL;
+ }
+
+ ret = UCODE_ERROR;
+ uc = vmalloc(sizeof(*uc) + size);
+ if (uc == NULL)
+ goto out;
+
+ ret = UCODE_OK;
+ uc->len = size;
+ memcpy(uc->data, firmware->data, uc->len);
+
+ uci->mc = uc;
+
+out:
+ release_firmware(firmware);
+
+ return ret;
+}
+
+static enum ucode_state xen_request_microcode_user(int cpu,
+ const void __user *buf, size_t size)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct xen_microcode *uc;
+ enum ucode_state ret;
+ size_t unread;
+
+ if (cpu != 0) {
+ /* No real firmware for non-zero cpus; just store a
+ placeholder */
+ size = 0;
+ }
+
+ if (uci->mc != NULL) {
+ vfree(uci->mc);
+ uci->mc = NULL;
+ }
+
+ ret = UCODE_ERROR;
+ uc = vmalloc(sizeof(*uc) + size);
+ if (uc == NULL)
+ goto out;
+
+ uc->len = size;
+
+ ret = UCODE_NFOUND;
+
+ unread = copy_from_user(uc->data, buf, size);
+
+ if (unread != 0) {
+ printk(KERN_WARNING "failed to read %zd of %zd bytes at %p -> %p\n",
+ unread, size, buf, uc->data);
+ goto out;
+ }
+
+ ret = UCODE_OK;
+
+out:
+ if (ret == UCODE_OK)
+ uci->mc = uc;
+ else
+ vfree(uc);
+
+ return ret;
+}
+
+static void xen_microcode_fini_cpu(int cpu)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+ vfree(uci->mc);
+ uci->mc = NULL;
+}
+
+static int xen_collect_cpu_info(int cpu, struct cpu_signature *sig)
+{
+ sig->sig = 0;
+ sig->pf = 0;
+ sig->rev = 0;
+
+ return 0;
+}
+
+static struct microcode_ops microcode_xen_ops = {
+ .request_microcode_user = xen_request_microcode_user,
+ .request_microcode_fw = xen_request_microcode_fw,
+ .collect_cpu_info = xen_collect_cpu_info,
+ .apply_microcode = xen_microcode_update,
+ .microcode_fini_cpu = xen_microcode_fini_cpu,
+};
+
+struct microcode_ops * __init init_xen_microcode(void)
+{
+ if (!xen_initial_domain())
+ return NULL;
+ return &microcode_xen_ops;
+}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b9b3b1a5164393..7430e7783cd113 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -14,6 +14,7 @@
#include <linux/utsname.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
+#include <linux/cpuidle.h>
#include <asm/cpu.h>
#include <asm/system.h>
#include <asm/apic.h>
@@ -587,6 +588,10 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
if (pm_idle)
return;
+ if (cpuidle_disabled()) {
+ pm_idle = default_idle;
+ return;
+ }
if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
/*
* One CPU supports mwait => All CPUs supports mwait
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index b0086567271c99..da853e3309ffad 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -38,6 +38,10 @@ static int pte_testbit(pte_t pte)
{
return pte_flags(pte) & _PAGE_UNUSED1;
}
+static int pte_testhuge(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_PSE;
+}
struct split_state {
long lpg, gpg, spg, exec;
@@ -180,7 +184,7 @@ static int pageattr_test(void)
}
pte = lookup_address(addr[i], &level);
- if (!pte || !pte_testbit(*pte) || pte_huge(*pte)) {
+ if (!pte || !pte_testbit(*pte) || pte_testhuge(*pte)) {
printk(KERN_ERR "CPA %lx: bad pte %Lx\n", addr[i],
pte ? (u64)pte_val(*pte) : 0ULL);
failed++;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f9e526742fa181..efa80409deb0ef 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -651,7 +651,7 @@ repeat:
if (level == PG_LEVEL_4K) {
pte_t new_pte;
- pgprot_t new_prot = pte_pgprot(old_pte);
+ pgprot_t new_prot = pte_attrs(old_pte);
unsigned long pfn = pte_pfn(old_pte);
pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 26c731a106afd1..eefea2700a843c 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -48,3 +48,8 @@ config XEN_DEBUG_FS
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
+
+
+config MICROCODE_XEN
+ def_bool y
+ depends on XEN_DOM0 && MICROCODE \ No newline at end of file
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index da8afd576a6b7d..bd1a58483b3fc1 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -31,6 +31,7 @@
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
+#include <linux/cpuidle.h>
#include <xen/xen.h>
#include <xen/interface/xen.h>
@@ -1356,7 +1357,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
int cpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
- per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ xen_vcpu_setup(cpu);
if (xen_have_vector_callback)
xen_init_lock_cpu(cpu);
break;
@@ -1386,10 +1387,13 @@ static void __init xen_hvm_guest_init(void)
xen_hvm_smp_init();
register_cpu_notifier(&xen_hvm_cpu_notifier);
xen_unplug_emulated_devices();
- have_vcpu_info_placement = 0;
x86_init.irqs.intr_init = xen_init_IRQ;
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
+ /* And use the HLT to trap */
+ disable_cpuidle();
+ boot_option_idle_override = IDLE_HALT;
+ pm_idle = default_idle;
}
static bool __init xen_hvm_platform(void)
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 6bbfd7ac5e814b..5a40d24ba3316b 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -71,7 +71,7 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
if (shared == NULL) {
struct vm_struct *area =
- alloc_vm_area(PAGE_SIZE * max_nr_gframes);
+ alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
BUG_ON(area == NULL);
shared = area->addr;
*__shared = shared;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 38d0af4fefec19..9e88e6c7914115 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -410,6 +410,6 @@ void __init xen_arch_setup(void)
#endif
disable_cpuidle();
boot_option_idle_override = IDLE_HALT;
-
+ pm_idle = default_idle;
fiddle_vdso();
}
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 434a6c01167521..95706fa24c7355 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -669,7 +669,7 @@ struct srcu_notifier_head *opp_get_notifier(struct device *dev)
struct device_opp *dev_opp = find_device_opp(dev);
if (IS_ERR(dev_opp))
- return ERR_PTR(PTR_ERR(dev_opp)); /* matching type */
+ return ERR_CAST(dev_opp); /* matching type */
return &dev_opp->head;
}
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 15ec4db194d1bb..b058de7825f5f9 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -362,7 +362,7 @@ static int xen_blkbk_map(struct blkif_request *req,
{
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int i;
- int nseg = req->nr_segments;
+ int nseg = req->u.rw.nr_segments;
int ret = 0;
/*
@@ -416,19 +416,26 @@ static int xen_blkbk_map(struct blkif_request *req,
return ret;
}
-static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
+static int dispatch_discard_io(struct xen_blkif *blkif,
+ struct blkif_request *req)
{
int err = 0;
int status = BLKIF_RSP_OKAY;
struct block_device *bdev = blkif->vbd.bdev;
- if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
+ blkif->st_ds_req++;
+
+ xen_blkif_get(blkif);
+ if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) {
+ unsigned long secure = (blkif->vbd.discard_secure &&
+ (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
+ BLKDEV_DISCARD_SECURE : 0;
/* just forward the discard request */
err = blkdev_issue_discard(bdev,
req->u.discard.sector_number,
req->u.discard.nr_sectors,
- GFP_KERNEL, 0);
- else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
+ GFP_KERNEL, secure);
+ } else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
/* punch a hole in the backing file */
struct loop_device *lo = bdev->bd_disk->private_data;
struct file *file = lo->lo_backing_file;
@@ -449,7 +456,9 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
} else if (err)
status = BLKIF_RSP_ERROR;
- make_response(blkif, req->id, req->operation, status);
+ make_response(blkif, req->u.discard.id, req->operation, status);
+ xen_blkif_put(blkif);
+ return err;
}
static void xen_blk_drain_io(struct xen_blkif *blkif)
@@ -573,8 +582,11 @@ __do_block_io_op(struct xen_blkif *blkif)
/* Apply all sanity checks to /private copy/ of request. */
barrier();
-
- if (dispatch_rw_block_io(blkif, &req, pending_req))
+ if (unlikely(req.operation == BLKIF_OP_DISCARD)) {
+ free_req(pending_req);
+ if (dispatch_discard_io(blkif, &req))
+ break;
+ } else if (dispatch_rw_block_io(blkif, &req, pending_req))
break;
/* Yield point for this unbounded loop. */
@@ -633,10 +645,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
blkif->st_f_req++;
operation = WRITE_FLUSH;
break;
- case BLKIF_OP_DISCARD:
- blkif->st_ds_req++;
- operation = REQ_DISCARD;
- break;
default:
operation = 0; /* make gcc happy */
goto fail_response;
@@ -644,9 +652,9 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
}
/* Check that the number of segments is sane. */
- nseg = req->nr_segments;
- if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
- operation != REQ_DISCARD) ||
+ nseg = req->u.rw.nr_segments;
+
+ if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
nseg);
@@ -654,12 +662,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
goto fail_response;
}
- preq.dev = req->handle;
+ preq.dev = req->u.rw.handle;
preq.sector_number = req->u.rw.sector_number;
preq.nr_sects = 0;
pending_req->blkif = blkif;
- pending_req->id = req->id;
+ pending_req->id = req->u.rw.id;
pending_req->operation = req->operation;
pending_req->status = BLKIF_RSP_OKAY;
pending_req->nr_pages = nseg;
@@ -707,7 +715,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
* the hypercall to unmap the grants - that is all done in
* xen_blkbk_unmap.
*/
- if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
+ if (xen_blkbk_map(req, pending_req, seg))
goto fail_flush;
/*
@@ -739,23 +747,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
/* This will be hit if the operation was a flush or discard. */
if (!bio) {
- BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
+ BUG_ON(operation != WRITE_FLUSH);
- if (operation == WRITE_FLUSH) {
- bio = bio_alloc(GFP_KERNEL, 0);
- if (unlikely(bio == NULL))
- goto fail_put_bio;
+ bio = bio_alloc(GFP_KERNEL, 0);
+ if (unlikely(bio == NULL))
+ goto fail_put_bio;
- biolist[nbio++] = bio;
- bio->bi_bdev = preq.bdev;
- bio->bi_private = pending_req;
- bio->bi_end_io = end_block_io_op;
- } else if (operation == REQ_DISCARD) {
- xen_blk_discard(blkif, req);
- xen_blkif_put(blkif);
- free_req(pending_req);
- return 0;
- }
+ biolist[nbio++] = bio;
+ bio->bi_bdev = preq.bdev;
+ bio->bi_private = pending_req;
+ bio->bi_end_io = end_block_io_op;
}
/*
@@ -784,7 +785,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
xen_blkbk_unmap(pending_req);
fail_response:
/* Haven't submitted any bio's yet. */
- make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+ make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
free_req(pending_req);
msleep(1); /* back off a bit */
return -EIO;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index dfb1b3a43a5ddc..d0ee7edc9be8ad 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -60,58 +60,66 @@ struct blkif_common_response {
char dummy;
};
-/* i386 protocol version */
-#pragma pack(push, 4)
-
struct blkif_x86_32_request_rw {
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+ uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
+} __attribute__((__packed__));
struct blkif_x86_32_request_discard {
+ uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
+ blkif_vdev_t _pad1; /* was "handle" for read/write requests */
+ uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
- uint64_t nr_sectors;
-};
+ uint64_t nr_sectors;
+} __attribute__((__packed__));
struct blkif_x86_32_request {
uint8_t operation; /* BLKIF_OP_??? */
- uint8_t nr_segments; /* number of segments */
- blkif_vdev_t handle; /* only for read/write requests */
- uint64_t id; /* private guest value, echoed in resp */
union {
struct blkif_x86_32_request_rw rw;
struct blkif_x86_32_request_discard discard;
} u;
-};
+} __attribute__((__packed__));
+
+/* i386 protocol version */
+#pragma pack(push, 4)
struct blkif_x86_32_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
#pragma pack(pop)
-
/* x86_64 protocol version */
struct blkif_x86_64_request_rw {
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+ uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */
+ uint64_t id;
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
+} __attribute__((__packed__));
struct blkif_x86_64_request_discard {
+ uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
+ blkif_vdev_t _pad1; /* was "handle" for read/write requests */
+ uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */
+ uint64_t id;
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
- uint64_t nr_sectors;
-};
+ uint64_t nr_sectors;
+} __attribute__((__packed__));
struct blkif_x86_64_request {
uint8_t operation; /* BLKIF_OP_??? */
- uint8_t nr_segments; /* number of segments */
- blkif_vdev_t handle; /* only for read/write requests */
- uint64_t __attribute__((__aligned__(8))) id;
union {
struct blkif_x86_64_request_rw rw;
struct blkif_x86_64_request_discard discard;
} u;
-};
+} __attribute__((__packed__));
+
struct blkif_x86_64_response {
uint64_t __attribute__((__aligned__(8))) id;
uint8_t operation; /* copied from request */
@@ -156,6 +164,7 @@ struct xen_vbd {
/* Cached size parameter. */
sector_t size;
bool flush_support;
+ bool discard_secure;
};
struct backend_info;
@@ -237,22 +246,23 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
{
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
dst->operation = src->operation;
- dst->nr_segments = src->nr_segments;
- dst->handle = src->handle;
- dst->id = src->id;
switch (src->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
case BLKIF_OP_FLUSH_DISKCACHE:
+ dst->u.rw.nr_segments = src->u.rw.nr_segments;
+ dst->u.rw.handle = src->u.rw.handle;
+ dst->u.rw.id = src->u.rw.id;
dst->u.rw.sector_number = src->u.rw.sector_number;
barrier();
- if (n > dst->nr_segments)
- n = dst->nr_segments;
+ if (n > dst->u.rw.nr_segments)
+ n = dst->u.rw.nr_segments;
for (i = 0; i < n; i++)
dst->u.rw.seg[i] = src->u.rw.seg[i];
break;
case BLKIF_OP_DISCARD:
+ dst->u.discard.flag = src->u.discard.flag;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
@@ -266,22 +276,23 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
{
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
dst->operation = src->operation;
- dst->nr_segments = src->nr_segments;
- dst->handle = src->handle;
- dst->id = src->id;
switch (src->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
case BLKIF_OP_FLUSH_DISKCACHE:
+ dst->u.rw.nr_segments = src->u.rw.nr_segments;
+ dst->u.rw.handle = src->u.rw.handle;
+ dst->u.rw.id = src->u.rw.id;
dst->u.rw.sector_number = src->u.rw.sector_number;
barrier();
- if (n > dst->nr_segments)
- n = dst->nr_segments;
+ if (n > dst->u.rw.nr_segments)
+ n = dst->u.rw.nr_segments;
for (i = 0; i < n; i++)
dst->u.rw.seg[i] = src->u.rw.seg[i];
break;
case BLKIF_OP_DISCARD:
+ dst->u.discard.flag = src->u.discard.flag;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index f759ad4584c306..187fd2c1a15d07 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -338,6 +338,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
if (q && q->flush_flags)
vbd->flush_support = true;
+ if (q && blk_queue_secdiscard(q))
+ vbd->discard_secure = true;
+
DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
handle, blkif->domid);
return 0;
@@ -420,6 +423,15 @@ int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
state = 1;
blkif->blk_backend_type = BLKIF_BACKEND_PHY;
}
+ /* Optional. */
+ err = xenbus_printf(xbt, dev->nodename,
+ "discard-secure", "%d",
+ blkif->vbd.discard_secure);
+ if (err) {
+ xenbus_dev_fatal(dev, err,
+ "writting discard-secure");
+ goto kfree;
+ }
}
} else {
err = PTR_ERR(type);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 7b2ec5908413da..351ddeffd430bf 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -98,7 +98,8 @@ struct blkfront_info
unsigned long shadow_free;
unsigned int feature_flush;
unsigned int flush_op;
- unsigned int feature_discard;
+ unsigned int feature_discard:1;
+ unsigned int feature_secdiscard:1;
unsigned int discard_granularity;
unsigned int discard_alignment;
int is_ready;
@@ -135,15 +136,15 @@ static int get_id_from_freelist(struct blkfront_info *info)
{
unsigned long free = info->shadow_free;
BUG_ON(free >= BLK_RING_SIZE);
- info->shadow_free = info->shadow[free].req.id;
- info->shadow[free].req.id = 0x0fffffee; /* debug */
+ info->shadow_free = info->shadow[free].req.u.rw.id;
+ info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
return free;
}
static void add_id_to_freelist(struct blkfront_info *info,
unsigned long id)
{
- info->shadow[id].req.id = info->shadow_free;
+ info->shadow[id].req.u.rw.id = info->shadow_free;
info->shadow[id].request = NULL;
info->shadow_free = id;
}
@@ -287,9 +288,9 @@ static int blkif_queue_request(struct request *req)
id = get_id_from_freelist(info);
info->shadow[id].request = req;
- ring_req->id = id;
+ ring_req->u.rw.id = id;
ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
- ring_req->handle = info->handle;
+ ring_req->u.rw.handle = info->handle;
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
@@ -305,16 +306,21 @@ static int blkif_queue_request(struct request *req)
ring_req->operation = info->flush_op;
}
- if (unlikely(req->cmd_flags & REQ_DISCARD)) {
+ if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
/* id, sector_number and handle are set above. */
ring_req->operation = BLKIF_OP_DISCARD;
- ring_req->nr_segments = 0;
ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
+ if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
+ ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
+ else
+ ring_req->u.discard.flag = 0;
} else {
- ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
- BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
+ info->sg);
+ BUG_ON(ring_req->u.rw.nr_segments >
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
- for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
+ for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
fsect = sg->offset >> 9;
lsect = fsect + (sg->length >> 9) - 1;
@@ -424,6 +430,8 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
blk_queue_max_discard_sectors(rq, get_capacity(gd));
rq->limits.discard_granularity = info->discard_granularity;
rq->limits.discard_alignment = info->discard_alignment;
+ if (info->feature_secdiscard)
+ queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
}
/* Hard sector size and max sectors impersonate the equiv. hardware. */
@@ -705,7 +713,9 @@ static void blkif_free(struct blkfront_info *info, int suspend)
static void blkif_completion(struct blk_shadow *s)
{
int i;
- for (i = 0; i < s->req.nr_segments; i++)
+ /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
+ * flag. */
+ for (i = 0; i < s->req.u.rw.nr_segments; i++)
gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
}
@@ -736,7 +746,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
id = bret->id;
req = info->shadow[id].request;
- blkif_completion(&info->shadow[id]);
+ if (bret->operation != BLKIF_OP_DISCARD)
+ blkif_completion(&info->shadow[id]);
add_id_to_freelist(info, id);
@@ -749,7 +760,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
info->gd->disk_name);
error = -EOPNOTSUPP;
info->feature_discard = 0;
+ info->feature_secdiscard = 0;
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
+ queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
}
__blk_end_request_all(req, error);
break;
@@ -763,7 +776,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
error = -EOPNOTSUPP;
}
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
- info->shadow[id].req.nr_segments == 0)) {
+ info->shadow[id].req.u.rw.nr_segments == 0)) {
printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
"barrier" : "flush disk cache",
@@ -984,8 +997,8 @@ static int blkfront_probe(struct xenbus_device *dev,
INIT_WORK(&info->work, blkif_restart_queue);
for (i = 0; i < BLK_RING_SIZE; i++)
- info->shadow[i].req.id = i+1;
- info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+ info->shadow[i].req.u.rw.id = i+1;
+ info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
@@ -1019,9 +1032,9 @@ static int blkif_recover(struct blkfront_info *info)
/* Stage 2: Set up free list. */
memset(&info->shadow, 0, sizeof(info->shadow));
for (i = 0; i < BLK_RING_SIZE; i++)
- info->shadow[i].req.id = i+1;
+ info->shadow[i].req.u.rw.id = i+1;
info->shadow_free = info->ring.req_prod_pvt;
- info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+ info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
/* Stage 3: Find pending requests and requeue them. */
for (i = 0; i < BLK_RING_SIZE; i++) {
@@ -1034,17 +1047,19 @@ static int blkif_recover(struct blkfront_info *info)
*req = copy[i].req;
/* We get a new request id, and must reset the shadow state. */
- req->id = get_id_from_freelist(info);
- memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
+ req->u.rw.id = get_id_from_freelist(info);
+ memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
+ if (req->operation != BLKIF_OP_DISCARD) {
/* Rewrite any grant references invalidated by susp/resume. */
- for (j = 0; j < req->nr_segments; j++)
- gnttab_grant_foreign_access_ref(
- req->u.rw.seg[j].gref,
- info->xbdev->otherend_id,
- pfn_to_mfn(info->shadow[req->id].frame[j]),
- rq_data_dir(info->shadow[req->id].request));
- info->shadow[req->id].req = *req;
+ for (j = 0; j < req->u.rw.nr_segments; j++)
+ gnttab_grant_foreign_access_ref(
+ req->u.rw.seg[j].gref,
+ info->xbdev->otherend_id,
+ pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
+ rq_data_dir(info->shadow[req->u.rw.id].request));
+ }
+ info->shadow[req->u.rw.id].req = *req;
info->ring.req_prod_pvt++;
}
@@ -1135,11 +1150,13 @@ static void blkfront_setup_discard(struct blkfront_info *info)
char *type;
unsigned int discard_granularity;
unsigned int discard_alignment;
+ unsigned int discard_secure;
type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
if (IS_ERR(type))
return;
+ info->feature_secdiscard = 0;
if (strncmp(type, "phy", 3) == 0) {
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"discard-granularity", "%u", &discard_granularity,
@@ -1150,6 +1167,12 @@ static void blkfront_setup_discard(struct blkfront_info *info)
info->discard_granularity = discard_granularity;
info->discard_alignment = discard_alignment;
}
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "discard-secure", "%d", &discard_secure,
+ NULL);
+ if (!err)
+ info->feature_secdiscard = discard_secure;
+
} else if (strncmp(type, "file", 4) == 0)
info->feature_discard = 1;
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 1451790337160f..5f96303fbbe441 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -314,9 +314,14 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma)
&vma->vm_page_prot))
return -EINVAL;
- vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
- size,
- vma->vm_page_prot);
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+ vma->vm_page_prot = __pgprot(
+ pgprot_val(vm_get_page_prot(vma->vm_flags)) |
+ _PAGE_IOMAP |
+ pgprot_val(phys_mem_access_prot(file,
+ vma->vm_pgoff,
+ size,
+ vma->vm_page_prot)));
vma->vm_ops = &mmap_mem_ops;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index cc531bb59c26f7..e9c2cfe45daae0 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -789,8 +789,8 @@ static struct vm_operations_struct i915_gem_vm_ops = {
};
static struct drm_driver driver = {
- /* don't use mtrr's here, the Xserver or user space app should
- * deal with them for intel hardware.
+ /* Don't use MTRRs here; the Xserver or userspace app should
+ * deal with them for Intel hardware.
*/
.driver_features =
DRIVER_USE_AGP | DRIVER_REQUIRE_AGP | /* DRIVER_USE_MTRR |*/
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 7226f419e178b6..0c9c5db3ef8c2a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -28,6 +28,7 @@
*/
#include "drmP.h"
+#include "ttm/ttm_page_alloc.h"
#include "nouveau_drm.h"
#include "nouveau_drv.h"
@@ -343,8 +344,10 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val)
*mem = val;
}
-static struct ttm_backend *
-nouveau_bo_create_ttm_backend_entry(struct ttm_bo_device *bdev)
+static struct ttm_tt *
+nouveau_ttm_tt_create(struct ttm_bo_device *bdev,
+ unsigned long size, uint32_t page_flags,
+ struct page *dummy_read_page)
{
struct drm_nouveau_private *dev_priv = nouveau_bdev(bdev);
struct drm_device *dev = dev_priv->dev;
@@ -352,11 +355,13 @@ nouveau_bo_create_ttm_backend_entry(struct ttm_bo_device *bdev)
switch (dev_priv->gart_info.type) {
#if __OS_HAS_AGP
case NOUVEAU_GART_AGP:
- return ttm_agp_backend_init(bdev, dev->agp->bridge);
+ return ttm_agp_tt_create(bdev, dev->agp->bridge,
+ size, page_flags, dummy_read_page);
#endif
case NOUVEAU_GART_PDMA:
case NOUVEAU_GART_HW:
- return nouveau_sgdma_init_ttm(dev);
+ return nouveau_sgdma_create_ttm(bdev, size, page_flags,
+ dummy_read_page);
default:
NV_ERROR(dev, "Unknown GART type %d\n",
dev_priv->gart_info.type);
@@ -1044,8 +1049,79 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence)
nouveau_fence_unref(&old_fence);
}
+static int
+nouveau_ttm_tt_populate(struct ttm_tt *ttm)
+{
+ struct drm_nouveau_private *dev_priv;
+ struct drm_device *dev;
+ unsigned i;
+ int r;
+
+ if (ttm->state != tt_unpopulated)
+ return 0;
+
+ dev_priv = nouveau_bdev(ttm->bdev);
+ dev = dev_priv->dev;
+
+#ifdef CONFIG_SWIOTLB
+ if (((dma_get_mask(dev->dev) <= DMA_BIT_MASK(32)) && swiotlb_nr_tbl()) || nouveau_ttm_dma) {
+ return ttm_dma_populate(ttm, dev->dev);
+ }
+#endif
+
+ r = ttm_page_alloc_ttm_tt_populate(ttm);
+ if (r) {
+ return r;
+ }
+
+ for (i = 0; i < ttm->num_pages; i++) {
+ ttm->dma_address[i] = pci_map_page(dev->pdev, ttm->pages[i],
+ 0, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ if (pci_dma_mapping_error(dev->pdev, ttm->dma_address[i])) {
+ while (--i) {
+ pci_unmap_page(dev->pdev, ttm->dma_address[i],
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ ttm->dma_address[i] = 0;
+ }
+ ttm_page_alloc_ttm_tt_unpopulate(ttm);
+ return -EFAULT;
+ }
+ }
+ return 0;
+}
+
+static void
+nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
+{
+ struct drm_nouveau_private *dev_priv;
+ struct drm_device *dev;
+ unsigned i;
+
+ dev_priv = nouveau_bdev(ttm->bdev);
+ dev = dev_priv->dev;
+
+#ifdef CONFIG_SWIOTLB
+ if (((dma_get_mask(dev->dev) <= DMA_BIT_MASK(32)) && swiotlb_nr_tbl()) || nouveau_ttm_dma) {
+ ttm_dma_unpopulate(ttm, dev->dev);
+ return;
+ }
+#endif
+
+ for (i = 0; i < ttm->num_pages; i++) {
+ if (ttm->dma_address[i]) {
+ pci_unmap_page(dev->pdev, ttm->dma_address[i],
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ }
+ }
+
+ ttm_page_alloc_ttm_tt_unpopulate(ttm);
+}
+
struct ttm_bo_driver nouveau_bo_driver = {
- .create_ttm_backend_entry = nouveau_bo_create_ttm_backend_entry,
+ .ttm_tt_create = &nouveau_ttm_tt_create,
+ .ttm_tt_populate = &nouveau_ttm_tt_populate,
+ .ttm_tt_unpopulate = &nouveau_ttm_tt_unpopulate,
.invalidate_caches = nouveau_bo_invalidate_caches,
.init_mem_type = nouveau_bo_init_mem_type,
.evict_flags = nouveau_bo_evict_flags,
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 8e1592368cce19..f52c2db3529efd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -178,6 +178,7 @@ static struct drm_info_list nouveau_debugfs_list[] = {
{ "memory", nouveau_debugfs_memory_info, 0, NULL },
{ "vbios.rom", nouveau_debugfs_vbios_image, 0, NULL },
{ "ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL },
+ { "ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL },
};
#define NOUVEAU_DEBUGFS_ENTRIES ARRAY_SIZE(nouveau_debugfs_list)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index 9f7bb12952623b..6325908fe9902b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -37,6 +37,10 @@
#include "drm_pciids.h"
+MODULE_PARM_DESC(ttm_dma, "Enable TTM DMA mode irregardless if DMA32 is set");
+int nouveau_ttm_dma;
+module_param_named(ttm_dma, nouveau_ttm_dma, int, 0444);
+
MODULE_PARM_DESC(agpmode, "AGP mode (0 to disable AGP)");
int nouveau_agpmode = -1;
module_param_named(agpmode, nouveau_agpmode, int, 0400);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 29837da1098b3a..cfdcd5de11106d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -829,6 +829,7 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
}
/* nouveau_drv.c */
+extern int nouveau_ttm_dma;
extern int nouveau_modeset;
extern int nouveau_agpmode;
extern int nouveau_duallink;
@@ -1000,7 +1001,10 @@ extern int nouveau_sgdma_init(struct drm_device *);
extern void nouveau_sgdma_takedown(struct drm_device *);
extern uint32_t nouveau_sgdma_get_physical(struct drm_device *,
uint32_t offset);
-extern struct ttm_backend *nouveau_sgdma_init_ttm(struct drm_device *);
+extern struct ttm_tt *nouveau_sgdma_create_ttm(struct ttm_bo_device *bdev,
+ unsigned long size,
+ uint32_t page_flags,
+ struct page *dummy_read_page);
/* nouveau_debugfs.c */
#if defined(CONFIG_DRM_NOUVEAU_DEBUG)
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 36bec4807701b2..37fcaa260e9857 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -407,6 +407,12 @@ nouveau_mem_vram_init(struct drm_device *dev)
ret = pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(dma_bits));
if (ret)
return ret;
+ ret = pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(dma_bits));
+ if (ret) {
+ /* Reset to default value. */
+ pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(32));
+ }
+
ret = nouveau_ttm_global_init(dev_priv);
if (ret)
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index b75258a9fe44d5..ee1eb7cba798ff 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -8,88 +8,26 @@
#define NV_CTXDMA_PAGE_MASK (NV_CTXDMA_PAGE_SIZE - 1)
struct nouveau_sgdma_be {
- struct ttm_backend backend;
+ struct ttm_tt ttm;
struct drm_device *dev;
-
- dma_addr_t *pages;
- unsigned nr_pages;
- bool unmap_pages;
-
u64 offset;
- bool bound;
};
-static int
-nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
- struct page **pages, struct page *dummy_read_page,
- dma_addr_t *dma_addrs)
-{
- struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
- struct drm_device *dev = nvbe->dev;
- int i;
-
- NV_DEBUG(nvbe->dev, "num_pages = %ld\n", num_pages);
-
- nvbe->pages = dma_addrs;
- nvbe->nr_pages = num_pages;
- nvbe->unmap_pages = true;
-
- /* this code path isn't called and is incorrect anyways */
- if (0) { /* dma_addrs[0] != DMA_ERROR_CODE) { */
- nvbe->unmap_pages = false;
- return 0;
- }
-
- for (i = 0; i < num_pages; i++) {
- nvbe->pages[i] = pci_map_page(dev->pdev, pages[i], 0,
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(dev->pdev, nvbe->pages[i])) {
- nvbe->nr_pages = --i;
- be->func->clear(be);
- return -EFAULT;
- }
- }
-
- return 0;
-}
-
-static void
-nouveau_sgdma_clear(struct ttm_backend *be)
-{
- struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
- struct drm_device *dev = nvbe->dev;
-
- if (nvbe->bound)
- be->func->unbind(be);
-
- if (nvbe->unmap_pages) {
- while (nvbe->nr_pages--) {
- pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- }
- }
-}
-
static void
-nouveau_sgdma_destroy(struct ttm_backend *be)
+nouveau_sgdma_destroy(struct ttm_tt *ttm)
{
- struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
- if (be) {
+ if (ttm) {
NV_DEBUG(nvbe->dev, "\n");
-
- if (nvbe) {
- if (nvbe->pages)
- be->func->clear(be);
- kfree(nvbe);
- }
+ kfree(nvbe);
}
}
static int
-nv04_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
+nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
{
- struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
struct drm_device *dev = nvbe->dev;
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_gpuobj *gpuobj = dev_priv->gart_info.sg_ctxdma;
@@ -99,8 +37,8 @@ nv04_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
nvbe->offset = mem->start << PAGE_SHIFT;
pte = (nvbe->offset >> NV_CTXDMA_PAGE_SHIFT) + 2;
- for (i = 0; i < nvbe->nr_pages; i++) {
- dma_addr_t dma_offset = nvbe->pages[i];
+ for (i = 0; i < ttm->num_pages; i++) {
+ dma_addr_t dma_offset = ttm->dma_address[i];
uint32_t offset_l = lower_32_bits(dma_offset);
for (j = 0; j < PAGE_SIZE / NV_CTXDMA_PAGE_SIZE; j++, pte++) {
@@ -109,14 +47,13 @@ nv04_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
}
}
- nvbe->bound = true;
return 0;
}
static int
-nv04_sgdma_unbind(struct ttm_backend *be)
+nv04_sgdma_unbind(struct ttm_tt *ttm)
{
- struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
struct drm_device *dev = nvbe->dev;
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_gpuobj *gpuobj = dev_priv->gart_info.sg_ctxdma;
@@ -124,22 +61,19 @@ nv04_sgdma_unbind(struct ttm_backend *be)
NV_DEBUG(dev, "\n");
- if (!nvbe->bound)
+ if (ttm->state != tt_bound)
return 0;
pte = (nvbe->offset >> NV_CTXDMA_PAGE_SHIFT) + 2;
- for (i = 0; i < nvbe->nr_pages; i++) {
+ for (i = 0; i < ttm->num_pages; i++) {
for (j = 0; j < PAGE_SIZE / NV_CTXDMA_PAGE_SIZE; j++, pte++)
nv_wo32(gpuobj, (pte * 4) + 0, 0x00000000);
}
- nvbe->bound = false;
return 0;
}
static struct ttm_backend_func nv04_sgdma_backend = {
- .populate = nouveau_sgdma_populate,
- .clear = nouveau_sgdma_clear,
.bind = nv04_sgdma_bind,
.unbind = nv04_sgdma_unbind,
.destroy = nouveau_sgdma_destroy
@@ -158,14 +92,14 @@ nv41_sgdma_flush(struct nouveau_sgdma_be *nvbe)
}
static int
-nv41_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
+nv41_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
{
- struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private;
struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma;
- dma_addr_t *list = nvbe->pages;
+ dma_addr_t *list = ttm->dma_address;
u32 pte = mem->start << 2;
- u32 cnt = nvbe->nr_pages;
+ u32 cnt = ttm->num_pages;
nvbe->offset = mem->start << PAGE_SHIFT;
@@ -175,18 +109,17 @@ nv41_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
}
nv41_sgdma_flush(nvbe);
- nvbe->bound = true;
return 0;
}
static int
-nv41_sgdma_unbind(struct ttm_backend *be)
+nv41_sgdma_unbind(struct ttm_tt *ttm)
{
- struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private;
struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma;
u32 pte = (nvbe->offset >> 12) << 2;
- u32 cnt = nvbe->nr_pages;
+ u32 cnt = ttm->num_pages;
while (cnt--) {
nv_wo32(pgt, pte, 0x00000000);
@@ -194,24 +127,22 @@ nv41_sgdma_unbind(struct ttm_backend *be)
}
nv41_sgdma_flush(nvbe);
- nvbe->bound = false;
return 0;
}
static struct ttm_backend_func nv41_sgdma_backend = {
- .populate = nouveau_sgdma_populate,
- .clear = nouveau_sgdma_clear,
.bind = nv41_sgdma_bind,
.unbind = nv41_sgdma_unbind,
.destroy = nouveau_sgdma_destroy
};
static void
-nv44_sgdma_flush(struct nouveau_sgdma_be *nvbe)
+nv44_sgdma_flush(struct ttm_tt *ttm)
{
+ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
struct drm_device *dev = nvbe->dev;
- nv_wr32(dev, 0x100814, (nvbe->nr_pages - 1) << 12);
+ nv_wr32(dev, 0x100814, (ttm->num_pages - 1) << 12);
nv_wr32(dev, 0x100808, nvbe->offset | 0x20);
if (!nv_wait(dev, 0x100808, 0x00000001, 0x00000001))
NV_ERROR(dev, "gart flush timeout: 0x%08x\n",
@@ -270,14 +201,14 @@ nv44_sgdma_fill(struct nouveau_gpuobj *pgt, dma_addr_t *list, u32 base, u32 cnt)
}
static int
-nv44_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
+nv44_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
{
- struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private;
struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma;
- dma_addr_t *list = nvbe->pages;
+ dma_addr_t *list = ttm->dma_address;
u32 pte = mem->start << 2, tmp[4];
- u32 cnt = nvbe->nr_pages;
+ u32 cnt = ttm->num_pages;
int i;
nvbe->offset = mem->start << PAGE_SHIFT;
@@ -305,19 +236,18 @@ nv44_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
if (cnt)
nv44_sgdma_fill(pgt, list, pte, cnt);
- nv44_sgdma_flush(nvbe);
- nvbe->bound = true;
+ nv44_sgdma_flush(ttm);
return 0;
}
static int
-nv44_sgdma_unbind(struct ttm_backend *be)
+nv44_sgdma_unbind(struct ttm_tt *ttm)
{
- struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+ struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
struct drm_nouveau_private *dev_priv = nvbe->dev->dev_private;
struct nouveau_gpuobj *pgt = dev_priv->gart_info.sg_ctxdma;
u32 pte = (nvbe->offset >> 12) << 2;
- u32 cnt = nvbe->nr_pages;
+ u32 cnt = ttm->num_pages;
if (pte & 0x0000000c) {
u32 max = 4 - ((pte >> 2) & 0x3);
@@ -339,55 +269,46 @@ nv44_sgdma_unbind(struct ttm_backend *be)
if (cnt)
nv44_sgdma_fill(pgt, NULL, pte, cnt);
- nv44_sgdma_flush(nvbe);
- nvbe->bound = false;
+ nv44_sgdma_flush(ttm);
return 0;
}
static struct ttm_backend_func nv44_sgdma_backend = {
- .populate = nouveau_sgdma_populate,
- .clear = nouveau_sgdma_clear,
.bind = nv44_sgdma_bind,
.unbind = nv44_sgdma_unbind,
.destroy = nouveau_sgdma_destroy
};
static int
-nv50_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
+nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
{
- struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
struct nouveau_mem *node = mem->mm_node;
+
/* noop: bound in move_notify() */
- node->pages = nvbe->pages;
- nvbe->pages = (dma_addr_t *)node;
- nvbe->bound = true;
+ node->pages = ttm->dma_address;
return 0;
}
static int
-nv50_sgdma_unbind(struct ttm_backend *be)
+nv50_sgdma_unbind(struct ttm_tt *ttm)
{
- struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
- struct nouveau_mem *node = (struct nouveau_mem *)nvbe->pages;
/* noop: unbound in move_notify() */
- nvbe->pages = node->pages;
- node->pages = NULL;
- nvbe->bound = false;
return 0;
}
static struct ttm_backend_func nv50_sgdma_backend = {
- .populate = nouveau_sgdma_populate,
- .clear = nouveau_sgdma_clear,
.bind = nv50_sgdma_bind,
.unbind = nv50_sgdma_unbind,
.destroy = nouveau_sgdma_destroy
};
-struct ttm_backend *
-nouveau_sgdma_init_ttm(struct drm_device *dev)
+struct ttm_tt *
+nouveau_sgdma_create_ttm(struct ttm_bo_device *bdev,
+ unsigned long size, uint32_t page_flags,
+ struct page *dummy_read_page)
{
- struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct drm_nouveau_private *dev_priv = nouveau_bdev(bdev);
+ struct drm_device *dev = dev_priv->dev;
struct nouveau_sgdma_be *nvbe;
nvbe = kzalloc(sizeof(*nvbe), GFP_KERNEL);
@@ -395,9 +316,12 @@ nouveau_sgdma_init_ttm(struct drm_device *dev)
return NULL;
nvbe->dev = dev;
+ nvbe->ttm.func = dev_priv->gart_info.func;
- nvbe->backend.func = dev_priv->gart_info.func;
- return &nvbe->backend;
+ if (ttm_tt_init(&nvbe->ttm, bdev, size, page_flags, dummy_read_page)) {
+ return NULL;
+ }
+ return &nvbe->ttm;
}
int
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b316b301152ff2..abe1297b621bcc 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -95,6 +95,7 @@ extern int radeon_hw_i2c;
extern int radeon_pcie_gen2;
extern int radeon_msi;
+extern int radeon_ttm_dma;
/*
* Copy from radeon_drv.h so we don't have to include both and have conflicting
* symbol;
@@ -320,7 +321,6 @@ struct radeon_gart {
unsigned table_size;
struct page **pages;
dma_addr_t *pages_addr;
- bool *ttm_alloced;
bool ready;
};
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index c33bc914d93df9..7c31321df45b1e 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -765,8 +765,14 @@ int radeon_device_init(struct radeon_device *rdev,
r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits));
if (r) {
rdev->need_dma32 = true;
+ dma_bits = 32;
printk(KERN_WARNING "radeon: No suitable DMA available.\n");
}
+ r = pci_set_consistent_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits));
+ if (r) {
+ pci_set_consistent_dma_mask(rdev->pdev, DMA_BIT_MASK(32));
+ printk(KERN_WARNING "radeon: No coherent DMA available.\n");
+ }
/* Registers mapping */
/* TODO: block userspace mapping of io register */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index a0b35e9094896c..bf917684ddda49 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -120,6 +120,10 @@ int radeon_disp_priority = 0;
int radeon_hw_i2c = 0;
int radeon_pcie_gen2 = 0;
int radeon_msi = -1;
+int radeon_ttm_dma = 0;
+
+MODULE_PARM_DESC(ttm_dma, "Enable TTM DMA page pool always irregardless of DMA32 flag");
+module_param_named(ttm_dma, radeon_ttm_dma, int, 0444);
MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
module_param_named(no_wb, radeon_no_wb, int, 0444);
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index ba7ab79e12c196..a4d981608580ad 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -157,9 +157,6 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
for (i = 0; i < pages; i++, p++) {
if (rdev->gart.pages[p]) {
- if (!rdev->gart.ttm_alloced[p])
- pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
rdev->gart.pages[p] = NULL;
rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
page_base = rdev->gart.pages_addr[p];
@@ -191,23 +188,7 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
for (i = 0; i < pages; i++, p++) {
- /* we reverted the patch using dma_addr in TTM for now but this
- * code stops building on alpha so just comment it out for now */
- if (0) { /*dma_addr[i] != DMA_ERROR_CODE) */
- rdev->gart.ttm_alloced[p] = true;
- rdev->gart.pages_addr[p] = dma_addr[i];
- } else {
- /* we need to support large memory configurations */
- /* assume that unbind have already been call on the range */
- rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
- 0, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
- /* FIXME: failed to map page (return -ENOMEM?) */
- radeon_gart_unbind(rdev, offset, pages);
- return -ENOMEM;
- }
- }
+ rdev->gart.pages_addr[p] = dma_addr[i];
rdev->gart.pages[p] = pagelist[i];
if (rdev->gart.ptr) {
page_base = rdev->gart.pages_addr[p];
@@ -274,12 +255,6 @@ int radeon_gart_init(struct radeon_device *rdev)
radeon_gart_fini(rdev);
return -ENOMEM;
}
- rdev->gart.ttm_alloced = kzalloc(sizeof(bool) *
- rdev->gart.num_cpu_pages, GFP_KERNEL);
- if (rdev->gart.ttm_alloced == NULL) {
- radeon_gart_fini(rdev);
- return -ENOMEM;
- }
/* set GART entry to point to the dummy page by default */
for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
@@ -296,10 +271,8 @@ void radeon_gart_fini(struct radeon_device *rdev)
rdev->gart.ready = false;
kfree(rdev->gart.pages);
kfree(rdev->gart.pages_addr);
- kfree(rdev->gart.ttm_alloced);
rdev->gart.pages = NULL;
rdev->gart.pages_addr = NULL;
- rdev->gart.ttm_alloced = NULL;
radeon_dummy_page_fini(rdev);
}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 0b5468bfaf5448..0dc0048f565c0a 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -114,24 +114,6 @@ static void radeon_ttm_global_fini(struct radeon_device *rdev)
}
}
-struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev);
-
-static struct ttm_backend*
-radeon_create_ttm_backend_entry(struct ttm_bo_device *bdev)
-{
- struct radeon_device *rdev;
-
- rdev = radeon_get_rdev(bdev);
-#if __OS_HAS_AGP
- if (rdev->flags & RADEON_IS_AGP) {
- return ttm_agp_backend_init(bdev, rdev->ddev->agp->bridge);
- } else
-#endif
- {
- return radeon_ttm_backend_create(rdev);
- }
-}
-
static int radeon_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
{
return 0;
@@ -515,8 +497,157 @@ static bool radeon_sync_obj_signaled(void *sync_obj, void *sync_arg)
return radeon_fence_signaled((struct radeon_fence *)sync_obj);
}
+/*
+ * TTM backend functions.
+ */
+struct radeon_ttm_tt {
+ struct ttm_tt ttm;
+ struct radeon_device *rdev;
+ u64 offset;
+};
+
+static int radeon_ttm_backend_bind(struct ttm_tt *ttm,
+ struct ttm_mem_reg *bo_mem)
+{
+ struct radeon_ttm_tt *gtt;
+ int r;
+
+ gtt = container_of(ttm, struct radeon_ttm_tt, ttm);
+ gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
+ if (!ttm->num_pages) {
+ WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
+ ttm->num_pages, bo_mem, ttm);
+ }
+ r = radeon_gart_bind(gtt->rdev, gtt->offset,
+ ttm->num_pages, ttm->pages, ttm->dma_address);
+ if (r) {
+ DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
+ ttm->num_pages, (unsigned)gtt->offset);
+ return r;
+ }
+ return 0;
+}
+
+static int radeon_ttm_backend_unbind(struct ttm_tt *ttm)
+{
+ struct radeon_ttm_tt *gtt;
+
+ gtt = container_of(ttm, struct radeon_ttm_tt, ttm);
+ radeon_gart_unbind(gtt->rdev, gtt->offset, ttm->num_pages);
+ return 0;
+}
+
+static void radeon_ttm_backend_destroy(struct ttm_tt *ttm)
+{
+ struct radeon_ttm_tt *gtt;
+
+ gtt = container_of(ttm, struct radeon_ttm_tt, ttm);
+ if (ttm->state == tt_bound) {
+ radeon_ttm_backend_unbind(ttm);
+ }
+ kfree(gtt);
+}
+
+static struct ttm_backend_func radeon_backend_func = {
+ .bind = &radeon_ttm_backend_bind,
+ .unbind = &radeon_ttm_backend_unbind,
+ .destroy = &radeon_ttm_backend_destroy,
+};
+
+struct ttm_tt *radeon_ttm_tt_create(struct ttm_bo_device *bdev,
+ unsigned long size, uint32_t page_flags,
+ struct page *dummy_read_page)
+{
+ struct radeon_device *rdev;
+ struct radeon_ttm_tt *gtt;
+
+ rdev = radeon_get_rdev(bdev);
+#if __OS_HAS_AGP
+ if (rdev->flags & RADEON_IS_AGP) {
+ return ttm_agp_tt_create(bdev, rdev->ddev->agp->bridge,
+ size, page_flags, dummy_read_page);
+ }
+#endif
+
+ gtt = kzalloc(sizeof(struct radeon_ttm_tt), GFP_KERNEL);
+ if (gtt == NULL) {
+ return NULL;
+ }
+ gtt->ttm.func = &radeon_backend_func;
+ gtt->rdev = rdev;
+ if (ttm_tt_init(&gtt->ttm, bdev, size, page_flags, dummy_read_page)) {
+ return NULL;
+ }
+ return &gtt->ttm;
+}
+
+static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
+{
+ struct radeon_device *rdev;
+ unsigned i;
+ int r;
+
+ if (ttm->state != tt_unpopulated)
+ return 0;
+
+ rdev = radeon_get_rdev(ttm->bdev);
+
+#ifdef CONFIG_SWIOTLB
+ if ((rdev->need_dma32 && swiotlb_nr_tbl()) || radeon_ttm_dma) {
+ return ttm_dma_populate(ttm, rdev->dev);
+ }
+#endif
+
+ r = ttm_page_alloc_ttm_tt_populate(ttm);
+ if (r) {
+ return r;
+ }
+
+ for (i = 0; i < ttm->num_pages; i++) {
+ ttm->dma_address[i] = pci_map_page(rdev->pdev, ttm->pages[i],
+ 0, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ if (pci_dma_mapping_error(rdev->pdev, ttm->dma_address[i])) {
+ while (--i) {
+ pci_unmap_page(rdev->pdev, ttm->dma_address[i],
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ ttm->dma_address[i] = 0;
+ }
+ ttm_page_alloc_ttm_tt_unpopulate(ttm);
+ return -EFAULT;
+ }
+ }
+ return 0;
+}
+
+static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm)
+{
+ struct radeon_device *rdev;
+ unsigned i;
+
+ rdev = radeon_get_rdev(ttm->bdev);
+
+#ifdef CONFIG_SWIOTLB
+ if ((rdev->need_dma32 && swiotlb_nr_tbl()) || radeon_ttm_dma) {
+ ttm_dma_unpopulate(ttm, rdev->dev);
+ return;
+ }
+#endif
+
+ for (i = 0; i < ttm->num_pages; i++) {
+ if (ttm->dma_address[i]) {
+ pci_unmap_page(rdev->pdev, ttm->dma_address[i],
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ }
+ }
+
+ ttm_page_alloc_ttm_tt_unpopulate(ttm);
+}
+
static struct ttm_bo_driver radeon_bo_driver = {
- .create_ttm_backend_entry = &radeon_create_ttm_backend_entry,
+ .ttm_tt_create = &radeon_ttm_tt_create,
+ .ttm_tt_populate = &radeon_ttm_tt_populate,
+ .ttm_tt_unpopulate = &radeon_ttm_tt_unpopulate,
.invalidate_caches = &radeon_invalidate_caches,
.init_mem_type = &radeon_init_mem_type,
.evict_flags = &radeon_evict_flags,
@@ -680,124 +811,6 @@ int radeon_mmap(struct file *filp, struct vm_area_struct *vma)
}
-/*
- * TTM backend functions.
- */
-struct radeon_ttm_backend {
- struct ttm_backend backend;
- struct radeon_device *rdev;
- unsigned long num_pages;
- struct page **pages;
- struct page *dummy_read_page;
- dma_addr_t *dma_addrs;
- bool populated;
- bool bound;
- unsigned offset;
-};
-
-static int radeon_ttm_backend_populate(struct ttm_backend *backend,
- unsigned long num_pages,
- struct page **pages,
- struct page *dummy_read_page,
- dma_addr_t *dma_addrs)
-{
- struct radeon_ttm_backend *gtt;
-
- gtt = container_of(backend, struct radeon_ttm_backend, backend);
- gtt->pages = pages;
- gtt->dma_addrs = dma_addrs;
- gtt->num_pages = num_pages;
- gtt->dummy_read_page = dummy_read_page;
- gtt->populated = true;
- return 0;
-}
-
-static void radeon_ttm_backend_clear(struct ttm_backend *backend)
-{
- struct radeon_ttm_backend *gtt;
-
- gtt = container_of(backend, struct radeon_ttm_backend, backend);
- gtt->pages = NULL;
- gtt->dma_addrs = NULL;
- gtt->num_pages = 0;
- gtt->dummy_read_page = NULL;
- gtt->populated = false;
- gtt->bound = false;
-}
-
-
-static int radeon_ttm_backend_bind(struct ttm_backend *backend,
- struct ttm_mem_reg *bo_mem)
-{
- struct radeon_ttm_backend *gtt;
- int r;
-
- gtt = container_of(backend, struct radeon_ttm_backend, backend);
- gtt->offset = bo_mem->start << PAGE_SHIFT;
- if (!gtt->num_pages) {
- WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
- gtt->num_pages, bo_mem, backend);
- }
- r = radeon_gart_bind(gtt->rdev, gtt->offset,
- gtt->num_pages, gtt->pages, gtt->dma_addrs);
- if (r) {
- DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
- gtt->num_pages, gtt->offset);
- return r;
- }
- gtt->bound = true;
- return 0;
-}
-
-static int radeon_ttm_backend_unbind(struct ttm_backend *backend)
-{
- struct radeon_ttm_backend *gtt;
-
- gtt = container_of(backend, struct radeon_ttm_backend, backend);
- radeon_gart_unbind(gtt->rdev, gtt->offset, gtt->num_pages);
- gtt->bound = false;
- return 0;
-}
-
-static void radeon_ttm_backend_destroy(struct ttm_backend *backend)
-{
- struct radeon_ttm_backend *gtt;
-
- gtt = container_of(backend, struct radeon_ttm_backend, backend);
- if (gtt->bound) {
- radeon_ttm_backend_unbind(backend);
- }
- kfree(gtt);
-}
-
-static struct ttm_backend_func radeon_backend_func = {
- .populate = &radeon_ttm_backend_populate,
- .clear = &radeon_ttm_backend_clear,
- .bind = &radeon_ttm_backend_bind,
- .unbind = &radeon_ttm_backend_unbind,
- .destroy = &radeon_ttm_backend_destroy,
-};
-
-struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev)
-{
- struct radeon_ttm_backend *gtt;
-
- gtt = kzalloc(sizeof(struct radeon_ttm_backend), GFP_KERNEL);
- if (gtt == NULL) {
- return NULL;
- }
- gtt->backend.bdev = &rdev->mman.bdev;
- gtt->backend.flags = 0;
- gtt->backend.func = &radeon_backend_func;
- gtt->rdev = rdev;
- gtt->pages = NULL;
- gtt->num_pages = 0;
- gtt->dummy_read_page = NULL;
- gtt->populated = false;
- gtt->bound = false;
- return &gtt->backend;
-}
-
#define RADEON_DEBUGFS_MEM_TYPES 2
#if defined(CONFIG_DEBUG_FS)
@@ -820,8 +833,8 @@ static int radeon_mm_dump_table(struct seq_file *m, void *data)
static int radeon_ttm_debugfs_init(struct radeon_device *rdev)
{
#if defined(CONFIG_DEBUG_FS)
- static struct drm_info_list radeon_mem_types_list[RADEON_DEBUGFS_MEM_TYPES+1];
- static char radeon_mem_types_names[RADEON_DEBUGFS_MEM_TYPES+1][32];
+ static struct drm_info_list radeon_mem_types_list[RADEON_DEBUGFS_MEM_TYPES+2];
+ static char radeon_mem_types_names[RADEON_DEBUGFS_MEM_TYPES+2][32];
unsigned i;
for (i = 0; i < RADEON_DEBUGFS_MEM_TYPES; i++) {
@@ -843,8 +856,17 @@ static int radeon_ttm_debugfs_init(struct radeon_device *rdev)
radeon_mem_types_list[i].name = radeon_mem_types_names[i];
radeon_mem_types_list[i].show = &ttm_page_alloc_debugfs;
radeon_mem_types_list[i].driver_features = 0;
- radeon_mem_types_list[i].data = NULL;
- return radeon_debugfs_add_files(rdev, radeon_mem_types_list, RADEON_DEBUGFS_MEM_TYPES+1);
+ radeon_mem_types_list[i++].data = NULL;
+#ifdef CONFIG_SWIOTLB
+ if ((rdev->need_dma32 && swiotlb_nr_tbl()) || radeon_ttm_dma) {
+ sprintf(radeon_mem_types_names[i], "ttm_dma_page_pool");
+ radeon_mem_types_list[i].name = radeon_mem_types_names[i];
+ radeon_mem_types_list[i].show = &ttm_dma_page_alloc_debugfs;
+ radeon_mem_types_list[i].driver_features = 0;
+ radeon_mem_types_list[i++].data = NULL;
+ }
+#endif
+ return radeon_debugfs_add_files(rdev, radeon_mem_types_list, i);
#endif
return 0;
diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index f3cf6f02c9970d..b2b33dde2afb06 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -7,4 +7,8 @@ ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o \
ttm_bo_manager.o
+ifeq ($(CONFIG_SWIOTLB),y)
+ttm-y += ttm_page_alloc_dma.o
+endif
+
obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index 1c4a72f681c16c..14ebd3650aa9d6 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -40,45 +40,33 @@
#include <asm/agp.h>
struct ttm_agp_backend {
- struct ttm_backend backend;
+ struct ttm_tt ttm;
struct agp_memory *mem;
struct agp_bridge_data *bridge;
};
-static int ttm_agp_populate(struct ttm_backend *backend,
- unsigned long num_pages, struct page **pages,
- struct page *dummy_read_page,
- dma_addr_t *dma_addrs)
+static int ttm_agp_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
{
- struct ttm_agp_backend *agp_be =
- container_of(backend, struct ttm_agp_backend, backend);
- struct page **cur_page, **last_page = pages + num_pages;
+ struct ttm_agp_backend *agp_be = container_of(ttm, struct ttm_agp_backend, ttm);
+ struct drm_mm_node *node = bo_mem->mm_node;
struct agp_memory *mem;
+ int ret, cached = (bo_mem->placement & TTM_PL_FLAG_CACHED);
+ unsigned i;
- mem = agp_allocate_memory(agp_be->bridge, num_pages, AGP_USER_MEMORY);
+ mem = agp_allocate_memory(agp_be->bridge, ttm->num_pages, AGP_USER_MEMORY);
if (unlikely(mem == NULL))
return -ENOMEM;
mem->page_count = 0;
- for (cur_page = pages; cur_page < last_page; ++cur_page) {
- struct page *page = *cur_page;
+ for (i = 0; i < ttm->num_pages; i++) {
+ struct page *page = ttm->pages[i];
+
if (!page)
- page = dummy_read_page;
+ page = ttm->dummy_read_page;
mem->pages[mem->page_count++] = page;
}
agp_be->mem = mem;
- return 0;
-}
-
-static int ttm_agp_bind(struct ttm_backend *backend, struct ttm_mem_reg *bo_mem)
-{
- struct ttm_agp_backend *agp_be =
- container_of(backend, struct ttm_agp_backend, backend);
- struct drm_mm_node *node = bo_mem->mm_node;
- struct agp_memory *mem = agp_be->mem;
- int cached = (bo_mem->placement & TTM_PL_FLAG_CACHED);
- int ret;
mem->is_flushed = 1;
mem->type = (cached) ? AGP_USER_CACHED_MEMORY : AGP_USER_MEMORY;
@@ -90,50 +78,38 @@ static int ttm_agp_bind(struct ttm_backend *backend, struct ttm_mem_reg *bo_mem)
return ret;
}
-static int ttm_agp_unbind(struct ttm_backend *backend)
+static int ttm_agp_unbind(struct ttm_tt *ttm)
{
- struct ttm_agp_backend *agp_be =
- container_of(backend, struct ttm_agp_backend, backend);
-
- if (agp_be->mem->is_bound)
- return agp_unbind_memory(agp_be->mem);
- else
- return 0;
-}
+ struct ttm_agp_backend *agp_be = container_of(ttm, struct ttm_agp_backend, ttm);
-static void ttm_agp_clear(struct ttm_backend *backend)
-{
- struct ttm_agp_backend *agp_be =
- container_of(backend, struct ttm_agp_backend, backend);
- struct agp_memory *mem = agp_be->mem;
-
- if (mem) {
- ttm_agp_unbind(backend);
- agp_free_memory(mem);
+ if (agp_be->mem) {
+ if (agp_be->mem->is_bound)
+ return agp_unbind_memory(agp_be->mem);
+ agp_free_memory(agp_be->mem);
+ agp_be->mem = NULL;
}
- agp_be->mem = NULL;
+ return 0;
}
-static void ttm_agp_destroy(struct ttm_backend *backend)
+static void ttm_agp_destroy(struct ttm_tt *ttm)
{
- struct ttm_agp_backend *agp_be =
- container_of(backend, struct ttm_agp_backend, backend);
+ struct ttm_agp_backend *agp_be = container_of(ttm, struct ttm_agp_backend, ttm);
if (agp_be->mem)
- ttm_agp_clear(backend);
+ ttm_agp_unbind(ttm);
kfree(agp_be);
}
static struct ttm_backend_func ttm_agp_func = {
- .populate = ttm_agp_populate,
- .clear = ttm_agp_clear,
.bind = ttm_agp_bind,
.unbind = ttm_agp_unbind,
.destroy = ttm_agp_destroy,
};
-struct ttm_backend *ttm_agp_backend_init(struct ttm_bo_device *bdev,
- struct agp_bridge_data *bridge)
+struct ttm_tt *ttm_agp_tt_create(struct ttm_bo_device *bdev,
+ struct agp_bridge_data *bridge,
+ unsigned long size, uint32_t page_flags,
+ struct page *dummy_read_page)
{
struct ttm_agp_backend *agp_be;
@@ -143,10 +119,14 @@ struct ttm_backend *ttm_agp_backend_init(struct ttm_bo_device *bdev,
agp_be->mem = NULL;
agp_be->bridge = bridge;
- agp_be->backend.func = &ttm_agp_func;
- agp_be->backend.bdev = bdev;
- return &agp_be->backend;
+ agp_be->ttm.func = &ttm_agp_func;
+
+ if (ttm_tt_init(&agp_be->ttm, bdev, size, page_flags, dummy_read_page)) {
+ return NULL;
+ }
+
+ return &agp_be->ttm;
}
-EXPORT_SYMBOL(ttm_agp_backend_init);
+EXPORT_SYMBOL(ttm_agp_tt_create);
#endif
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 617b64678fc62f..c31f40f0a1741c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -337,27 +337,11 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
if (zero_alloc)
page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC;
case ttm_bo_type_kernel:
- bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
- page_flags, glob->dummy_read_page);
+ bo->ttm = bdev->driver->ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
+ page_flags, glob->dummy_read_page);
if (unlikely(bo->ttm == NULL))
ret = -ENOMEM;
break;
- case ttm_bo_type_user:
- bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
- page_flags | TTM_PAGE_FLAG_USER,
- glob->dummy_read_page);
- if (unlikely(bo->ttm == NULL)) {
- ret = -ENOMEM;
- break;
- }
-
- ret = ttm_tt_set_user(bo->ttm, current,
- bo->buffer_start, bo->num_pages);
- if (unlikely(ret != 0)) {
- ttm_tt_destroy(bo->ttm);
- bo->ttm = NULL;
- }
- break;
default:
printk(KERN_ERR TTM_PFX "Illegal buffer object type\n");
ret = -EINVAL;
@@ -907,16 +891,12 @@ static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man,
}
static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man,
- bool disallow_fixed,
uint32_t mem_type,
uint32_t proposed_placement,
uint32_t *masked_placement)
{
uint32_t cur_flags = ttm_bo_type_flags(mem_type);
- if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && disallow_fixed)
- return false;
-
if ((cur_flags & proposed_placement & TTM_PL_MASK_MEM) == 0)
return false;
@@ -961,7 +941,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
man = &bdev->man[mem_type];
type_ok = ttm_bo_mt_compatible(man,
- bo->type == ttm_bo_type_user,
mem_type,
placement->placement[i],
&cur_flags);
@@ -1009,7 +988,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
if (!man->has_type)
continue;
if (!ttm_bo_mt_compatible(man,
- bo->type == ttm_bo_type_user,
mem_type,
placement->busy_placement[i],
&cur_flags))
@@ -1274,7 +1252,7 @@ int ttm_bo_create(struct ttm_bo_device *bdev,
size_t acc_size =
ttm_bo_size(bdev->glob, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
- ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false);
+ ret = ttm_mem_global_alloc(mem_glob, acc_size, false);
if (unlikely(ret != 0))
return ret;
@@ -1459,10 +1437,7 @@ int ttm_bo_global_init(struct drm_global_reference *ref)
goto out_no_shrink;
}
- glob->ttm_bo_extra_size =
- ttm_round_pot(sizeof(struct ttm_tt)) +
- ttm_round_pot(sizeof(struct ttm_backend));
-
+ glob->ttm_bo_extra_size = ttm_round_pot(sizeof(struct ttm_tt));
glob->ttm_bo_size = glob->ttm_bo_extra_size +
ttm_round_pot(sizeof(struct ttm_buffer_object));
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 082fcaea583fb0..60f204d67dbb62 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -244,7 +244,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src,
unsigned long page,
pgprot_t prot)
{
- struct page *d = ttm_tt_get_page(ttm, page);
+ struct page *d = ttm->pages[page];
void *dst;
if (!d)
@@ -281,7 +281,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
unsigned long page,
pgprot_t prot)
{
- struct page *s = ttm_tt_get_page(ttm, page);
+ struct page *s = ttm->pages[page];
void *src;
if (!s)
@@ -342,6 +342,12 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
if (old_iomap == NULL && ttm == NULL)
goto out2;
+ if (ttm->state == tt_unpopulated) {
+ ret = ttm->bdev->driver->ttm_tt_populate(ttm);
+ if (ret)
+ goto out1;
+ }
+
add = 0;
dir = 1;
@@ -502,10 +508,16 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
{
struct ttm_mem_reg *mem = &bo->mem; pgprot_t prot;
struct ttm_tt *ttm = bo->ttm;
- struct page *d;
- int i;
+ int ret;
BUG_ON(!ttm);
+
+ if (ttm->state == tt_unpopulated) {
+ ret = ttm->bdev->driver->ttm_tt_populate(ttm);
+ if (ret)
+ return ret;
+ }
+
if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED)) {
/*
* We're mapping a single page, and the desired
@@ -513,18 +525,9 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
*/
map->bo_kmap_type = ttm_bo_map_kmap;
- map->page = ttm_tt_get_page(ttm, start_page);
+ map->page = ttm->pages[start_page];
map->virtual = kmap(map->page);
} else {
- /*
- * Populate the part we're mapping;
- */
- for (i = start_page; i < start_page + num_pages; ++i) {
- d = ttm_tt_get_page(ttm, i);
- if (!d)
- return -ENOMEM;
- }
-
/*
* We need to use vmap to get the desired page protection
* or to make the buffer object look contiguous.
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 221b924acebe27..bc1d751cbc1e20 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -174,18 +174,19 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
vma->vm_page_prot = (bo->mem.placement & TTM_PL_FLAG_CACHED) ?
vm_get_page_prot(vma->vm_flags) :
ttm_io_prot(bo->mem.placement, vma->vm_page_prot);
- }
- /*
- * Speculatively prefault a number of pages. Only error on
- * first page.
- */
+ /* Allocate all page at once, most common usage */
+ if (ttm->bdev->driver->ttm_tt_populate(ttm)) {
+ retval = VM_FAULT_OOM;
+ goto out_io_unlock;
+ }
+ }
for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
if (bo->mem.bus.is_iomem)
pfn = ((bo->mem.bus.base + bo->mem.bus.offset) >> PAGE_SHIFT) + page_offset;
else {
- page = ttm_tt_get_page(ttm, page_offset);
+ page = ttm->pages[page_offset];
if (unlikely(!page && i == 0)) {
retval = VM_FAULT_OOM;
goto out_io_unlock;
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index e70ddd82dc022e..3b12f05cdcde30 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -35,21 +35,10 @@
#include <linux/module.h>
#include <linux/slab.h>
-#define TTM_MEMORY_ALLOC_RETRIES 4
-
-struct ttm_mem_zone {
- struct kobject kobj;
- struct ttm_mem_global *glob;
- const char *name;
- uint64_t zone_mem;
- uint64_t emer_mem;
- uint64_t max_mem;
- uint64_t swap_limit;
- uint64_t used_mem;
-};
+#define TTM_MEMORY_RETRIES 4
static struct attribute ttm_mem_sys = {
- .name = "zone_memory",
+ .name = "memory",
.mode = S_IRUGO
};
static struct attribute ttm_mem_emer = {
@@ -64,140 +53,141 @@ static struct attribute ttm_mem_swap = {
.name = "swap_limit",
.mode = S_IRUGO | S_IWUSR
};
+static struct attribute ttm_mem_dma32_swap = {
+ .name = "swap_dma32_limit",
+ .mode = S_IRUGO | S_IWUSR
+};
static struct attribute ttm_mem_used = {
.name = "used_memory",
.mode = S_IRUGO
};
+static struct attribute ttm_mem_dma32_used = {
+ .name = "used_dma32_memory",
+ .mode = S_IRUGO
+};
-static void ttm_mem_zone_kobj_release(struct kobject *kobj)
-{
- struct ttm_mem_zone *zone =
- container_of(kobj, struct ttm_mem_zone, kobj);
-
- printk(KERN_INFO TTM_PFX
- "Zone %7s: Used memory at exit: %llu kiB.\n",
- zone->name, (unsigned long long) zone->used_mem >> 10);
- kfree(zone);
-}
-
-static ssize_t ttm_mem_zone_show(struct kobject *kobj,
- struct attribute *attr,
- char *buffer)
+static ssize_t ttm_mem_global_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buffer)
{
- struct ttm_mem_zone *zone =
- container_of(kobj, struct ttm_mem_zone, kobj);
- uint64_t val = 0;
+ struct ttm_mem_global *glob =
+ container_of(kobj, struct ttm_mem_global, kobj);
+ unsigned long val = 0;
- spin_lock(&zone->glob->lock);
+ spin_lock(&glob->lock);
if (attr == &ttm_mem_sys)
- val = zone->zone_mem;
+ val = glob->mem;
else if (attr == &ttm_mem_emer)
- val = zone->emer_mem;
+ val = glob->emer_mem;
else if (attr == &ttm_mem_max)
- val = zone->max_mem;
+ val = glob->max_mem;
else if (attr == &ttm_mem_swap)
- val = zone->swap_limit;
+ val = glob->swap_limit;
else if (attr == &ttm_mem_used)
- val = zone->used_mem;
- spin_unlock(&zone->glob->lock);
+ val = glob->used_mem;
+ else if (attr == &ttm_mem_dma32_used)
+ val = glob->used_dma32_mem;
+ else if (attr == &ttm_mem_dma32_swap)
+ val = glob->swap_dma32_limit;
+ spin_unlock(&glob->lock);
- return snprintf(buffer, PAGE_SIZE, "%llu\n",
- (unsigned long long) val >> 10);
+ return snprintf(buffer, PAGE_SIZE, "%lu\n", val >> 10);
}
static void ttm_check_swapping(struct ttm_mem_global *glob);
-static ssize_t ttm_mem_zone_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t size)
+static ssize_t ttm_mem_global_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t size)
{
- struct ttm_mem_zone *zone =
- container_of(kobj, struct ttm_mem_zone, kobj);
- int chars;
+ struct ttm_mem_global *glob =
+ container_of(kobj, struct ttm_mem_global, kobj);
unsigned long val;
- uint64_t val64;
+ int chars;
chars = sscanf(buffer, "%lu", &val);
if (chars == 0)
return size;
- val64 = val;
- val64 <<= 10;
+ val <<= 10;
+
+ spin_lock(&glob->lock);
+ /* limit to maximum memory */
+ if (val > glob->mem)
+ val = glob->mem;
- spin_lock(&zone->glob->lock);
- if (val64 > zone->zone_mem)
- val64 = zone->zone_mem;
if (attr == &ttm_mem_emer) {
- zone->emer_mem = val64;
- if (zone->max_mem > val64)
- zone->max_mem = val64;
+ glob->emer_mem = val;
+ if (glob->max_mem > val)
+ glob->max_mem = val;
} else if (attr == &ttm_mem_max) {
- zone->max_mem = val64;
- if (zone->emer_mem < val64)
- zone->emer_mem = val64;
- } else if (attr == &ttm_mem_swap)
- zone->swap_limit = val64;
- spin_unlock(&zone->glob->lock);
+ glob->max_mem = val;
+ if (glob->emer_mem < val)
+ glob->emer_mem = val;
+ } else if (attr == &ttm_mem_swap) {
+ glob->swap_limit = val;
+ } else if (attr == &ttm_mem_dma32_swap) {
+ glob->swap_dma32_limit = val;
+ }
+ spin_unlock(&glob->lock);
+ ttm_check_swapping(glob);
+ return size;
+}
- ttm_check_swapping(zone->glob);
+static void ttm_mem_global_kobj_release(struct kobject *kobj)
+{
+ struct ttm_mem_global *glob =
+ container_of(kobj, struct ttm_mem_global, kobj);
- return size;
+ kfree(glob);
}
-static struct attribute *ttm_mem_zone_attrs[] = {
+static struct attribute *ttm_mem_global_attrs[] = {
&ttm_mem_sys,
&ttm_mem_emer,
&ttm_mem_max,
&ttm_mem_swap,
+ &ttm_mem_dma32_swap,
&ttm_mem_used,
+ &ttm_mem_dma32_used,
NULL
};
-static const struct sysfs_ops ttm_mem_zone_ops = {
- .show = &ttm_mem_zone_show,
- .store = &ttm_mem_zone_store
+static const struct sysfs_ops ttm_mem_global_ops = {
+ .show = &ttm_mem_global_show,
+ .store = &ttm_mem_global_store
};
-static struct kobj_type ttm_mem_zone_kobj_type = {
- .release = &ttm_mem_zone_kobj_release,
- .sysfs_ops = &ttm_mem_zone_ops,
- .default_attrs = ttm_mem_zone_attrs,
-};
-
-static void ttm_mem_global_kobj_release(struct kobject *kobj)
-{
- struct ttm_mem_global *glob =
- container_of(kobj, struct ttm_mem_global, kobj);
-
- kfree(glob);
-}
-
static struct kobj_type ttm_mem_glob_kobj_type = {
.release = &ttm_mem_global_kobj_release,
+ .sysfs_ops = &ttm_mem_global_ops,
+ .default_attrs = ttm_mem_global_attrs,
};
-static bool ttm_zones_above_swap_target(struct ttm_mem_global *glob,
- bool from_wq, uint64_t extra)
+static bool ttm_above_swap_target(struct ttm_mem_global *glob,
+ bool from_wq, uint64_t extra)
{
- unsigned int i;
- struct ttm_mem_zone *zone;
- uint64_t target;
+ unsigned long target;
- for (i = 0; i < glob->num_zones; ++i) {
- zone = glob->zones[i];
-
- if (from_wq)
- target = zone->swap_limit;
- else if (capable(CAP_SYS_ADMIN))
- target = zone->emer_mem;
+ if (from_wq) {
+ if (glob->used_mem > glob->swap_limit) {
+ return true;
+ }
+ if (glob->used_dma32_mem > glob->swap_dma32_limit) {
+ return true;
+ }
+ } else {
+ if (capable(CAP_SYS_ADMIN))
+ target = glob->emer_mem;
else
- target = zone->max_mem;
-
- target = (extra > target) ? 0ULL : target;
-
- if (zone->used_mem > target)
+ target = glob->max_mem;
+ if (extra > target) {
return true;
+ }
+ if ((glob->used_mem + glob->used_dma32_mem) > target) {
+ return true;
+ }
}
return false;
}
@@ -208,31 +198,31 @@ static bool ttm_zones_above_swap_target(struct ttm_mem_global *glob,
* Note that this function is reentrant:
* many threads may try to swap out at any given time.
*/
-
-static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
+static void ttm_shrink(struct ttm_mem_global *glob,
+ bool from_wq,
uint64_t extra)
{
- int ret;
struct ttm_mem_shrink *shrink;
+ int ret, nretries = TTM_MEMORY_RETRIES;
spin_lock(&glob->lock);
if (glob->shrink == NULL)
goto out;
- while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
+ while (ttm_above_swap_target(glob, from_wq, extra)) {
shrink = glob->shrink;
spin_unlock(&glob->lock);
ret = shrink->do_shrink(shrink);
spin_lock(&glob->lock);
if (unlikely(ret != 0))
goto out;
+ if (--nretries < 0)
+ goto out;
}
out:
spin_unlock(&glob->lock);
}
-
-
static void ttm_shrink_work(struct work_struct *work)
{
struct ttm_mem_global *glob =
@@ -241,127 +231,10 @@ static void ttm_shrink_work(struct work_struct *work)
ttm_shrink(glob, true, 0ULL);
}
-static int ttm_mem_init_kernel_zone(struct ttm_mem_global *glob,
- const struct sysinfo *si)
-{
- struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL);
- uint64_t mem;
- int ret;
-
- if (unlikely(!zone))
- return -ENOMEM;
-
- mem = si->totalram - si->totalhigh;
- mem *= si->mem_unit;
-
- zone->name = "kernel";
- zone->zone_mem = mem;
- zone->max_mem = mem >> 1;
- zone->emer_mem = (mem >> 1) + (mem >> 2);
- zone->swap_limit = zone->max_mem - (mem >> 3);
- zone->used_mem = 0;
- zone->glob = glob;
- glob->zone_kernel = zone;
- ret = kobject_init_and_add(
- &zone->kobj, &ttm_mem_zone_kobj_type, &glob->kobj, zone->name);
- if (unlikely(ret != 0)) {
- kobject_put(&zone->kobj);
- return ret;
- }
- glob->zones[glob->num_zones++] = zone;
- return 0;
-}
-
-#ifdef CONFIG_HIGHMEM
-static int ttm_mem_init_highmem_zone(struct ttm_mem_global *glob,
- const struct sysinfo *si)
-{
- struct ttm_mem_zone *zone;
- uint64_t mem;
- int ret;
-
- if (si->totalhigh == 0)
- return 0;
-
- zone = kzalloc(sizeof(*zone), GFP_KERNEL);
- if (unlikely(!zone))
- return -ENOMEM;
-
- mem = si->totalram;
- mem *= si->mem_unit;
-
- zone->name = "highmem";
- zone->zone_mem = mem;
- zone->max_mem = mem >> 1;
- zone->emer_mem = (mem >> 1) + (mem >> 2);
- zone->swap_limit = zone->max_mem - (mem >> 3);
- zone->used_mem = 0;
- zone->glob = glob;
- glob->zone_highmem = zone;
- ret = kobject_init_and_add(
- &zone->kobj, &ttm_mem_zone_kobj_type, &glob->kobj, zone->name);
- if (unlikely(ret != 0)) {
- kobject_put(&zone->kobj);
- return ret;
- }
- glob->zones[glob->num_zones++] = zone;
- return 0;
-}
-#else
-static int ttm_mem_init_dma32_zone(struct ttm_mem_global *glob,
- const struct sysinfo *si)
-{
- struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL);
- uint64_t mem;
- int ret;
-
- if (unlikely(!zone))
- return -ENOMEM;
-
- mem = si->totalram;
- mem *= si->mem_unit;
-
- /**
- * No special dma32 zone needed.
- */
-
- if (mem <= ((uint64_t) 1ULL << 32)) {
- kfree(zone);
- return 0;
- }
-
- /*
- * Limit max dma32 memory to 4GB for now
- * until we can figure out how big this
- * zone really is.
- */
-
- mem = ((uint64_t) 1ULL << 32);
- zone->name = "dma32";
- zone->zone_mem = mem;
- zone->max_mem = mem >> 1;
- zone->emer_mem = (mem >> 1) + (mem >> 2);
- zone->swap_limit = zone->max_mem - (mem >> 3);
- zone->used_mem = 0;
- zone->glob = glob;
- glob->zone_dma32 = zone;
- ret = kobject_init_and_add(
- &zone->kobj, &ttm_mem_zone_kobj_type, &glob->kobj, zone->name);
- if (unlikely(ret != 0)) {
- kobject_put(&zone->kobj);
- return ret;
- }
- glob->zones[glob->num_zones++] = zone;
- return 0;
-}
-#endif
-
int ttm_mem_global_init(struct ttm_mem_global *glob)
{
struct sysinfo si;
int ret;
- int i;
- struct ttm_mem_zone *zone;
spin_lock_init(&glob->lock);
glob->swap_queue = create_singlethread_workqueue("ttm_swap");
@@ -374,50 +247,31 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
return ret;
}
+ /* compute limit */
si_meminfo(&si);
-
- ret = ttm_mem_init_kernel_zone(glob, &si);
- if (unlikely(ret != 0))
- goto out_no_zone;
-#ifdef CONFIG_HIGHMEM
- ret = ttm_mem_init_highmem_zone(glob, &si);
- if (unlikely(ret != 0))
- goto out_no_zone;
-#else
- ret = ttm_mem_init_dma32_zone(glob, &si);
- if (unlikely(ret != 0))
- goto out_no_zone;
-#endif
- for (i = 0; i < glob->num_zones; ++i) {
- zone = glob->zones[i];
- printk(KERN_INFO TTM_PFX
- "Zone %7s: Available graphics memory: %llu kiB.\n",
- zone->name, (unsigned long long) zone->max_mem >> 10);
- }
- ttm_page_alloc_init(glob, glob->zone_kernel->max_mem/(2*PAGE_SIZE));
+ glob->mem = si.totalram;
+ glob->mem *= si.mem_unit;
+ glob->used_mem = 0;
+ glob->used_dma32_mem = 0;
+ glob->max_mem = glob->mem >> 1;
+ glob->emer_mem = (glob->mem >> 1) + (glob->mem >> 2);
+ glob->swap_limit = glob->max_mem - (glob->mem >> 3);
+ glob->swap_dma32_limit = ((1ULL << 32) >> 1) - (((1ULL << 32) >> 3));
+ ttm_page_alloc_init(glob, glob->max_mem/(2*PAGE_SIZE));
+ ttm_dma_page_alloc_init(glob, glob->max_mem/(2*PAGE_SIZE));
return 0;
-out_no_zone:
- ttm_mem_global_release(glob);
- return ret;
}
EXPORT_SYMBOL(ttm_mem_global_init);
void ttm_mem_global_release(struct ttm_mem_global *glob)
{
- unsigned int i;
- struct ttm_mem_zone *zone;
-
/* let the page allocator first stop the shrink work. */
ttm_page_alloc_fini();
+ ttm_dma_page_alloc_fini();
flush_workqueue(glob->swap_queue);
destroy_workqueue(glob->swap_queue);
glob->swap_queue = NULL;
- for (i = 0; i < glob->num_zones; ++i) {
- zone = glob->zones[i];
- kobject_del(&zone->kobj);
- kobject_put(&zone->kobj);
- }
kobject_del(&glob->kobj);
kobject_put(&glob->kobj);
}
@@ -426,18 +280,14 @@ EXPORT_SYMBOL(ttm_mem_global_release);
static void ttm_check_swapping(struct ttm_mem_global *glob)
{
bool needs_swapping = false;
- unsigned int i;
- struct ttm_mem_zone *zone;
spin_lock(&glob->lock);
- for (i = 0; i < glob->num_zones; ++i) {
- zone = glob->zones[i];
- if (zone->used_mem > zone->swap_limit) {
- needs_swapping = true;
- break;
- }
+ if ((glob->used_mem + glob->used_dma32_mem) > glob->swap_limit) {
+ needs_swapping = true;
+ }
+ if (glob->used_dma32_mem > glob->swap_dma32_limit) {
+ needs_swapping = true;
}
-
spin_unlock(&glob->lock);
if (unlikely(needs_swapping))
@@ -445,141 +295,81 @@ static void ttm_check_swapping(struct ttm_mem_global *glob)
}
-static void ttm_mem_global_free_zone(struct ttm_mem_global *glob,
- struct ttm_mem_zone *single_zone,
- uint64_t amount)
-{
- unsigned int i;
- struct ttm_mem_zone *zone;
-
- spin_lock(&glob->lock);
- for (i = 0; i < glob->num_zones; ++i) {
- zone = glob->zones[i];
- if (single_zone && zone != single_zone)
- continue;
- zone->used_mem -= amount;
- }
- spin_unlock(&glob->lock);
-}
-
void ttm_mem_global_free(struct ttm_mem_global *glob,
uint64_t amount)
{
- return ttm_mem_global_free_zone(glob, NULL, amount);
-}
-EXPORT_SYMBOL(ttm_mem_global_free);
-
-static int ttm_mem_global_reserve(struct ttm_mem_global *glob,
- struct ttm_mem_zone *single_zone,
- uint64_t amount, bool reserve)
-{
- uint64_t limit;
- int ret = -ENOMEM;
- unsigned int i;
- struct ttm_mem_zone *zone;
-
spin_lock(&glob->lock);
- for (i = 0; i < glob->num_zones; ++i) {
- zone = glob->zones[i];
- if (single_zone && zone != single_zone)
- continue;
-
- limit = (capable(CAP_SYS_ADMIN)) ?
- zone->emer_mem : zone->max_mem;
-
- if (zone->used_mem > limit)
- goto out_unlock;
- }
-
- if (reserve) {
- for (i = 0; i < glob->num_zones; ++i) {
- zone = glob->zones[i];
- if (single_zone && zone != single_zone)
- continue;
- zone->used_mem += amount;
- }
- }
-
- ret = 0;
-out_unlock:
+ glob->used_mem -= amount;
spin_unlock(&glob->lock);
- ttm_check_swapping(glob);
-
- return ret;
}
+EXPORT_SYMBOL(ttm_mem_global_free);
-
-static int ttm_mem_global_alloc_zone(struct ttm_mem_global *glob,
- struct ttm_mem_zone *single_zone,
- uint64_t memory,
- bool no_wait, bool interruptible)
+int ttm_mem_global_alloc(struct ttm_mem_global *glob,
+ uint64_t memory,
+ bool no_wait)
{
- int count = TTM_MEMORY_ALLOC_RETRIES;
+ unsigned long limit;
+ int i;
- while (unlikely(ttm_mem_global_reserve(glob,
- single_zone,
- memory, true)
- != 0)) {
+ for (i = 0; i < TTM_MEMORY_RETRIES; i++) {
+ spin_lock(&glob->lock);
+ limit = (capable(CAP_SYS_ADMIN)) ? glob->emer_mem : glob->max_mem;
+ if ((glob->used_mem + glob->used_dma32_mem + memory) < limit) {
+ glob->used_mem += memory;
+ spin_unlock(&glob->lock);
+ return 0;
+ }
+ spin_unlock(&glob->lock);
if (no_wait)
return -ENOMEM;
- if (unlikely(count-- == 0))
- return -ENOMEM;
ttm_shrink(glob, false, memory + (memory >> 2) + 16);
}
-
- return 0;
+ return -ENOMEM;
}
+EXPORT_SYMBOL(ttm_mem_global_alloc);
-int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
- bool no_wait, bool interruptible)
+int ttm_mem_global_alloc_pages(struct ttm_mem_global *glob,
+ unsigned npages,
+ bool no_wait)
{
- /**
- * Normal allocations of kernel memory are registered in
- * all zones.
- */
-
- return ttm_mem_global_alloc_zone(glob, NULL, memory, no_wait,
- interruptible);
+ if (ttm_mem_global_alloc(glob, PAGE_SIZE * npages, no_wait))
+ return -ENOMEM;
+ ttm_check_swapping(glob);
+ return 0;
}
-EXPORT_SYMBOL(ttm_mem_global_alloc);
-int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
- struct page *page,
- bool no_wait, bool interruptible)
+void ttm_mem_global_account_pages(struct ttm_mem_global *glob,
+ struct page **pages,
+ unsigned npages)
{
+ unsigned i;
- struct ttm_mem_zone *zone = NULL;
-
- /**
- * Page allocations may be registed in a single zone
- * only if highmem or !dma32.
- */
-
-#ifdef CONFIG_HIGHMEM
- if (PageHighMem(page) && glob->zone_highmem != NULL)
- zone = glob->zone_highmem;
-#else
- if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL)
- zone = glob->zone_kernel;
-#endif
- return ttm_mem_global_alloc_zone(glob, zone, PAGE_SIZE, no_wait,
- interruptible);
+ /* check if page is dma32 */
+ spin_lock(&glob->lock);
+ for (i = 0; i < npages; i++) {
+ if (page_to_pfn(pages[i]) > 0x00100000UL) {
+ glob->used_mem -= PAGE_SIZE;
+ glob->used_dma32_mem += PAGE_SIZE;
+ }
+ }
+ spin_unlock(&glob->lock);
}
-void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page)
+void ttm_mem_global_free_pages(struct ttm_mem_global *glob,
+ struct page **pages, unsigned npages)
{
- struct ttm_mem_zone *zone = NULL;
-
-#ifdef CONFIG_HIGHMEM
- if (PageHighMem(page) && glob->zone_highmem != NULL)
- zone = glob->zone_highmem;
-#else
- if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL)
- zone = glob->zone_kernel;
-#endif
- ttm_mem_global_free_zone(glob, zone, PAGE_SIZE);
-}
+ unsigned i;
+ spin_lock(&glob->lock);
+ for (i = 0; i < npages; i++) {
+ if (page_to_pfn(pages[i]) > 0x00100000UL) {
+ glob->used_dma32_mem -= PAGE_SIZE;
+ } else {
+ glob->used_mem -= PAGE_SIZE;
+ }
+ }
+ spin_unlock(&glob->lock);
+}
size_t ttm_round_pot(size_t size)
{
diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index 93577f2e2954d4..662c6fc9dd14f1 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -267,8 +267,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile,
}
read_unlock(&tfile->lock);
- ret = ttm_mem_global_alloc(mem_glob, sizeof(*ref),
- false, false);
+ ret = ttm_mem_global_alloc(mem_glob, sizeof(*ref), false);
if (unlikely(ret != 0))
return ret;
ref = kmalloc(sizeof(*ref), GFP_KERNEL);
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 727e93daac3b04..ada2cad9b2b798 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -619,8 +619,10 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool *pool,
* @return count of pages still required to fulfill the request.
*/
static unsigned ttm_page_pool_get_pages(struct ttm_page_pool *pool,
- struct list_head *pages, int ttm_flags,
- enum ttm_caching_state cstate, unsigned count)
+ struct list_head *pages,
+ int ttm_flags,
+ enum ttm_caching_state cstate,
+ unsigned count)
{
unsigned long irq_flags;
struct list_head *p;
@@ -664,13 +666,14 @@ out:
* On success pages list will hold count number of correctly
* cached pages.
*/
-int ttm_get_pages(struct list_head *pages, int flags,
- enum ttm_caching_state cstate, unsigned count,
- dma_addr_t *dma_address)
+int ttm_get_pages(struct page **pages, unsigned npages, int flags,
+ enum ttm_caching_state cstate, dma_addr_t *dma_address)
{
struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
struct page *p = NULL;
+ struct list_head plist;
gfp_t gfp_flags = GFP_USER;
+ unsigned count = 0;
int r;
/* set zero flag for page allocation if required */
@@ -684,94 +687,107 @@ int ttm_get_pages(struct list_head *pages, int flags,
else
gfp_flags |= GFP_HIGHUSER;
- for (r = 0; r < count; ++r) {
- p = alloc_page(gfp_flags);
- if (!p) {
-
+ for (count = 0; count < npages; ++count) {
+ pages[count] = alloc_page(gfp_flags);
+ if (pages[count] == NULL) {
printk(KERN_ERR TTM_PFX
"Unable to allocate page.");
return -ENOMEM;
}
-
- list_add(&p->lru, pages);
}
return 0;
}
-
/* combine zero flag to pool flags */
gfp_flags |= pool->gfp_flags;
/* First we take pages from the pool */
- count = ttm_page_pool_get_pages(pool, pages, flags, cstate, count);
+ INIT_LIST_HEAD(&plist);
+ npages = ttm_page_pool_get_pages(pool, &plist, flags, cstate, npages);
/* clear the pages coming from the pool if requested */
+ count = 0;
+ list_for_each_entry(p, &plist, lru) {
+ pages[count++] = p;
+ }
if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
- list_for_each_entry(p, pages, lru) {
+ list_for_each_entry(p, &plist, lru) {
clear_page(page_address(p));
}
}
/* If pool didn't have enough pages allocate new one. */
- if (count > 0) {
+ if (npages > 0) {
/* ttm_alloc_new_pages doesn't reference pool so we can run
* multiple requests in parallel.
**/
- r = ttm_alloc_new_pages(pages, gfp_flags, flags, cstate, count);
+ INIT_LIST_HEAD(&plist);
+ r = ttm_alloc_new_pages(&plist, gfp_flags, flags, cstate, npages);
+ list_for_each_entry(p, &plist, lru) {
+ pages[count++] = p;
+ }
if (r) {
/* If there is any pages in the list put them back to
* the pool. */
printk(KERN_ERR TTM_PFX
"Failed to allocate extra pages "
"for large request.");
- ttm_put_pages(pages, 0, flags, cstate, NULL);
+ ttm_put_pages(pages, count, flags, cstate, NULL);
return r;
}
}
-
return 0;
}
/* Put all pages in pages list to correct pool to wait for reuse */
-void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
+void ttm_put_pages(struct page **pages, unsigned npages, int flags,
enum ttm_caching_state cstate, dma_addr_t *dma_address)
{
unsigned long irq_flags;
struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
- struct page *p, *tmp;
+ unsigned i;
if (pool == NULL) {
/* No pool for this memory type so free the pages */
- list_for_each_entry_safe(p, tmp, pages, lru) {
- __free_page(p);
+ for (i = 0; i < npages; i++) {
+ if (pages[i]) {
+ if (page_count(pages[i]) != 1)
+ printk(KERN_ERR TTM_PFX
+ "Erroneous page count. "
+ "Leaking pages.\n");
+ __free_page(pages[i]);
+ pages[i] = NULL;
+ }
}
- /* Make the pages list empty */
- INIT_LIST_HEAD(pages);
return;
}
- if (page_count == 0) {
- list_for_each_entry_safe(p, tmp, pages, lru) {
- ++page_count;
- }
- }
spin_lock_irqsave(&pool->lock, irq_flags);
- list_splice_init(pages, &pool->list);
- pool->npages += page_count;
+ for (i = 0; i < npages; i++) {
+ if (pages[i]) {
+ if (page_count(pages[i]) != 1)
+ printk(KERN_ERR TTM_PFX
+ "Erroneous page count. "
+ "Leaking pages.\n");
+ list_add_tail(&pages[i]->lru, &pool->list);
+ pages[i] = NULL;
+ pool->npages++;
+ }
+ }
/* Check that we don't go over the pool limit */
- page_count = 0;
+ npages = 0;
if (pool->npages > _manager->options.max_size) {
- page_count = pool->npages - _manager->options.max_size;
+ npages = pool->npages - _manager->options.max_size;
/* free at least NUM_PAGES_TO_ALLOC number of pages
* to reduce calls to set_memory_wb */
- if (page_count < NUM_PAGES_TO_ALLOC)
- page_count = NUM_PAGES_TO_ALLOC;
+ if (npages < NUM_PAGES_TO_ALLOC)
+ npages = NUM_PAGES_TO_ALLOC;
}
spin_unlock_irqrestore(&pool->lock, irq_flags);
- if (page_count)
- ttm_page_pool_free(pool, page_count);
+ if (npages)
+ ttm_page_pool_free(pool, npages);
}
static void ttm_page_pool_init_locked(struct ttm_page_pool *pool, int flags,
@@ -836,6 +852,51 @@ void ttm_page_alloc_fini(void)
_manager = NULL;
}
+int ttm_page_alloc_ttm_tt_populate(struct ttm_tt *ttm)
+{
+ struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
+ int ret;
+
+ if (ttm->state != tt_unpopulated)
+ return 0;
+
+ ret = ttm_mem_global_alloc_pages(mem_glob, ttm->num_pages, false);
+ if (unlikely(ret != 0))
+ return -ENOMEM;
+
+ ret = ttm_get_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
+ ttm->caching_state, ttm->dma_address);
+ if (ret != 0) {
+ ttm_mem_global_free(mem_glob, ttm->num_pages * PAGE_SIZE);
+ return -ENOMEM;
+ }
+
+ ttm_mem_global_account_pages(mem_glob, ttm->pages, ttm->num_pages);
+
+ if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
+ ret = ttm_tt_swapin(ttm);
+ if (unlikely(ret != 0)) {
+ ttm_page_alloc_ttm_tt_unpopulate(ttm);
+ return ret;
+ }
+ }
+
+ ttm->state = tt_unbound;
+ return 0;
+}
+EXPORT_SYMBOL(ttm_page_alloc_ttm_tt_populate);
+
+void ttm_page_alloc_ttm_tt_unpopulate(struct ttm_tt *ttm)
+{
+ struct ttm_mem_global *glob = ttm->glob->mem_glob;
+
+ ttm_mem_global_free_pages(glob, ttm->pages, ttm->num_pages);
+ ttm_put_pages(ttm->pages, ttm->num_pages, ttm->page_flags,
+ ttm->caching_state, ttm->dma_address);
+ ttm->state = tt_unpopulated;
+}
+EXPORT_SYMBOL(ttm_page_alloc_ttm_tt_unpopulate);
+
int ttm_page_alloc_debugfs(struct seq_file *m, void *data)
{
struct ttm_page_pool *p;
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
new file mode 100644
index 00000000000000..0850836e669bef
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -0,0 +1,1235 @@
+/*
+ * Copyright 2011 (c) Oracle Corp.
+
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+ */
+
+/*
+ * A simple DMA pool losely based on dmapool.c. It has certain advantages
+ * over the DMA pools:
+ * - Pool collects resently freed pages for reuse (and hooks up to
+ * the shrinker).
+ * - Tracks currently in use pages
+ * - Tracks whether the page is UC, WB or cached (and reverts to WB
+ * when freed).
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/list.h>
+#include <linux/seq_file.h> /* for seq_printf */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/highmem.h>
+#include <linux/mm_types.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/atomic.h>
+#include <linux/device.h>
+#include <linux/kthread.h>
+#include "ttm/ttm_bo_driver.h"
+#include "ttm/ttm_page_alloc.h"
+#ifdef TTM_HAS_AGP
+#include <asm/agp.h>
+#endif
+
+#define NUM_PAGES_TO_ALLOC (PAGE_SIZE/sizeof(struct page *))
+#define SMALL_ALLOCATION 16
+#define FREE_ALL_PAGES (~0U)
+/* times are in msecs */
+#define IS_UNDEFINED (0)
+#define IS_WC (1<<1)
+#define IS_UC (1<<2)
+#define IS_CACHED (1<<3)
+#define IS_DMA32 (1<<4)
+
+enum pool_type {
+ POOL_IS_UNDEFINED,
+ POOL_IS_WC = IS_WC,
+ POOL_IS_UC = IS_UC,
+ POOL_IS_CACHED = IS_CACHED,
+ POOL_IS_WC_DMA32 = IS_WC | IS_DMA32,
+ POOL_IS_UC_DMA32 = IS_UC | IS_DMA32,
+ POOL_IS_CACHED_DMA32 = IS_CACHED | IS_DMA32,
+};
+/*
+ * The pool structure. There are usually six pools:
+ * - generic (not restricted to DMA32):
+ * - write combined, uncached, cached.
+ * - dma32 (up to 2^32 - so up 4GB):
+ * - write combined, uncached, cached.
+ * for each 'struct device'. The 'cached' is for pages that are actively used.
+ * The other ones can be shrunk by the shrinker API if neccessary.
+ * @pools: The 'struct device->dma_pools' link.
+ * @type: Type of the pool
+ * @lock: Protects the inuse_list and free_list from concurrnet access. Must be
+ * used with irqsave/irqrestore variants because pool allocator maybe called
+ * from delayed work.
+ * @inuse_list: Pool of pages that are in use. The order is very important and
+ * it is in the order that the TTM pages that are put back are in.
+ * @free_list: Pool of pages that are free to be used. No order requirements.
+ * @dev: The device that is associated with these pools.
+ * @size: Size used during DMA allocation.
+ * @npages_free: Count of available pages for re-use.
+ * @npages_in_use: Count of pages that are in use.
+ * @nfrees: Stats when pool is shrinking.
+ * @nrefills: Stats when the pool is grown.
+ * @gfp_flags: Flags to pass for alloc_page.
+ * @fill_lock: Allows only one pool fill operation at time.
+ * @name: Name of the pool.
+ * @dev_name: Name derieved from dev - similar to how dev_info works.
+ * Used during shutdown as the dev_info during release is unavailable.
+ */
+struct dma_pool {
+ struct list_head pools; /* The 'struct device->dma_pools link */
+ enum pool_type type;
+ spinlock_t lock;
+ struct list_head inuse_list;
+ struct list_head free_list;
+ struct device *dev;
+ unsigned size;
+ unsigned npages_free;
+ unsigned npages_in_use;
+ unsigned long nfrees; /* Stats when shrunk. */
+ unsigned long nrefills; /* Stats when grown. */
+ gfp_t gfp_flags;
+ bool fill_lock;
+ char name[13]; /* "cached dma32" */
+ char dev_name[64]; /* Constructed from dev */
+};
+
+/*
+ * The accounting page keeping track of the allocated page along with
+ * the DMA address.
+ * @page_list: The link to the 'page_list' in 'struct dma_pool'.
+ * @vaddr: The virtual address of the page
+ * @dma: The bus address of the page. If the page is not allocated
+ * via the DMA API, it will be -1.
+ */
+struct dma_page {
+ struct list_head page_list;
+ void *vaddr;
+ struct page *p;
+ dma_addr_t dma;
+};
+
+/*
+ * Limits for the pool. They are handled without locks because only place where
+ * they may change is in sysfs store. They won't have immediate effect anyway
+ * so forcing serialization to access them is pointless.
+ */
+
+struct ttm_pool_opts {
+ unsigned alloc_size;
+ unsigned max_size;
+ unsigned small;
+};
+
+/*
+ * Contains the list of all of the 'struct device' and their corresponding
+ * DMA pools. Guarded by _mutex->lock.
+ * @pools: The link to 'struct ttm_pool_manager->pools'
+ * @dev: The 'struct device' associated with the 'pool'
+ * @pool: The 'struct dma_pool' associated with the 'dev'
+ */
+struct device_pools {
+ struct list_head pools;
+ struct device *dev;
+ struct dma_pool *pool;
+};
+
+/*
+ * struct ttm_pool_manager - Holds memory pools for fast allocation
+ *
+ * @lock: Lock used when adding/removing from pools
+ * @pools: List of 'struct device' and 'struct dma_pool' tuples.
+ * @options: Limits for the pool.
+ * @npools: Total amount of pools in existence.
+ * @shrinker: The structure used by [un|]register_shrinker
+ */
+struct ttm_pool_manager {
+ struct mutex lock;
+ struct list_head pools;
+ struct ttm_pool_opts options;
+ unsigned npools;
+ struct shrinker mm_shrink;
+ struct kobject kobj;
+};
+
+static struct ttm_pool_manager *_manager;
+
+static struct attribute ttm_page_pool_max = {
+ .name = "pool_max_size",
+ .mode = S_IRUGO | S_IWUSR
+};
+static struct attribute ttm_page_pool_small = {
+ .name = "pool_small_allocation",
+ .mode = S_IRUGO | S_IWUSR
+};
+static struct attribute ttm_page_pool_alloc_size = {
+ .name = "pool_allocation_size",
+ .mode = S_IRUGO | S_IWUSR
+};
+
+static struct attribute *ttm_pool_attrs[] = {
+ &ttm_page_pool_max,
+ &ttm_page_pool_small,
+ &ttm_page_pool_alloc_size,
+ NULL
+};
+
+static void ttm_pool_kobj_release(struct kobject *kobj)
+{
+ struct ttm_pool_manager *m =
+ container_of(kobj, struct ttm_pool_manager, kobj);
+ kfree(m);
+}
+
+static ssize_t ttm_pool_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t size)
+{
+ struct ttm_pool_manager *m =
+ container_of(kobj, struct ttm_pool_manager, kobj);
+ int chars;
+ unsigned val;
+ chars = sscanf(buffer, "%u", &val);
+ if (chars == 0)
+ return size;
+
+ /* Convert kb to number of pages */
+ val = val / (PAGE_SIZE >> 10);
+
+ if (attr == &ttm_page_pool_max)
+ m->options.max_size = val;
+ else if (attr == &ttm_page_pool_small)
+ m->options.small = val;
+ else if (attr == &ttm_page_pool_alloc_size) {
+ if (val > NUM_PAGES_TO_ALLOC*8) {
+ printk(KERN_ERR TTM_PFX
+ "Setting allocation size to %lu "
+ "is not allowed. Recommended size is "
+ "%lu\n",
+ NUM_PAGES_TO_ALLOC*(PAGE_SIZE >> 7),
+ NUM_PAGES_TO_ALLOC*(PAGE_SIZE >> 10));
+ return size;
+ } else if (val > NUM_PAGES_TO_ALLOC) {
+ printk(KERN_WARNING TTM_PFX
+ "Setting allocation size to "
+ "larger than %lu is not recommended.\n",
+ NUM_PAGES_TO_ALLOC*(PAGE_SIZE >> 10));
+ }
+ m->options.alloc_size = val;
+ }
+
+ return size;
+}
+
+static ssize_t ttm_pool_show(struct kobject *kobj, struct attribute *attr,
+ char *buffer)
+{
+ struct ttm_pool_manager *m =
+ container_of(kobj, struct ttm_pool_manager, kobj);
+ unsigned val = 0;
+
+ if (attr == &ttm_page_pool_max)
+ val = m->options.max_size;
+ else if (attr == &ttm_page_pool_small)
+ val = m->options.small;
+ else if (attr == &ttm_page_pool_alloc_size)
+ val = m->options.alloc_size;
+
+ val = val * (PAGE_SIZE >> 10);
+
+ return snprintf(buffer, PAGE_SIZE, "%u\n", val);
+}
+
+static const struct sysfs_ops ttm_pool_sysfs_ops = {
+ .show = &ttm_pool_show,
+ .store = &ttm_pool_store,
+};
+
+static struct kobj_type ttm_pool_kobj_type = {
+ .release = &ttm_pool_kobj_release,
+ .sysfs_ops = &ttm_pool_sysfs_ops,
+ .default_attrs = ttm_pool_attrs,
+};
+
+#ifndef CONFIG_X86
+static int set_pages_array_wb(struct page **pages, int addrinarray)
+{
+#ifdef TTM_HAS_AGP
+ int i;
+
+ for (i = 0; i < addrinarray; i++)
+ unmap_page_from_agp(pages[i]);
+#endif
+ return 0;
+}
+
+static int set_pages_array_wc(struct page **pages, int addrinarray)
+{
+#ifdef TTM_HAS_AGP
+ int i;
+
+ for (i = 0; i < addrinarray; i++)
+ map_page_into_agp(pages[i]);
+#endif
+ return 0;
+}
+
+static int set_pages_array_uc(struct page **pages, int addrinarray)
+{
+#ifdef TTM_HAS_AGP
+ int i;
+
+ for (i = 0; i < addrinarray; i++)
+ map_page_into_agp(pages[i]);
+#endif
+ return 0;
+}
+#endif /* for !CONFIG_X86 */
+
+static int ttm_set_pages_caching(struct dma_pool *pool,
+ struct page **pages, unsigned cpages)
+{
+ int r = 0;
+ /* Set page caching */
+ if (pool->type & IS_UC) {
+ r = set_pages_array_uc(pages, cpages);
+ if (r)
+ pr_err(TTM_PFX
+ "%s: Failed to set %d pages to uc!\n",
+ pool->dev_name, cpages);
+ }
+ if (pool->type & IS_WC) {
+ r = set_pages_array_wc(pages, cpages);
+ if (r)
+ pr_err(TTM_PFX
+ "%s: Failed to set %d pages to wc!\n",
+ pool->dev_name, cpages);
+ }
+ return r;
+}
+
+static void __ttm_dma_free_page(struct dma_pool *pool, struct dma_page *d_page)
+{
+ dma_addr_t dma = d_page->dma;
+ dma_free_coherent(pool->dev, pool->size, d_page->vaddr, dma);
+
+ kfree(d_page);
+ d_page = NULL;
+}
+static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool)
+{
+ struct dma_page *d_page;
+
+ d_page = kmalloc(sizeof(struct dma_page), GFP_KERNEL);
+ if (!d_page)
+ return NULL;
+
+ d_page->vaddr = dma_alloc_coherent(pool->dev, pool->size,
+ &d_page->dma,
+ pool->gfp_flags);
+ if (d_page->vaddr)
+ d_page->p = virt_to_page(d_page->vaddr);
+ else {
+ kfree(d_page);
+ d_page = NULL;
+ }
+ return d_page;
+}
+static enum pool_type ttm_to_type(int flags, enum ttm_caching_state cstate)
+{
+ enum pool_type type = IS_UNDEFINED;
+
+ if (flags & TTM_PAGE_FLAG_DMA32)
+ type |= IS_DMA32;
+ if (cstate == tt_cached)
+ type |= IS_CACHED;
+ else if (cstate == tt_uncached)
+ type |= IS_UC;
+ else
+ type |= IS_WC;
+
+ return type;
+}
+
+static void ttm_pool_update_free_locked(struct dma_pool *pool,
+ unsigned freed_pages)
+{
+ pool->npages_free -= freed_pages;
+ pool->nfrees += freed_pages;
+
+}
+
+/* set memory back to wb and free the pages. */
+static void ttm_dma_pages_put(struct dma_pool *pool, struct list_head *d_pages,
+ struct page *pages[], unsigned npages)
+{
+ struct dma_page *d_page, *tmp;
+
+ if (npages && set_pages_array_wb(pages, npages))
+ pr_err(TTM_PFX "%s: Failed to set %d pages to wb!\n",
+ pool->dev_name, npages);
+
+ if (npages > 1) {
+ pr_debug("%s: (%s:%d) Freeing %d pages at once (lockless).\n",
+ pool->dev_name, pool->name, current->pid, npages);
+ }
+
+ list_for_each_entry_safe(d_page, tmp, d_pages, page_list) {
+ list_del(&d_page->page_list);
+ __ttm_dma_free_page(pool, d_page);
+ }
+}
+
+/*
+ * Free pages from pool.
+ *
+ * To prevent hogging the ttm_swap process we only free NUM_PAGES_TO_ALLOC
+ * number of pages in one go.
+ *
+ * @pool: to free the pages from
+ * @nr_free: If set to true will free all pages in pool
+ **/
+static unsigned ttm_dma_page_pool_free(struct dma_pool *pool, unsigned nr_free)
+{
+ unsigned long irq_flags;
+ struct dma_page *dma_p, *tmp;
+ struct page **pages_to_free;
+ struct list_head d_pages;
+ unsigned freed_pages = 0,
+ npages_to_free = nr_free;
+
+ if (NUM_PAGES_TO_ALLOC < nr_free)
+ npages_to_free = NUM_PAGES_TO_ALLOC;
+#if 0
+ if (nr_free > 1) {
+ pr_debug("%s: (%s:%d) Attempting to free %d (%d) pages\n",
+ pool->dev_name, pool->name, current->pid,
+ npages_to_free, nr_free);
+ }
+#endif
+ pages_to_free = kmalloc(npages_to_free * sizeof(struct page *),
+ GFP_KERNEL);
+
+ if (!pages_to_free) {
+ pr_err(TTM_PFX
+ "%s: Failed to allocate memory for pool free operation.\n",
+ pool->dev_name);
+ return 0;
+ }
+ INIT_LIST_HEAD(&d_pages);
+restart:
+ spin_lock_irqsave(&pool->lock, irq_flags);
+
+ /* We picking the oldest ones off the list */
+ list_for_each_entry_safe_reverse(dma_p, tmp, &pool->free_list,
+ page_list) {
+ if (freed_pages >= npages_to_free)
+ break;
+
+ /* Move the dma_page from one list to another. */
+ list_move(&dma_p->page_list, &d_pages);
+
+ pages_to_free[freed_pages++] = dma_p->p;
+ /* We can only remove NUM_PAGES_TO_ALLOC at a time. */
+ if (freed_pages >= NUM_PAGES_TO_ALLOC) {
+
+ ttm_pool_update_free_locked(pool, freed_pages);
+ /**
+ * Because changing page caching is costly
+ * we unlock the pool to prevent stalling.
+ */
+ spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+ ttm_dma_pages_put(pool, &d_pages, pages_to_free,
+ freed_pages);
+
+ INIT_LIST_HEAD(&d_pages);
+
+ if (likely(nr_free != FREE_ALL_PAGES))
+ nr_free -= freed_pages;
+
+ if (NUM_PAGES_TO_ALLOC >= nr_free)
+ npages_to_free = nr_free;
+ else
+ npages_to_free = NUM_PAGES_TO_ALLOC;
+
+ freed_pages = 0;
+
+ /* free all so restart the processing */
+ if (nr_free)
+ goto restart;
+
+ /* Not allowed to fall through or break because
+ * following context is inside spinlock while we are
+ * outside here.
+ */
+ goto out;
+
+ }
+ }
+
+ /* remove range of pages from the pool */
+ if (freed_pages) {
+ ttm_pool_update_free_locked(pool, freed_pages);
+ nr_free -= freed_pages;
+ }
+
+ spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+ if (freed_pages)
+ ttm_dma_pages_put(pool, &d_pages, pages_to_free, freed_pages);
+out:
+ kfree(pages_to_free);
+ return nr_free;
+}
+
+static void ttm_dma_free_pool(struct device *dev, enum pool_type type)
+{
+ struct device_pools *p;
+ struct dma_pool *pool;
+
+ if (!dev)
+ return;
+
+ mutex_lock(&_manager->lock);
+ list_for_each_entry_reverse(p, &_manager->pools, pools) {
+ if (p->dev != dev)
+ continue;
+ pool = p->pool;
+ if (pool->type != type)
+ continue;
+
+ list_del(&p->pools);
+ kfree(p);
+ _manager->npools--;
+ break;
+ }
+ list_for_each_entry_reverse(pool, &dev->dma_pools, pools) {
+ if (pool->type != type)
+ continue;
+ /* Takes a spinlock.. */
+ ttm_dma_page_pool_free(pool, FREE_ALL_PAGES);
+ WARN_ON(((pool->npages_in_use + pool->npages_free) != 0));
+ /* This code path is called after _all_ references to the
+ * struct device has been dropped - so nobody should be
+ * touching it. In case somebody is trying to _add_ we are
+ * guarded by the mutex. */
+ list_del(&pool->pools);
+ kfree(pool);
+ break;
+ }
+ mutex_unlock(&_manager->lock);
+}
+
+/*
+ * On free-ing of the 'struct device' this deconstructor is run.
+ * Albeit the pool might have already been freed earlier.
+ */
+static void ttm_dma_pool_release(struct device *dev, void *res)
+{
+ struct dma_pool *pool = *(struct dma_pool **)res;
+
+ if (pool)
+ ttm_dma_free_pool(dev, pool->type);
+}
+
+static int ttm_dma_pool_match(struct device *dev, void *res, void *match_data)
+{
+ return *(struct dma_pool **)res == match_data;
+}
+
+static struct dma_pool *ttm_dma_pool_init(struct device *dev, gfp_t flags,
+ enum pool_type type)
+{
+ char *n[] = {"wc", "uc", "cached", " dma32", "unknown",};
+ enum pool_type t[] = {IS_WC, IS_UC, IS_CACHED, IS_DMA32, IS_UNDEFINED};
+ struct device_pools *sec_pool = NULL;
+ struct dma_pool *pool = NULL, **ptr;
+ unsigned i;
+ int ret = -ENODEV;
+ char *p;
+
+ if (!dev)
+ return NULL;
+
+ ptr = devres_alloc(ttm_dma_pool_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return NULL;
+
+ ret = -ENOMEM;
+
+ pool = kmalloc_node(sizeof(struct dma_pool), GFP_KERNEL,
+ dev_to_node(dev));
+ if (!pool)
+ goto err_mem;
+
+ sec_pool = kmalloc_node(sizeof(struct device_pools), GFP_KERNEL,
+ dev_to_node(dev));
+ if (!sec_pool)
+ goto err_mem;
+
+ INIT_LIST_HEAD(&sec_pool->pools);
+ sec_pool->dev = dev;
+ sec_pool->pool = pool;
+
+ INIT_LIST_HEAD(&pool->free_list);
+ INIT_LIST_HEAD(&pool->inuse_list);
+ INIT_LIST_HEAD(&pool->pools);
+ spin_lock_init(&pool->lock);
+ pool->dev = dev;
+ pool->npages_free = pool->npages_in_use = 0;
+ pool->nfrees = 0;
+ pool->gfp_flags = flags;
+ pool->size = PAGE_SIZE;
+ pool->type = type;
+ pool->nrefills = 0;
+ pool->fill_lock = false;
+ p = pool->name;
+ for (i = 0; i < 5; i++) {
+ if (type & t[i]) {
+ p += snprintf(p, sizeof(pool->name) - (p - pool->name),
+ "%s", n[i]);
+ }
+ }
+ *p = 0;
+ /* We copy the name for pr_ calls b/c when dma_pool_destroy is called
+ * - the kobj->name has already been deallocated.*/
+ snprintf(pool->dev_name, sizeof(pool->dev_name), "%s %s",
+ dev_driver_string(dev), dev_name(dev));
+ mutex_lock(&_manager->lock);
+ /* You can get the dma_pool from either the global: */
+ list_add(&sec_pool->pools, &_manager->pools);
+ _manager->npools++;
+ /* or from 'struct device': */
+ list_add(&pool->pools, &dev->dma_pools);
+ mutex_unlock(&_manager->lock);
+
+ *ptr = pool;
+ devres_add(dev, ptr);
+
+ return pool;
+err_mem:
+ devres_free(ptr);
+ kfree(sec_pool);
+ kfree(pool);
+ return ERR_PTR(ret);
+}
+
+static struct dma_pool *ttm_dma_find_pool(struct device *dev,
+ enum pool_type type)
+{
+ struct dma_pool *pool, *tmp, *found = NULL;
+
+ if (type == IS_UNDEFINED)
+ return found;
+
+ /* NB: We iterate on the 'struct dev' which has no spinlock, but
+ * it does have a kref which we have taken. The kref is taken during
+ * graphic driver loading - in the drm_pci_init it calls either
+ * pci_dev_get or pci_register_driver which both end up taking a kref
+ * on 'struct device'.
+ *
+ * On teardown, the graphic drivers end up quiescing the TTM (put_pages)
+ * and calls the dev_res deconstructors: ttm_dma_pool_release. The nice
+ * thing is at that point of time there are no pages associated with the
+ * driver so this function will not be called.
+ */
+ list_for_each_entry_safe(pool, tmp, &dev->dma_pools, pools) {
+ if (pool->type != type)
+ continue;
+ found = pool;
+ break;
+ }
+ return found;
+}
+
+/*
+ * Free pages the pages that failed to change the caching state. If there
+ * are pages that have changed their caching state already put them to the
+ * pool.
+ */
+static void ttm_dma_handle_caching_state_failure(struct dma_pool *pool,
+ struct list_head *d_pages,
+ struct page **failed_pages,
+ unsigned cpages)
+{
+ struct dma_page *d_page, *tmp;
+ struct page *p;
+ unsigned i = 0;
+
+ p = failed_pages[0];
+ if (!p)
+ return;
+ /* Find the failed page. */
+ list_for_each_entry_safe(d_page, tmp, d_pages, page_list) {
+ if (d_page->p != p)
+ continue;
+ /* .. and then progress over the full list. */
+ list_del(&d_page->page_list);
+ __ttm_dma_free_page(pool, d_page);
+ if (++i < cpages)
+ p = failed_pages[i];
+ else
+ break;
+ }
+
+}
+
+/*
+ * Allocate 'count' pages, and put 'need' number of them on the
+ * 'pages' and as well on the 'dma_address' starting at 'dma_offset' offset.
+ * The full list of pages should also be on 'd_pages'.
+ * We return zero for success, and negative numbers as errors.
+ */
+static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
+ struct list_head *d_pages,
+ unsigned count)
+{
+ struct page **caching_array;
+ struct dma_page *dma_p;
+ struct page *p;
+ int r = 0;
+ unsigned i, cpages;
+ unsigned max_cpages = min(count,
+ (unsigned)(PAGE_SIZE/sizeof(struct page *)));
+
+ /* allocate array for page caching change */
+ caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL);
+
+ if (!caching_array) {
+ pr_err(TTM_PFX
+ "%s: Unable to allocate table for new pages.",
+ pool->dev_name);
+ return -ENOMEM;
+ }
+
+ if (count > 1) {
+ pr_debug("%s: (%s:%d) Getting %d pages\n",
+ pool->dev_name, pool->name, current->pid,
+ count);
+ }
+
+ for (i = 0, cpages = 0; i < count; ++i) {
+ dma_p = __ttm_dma_alloc_page(pool);
+ if (!dma_p) {
+ pr_err(TTM_PFX "%s: Unable to get page %u.\n",
+ pool->dev_name, i);
+
+ /* store already allocated pages in the pool after
+ * setting the caching state */
+ if (cpages) {
+ r = ttm_set_pages_caching(pool, caching_array,
+ cpages);
+ if (r)
+ ttm_dma_handle_caching_state_failure(
+ pool, d_pages, caching_array,
+ cpages);
+ }
+ r = -ENOMEM;
+ goto out;
+ }
+ p = dma_p->p;
+#ifdef CONFIG_HIGHMEM
+ /* gfp flags of highmem page should never be dma32 so we
+ * we should be fine in such case
+ */
+ if (!PageHighMem(p))
+#endif
+ {
+ caching_array[cpages++] = p;
+ if (cpages == max_cpages) {
+ /* Note: Cannot hold the spinlock */
+ r = ttm_set_pages_caching(pool, caching_array,
+ cpages);
+ if (r) {
+ ttm_dma_handle_caching_state_failure(
+ pool, d_pages, caching_array,
+ cpages);
+ goto out;
+ }
+ cpages = 0;
+ }
+ }
+ list_add(&dma_p->page_list, d_pages);
+ }
+
+ if (cpages) {
+ r = ttm_set_pages_caching(pool, caching_array, cpages);
+ if (r)
+ ttm_dma_handle_caching_state_failure(pool, d_pages,
+ caching_array, cpages);
+ }
+out:
+ kfree(caching_array);
+ return r;
+}
+
+static int ttm_dma_check_alloc_list(struct ttm_tt *ttm)
+{
+ struct dma_page *d_page;
+ unsigned i = 0;
+
+ list_for_each_entry(d_page, &ttm->alloc_list, page_list) {
+ if (d_page->p != ttm->pages[i++]) {
+ pr_err(TTM_PFX "dma page list & array mismatch %d", i);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+/*
+ * @return count of pages still required to fulfill the request.
+*/
+static int ttm_dma_page_pool_fill_locked(struct dma_pool *pool,
+ unsigned count,
+ unsigned long *irq_flags)
+{
+ int r = count;
+
+ if (pool->fill_lock)
+ return r;
+
+ pool->fill_lock = true;
+ if (count < _manager->options.small &&
+ count > pool->npages_free) {
+ struct list_head d_pages;
+ unsigned alloc_size = _manager->options.alloc_size;
+
+ INIT_LIST_HEAD(&d_pages);
+
+ spin_unlock_irqrestore(&pool->lock, *irq_flags);
+
+ /* Returns how many more are neccessary to fulfill the
+ * request. */
+ r = ttm_dma_pool_alloc_new_pages(pool, &d_pages, alloc_size);
+
+ spin_lock_irqsave(&pool->lock, *irq_flags);
+ if (!r) {
+ /* Add the fresh to the end.. */
+ list_splice(&d_pages, &pool->free_list);
+ ++pool->nrefills;
+ pool->npages_free += alloc_size;
+ } else {
+ struct dma_page *d_page;
+ unsigned cpages = 0;
+
+ pr_err(TTM_PFX "%s: Failed to fill %s pool (r:%d)!\n",
+ pool->dev_name, pool->name, r);
+
+ list_for_each_entry(d_page, &d_pages, page_list) {
+ cpages++;
+ }
+ list_splice_tail(&d_pages, &pool->free_list);
+ pool->npages_free += cpages;
+ }
+ }
+ pool->fill_lock = false;
+ return r;
+
+}
+
+/*
+ * @return count of pages still required to fulfill the request.
+ * The populate list is actually a stack (not that is matters as TTM
+ * allocates one page at a time.
+ */
+static int ttm_dma_pool_get_pages(struct dma_pool *pool,
+ struct ttm_tt *ttm)
+{
+ struct dma_page *d_page, *tmp;
+ unsigned long irq_flags;
+ unsigned i, count = ttm->num_pages;
+ int r;
+
+ spin_lock_irqsave(&pool->lock, irq_flags);
+ r = ttm_dma_page_pool_fill_locked(pool, count, &irq_flags);
+ if (r < 0) {
+ pr_debug("%s: (%s:%d) Asked for %d, got %d %s.\n",
+ pool->dev_name, pool->name, current->pid, count, r,
+ (r < 0) ? "err:" : "pages");
+ goto out;
+ }
+ if (!pool->npages_free)
+ goto out;
+ if (count > 1) {
+ pr_debug("%s: (%s:%d) Looking in free list for %d pages. "\
+ "(have %d pages free)\n",
+ pool->dev_name, pool->name, current->pid, count,
+ pool->npages_free);
+ }
+ i = 0;
+ /* We are holding the spinlock.. */
+ /* Note: The the 'pages' (and inuse_list) is expected to be a stack,
+ * so we put the entries in the right order (and on the inuse list
+ * in the reverse order to compenstate for freeing - which inverts the
+ * 'pages' order).
+ */
+ list_for_each_entry_safe(d_page, tmp, &pool->free_list, page_list) {
+ ttm->pages[i] = d_page->p;
+ ttm->dma_address[i] = d_page->dma;
+ i++;
+ list_move_tail(&d_page->page_list, &ttm->alloc_list);
+ if (i == count)
+ break;
+ }
+ count -= i;
+ pool->npages_in_use += i;
+ pool->npages_free -= i;
+out:
+ spin_unlock_irqrestore(&pool->lock, irq_flags);
+ if (count)
+ pr_debug("%s: (%s:%d) Need %d more.\n",
+ pool->dev_name, pool->name, current->pid, count);
+ return count;
+}
+
+/*
+ * On success pages list will hold count number of correctly
+ * cached pages. On failure will hold the negative return value (-ENOMEM, etc).
+ */
+int ttm_dma_populate(struct ttm_tt *ttm, struct device *dev)
+{
+ struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
+ struct dma_pool *pool;
+ gfp_t gfp_flags;
+ enum pool_type type;
+ unsigned i;
+ int r;
+
+ r = ttm_mem_global_alloc_pages(mem_glob, ttm->num_pages, false);
+ if (r)
+ return -ENOMEM;
+
+ type = ttm_to_type(ttm->page_flags, ttm->caching_state);
+
+ if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
+ gfp_flags = GFP_USER | GFP_DMA32;
+ else
+ gfp_flags = GFP_HIGHUSER;
+
+ if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
+ gfp_flags |= __GFP_ZERO;
+
+ pool = ttm_dma_find_pool(dev, type);
+ if (!pool) {
+ pool = ttm_dma_pool_init(dev, gfp_flags, type);
+ if (IS_ERR_OR_NULL(pool)) {
+ ttm_mem_global_free(mem_glob, ttm->num_pages * PAGE_SIZE);
+ return -ENOMEM;
+ }
+ }
+
+ /* ?? we might want to check see if pages are already
+ * alloced, for the moment just trust the caller
+ */
+ INIT_LIST_HEAD(&ttm->alloc_list);
+ /* Take pages out of a pool (if applicable) */
+ r = ttm_dma_pool_get_pages(pool, ttm);
+ /* clear the pages coming from the pool if requested */
+ if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
+ for (i = 0; i < (ttm->num_pages - r); i++) {
+ clear_page(page_address(ttm->pages[i]));
+ }
+ }
+ /* If pool didn't have enough pages allocate new one. */
+ if (r > 0) {
+ struct list_head d_pages;
+ unsigned long irq_flags;
+ unsigned pages_need = r;
+
+ INIT_LIST_HEAD(&d_pages);
+ i = ttm->num_pages - pages_need;
+
+ /* Note, we are running without locking here..
+ * and we have to manually add the stack to the inuse pool. */
+ r = ttm_dma_pool_alloc_new_pages(pool, &d_pages, pages_need);
+
+ if (r == 0) {
+ struct dma_page *d_page;
+
+ /* Since the pages are directly going to the inuse_list
+ * which is stack based, lets treat it as a stack.
+ */
+ list_for_each_entry(d_page, &d_pages, page_list) {
+ ttm->pages[i] = d_page->p;
+ ttm->dma_address[i] = d_page->dma;
+ i++;
+ }
+ list_splice_tail(&d_pages, &ttm->alloc_list);
+
+ spin_lock_irqsave(&pool->lock, irq_flags);
+ pool->npages_in_use += pages_need;
+ spin_unlock_irqrestore(&pool->lock, irq_flags);
+ } else {
+ struct dma_page *d_page;
+ unsigned cpages = 0;
+
+ /* If there is any pages in the list put them back to
+ * the pool. */
+ pr_err(TTM_PFX
+ "%s: Failed to allocate extra pages "
+ "for large request.",
+ pool->dev_name);
+
+ list_for_each_entry(d_page, &d_pages, page_list) {
+ cpages++;
+ }
+ spin_lock_irqsave(&pool->lock, irq_flags);
+ pool->npages_free += cpages;
+ /* We don't care about ordering on the free_list. */
+ list_splice(&d_pages, &pool->free_list);
+ spin_unlock_irqrestore(&pool->lock, irq_flags);
+ ttm_mem_global_free(mem_glob, ttm->num_pages * PAGE_SIZE);
+ return -ENOMEM;
+ }
+ }
+ ttm_mem_global_account_pages(mem_glob, ttm->pages, ttm->num_pages);
+
+ if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
+ r = ttm_tt_swapin(ttm);
+ if (unlikely(r!= 0)) {
+ ttm_dma_unpopulate(ttm, dev);
+ return r;
+ }
+ }
+
+ ttm->state = tt_unbound;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ttm_dma_populate);
+
+/* Get good estimation how many pages are free in pools */
+static int ttm_dma_pool_get_num_unused_pages(void)
+{
+ struct device_pools *p;
+ unsigned total = 0;
+
+ mutex_lock(&_manager->lock);
+ list_for_each_entry(p, &_manager->pools, pools) {
+ if (p)
+ total += p->pool->npages_free;
+ }
+ mutex_unlock(&_manager->lock);
+ return total;
+}
+
+/* Put all pages in pages list to correct pool to wait for reuse */
+void ttm_dma_unpopulate(struct ttm_tt *ttm, struct device *dev)
+{
+ struct ttm_mem_global *glob = ttm->glob->mem_glob;
+ struct dma_pool *pool;
+ enum pool_type type;
+ bool is_cached = false;
+ unsigned count = 0, i;
+ unsigned long irq_flags;
+
+ ttm_mem_global_free_pages(glob, ttm->pages, ttm->num_pages);
+
+ type = ttm_to_type(ttm->page_flags, ttm->caching_state);
+ pool = ttm_dma_find_pool(dev, type);
+ if (!pool) {
+ WARN_ON(!pool);
+ return;
+ }
+ is_cached = (ttm_dma_find_pool(pool->dev,
+ ttm_to_type(ttm->page_flags, tt_cached)) == pool);
+
+ if (ttm->num_pages > 1) {
+ dev_dbg(pool->dev, "(%s:%d) Attempting to %s %ld pages.\n",
+ pool->name, current->pid,
+ (is_cached) ? "destroy" : "recycle", ttm->num_pages);
+ }
+
+ /* this should never happen, should we panic here ?*/
+ if (ttm_dma_check_alloc_list(ttm)) {
+ pr_err(TTM_PFX "Could not free the other ttm tt pages %ld!\n",
+ ttm->num_pages);
+ return;
+ }
+
+ spin_lock_irqsave(&pool->lock, irq_flags);
+ count = 0;
+ pool->npages_in_use -= ttm->num_pages;
+ if (is_cached) {
+ pool->nfrees += ttm->num_pages;
+ } else {
+ pool->npages_free += ttm->num_pages;
+ list_splice(&ttm->alloc_list, &pool->free_list);
+ if (pool->npages_free > _manager->options.max_size) {
+ count = pool->npages_free - _manager->options.max_size;
+ }
+ }
+ spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+ if (is_cached) {
+ ttm_dma_pages_put(pool, &ttm->alloc_list,
+ ttm->pages, ttm->num_pages);
+ }
+
+ INIT_LIST_HEAD(&ttm->alloc_list);
+ for (i = 0; i < ttm->num_pages; i++) {
+ ttm->pages[i] = NULL;
+ ttm->dma_address[i] = 0;
+ }
+
+ /* shrink pool if necessary */
+ if (count)
+ ttm_dma_page_pool_free(pool, count);
+ ttm->state = tt_unpopulated;
+}
+EXPORT_SYMBOL_GPL(ttm_dma_unpopulate);
+
+/**
+ * Callback for mm to request pool to reduce number of page held.
+ */
+static int ttm_dma_pool_mm_shrink(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ static atomic_t start_pool = ATOMIC_INIT(0);
+ unsigned idx = 0;
+ unsigned pool_offset = atomic_add_return(1, &start_pool);
+ unsigned shrink_pages = sc->nr_to_scan;
+ struct device_pools *p;
+
+ if (list_empty(&_manager->pools))
+ return 0;
+
+ mutex_lock(&_manager->lock);
+ pool_offset = pool_offset % _manager->npools;
+ list_for_each_entry(p, &_manager->pools, pools) {
+ unsigned nr_free;
+
+ if (!p && !p->dev)
+ continue;
+ if (shrink_pages == 0)
+ break;
+ /* Do it in round-robin fashion. */
+ if (++idx < pool_offset)
+ continue;
+ nr_free = shrink_pages;
+ shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free);
+ pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n",
+ p->pool->dev_name, p->pool->name, current->pid, nr_free,
+ shrink_pages);
+ }
+ mutex_unlock(&_manager->lock);
+ /* return estimated number of unused pages in pool */
+ return ttm_dma_pool_get_num_unused_pages();
+}
+
+static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager)
+{
+ manager->mm_shrink.shrink = &ttm_dma_pool_mm_shrink;
+ manager->mm_shrink.seeks = 1;
+ register_shrinker(&manager->mm_shrink);
+}
+
+static void ttm_dma_pool_mm_shrink_fini(struct ttm_pool_manager *manager)
+{
+ unregister_shrinker(&manager->mm_shrink);
+}
+
+int ttm_dma_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
+{
+ int ret = -ENOMEM;
+
+ WARN_ON(_manager);
+
+ printk(KERN_INFO TTM_PFX "Initializing DMA pool allocator.\n");
+
+ _manager = kzalloc(sizeof(*_manager), GFP_KERNEL);
+ if (!_manager)
+ goto err_manager;
+
+ mutex_init(&_manager->lock);
+ INIT_LIST_HEAD(&_manager->pools);
+
+ _manager->options.max_size = max_pages;
+ _manager->options.small = SMALL_ALLOCATION;
+ _manager->options.alloc_size = NUM_PAGES_TO_ALLOC;
+
+ /* This takes care of auto-freeing the _manager */
+ ret = kobject_init_and_add(&_manager->kobj, &ttm_pool_kobj_type,
+ &glob->kobj, "dma_pool");
+ if (unlikely(ret != 0)) {
+ kobject_put(&_manager->kobj);
+ goto err;
+ }
+ ttm_dma_pool_mm_shrink_init(_manager);
+ return 0;
+err_manager:
+ kfree(_manager);
+ _manager = NULL;
+err:
+ return ret;
+}
+
+void ttm_dma_page_alloc_fini(void)
+{
+ struct device_pools *p, *t;
+
+ printk(KERN_INFO TTM_PFX "Finalizing DMA pool allocator.\n");
+ ttm_dma_pool_mm_shrink_fini(_manager);
+
+ list_for_each_entry_safe_reverse(p, t, &_manager->pools, pools) {
+ dev_dbg(p->dev, "(%s:%d) Freeing.\n", p->pool->name,
+ current->pid);
+ WARN_ON(devres_destroy(p->dev, ttm_dma_pool_release,
+ ttm_dma_pool_match, p->pool));
+ ttm_dma_free_pool(p->dev, p->pool->type);
+ }
+ kobject_put(&_manager->kobj);
+ _manager = NULL;
+}
+
+int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data)
+{
+ struct device_pools *p;
+ struct dma_pool *pool = NULL;
+ char *h[] = {"pool", "refills", "pages freed", "inuse", "available",
+ "name", "virt", "busaddr"};
+
+ if (!_manager) {
+ seq_printf(m, "No pool allocator running.\n");
+ return 0;
+ }
+ seq_printf(m, "%13s %12s %13s %8s %8s %8s\n",
+ h[0], h[1], h[2], h[3], h[4], h[5]);
+ mutex_lock(&_manager->lock);
+ list_for_each_entry(p, &_manager->pools, pools) {
+ struct device *dev = p->dev;
+ if (!dev)
+ continue;
+ pool = p->pool;
+ seq_printf(m, "%13s %12ld %13ld %8d %8d %8s\n",
+ pool->name, pool->nrefills,
+ pool->nfrees, pool->npages_in_use,
+ pool->npages_free,
+ pool->dev_name);
+ }
+ mutex_unlock(&_manager->lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ttm_dma_page_alloc_debugfs);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index f9cc548d6d98cd..920d12e66e55bb 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -43,8 +43,6 @@
#include "ttm/ttm_placement.h"
#include "ttm/ttm_page_alloc.h"
-static int ttm_tt_swapin(struct ttm_tt *ttm);
-
/**
* Allocates storage for pointers to the pages that back the ttm.
*/
@@ -63,120 +61,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
ttm->dma_address = NULL;
}
-static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
-{
- int write;
- int dirty;
- struct page *page;
- int i;
- struct ttm_backend *be = ttm->be;
-
- BUG_ON(!(ttm->page_flags & TTM_PAGE_FLAG_USER));
- write = ((ttm->page_flags & TTM_PAGE_FLAG_WRITE) != 0);
- dirty = ((ttm->page_flags & TTM_PAGE_FLAG_USER_DIRTY) != 0);
-
- if (be)
- be->func->clear(be);
-
- for (i = 0; i < ttm->num_pages; ++i) {
- page = ttm->pages[i];
- if (page == NULL)
- continue;
-
- if (page == ttm->dummy_read_page) {
- BUG_ON(write);
- continue;
- }
-
- if (write && dirty && !PageReserved(page))
- set_page_dirty_lock(page);
-
- ttm->pages[i] = NULL;
- ttm_mem_global_free(ttm->glob->mem_glob, PAGE_SIZE);
- put_page(page);
- }
- ttm->state = tt_unpopulated;
- ttm->first_himem_page = ttm->num_pages;
- ttm->last_lomem_page = -1;
-}
-
-static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
-{
- struct page *p;
- struct list_head h;
- struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
- int ret;
-
- while (NULL == (p = ttm->pages[index])) {
-
- INIT_LIST_HEAD(&h);
-
- ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1,
- &ttm->dma_address[index]);
-
- if (ret != 0)
- return NULL;
-
- p = list_first_entry(&h, struct page, lru);
-
- ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
- if (unlikely(ret != 0))
- goto out_err;
-
- if (PageHighMem(p))
- ttm->pages[--ttm->first_himem_page] = p;
- else
- ttm->pages[++ttm->last_lomem_page] = p;
- }
- return p;
-out_err:
- put_page(p);
- return NULL;
-}
-
-struct page *ttm_tt_get_page(struct ttm_tt *ttm, int index)
-{
- int ret;
-
- if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
- ret = ttm_tt_swapin(ttm);
- if (unlikely(ret != 0))
- return NULL;
- }
- return __ttm_tt_get_page(ttm, index);
-}
-
-int ttm_tt_populate(struct ttm_tt *ttm)
-{
- struct page *page;
- unsigned long i;
- struct ttm_backend *be;
- int ret;
-
- if (ttm->state != tt_unpopulated)
- return 0;
-
- if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
- ret = ttm_tt_swapin(ttm);
- if (unlikely(ret != 0))
- return ret;
- }
-
- be = ttm->be;
-
- for (i = 0; i < ttm->num_pages; ++i) {
- page = __ttm_tt_get_page(ttm, i);
- if (!page)
- return -ENOMEM;
- }
-
- be->func->populate(be, ttm->num_pages, ttm->pages,
- ttm->dummy_read_page, ttm->dma_address);
- ttm->state = tt_unbound;
- return 0;
-}
-EXPORT_SYMBOL(ttm_tt_populate);
-
#ifdef CONFIG_X86
static inline int ttm_tt_set_page_caching(struct page *p,
enum ttm_caching_state c_old,
@@ -278,59 +162,13 @@ int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement)
}
EXPORT_SYMBOL(ttm_tt_set_placement_caching);
-static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
-{
- int i;
- unsigned count = 0;
- struct list_head h;
- struct page *cur_page;
- struct ttm_backend *be = ttm->be;
-
- INIT_LIST_HEAD(&h);
-
- if (be)
- be->func->clear(be);
- for (i = 0; i < ttm->num_pages; ++i) {
-
- cur_page = ttm->pages[i];
- ttm->pages[i] = NULL;
- if (cur_page) {
- if (page_count(cur_page) != 1)
- printk(KERN_ERR TTM_PFX
- "Erroneous page count. "
- "Leaking pages.\n");
- ttm_mem_global_free_page(ttm->glob->mem_glob,
- cur_page);
- list_add(&cur_page->lru, &h);
- count++;
- }
- }
- ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
- ttm->dma_address);
- ttm->state = tt_unpopulated;
- ttm->first_himem_page = ttm->num_pages;
- ttm->last_lomem_page = -1;
-}
-
void ttm_tt_destroy(struct ttm_tt *ttm)
{
- struct ttm_backend *be;
-
if (unlikely(ttm == NULL))
return;
- be = ttm->be;
- if (likely(be != NULL)) {
- be->func->destroy(be);
- ttm->be = NULL;
- }
-
if (likely(ttm->pages != NULL)) {
- if (ttm->page_flags & TTM_PAGE_FLAG_USER)
- ttm_tt_free_user_pages(ttm);
- else
- ttm_tt_free_alloced_pages(ttm);
-
+ ttm->bdev->driver->ttm_tt_unpopulate(ttm);
ttm_tt_free_page_directory(ttm);
}
@@ -338,93 +176,38 @@ void ttm_tt_destroy(struct ttm_tt *ttm)
ttm->swap_storage)
fput(ttm->swap_storage);
- kfree(ttm);
-}
-
-int ttm_tt_set_user(struct ttm_tt *ttm,
- struct task_struct *tsk,
- unsigned long start, unsigned long num_pages)
-{
- struct mm_struct *mm = tsk->mm;
- int ret;
- int write = (ttm->page_flags & TTM_PAGE_FLAG_WRITE) != 0;
- struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
-
- BUG_ON(num_pages != ttm->num_pages);
- BUG_ON((ttm->page_flags & TTM_PAGE_FLAG_USER) == 0);
-
- /**
- * Account user pages as lowmem pages for now.
- */
-
- ret = ttm_mem_global_alloc(mem_glob, num_pages * PAGE_SIZE,
- false, false);
- if (unlikely(ret != 0))
- return ret;
-
- down_read(&mm->mmap_sem);
- ret = get_user_pages(tsk, mm, start, num_pages,
- write, 0, ttm->pages, NULL);
- up_read(&mm->mmap_sem);
-
- if (ret != num_pages && write) {
- ttm_tt_free_user_pages(ttm);
- ttm_mem_global_free(mem_glob, num_pages * PAGE_SIZE);
- return -ENOMEM;
- }
-
- ttm->tsk = tsk;
- ttm->start = start;
- ttm->state = tt_unbound;
-
- return 0;
+ ttm->swap_storage = NULL;
+ ttm->func->destroy(ttm);
}
-struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, unsigned long size,
- uint32_t page_flags, struct page *dummy_read_page)
+int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
+ unsigned long size, uint32_t page_flags,
+ struct page *dummy_read_page)
{
- struct ttm_bo_driver *bo_driver = bdev->driver;
- struct ttm_tt *ttm;
-
- if (!bo_driver)
- return NULL;
-
- ttm = kzalloc(sizeof(*ttm), GFP_KERNEL);
- if (!ttm)
- return NULL;
-
+ ttm->bdev = bdev;
ttm->glob = bdev->glob;
ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- ttm->first_himem_page = ttm->num_pages;
- ttm->last_lomem_page = -1;
ttm->caching_state = tt_cached;
ttm->page_flags = page_flags;
-
ttm->dummy_read_page = dummy_read_page;
+ ttm->state = tt_unpopulated;
ttm_tt_alloc_page_directory(ttm);
- if (!ttm->pages) {
+ if (!ttm->pages || !ttm->dma_address) {
ttm_tt_destroy(ttm);
printk(KERN_ERR TTM_PFX "Failed allocating page table\n");
- return NULL;
- }
- ttm->be = bo_driver->create_ttm_backend_entry(bdev);
- if (!ttm->be) {
- ttm_tt_destroy(ttm);
- printk(KERN_ERR TTM_PFX "Failed creating ttm backend entry\n");
- return NULL;
+ return -ENOMEM;
}
- ttm->state = tt_unpopulated;
- return ttm;
+ return 0;
}
+EXPORT_SYMBOL(ttm_tt_init);
void ttm_tt_unbind(struct ttm_tt *ttm)
{
int ret;
- struct ttm_backend *be = ttm->be;
if (ttm->state == tt_bound) {
- ret = be->func->unbind(be);
+ ret = ttm->func->unbind(ttm);
BUG_ON(ret);
ttm->state = tt_unbound;
}
@@ -433,7 +216,6 @@ void ttm_tt_unbind(struct ttm_tt *ttm)
int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
{
int ret = 0;
- struct ttm_backend *be;
if (!ttm)
return -EINVAL;
@@ -441,25 +223,21 @@ int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
if (ttm->state == tt_bound)
return 0;
- be = ttm->be;
-
- ret = ttm_tt_populate(ttm);
+ ret = ttm->bdev->driver->ttm_tt_populate(ttm);
if (ret)
return ret;
- ret = be->func->bind(be, bo_mem);
+ ret = ttm->func->bind(ttm, bo_mem);
if (unlikely(ret != 0))
return ret;
ttm->state = tt_bound;
- if (ttm->page_flags & TTM_PAGE_FLAG_USER)
- ttm->page_flags |= TTM_PAGE_FLAG_USER_DIRTY;
return 0;
}
EXPORT_SYMBOL(ttm_tt_bind);
-static int ttm_tt_swapin(struct ttm_tt *ttm)
+int ttm_tt_swapin(struct ttm_tt *ttm)
{
struct address_space *swap_space;
struct file *swap_storage;
@@ -470,16 +248,6 @@ static int ttm_tt_swapin(struct ttm_tt *ttm)
int i;
int ret = -ENOMEM;
- if (ttm->page_flags & TTM_PAGE_FLAG_USER) {
- ret = ttm_tt_set_user(ttm, ttm->tsk, ttm->start,
- ttm->num_pages);
- if (unlikely(ret != 0))
- return ret;
-
- ttm->page_flags &= ~TTM_PAGE_FLAG_SWAPPED;
- return 0;
- }
-
swap_storage = ttm->swap_storage;
BUG_ON(swap_storage == NULL);
@@ -491,7 +259,7 @@ static int ttm_tt_swapin(struct ttm_tt *ttm)
ret = PTR_ERR(from_page);
goto out_err;
}
- to_page = __ttm_tt_get_page(ttm, i);
+ to_page = ttm->pages[i];
if (unlikely(to_page == NULL))
goto out_err;
@@ -512,7 +280,6 @@ static int ttm_tt_swapin(struct ttm_tt *ttm)
return 0;
out_err:
- ttm_tt_free_alloced_pages(ttm);
return ret;
}
@@ -530,18 +297,6 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage)
BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated);
BUG_ON(ttm->caching_state != tt_cached);
- /*
- * For user buffers, just unpin the pages, as there should be
- * vma references.
- */
-
- if (ttm->page_flags & TTM_PAGE_FLAG_USER) {
- ttm_tt_free_user_pages(ttm);
- ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
- ttm->swap_storage = NULL;
- return 0;
- }
-
if (!persistent_swap_storage) {
swap_storage = shmem_file_setup("ttm swap",
ttm->num_pages << PAGE_SHIFT,
@@ -576,7 +331,7 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage)
page_cache_release(to_page);
}
- ttm_tt_free_alloced_pages(ttm);
+ ttm->bdev->driver->ttm_tt_unpopulate(ttm);
ttm->swap_storage = swap_storage;
ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
if (persistent_swap_storage)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 5a72ed9082321e..383993b845b13f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -28,6 +28,7 @@
#include "vmwgfx_drv.h"
#include "ttm/ttm_bo_driver.h"
#include "ttm/ttm_placement.h"
+#include "ttm/ttm_page_alloc.h"
static uint32_t vram_placement_flags = TTM_PL_FLAG_VRAM |
TTM_PL_FLAG_CACHED;
@@ -139,85 +140,61 @@ struct ttm_placement vmw_srf_placement = {
.busy_placement = gmr_vram_placement_flags
};
-struct vmw_ttm_backend {
- struct ttm_backend backend;
- struct page **pages;
- unsigned long num_pages;
+struct vmw_ttm_tt {
+ struct ttm_tt ttm;
struct vmw_private *dev_priv;
int gmr_id;
};
-static int vmw_ttm_populate(struct ttm_backend *backend,
- unsigned long num_pages, struct page **pages,
- struct page *dummy_read_page,
- dma_addr_t *dma_addrs)
+static int vmw_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
{
- struct vmw_ttm_backend *vmw_be =
- container_of(backend, struct vmw_ttm_backend, backend);
-
- vmw_be->pages = pages;
- vmw_be->num_pages = num_pages;
-
- return 0;
-}
-
-static int vmw_ttm_bind(struct ttm_backend *backend, struct ttm_mem_reg *bo_mem)
-{
- struct vmw_ttm_backend *vmw_be =
- container_of(backend, struct vmw_ttm_backend, backend);
+ struct vmw_ttm_tt *vmw_be = container_of(ttm, struct vmw_ttm_tt, ttm);
vmw_be->gmr_id = bo_mem->start;
- return vmw_gmr_bind(vmw_be->dev_priv, vmw_be->pages,
- vmw_be->num_pages, vmw_be->gmr_id);
+ return vmw_gmr_bind(vmw_be->dev_priv, ttm->pages,
+ ttm->num_pages, vmw_be->gmr_id);
}
-static int vmw_ttm_unbind(struct ttm_backend *backend)
+static int vmw_ttm_unbind(struct ttm_tt *ttm)
{
- struct vmw_ttm_backend *vmw_be =
- container_of(backend, struct vmw_ttm_backend, backend);
+ struct vmw_ttm_tt *vmw_be = container_of(ttm, struct vmw_ttm_tt, ttm);
vmw_gmr_unbind(vmw_be->dev_priv, vmw_be->gmr_id);
return 0;
}
-static void vmw_ttm_clear(struct ttm_backend *backend)
+static void vmw_ttm_destroy(struct ttm_tt *ttm)
{
- struct vmw_ttm_backend *vmw_be =
- container_of(backend, struct vmw_ttm_backend, backend);
-
- vmw_be->pages = NULL;
- vmw_be->num_pages = 0;
-}
-
-static void vmw_ttm_destroy(struct ttm_backend *backend)
-{
- struct vmw_ttm_backend *vmw_be =
- container_of(backend, struct vmw_ttm_backend, backend);
+ struct vmw_ttm_tt *vmw_be = container_of(ttm, struct vmw_ttm_tt, ttm);
kfree(vmw_be);
}
static struct ttm_backend_func vmw_ttm_func = {
- .populate = vmw_ttm_populate,
- .clear = vmw_ttm_clear,
.bind = vmw_ttm_bind,
.unbind = vmw_ttm_unbind,
.destroy = vmw_ttm_destroy,
};
-struct ttm_backend *vmw_ttm_backend_init(struct ttm_bo_device *bdev)
+struct ttm_tt *vmw_ttm_tt_create(struct ttm_bo_device *bdev,
+ unsigned long size, uint32_t page_flags,
+ struct page *dummy_read_page)
{
- struct vmw_ttm_backend *vmw_be;
+ struct vmw_ttm_tt *vmw_be;
vmw_be = kmalloc(sizeof(*vmw_be), GFP_KERNEL);
if (!vmw_be)
return NULL;
- vmw_be->backend.func = &vmw_ttm_func;
+ vmw_be->ttm.func = &vmw_ttm_func;
vmw_be->dev_priv = container_of(bdev, struct vmw_private, bdev);
- return &vmw_be->backend;
+ if (ttm_tt_init(&vmw_be->ttm, bdev, size, page_flags, dummy_read_page)) {
+ return NULL;
+ }
+
+ return &vmw_be->ttm;
}
int vmw_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
@@ -357,7 +334,9 @@ static int vmw_sync_obj_wait(void *sync_obj, void *sync_arg,
}
struct ttm_bo_driver vmw_bo_driver = {
- .create_ttm_backend_entry = vmw_ttm_backend_init,
+ .ttm_tt_create = &vmw_ttm_tt_create,
+ .ttm_tt_populate = &ttm_page_alloc_ttm_tt_populate,
+ .ttm_tt_unpopulate = &ttm_page_alloc_ttm_tt_unpopulate,
.invalidate_caches = vmw_invalidate_caches,
.init_mem_type = vmw_init_mem_type,
.evict_flags = vmw_evict_flags,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 15fb26088d68e7..8fd1e6bcb3b38c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -503,8 +503,7 @@ int vmw_fence_create(struct vmw_fence_manager *fman,
struct vmw_fence_obj *fence;
int ret;
- ret = ttm_mem_global_alloc(mem_glob, fman->fence_size,
- false, false);
+ ret = ttm_mem_global_alloc(mem_glob, fman->fence_size, false);
if (unlikely(ret != 0))
return ret;
@@ -573,8 +572,7 @@ int vmw_user_fence_create(struct drm_file *file_priv,
* be created by a user-space request.
*/
- ret = ttm_mem_global_alloc(mem_glob, fman->user_fence_size,
- false, false);
+ ret = ttm_mem_global_alloc(mem_glob, fman->user_fence_size, false);
if (unlikely(ret != 0))
return ret;
@@ -966,7 +964,7 @@ int vmw_event_fence_action_create(struct drm_file *file_priv,
* event size itself.
*/
- ret = ttm_mem_global_alloc(mem_glob, size, false, interruptible);
+ ret = ttm_mem_global_alloc(mem_glob, size, false);
if (unlikely(ret != 0))
return ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 86c5e4cceb31ef..3c43ac046d5fd9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -430,7 +430,7 @@ int vmw_context_define_ioctl(struct drm_device *dev, void *data,
ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
vmw_user_context_size,
- false, true);
+ false);
if (unlikely(ret != 0)) {
if (ret != -ERESTARTSYS)
DRM_ERROR("Out of graphics memory for context"
@@ -1298,7 +1298,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
return ret;
ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
- size, false, true);
+ size, false);
if (unlikely(ret != 0)) {
if (ret != -ERESTARTSYS)
DRM_ERROR("Out of graphics memory for surface"
@@ -1560,7 +1560,7 @@ int vmw_dmabuf_init(struct vmw_private *dev_priv,
vmw_dmabuf_acc_size(bdev->glob,
(size + PAGE_SIZE - 1) >> PAGE_SHIFT);
- ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false);
+ ret = ttm_mem_global_alloc(mem_glob, acc_size, false);
if (unlikely(ret != 0)) {
/* we must free the bo here as
* ttm_buffer_object_init does so as well */
@@ -1851,7 +1851,7 @@ int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
vmw_user_stream_size,
- false, true);
+ false);
if (unlikely(ret != 0)) {
if (ret != -ERESTARTSYS)
DRM_ERROR("Out of graphics memory for stream"
diff --git a/drivers/hwspinlock/u8500_hsem.c b/drivers/hwspinlock/u8500_hsem.c
index 143461a95ae4ad..86980fe041179e 100644
--- a/drivers/hwspinlock/u8500_hsem.c
+++ b/drivers/hwspinlock/u8500_hsem.c
@@ -21,6 +21,7 @@
* General Public License for more details.
*/
+#include <linux/module.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/pm_runtime.h>
@@ -108,10 +109,8 @@ static int __devinit u8500_hsem_probe(struct platform_device *pdev)
return -ENODEV;
io_base = ioremap(res->start, resource_size(res));
- if (!io_base) {
- ret = -ENOMEM;
- goto free_state;
- }
+ if (!io_base)
+ return -ENOMEM;
/* make sure protocol 1 is selected */
val = readl(io_base + HSEM_CTRL_REG);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 472aedfb07cf35..297e260921787f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3110,7 +3110,7 @@ static void handle_stripe(struct stripe_head *sh)
struct r5dev *pdev, *qdev;
clear_bit(STRIPE_HANDLE, &sh->state);
- if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) {
+ if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) {
/* already being handled, ensure it gets handled
* again when current action finishes */
set_bit(STRIPE_HANDLE, &sh->state);
@@ -3159,10 +3159,14 @@ static void handle_stripe(struct stripe_head *sh)
/* check if the array has lost more than max_degraded devices and,
* if so, some requests might need to be failed.
*/
- if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written)
- handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
- if (s.failed > conf->max_degraded && s.syncing)
- handle_failed_sync(conf, sh, &s);
+ if (s.failed > conf->max_degraded) {
+ sh->check_state = 0;
+ sh->reconstruct_state = 0;
+ if (s.to_read+s.to_write+s.written)
+ handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
+ if (s.syncing)
+ handle_failed_sync(conf, sh, &s);
+ }
/*
* might be able to return some write requests if the parity blocks
@@ -3371,7 +3375,7 @@ finish:
return_io(s.return_bi);
- clear_bit(STRIPE_ACTIVE, &sh->state);
+ clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
}
static void raid5_activate_delayed(struct r5conf *conf)
diff --git a/drivers/media/video/s5k6aa.c b/drivers/media/video/s5k6aa.c
index 2446736b787115..0df7f2a4181404 100644
--- a/drivers/media/video/s5k6aa.c
+++ b/drivers/media/video/s5k6aa.c
@@ -19,6 +19,7 @@
#include <linux/gpio.h>
#include <linux/i2c.h>
#include <linux/media.h>
+#include <linux/module.h>
#include <linux/regulator/consumer.h>
#include <linux/slab.h>
diff --git a/drivers/mfd/ab5500-core.c b/drivers/mfd/ab5500-core.c
index 4175544b491b51..ec10629a0b0b15 100644
--- a/drivers/mfd/ab5500-core.c
+++ b/drivers/mfd/ab5500-core.c
@@ -13,6 +13,7 @@
* TODO: Event handling with irq_chip. Waiting for PRCMU fw support.
*/
+#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/err.h>
#include <linux/platform_device.h>
diff --git a/drivers/mfd/ab5500-debugfs.c b/drivers/mfd/ab5500-debugfs.c
index 6be1fe6b5f9ac7..43c0ebb8195618 100644
--- a/drivers/mfd/ab5500-debugfs.c
+++ b/drivers/mfd/ab5500-debugfs.c
@@ -4,6 +4,7 @@
* Debugfs support for the AB5500 MFD driver
*/
+#include <linux/export.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/mfd/ab5500/ab5500.h>
diff --git a/drivers/mtd/maps/bcm963xx-flash.c b/drivers/mtd/maps/bcm963xx-flash.c
index 608967fe74c63b..736ca10ca9f11c 100644
--- a/drivers/mtd/maps/bcm963xx-flash.c
+++ b/drivers/mtd/maps/bcm963xx-flash.c
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/slab.h>
+#include <linux/module.h>
#include <linux/mtd/map.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index f4e3d82379d786..7f43cf86d77602 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -83,8 +83,10 @@ config DELL_LAPTOP
depends on EXPERIMENTAL
depends on BACKLIGHT_CLASS_DEVICE
depends on RFKILL || RFKILL = n
- depends on POWER_SUPPLY
depends on SERIO_I8042
+ select POWER_SUPPLY
+ select LEDS_CLASS
+ select NEW_LEDS
default n
---help---
This driver adds support for rfkill and backlight control to Dell
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 06ea3bcfdd2a30..047bcea67f47ea 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1902,6 +1902,22 @@ config SCSI_BFA_FC
To compile this driver as a module, choose M here. The module will
be called bfa.
+config XEN_SCSI_FRONTEND
+ tristate "Xen PVSCSI frontend driver"
+ depends on XEN && SCSI
+ default m
+ help
+ The SCSI frontend driver allows the kernel to access SCSI Devices
+ within another guest OS.
+
+config XEN_SCSI_BACKEND
+ tristate "Xen PVSCSI backend driver"
+ depends on XEN_BACKEND && SCSI
+ default m
+ help
+ The PVSCSI backend driver allows the kernel to export its SCSI Devices
+ to other Xen guests via a high-performance shared-memory interface.
+
endif # SCSI_LOWLEVEL
source "drivers/scsi/pcmcia/Kconfig"
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 2b887498be5013..f772dc1cbb91bd 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -141,6 +141,8 @@ obj-$(CONFIG_SCSI_CXGB4_ISCSI) += libiscsi.o libiscsi_tcp.o cxgbi/
obj-$(CONFIG_SCSI_BNX2_ISCSI) += libiscsi.o bnx2i/
obj-$(CONFIG_BE2ISCSI) += libiscsi.o be2iscsi/
obj-$(CONFIG_SCSI_PMCRAID) += pmcraid.o
+obj-$(CONFIG_XEN_SCSI_FRONTEND) += xen-scsifront/
+obj-$(CONFIG_XEN_SCSI_BACKEND) += xen-scsiback/
obj-$(CONFIG_VMWARE_PVSCSI) += vmw_pvscsi.o
obj-$(CONFIG_ARM) += arm/
diff --git a/drivers/scsi/xen-scsiback/Makefile b/drivers/scsi/xen-scsiback/Makefile
new file mode 100644
index 00000000000000..94926dc90315f1
--- /dev/null
+++ b/drivers/scsi/xen-scsiback/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_XEN_SCSI_BACKEND) := xen-scsiback.o
+
+xen-scsiback-y := interface.o scsiback.o xenbus.o translate.o emulate.o
+
diff --git a/drivers/scsi/xen-scsiback/common.h b/drivers/scsi/xen-scsiback/common.h
new file mode 100644
index 00000000000000..dafa79e2b7520c
--- /dev/null
+++ b/drivers/scsi/xen-scsiback/common.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2008, FUJITSU Limited
+ *
+ * Based on the blkback driver code.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __SCSIIF__BACKEND__COMMON_H__
+#define __SCSIIF__BACKEND__COMMON_H__
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/kthread.h>
+#include <linux/blkdev.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_eh.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <asm/delay.h>
+#include <xen/evtchn.h>
+#include <asm/hypervisor.h>
+#include <xen/xen.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+#include <xen/page.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/io/ring.h>
+#include <xen/interface/grant_table.h>
+#include <xen/interface/io/vscsiif.h>
+
+
+#define DPRINTK(_f, _a...) \
+ pr_debug("(file=%s, line=%d) " _f, \
+ __FILE__ , __LINE__ , ## _a )
+
+struct ids_tuple {
+ unsigned int hst; /* host */
+ unsigned int chn; /* channel */
+ unsigned int tgt; /* target */
+ unsigned int lun; /* LUN */
+};
+
+struct v2p_entry {
+ struct ids_tuple v; /* translate from */
+ struct scsi_device *sdev; /* translate to */
+ struct list_head l;
+};
+
+struct vscsibk_info {
+ struct xenbus_device *dev;
+
+ domid_t domid;
+ unsigned int evtchn;
+ unsigned int irq;
+
+ int feature;
+
+ struct vscsiif_back_ring ring;
+ void *ring_area;
+
+ spinlock_t ring_lock;
+ atomic_t nr_unreplied_reqs;
+
+ spinlock_t v2p_lock;
+ struct list_head v2p_entry_lists;
+
+ struct task_struct *kthread;
+ wait_queue_head_t waiting_to_free;
+ wait_queue_head_t wq;
+ unsigned int waiting_reqs;
+ struct page **mmap_pages;
+
+};
+
+typedef struct {
+ unsigned char act;
+ struct vscsibk_info *info;
+ struct scsi_device *sdev;
+
+ uint16_t rqid;
+
+ uint16_t v_chn, v_tgt;
+
+ uint8_t nr_segments;
+ uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE];
+ uint8_t cmd_len;
+
+ uint8_t sc_data_direction;
+ uint16_t timeout_per_command;
+
+ uint32_t request_bufflen;
+ struct scatterlist *sgl;
+ grant_ref_t gref[VSCSIIF_SG_TABLESIZE];
+
+ int32_t rslt;
+ uint32_t resid;
+ uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
+
+ struct list_head free_list;
+} pending_req_t;
+
+
+
+#define scsiback_get(_b) (atomic_inc(&(_b)->nr_unreplied_reqs))
+#define scsiback_put(_b) \
+ do { \
+ if (atomic_dec_and_test(&(_b)->nr_unreplied_reqs)) \
+ wake_up(&(_b)->waiting_to_free);\
+ } while (0)
+
+#define VSCSIIF_TIMEOUT (900*HZ)
+
+#define VSCSI_TYPE_HOST 1
+
+irqreturn_t scsiback_intr(int, void *);
+int scsiback_init_sring(struct vscsibk_info *info,
+ unsigned long ring_ref, unsigned int evtchn);
+int scsiback_schedule(void *data);
+
+
+struct vscsibk_info *vscsibk_info_alloc(domid_t domid);
+void scsiback_free(struct vscsibk_info *info);
+void scsiback_disconnect(struct vscsibk_info *info);
+int __init scsiback_interface_init(void);
+void scsiback_interface_exit(void);
+int scsiback_xenbus_init(void);
+void scsiback_xenbus_unregister(void);
+
+void scsiback_init_translation_table(struct vscsibk_info *info);
+
+int scsiback_add_translation_entry(struct vscsibk_info *info,
+ struct scsi_device *sdev, struct ids_tuple *v);
+
+int scsiback_del_translation_entry(struct vscsibk_info *info,
+ struct ids_tuple *v);
+struct scsi_device *scsiback_do_translation(struct vscsibk_info *info,
+ struct ids_tuple *v);
+void scsiback_release_translation_entry(struct vscsibk_info *info);
+
+
+void scsiback_cmd_exec(pending_req_t *pending_req);
+void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
+ uint32_t resid, pending_req_t *pending_req);
+void scsiback_fast_flush_area(pending_req_t *req);
+
+void scsiback_rsp_emulation(pending_req_t *pending_req);
+void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req);
+void scsiback_emulation_init(void);
+
+
+#endif /* __SCSIIF__BACKEND__COMMON_H__ */
diff --git a/drivers/scsi/xen-scsiback/emulate.c b/drivers/scsi/xen-scsiback/emulate.c
new file mode 100644
index 00000000000000..2b35e2a0776d18
--- /dev/null
+++ b/drivers/scsi/xen-scsiback/emulate.c
@@ -0,0 +1,470 @@
+/*
+ * Xen SCSI backend driver
+ *
+ * Copyright (c) 2008, FUJITSU Limited
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include "common.h"
+
+/* Following SCSI commands are not defined in scsi/scsi.h */
+#define EXTENDED_COPY 0x83 /* EXTENDED COPY command */
+#define REPORT_ALIASES 0xa3 /* REPORT ALIASES command */
+#define CHANGE_ALIASES 0xa4 /* CHANGE ALIASES command */
+#define SET_PRIORITY 0xa4 /* SET PRIORITY command */
+
+
+/*
+ The bitmap in order to control emulation.
+ (Bit 3 to 7 are reserved for future use.)
+*/
+#define VSCSIIF_NEED_CMD_EXEC 0x01 /* If this bit is set, cmd exec */
+ /* is required. */
+#define VSCSIIF_NEED_EMULATE_REQBUF 0x02 /* If this bit is set, need */
+ /* emulation reqest buff before */
+ /* cmd exec. */
+#define VSCSIIF_NEED_EMULATE_RSPBUF 0x04 /* If this bit is set, need */
+ /* emulation resp buff after */
+ /* cmd exec. */
+
+/* Additional Sense Code (ASC) used */
+#define NO_ADDITIONAL_SENSE 0x0
+#define LOGICAL_UNIT_NOT_READY 0x4
+#define UNRECOVERED_READ_ERR 0x11
+#define PARAMETER_LIST_LENGTH_ERR 0x1a
+#define INVALID_OPCODE 0x20
+#define ADDR_OUT_OF_RANGE 0x21
+#define INVALID_FIELD_IN_CDB 0x24
+#define INVALID_FIELD_IN_PARAM_LIST 0x26
+#define POWERON_RESET 0x29
+#define SAVING_PARAMS_UNSUP 0x39
+#define THRESHOLD_EXCEEDED 0x5d
+#define LOW_POWER_COND_ON 0x5e
+
+
+
+/* Number os SCSI op_code */
+#define VSCSI_MAX_SCSI_OP_CODE 256
+static unsigned char bitmap[VSCSI_MAX_SCSI_OP_CODE];
+
+#define NO_EMULATE(cmd) \
+ bitmap[cmd] = VSCSIIF_NEED_CMD_EXEC; \
+ pre_function[cmd] = NULL; \
+ post_function[cmd] = NULL
+
+
+
+/*
+ Emulation routines for each SCSI op_code.
+*/
+static void (*pre_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *);
+static void (*post_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *);
+
+
+static const int check_condition_result =
+ (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+
+static void scsiback_mk_sense_buffer(uint8_t *data, uint8_t key,
+ uint8_t asc, uint8_t asq)
+{
+ data[0] = 0x70; /* fixed, current */
+ data[2] = key;
+ data[7] = 0xa; /* implies 18 byte sense buffer */
+ data[12] = asc;
+ data[13] = asq;
+}
+
+static void resp_not_supported_cmd(pending_req_t *pending_req, void *data)
+{
+ scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST,
+ INVALID_OPCODE, 0);
+ pending_req->resid = 0;
+ pending_req->rslt = check_condition_result;
+}
+
+
+static int __copy_to_sg(struct scatterlist *sgl, unsigned int nr_sg,
+ void *buf, unsigned int buflen)
+{
+ struct scatterlist *sg;
+ void *from = buf;
+ void *to;
+ unsigned int from_rest = buflen;
+ unsigned int to_capa;
+ unsigned int copy_size = 0;
+ unsigned int i;
+ unsigned long pfn;
+
+ for_each_sg (sgl, sg, nr_sg, i) {
+ if (sg_page(sg) == NULL) {
+ printk(KERN_WARNING "%s: inconsistent length field in "
+ "scatterlist\n", __FUNCTION__);
+ return -ENOMEM;
+ }
+
+ to_capa = sg->length;
+ copy_size = min_t(unsigned int, to_capa, from_rest);
+
+ pfn = page_to_pfn(sg_page(sg));
+ to = pfn_to_kaddr(pfn) + (sg->offset);
+ memcpy(to, from, copy_size);
+
+ from_rest -= copy_size;
+ if (from_rest == 0) {
+ return 0;
+ }
+
+ from += copy_size;
+ }
+
+ printk(KERN_WARNING "%s: no space in scatterlist\n",
+ __FUNCTION__);
+ return -ENOMEM;
+}
+#if 0
+static int __copy_from_sg(struct scatterlist *sgl, unsigned int nr_sg,
+ void *buf, unsigned int buflen)
+{
+ struct scatterlist *sg;
+ void *from;
+ void *to = buf;
+ unsigned int from_rest;
+ unsigned int to_capa = buflen;
+ unsigned int copy_size;
+ unsigned int i;
+ unsigned long pfn;
+
+ for_each_sg (sgl, sg, nr_sg, i) {
+ if (sg_page(sg) == NULL) {
+ printk(KERN_WARNING "%s: inconsistent length field in "
+ "scatterlist\n", __FUNCTION__);
+ return -ENOMEM;
+ }
+
+ from_rest = sg->length;
+ if ((from_rest > 0) && (to_capa < from_rest)) {
+ printk(KERN_WARNING
+ "%s: no space in destination buffer\n",
+ __FUNCTION__);
+ return -ENOMEM;
+ }
+ copy_size = from_rest;
+
+ pfn = page_to_pfn(sg_page(sg));
+ from = pfn_to_kaddr(pfn) + (sg->offset);
+ memcpy(to, from, copy_size);
+
+ to_capa -= copy_size;
+ to += copy_size;
+ }
+
+ return 0;
+}
+#endif
+static int __nr_luns_under_host(struct vscsibk_info *info)
+{
+ struct v2p_entry *entry;
+ struct list_head *head = &(info->v2p_entry_lists);
+ unsigned long flags;
+ int lun_cnt = 0;
+
+ spin_lock_irqsave(&info->v2p_lock, flags);
+ list_for_each_entry(entry, head, l) {
+ lun_cnt++;
+ }
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+
+ return (lun_cnt);
+}
+
+
+/* REPORT LUNS Define*/
+#define VSCSI_REPORT_LUNS_HEADER 8
+#define VSCSI_REPORT_LUNS_RETRY 3
+
+/* quoted scsi_debug.c/resp_report_luns() */
+static void __report_luns(pending_req_t *pending_req, void *data)
+{
+ struct vscsibk_info *info = pending_req->info;
+ unsigned int channel = pending_req->v_chn;
+ unsigned int target = pending_req->v_tgt;
+ unsigned int nr_seg = pending_req->nr_segments;
+ unsigned char *cmd = (unsigned char *)pending_req->cmnd;
+
+ unsigned char *buff = NULL;
+ unsigned char alloc_len;
+ unsigned int alloc_luns = 0;
+ unsigned int req_bufflen = 0;
+ unsigned int actual_len = 0;
+ unsigned int retry_cnt = 0;
+ int select_report = (int)cmd[2];
+ int i, lun_cnt = 0, lun, upper, err = 0;
+
+ struct v2p_entry *entry;
+ struct list_head *head = &(info->v2p_entry_lists);
+ unsigned long flags;
+
+ struct scsi_lun *one_lun;
+
+ req_bufflen = cmd[9] + (cmd[8] << 8) + (cmd[7] << 16) + (cmd[6] << 24);
+ if ((req_bufflen < 4) || (select_report != 0))
+ goto fail;
+
+ alloc_luns = __nr_luns_under_host(info);
+ alloc_len = sizeof(struct scsi_lun) * alloc_luns
+ + VSCSI_REPORT_LUNS_HEADER;
+retry:
+ if ((buff = kzalloc(alloc_len, GFP_KERNEL)) == NULL) {
+ printk(KERN_ERR "scsiback:%s kmalloc err\n", __FUNCTION__);
+ goto fail;
+ }
+
+ one_lun = (struct scsi_lun *) &buff[8];
+ spin_lock_irqsave(&info->v2p_lock, flags);
+ list_for_each_entry(entry, head, l) {
+ if ((entry->v.chn == channel) &&
+ (entry->v.tgt == target)) {
+ /* check overflow */
+ if (lun_cnt >= alloc_luns) {
+ spin_unlock_irqrestore(&info->v2p_lock,
+ flags);
+
+ if (retry_cnt < VSCSI_REPORT_LUNS_RETRY) {
+ retry_cnt++;
+ if (buff)
+ kfree(buff);
+ goto retry;
+ }
+
+ goto fail;
+ }
+
+ lun = entry->v.lun;
+ upper = (lun >> 8) & 0x3f;
+ if (upper)
+ one_lun[lun_cnt].scsi_lun[0] = upper;
+ one_lun[lun_cnt].scsi_lun[1] = lun & 0xff;
+ lun_cnt++;
+ }
+ }
+
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+
+ buff[2] = ((sizeof(struct scsi_lun) * lun_cnt) >> 8) & 0xff;
+ buff[3] = (sizeof(struct scsi_lun) * lun_cnt) & 0xff;
+
+ actual_len = lun_cnt * sizeof(struct scsi_lun)
+ + VSCSI_REPORT_LUNS_HEADER;
+ req_bufflen = 0;
+ for (i = 0; i < nr_seg; i++)
+ req_bufflen += pending_req->sgl[i].length;
+
+ err = __copy_to_sg(pending_req->sgl, nr_seg, buff,
+ min(req_bufflen, actual_len));
+ if (err)
+ goto fail;
+
+ memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
+ pending_req->rslt = 0x00;
+ pending_req->resid = req_bufflen - min(req_bufflen, actual_len);
+
+ kfree(buff);
+ return;
+
+fail:
+ scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST,
+ INVALID_FIELD_IN_CDB, 0);
+ pending_req->rslt = check_condition_result;
+ pending_req->resid = 0;
+ if (buff)
+ kfree(buff);
+ return;
+}
+
+
+
+int __pre_do_emulation(pending_req_t *pending_req, void *data)
+{
+ uint8_t op_code = pending_req->cmnd[0];
+
+ if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_REQBUF) &&
+ pre_function[op_code] != NULL) {
+ pre_function[op_code](pending_req, data);
+ }
+
+ /*
+ 0: no need for native driver call, so should return immediately.
+ 1: non emulation or should call native driver
+ after modifing the request buffer.
+ */
+ return !!(bitmap[op_code] & VSCSIIF_NEED_CMD_EXEC);
+}
+
+void scsiback_rsp_emulation(pending_req_t *pending_req)
+{
+ uint8_t op_code = pending_req->cmnd[0];
+
+ if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_RSPBUF) &&
+ post_function[op_code] != NULL) {
+ post_function[op_code](pending_req, NULL);
+ }
+
+ return;
+}
+
+
+void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req)
+{
+ if (__pre_do_emulation(pending_req, NULL)) {
+ scsiback_cmd_exec(pending_req);
+ }
+ else {
+ scsiback_fast_flush_area(pending_req);
+ scsiback_do_resp_with_sense(pending_req->sense_buffer,
+ pending_req->rslt, pending_req->resid, pending_req);
+ }
+}
+
+
+/*
+ Following are not customizable functions.
+*/
+void scsiback_emulation_init(void)
+{
+ int i;
+
+ /* Initialize to default state */
+ for (i = 0; i < VSCSI_MAX_SCSI_OP_CODE; i++) {
+ bitmap[i] = (VSCSIIF_NEED_EMULATE_REQBUF |
+ VSCSIIF_NEED_EMULATE_RSPBUF);
+ pre_function[i] = resp_not_supported_cmd;
+ post_function[i] = NULL;
+ /* means,
+ - no need for pre-emulation
+ - no need for post-emulation
+ - call native driver
+ */
+ }
+
+ /*
+ Register appropriate functions below as you need.
+ (See scsi/scsi.h for definition of SCSI op_code.)
+ */
+
+ /*
+ Following commands do not require emulation.
+ */
+ NO_EMULATE(TEST_UNIT_READY); /*0x00*/
+ NO_EMULATE(REZERO_UNIT); /*0x01*/
+ NO_EMULATE(REQUEST_SENSE); /*0x03*/
+ NO_EMULATE(FORMAT_UNIT); /*0x04*/
+ NO_EMULATE(READ_BLOCK_LIMITS); /*0x05*/
+ /*NO_EMULATE(REASSIGN_BLOCKS); *//*0x07*/
+ /*NO_EMULATE(INITIALIZE_ELEMENT_STATUS); *//*0x07*/
+ NO_EMULATE(READ_6); /*0x08*/
+ NO_EMULATE(WRITE_6); /*0x0a*/
+ /*NO_EMULATE(SEEK_6); *//*0x0b*/
+ /*NO_EMULATE(READ_REVERSE); *//*0x0f*/
+ NO_EMULATE(WRITE_FILEMARKS); /*0x10*/
+ NO_EMULATE(SPACE); /*0x11*/
+ NO_EMULATE(INQUIRY); /*0x12*/
+ /*NO_EMULATE(RECOVER_BUFFERED_DATA); *//*0x14*/
+ /*NO_EMULATE(MODE_SELECT); *//*0x15*/
+ /*NO_EMULATE(RESERVE); *//*0x16*/
+ /*NO_EMULATE(RELEASE); *//*0x17*/
+ /*NO_EMULATE(COPY); *//*0x18*/
+ NO_EMULATE(ERASE); /*0x19*/
+ NO_EMULATE(MODE_SENSE); /*0x1a*/
+ /*NO_EMULATE(START_STOP); *//*0x1b*/
+ /*NO_EMULATE(RECEIVE_DIAGNOSTIC); *//*0x1c*/
+ NO_EMULATE(SEND_DIAGNOSTIC); /*0x1d*/
+ /*NO_EMULATE(ALLOW_MEDIUM_REMOVAL); *//*0x1e*/
+
+ /*NO_EMULATE(SET_WINDOW); *//*0x24*/
+ NO_EMULATE(READ_CAPACITY); /*0x25*/
+ NO_EMULATE(READ_10); /*0x28*/
+ NO_EMULATE(WRITE_10); /*0x2a*/
+ /*NO_EMULATE(SEEK_10); *//*0x2b*/
+ /*NO_EMULATE(POSITION_TO_ELEMENT); *//*0x2b*/
+ /*NO_EMULATE(WRITE_VERIFY); *//*0x2e*/
+ /*NO_EMULATE(VERIFY); *//*0x2f*/
+ /*NO_EMULATE(SEARCH_HIGH); *//*0x30*/
+ /*NO_EMULATE(SEARCH_EQUAL); *//*0x31*/
+ /*NO_EMULATE(SEARCH_LOW); *//*0x32*/
+ /*NO_EMULATE(SET_LIMITS); *//*0x33*/
+ /*NO_EMULATE(PRE_FETCH); *//*0x34*/
+ /*NO_EMULATE(READ_POSITION); *//*0x34*/
+ /*NO_EMULATE(SYNCHRONIZE_CACHE); *//*0x35*/
+ /*NO_EMULATE(LOCK_UNLOCK_CACHE); *//*0x36*/
+ /*NO_EMULATE(READ_DEFECT_DATA); *//*0x37*/
+ /*NO_EMULATE(MEDIUM_SCAN); *//*0x38*/
+ /*NO_EMULATE(COMPARE); *//*0x39*/
+ /*NO_EMULATE(COPY_VERIFY); *//*0x3a*/
+ /*NO_EMULATE(WRITE_BUFFER); *//*0x3b*/
+ /*NO_EMULATE(READ_BUFFER); *//*0x3c*/
+ /*NO_EMULATE(UPDATE_BLOCK); *//*0x3d*/
+ /*NO_EMULATE(READ_LONG); *//*0x3e*/
+ /*NO_EMULATE(WRITE_LONG); *//*0x3f*/
+ /*NO_EMULATE(CHANGE_DEFINITION); *//*0x40*/
+ /*NO_EMULATE(WRITE_SAME); *//*0x41*/
+ /*NO_EMULATE(READ_TOC); *//*0x43*/
+ /*NO_EMULATE(LOG_SELECT); *//*0x4c*/
+ /*NO_EMULATE(LOG_SENSE); *//*0x4d*/
+ /*NO_EMULATE(MODE_SELECT_10); *//*0x55*/
+ /*NO_EMULATE(RESERVE_10); *//*0x56*/
+ /*NO_EMULATE(RELEASE_10); *//*0x57*/
+ /*NO_EMULATE(MODE_SENSE_10); *//*0x5a*/
+ /*NO_EMULATE(PERSISTENT_RESERVE_IN); *//*0x5e*/
+ /*NO_EMULATE(PERSISTENT_RESERVE_OUT); *//*0x5f*/
+ /* REPORT_LUNS *//*0xa0*//*Full emulaiton*/
+ /*NO_EMULATE(MOVE_MEDIUM); *//*0xa5*/
+ /*NO_EMULATE(EXCHANGE_MEDIUM); *//*0xa6*/
+ /*NO_EMULATE(READ_12); *//*0xa8*/
+ /*NO_EMULATE(WRITE_12); *//*0xaa*/
+ /*NO_EMULATE(WRITE_VERIFY_12); *//*0xae*/
+ /*NO_EMULATE(SEARCH_HIGH_12); *//*0xb0*/
+ /*NO_EMULATE(SEARCH_EQUAL_12); *//*0xb1*/
+ /*NO_EMULATE(SEARCH_LOW_12); *//*0xb2*/
+ /*NO_EMULATE(READ_ELEMENT_STATUS); *//*0xb8*/
+ /*NO_EMULATE(SEND_VOLUME_TAG); *//*0xb6*/
+ /*NO_EMULATE(WRITE_LONG_2); *//*0xea*/
+ /*NO_EMULATE(READ_16); *//*0x88*/
+ /*NO_EMULATE(WRITE_16); *//*0x8a*/
+ /*NO_EMULATE(VERIFY_16); *//*0x8f*/
+ /*NO_EMULATE(SERVICE_ACTION_IN); *//*0x9e*/
+
+ /*
+ Following commands require emulation.
+ */
+ pre_function[REPORT_LUNS] = __report_luns;
+ bitmap[REPORT_LUNS] = (VSCSIIF_NEED_EMULATE_REQBUF |
+ VSCSIIF_NEED_EMULATE_RSPBUF);
+
+ return;
+}
diff --git a/drivers/scsi/xen-scsiback/interface.c b/drivers/scsi/xen-scsiback/interface.c
new file mode 100644
index 00000000000000..663568edf0ba67
--- /dev/null
+++ b/drivers/scsi/xen-scsiback/interface.c
@@ -0,0 +1,141 @@
+/*
+ * interface management.
+ *
+ * Copyright (c) 2008, FUJITSU Limited
+ *
+ * Based on the blkback driver code.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include "common.h"
+
+#include <xen/evtchn.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+
+
+static struct kmem_cache *scsiback_cachep;
+
+struct vscsibk_info *vscsibk_info_alloc(domid_t domid)
+{
+ struct vscsibk_info *info;
+
+ info = kmem_cache_zalloc(scsiback_cachep, GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+
+ info->domid = domid;
+ spin_lock_init(&info->ring_lock);
+ atomic_set(&info->nr_unreplied_reqs, 0);
+ init_waitqueue_head(&info->wq);
+ init_waitqueue_head(&info->waiting_to_free);
+
+ return info;
+}
+
+int scsiback_init_sring(struct vscsibk_info *info,
+ unsigned long ring_ref, unsigned int evtchn)
+{
+ struct vscsiif_sring *sring;
+ int err;
+
+ if (!info)
+ return -ENODEV;
+
+ if (info->irq) {
+ printk(KERN_ERR "scsiback: Already connected through?\n");
+ return -1;
+ }
+
+ err = xenbus_map_ring_valloc(info->dev, ring_ref, &info->ring_area);
+ if (err < 0)
+ return -ENOMEM;
+
+ sring = (struct vscsiif_sring *) info->ring_area;
+ BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+ err = bind_interdomain_evtchn_to_irqhandler(
+ info->domid, evtchn,
+ scsiback_intr, 0, "vscsiif-backend", info);
+ if (err < 0)
+ goto unmap_page;
+
+ info->irq = err;
+
+ return 0;
+
+unmap_page:
+ xenbus_unmap_ring_vfree(info->dev, info->ring_area);
+
+ return err;
+}
+
+void scsiback_disconnect(struct vscsibk_info *info)
+{
+ if (info->kthread) {
+ kthread_stop(info->kthread);
+ info->kthread = NULL;
+ }
+
+ wait_event(info->waiting_to_free,
+ atomic_read(&info->nr_unreplied_reqs) == 0);
+
+ if (info->irq) {
+ unbind_from_irqhandler(info->irq, info);
+ info->irq = 0;
+ }
+
+ if (info->ring.sring || info->ring_area) {
+ xenbus_unmap_ring_vfree(info->dev, info->ring_area);
+ info->ring.sring = NULL;
+ info->ring_area = NULL;
+ }
+}
+
+void scsiback_free(struct vscsibk_info *info)
+{
+ kmem_cache_free(scsiback_cachep, info);
+}
+
+int __init scsiback_interface_init(void)
+{
+ scsiback_cachep = kmem_cache_create("vscsiif_cache",
+ sizeof(struct vscsibk_info), 0, 0, NULL);
+ if (!scsiback_cachep) {
+ printk(KERN_ERR "scsiback: can't init scsi cache\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void scsiback_interface_exit(void)
+{
+ kmem_cache_destroy(scsiback_cachep);
+}
diff --git a/drivers/scsi/xen-scsiback/scsiback.c b/drivers/scsi/xen-scsiback/scsiback.c
new file mode 100644
index 00000000000000..a209f87013da78
--- /dev/null
+++ b/drivers/scsi/xen-scsiback/scsiback.c
@@ -0,0 +1,757 @@
+/*
+ * Xen SCSI backend driver
+ *
+ * Copyright (c) 2008, FUJITSU Limited
+ *
+ * Based on the blkback driver code.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <xen/balloon.h>
+#include <asm/hypervisor.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_eh.h>
+
+#include "common.h"
+
+
+struct list_head pending_free;
+DEFINE_SPINLOCK(pending_free_lock);
+DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
+
+int vscsiif_reqs = VSCSIIF_BACK_MAX_PENDING_REQS;
+module_param_named(reqs, vscsiif_reqs, int, 0);
+MODULE_PARM_DESC(reqs, "Number of scsiback requests to allocate");
+
+static unsigned int log_print_stat;
+module_param(log_print_stat, int, 0644);
+
+#define SCSIBACK_INVALID_HANDLE (~0)
+
+static pending_req_t *pending_reqs;
+static struct page **pending_pages;
+static grant_handle_t *pending_grant_handles;
+
+static int vaddr_pagenr(pending_req_t *req, int seg)
+{
+ return (req - pending_reqs) * VSCSIIF_SG_TABLESIZE + seg;
+}
+
+static unsigned long vaddr(pending_req_t *req, int seg)
+{
+ unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
+ return (unsigned long)pfn_to_kaddr(pfn);
+}
+
+#define pending_handle(_req, _seg) \
+ (pending_grant_handles[vaddr_pagenr(_req, _seg)])
+
+
+void scsiback_fast_flush_area(pending_req_t *req)
+{
+ struct gnttab_unmap_grant_ref unmap[VSCSIIF_SG_TABLESIZE];
+ unsigned int i, invcount = 0;
+ grant_handle_t handle;
+ int err;
+
+ if (req->nr_segments) {
+ for (i = 0; i < req->nr_segments; i++) {
+ handle = pending_handle(req, i);
+ if (handle == SCSIBACK_INVALID_HANDLE)
+ continue;
+ gnttab_set_unmap_op(&unmap[i], vaddr(req, i),
+ GNTMAP_host_map, handle);
+ pending_handle(req, i) = SCSIBACK_INVALID_HANDLE;
+ invcount++;
+ }
+
+ err = HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, unmap, invcount);
+ BUG_ON(err);
+ for (i = 0; i <invcount; i++) {
+ err = m2p_remove_override(
+ virt_to_page(unmap[i].host_addr), false);
+ WARN_ON(err);
+ }
+ kfree(req->sgl);
+ }
+
+ return;
+}
+
+
+static pending_req_t * alloc_req(struct vscsibk_info *info)
+{
+ pending_req_t *req = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pending_free_lock, flags);
+ if (!list_empty(&pending_free)) {
+ req = list_entry(pending_free.next, pending_req_t, free_list);
+ list_del(&req->free_list);
+ }
+ spin_unlock_irqrestore(&pending_free_lock, flags);
+ return req;
+}
+
+
+static void free_req(pending_req_t *req)
+{
+ unsigned long flags;
+ int was_empty;
+
+ spin_lock_irqsave(&pending_free_lock, flags);
+ was_empty = list_empty(&pending_free);
+ list_add(&req->free_list, &pending_free);
+ spin_unlock_irqrestore(&pending_free_lock, flags);
+ if (was_empty)
+ wake_up(&pending_free_wq);
+}
+
+
+static void scsiback_notify_work(struct vscsibk_info *info)
+{
+ info->waiting_reqs = 1;
+ wake_up(&info->wq);
+}
+
+void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
+ uint32_t resid, pending_req_t *pending_req)
+{
+ vscsiif_response_t *ring_res;
+ struct vscsibk_info *info = pending_req->info;
+ int notify;
+ int more_to_do = 1;
+ struct scsi_sense_hdr sshdr;
+ unsigned long flags;
+
+ DPRINTK("%s\n",__FUNCTION__);
+
+ spin_lock_irqsave(&info->ring_lock, flags);
+
+ ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
+ info->ring.rsp_prod_pvt++;
+
+ ring_res->rslt = result;
+ ring_res->rqid = pending_req->rqid;
+
+ if (sense_buffer != NULL) {
+ if (scsi_normalize_sense(sense_buffer,
+ sizeof(sense_buffer), &sshdr)) {
+
+ int len = 8 + sense_buffer[7];
+
+ if (len > VSCSIIF_SENSE_BUFFERSIZE)
+ len = VSCSIIF_SENSE_BUFFERSIZE;
+
+ memcpy(ring_res->sense_buffer, sense_buffer, len);
+ ring_res->sense_len = len;
+ }
+ } else {
+ ring_res->sense_len = 0;
+ }
+
+ ring_res->residual_len = resid;
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info->ring, notify);
+ if (info->ring.rsp_prod_pvt == info->ring.req_cons) {
+ RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do);
+ } else if (RING_HAS_UNCONSUMED_REQUESTS(&info->ring)) {
+ more_to_do = 1;
+ }
+
+ spin_unlock_irqrestore(&info->ring_lock, flags);
+
+ if (more_to_do)
+ scsiback_notify_work(info);
+
+ if (notify)
+ notify_remote_via_irq(info->irq);
+
+ free_req(pending_req);
+}
+
+static void scsiback_print_status(char *sense_buffer, int errors,
+ pending_req_t *pending_req)
+{
+ struct scsi_device *sdev = pending_req->sdev;
+
+ printk(KERN_ERR "scsiback: %d:%d:%d:%d ",sdev->host->host_no,
+ sdev->channel, sdev->id, sdev->lun);
+ printk(KERN_ERR "status = 0x%02x, message = 0x%02x, host = 0x%02x, driver = 0x%02x\n",
+ status_byte(errors), msg_byte(errors),
+ host_byte(errors), driver_byte(errors));
+
+ printk(KERN_ERR "scsiback: cmnd[0]=0x%02X\n",
+ pending_req->cmnd[0]);
+
+ if (CHECK_CONDITION & status_byte(errors))
+ __scsi_print_sense("scsiback", sense_buffer, SCSI_SENSE_BUFFERSIZE);
+}
+
+
+static void scsiback_cmd_done(struct request *req, int uptodate)
+{
+ pending_req_t *pending_req = req->end_io_data;
+ unsigned char *sense_buffer;
+ unsigned int resid;
+ int errors;
+
+ sense_buffer = req->sense;
+ resid = blk_rq_bytes(req);
+ errors = req->errors;
+
+ if (errors != 0) {
+ if (log_print_stat)
+ scsiback_print_status(sense_buffer, errors, pending_req);
+ }
+
+ /* The Host mode is through as for Emulation. */
+ if (pending_req->info->feature != VSCSI_TYPE_HOST)
+ scsiback_rsp_emulation(pending_req);
+
+ scsiback_fast_flush_area(pending_req);
+ scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
+ scsiback_put(pending_req->info);
+
+ __blk_put_request(req->q, req);
+}
+
+
+static int scsiback_gnttab_data_map(vscsiif_request_t *ring_req,
+ pending_req_t *pending_req)
+{
+ u32 flags;
+ int write;
+ int i, err = 0;
+ unsigned int data_len = 0;
+ struct gnttab_map_grant_ref map[VSCSIIF_SG_TABLESIZE];
+ struct vscsibk_info *info = pending_req->info;
+
+ int data_dir = pending_req->sc_data_direction;
+ unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
+
+ write = (data_dir == DMA_TO_DEVICE);
+
+ if (nr_segments) {
+ struct scatterlist *sg;
+
+ /* free of (sgl) in fast_flush_area()*/
+ pending_req->sgl = kmalloc(sizeof(struct scatterlist) * nr_segments,
+ GFP_KERNEL);
+ if (!pending_req->sgl) {
+ printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__);
+ return -ENOMEM;
+ }
+
+ sg_init_table(pending_req->sgl, nr_segments);
+
+ for (i = 0; i < nr_segments; i++) {
+ flags = GNTMAP_host_map;
+ if (write)
+ flags |= GNTMAP_readonly;
+
+ gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
+ ring_req->seg[i].gref,
+ info->domid);
+ }
+
+ err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nr_segments);
+ BUG_ON(err);
+
+ /* Retry maps with GNTST_eagain */
+ for(i=0; i < nr_segments; i++) {
+ while(unlikely(map[i].status == GNTST_eagain))
+ {
+ msleep(10);
+ err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+ &map[i],
+ 1);
+ BUG_ON(err);
+ }
+ }
+
+ for_each_sg (pending_req->sgl, sg, nr_segments, i) {
+ struct page *pg;
+
+ if (unlikely(map[i].status != 0)) {
+ printk(KERN_ERR "scsiback: invalid buffer -- could not remap it: " \
+ "%d/%d, err:%d\n", i, nr_segments, map[i].status);
+ map[i].handle = SCSIBACK_INVALID_HANDLE;
+ err |= 1;
+ }
+
+ pending_handle(pending_req, i) = map[i].handle;
+
+ if (err)
+ continue;
+
+ pg = pending_pages[vaddr_pagenr(pending_req, i)];
+
+ m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), pg, false);
+ sg_set_page(sg, pg, ring_req->seg[i].length,
+ ring_req->seg[i].offset);
+ data_len += sg->length;
+
+ barrier();
+ if (sg->offset >= PAGE_SIZE ||
+ sg->length > PAGE_SIZE ||
+ sg->offset + sg->length > PAGE_SIZE)
+ err |= 1;
+
+ }
+
+ if (err)
+ goto fail_flush;
+ }
+
+ pending_req->request_bufflen = data_len;
+
+ return 0;
+
+fail_flush:
+ scsiback_fast_flush_area(pending_req);
+ return -ENOMEM;
+}
+
+/* quoted scsi_lib.c/scsi_bi_endio */
+static void scsiback_bi_endio(struct bio *bio, int error)
+{
+ bio_put(bio);
+}
+
+
+
+/* quoted scsi_lib.c/scsi_req_map_sg . */
+static struct bio *request_map_sg(pending_req_t *pending_req)
+{
+ struct request_queue *q = pending_req->sdev->request_queue;
+ unsigned int nsegs = (unsigned int)pending_req->nr_segments;
+ unsigned int i, len, bytes, off, nr_pages, nr_vecs = 0;
+ struct scatterlist *sg;
+ struct page *page;
+ struct bio *bio = NULL, *bio_first = NULL, *bio_last = NULL;
+ int err;
+
+ for_each_sg (pending_req->sgl, sg, nsegs, i) {
+ page = sg_page(sg);
+ off = sg->offset;
+ len = sg->length;
+
+ nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ while (len > 0) {
+ bytes = min_t(unsigned int, len, PAGE_SIZE - off);
+
+ if (!bio) {
+ nr_vecs = min_t(unsigned int, BIO_MAX_PAGES,
+ nr_pages);
+ nr_pages -= nr_vecs;
+ bio = bio_alloc(GFP_KERNEL, nr_vecs);
+ if (!bio) {
+ err = -ENOMEM;
+ goto free_bios;
+ }
+ bio->bi_end_io = scsiback_bi_endio;
+ if (bio_last)
+ bio_last->bi_next = bio;
+ else
+ bio_first = bio;
+ bio_last = bio;
+ }
+
+ if (bio_add_pc_page(q, bio, page, bytes, off) !=
+ bytes) {
+ bio_put(bio);
+ err = -EINVAL;
+ goto free_bios;
+ }
+
+ if (bio->bi_vcnt >= nr_vecs) {
+ bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+ if (pending_req->sc_data_direction == WRITE)
+ bio->bi_rw |= REQ_WRITE;
+ bio = NULL;
+ }
+
+ page++;
+ len -= bytes;
+ off = 0;
+ }
+ }
+
+ return bio_first;
+
+free_bios:
+ while ((bio = bio_first) != NULL) {
+ bio_first = bio->bi_next;
+ bio_put(bio);
+ }
+
+ return ERR_PTR(err);
+}
+
+
+void scsiback_cmd_exec(pending_req_t *pending_req)
+{
+ int cmd_len = (int)pending_req->cmd_len;
+ int data_dir = (int)pending_req->sc_data_direction;
+ unsigned int timeout;
+ struct request *rq;
+ int write;
+
+ DPRINTK("%s\n",__FUNCTION__);
+
+ /* because it doesn't timeout backend earlier than frontend.*/
+ if (pending_req->timeout_per_command)
+ timeout = pending_req->timeout_per_command * HZ;
+ else
+ timeout = VSCSIIF_TIMEOUT;
+
+ write = (data_dir == DMA_TO_DEVICE);
+ if (pending_req->nr_segments) {
+ struct bio *bio = request_map_sg(pending_req);
+
+ if (IS_ERR(bio)) {
+ printk(KERN_ERR "scsiback: SG Request Map Error\n");
+ return;
+ }
+
+ rq = blk_make_request(pending_req->sdev->request_queue, bio,
+ GFP_KERNEL);
+ if (IS_ERR(rq)) {
+ printk(KERN_ERR "scsiback: Make Request Error\n");
+ return;
+ }
+
+ rq->buffer = NULL;
+ } else {
+ rq = blk_get_request(pending_req->sdev->request_queue, write,
+ GFP_KERNEL);
+ if (unlikely(!rq)) {
+ printk(KERN_ERR "scsiback: Get Request Error\n");
+ return;
+ }
+ }
+
+ rq->cmd_type = REQ_TYPE_BLOCK_PC;
+ rq->cmd_len = cmd_len;
+ memcpy(rq->cmd, pending_req->cmnd, cmd_len);
+
+ memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
+ rq->sense = pending_req->sense_buffer;
+ rq->sense_len = 0;
+
+ /* not allowed to retry in backend. */
+ rq->retries = 0;
+ rq->timeout = timeout;
+ rq->end_io_data = pending_req;
+
+ scsiback_get(pending_req->info);
+ blk_execute_rq_nowait(rq->q, NULL, rq, 1, scsiback_cmd_done);
+
+ return ;
+}
+
+
+static void scsiback_device_reset_exec(pending_req_t *pending_req)
+{
+ struct vscsibk_info *info = pending_req->info;
+ int err;
+ struct scsi_device *sdev = pending_req->sdev;
+
+ scsiback_get(info);
+ err = scsi_reset_provider(sdev, SCSI_TRY_RESET_DEVICE);
+
+ scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
+ scsiback_put(info);
+
+ return;
+}
+
+
+irqreturn_t scsiback_intr(int irq, void *dev_id)
+{
+ scsiback_notify_work((struct vscsibk_info *)dev_id);
+ return IRQ_HANDLED;
+}
+
+static int prepare_pending_reqs(struct vscsibk_info *info,
+ vscsiif_request_t *ring_req, pending_req_t *pending_req)
+{
+ struct scsi_device *sdev;
+ struct ids_tuple vir;
+ int err = -EINVAL;
+
+ DPRINTK("%s\n",__FUNCTION__);
+
+ pending_req->rqid = ring_req->rqid;
+ pending_req->act = ring_req->act;
+
+ pending_req->info = info;
+
+ pending_req->v_chn = vir.chn = ring_req->channel;
+ pending_req->v_tgt = vir.tgt = ring_req->id;
+ vir.lun = ring_req->lun;
+
+ rmb();
+ sdev = scsiback_do_translation(info, &vir);
+ if (!sdev) {
+ pending_req->sdev = NULL;
+ DPRINTK("scsiback: doesn't exist.\n");
+ err = -ENODEV;
+ goto invalid_value;
+ }
+ pending_req->sdev = sdev;
+
+ /* request range check from frontend */
+ pending_req->sc_data_direction = ring_req->sc_data_direction;
+ barrier();
+ if ((pending_req->sc_data_direction != DMA_BIDIRECTIONAL) &&
+ (pending_req->sc_data_direction != DMA_TO_DEVICE) &&
+ (pending_req->sc_data_direction != DMA_FROM_DEVICE) &&
+ (pending_req->sc_data_direction != DMA_NONE)) {
+ DPRINTK("scsiback: invalid parameter data_dir = %d\n",
+ pending_req->sc_data_direction);
+ err = -EINVAL;
+ goto invalid_value;
+ }
+
+ pending_req->nr_segments = ring_req->nr_segments;
+ barrier();
+ if (pending_req->nr_segments > VSCSIIF_SG_TABLESIZE) {
+ DPRINTK("scsiback: invalid parameter nr_seg = %d\n",
+ pending_req->nr_segments);
+ err = -EINVAL;
+ goto invalid_value;
+ }
+
+ pending_req->cmd_len = ring_req->cmd_len;
+ barrier();
+ if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) {
+ DPRINTK("scsiback: invalid parameter cmd_len = %d\n",
+ pending_req->cmd_len);
+ err = -EINVAL;
+ goto invalid_value;
+ }
+ memcpy(pending_req->cmnd, ring_req->cmnd, pending_req->cmd_len);
+
+ pending_req->timeout_per_command = ring_req->timeout_per_command;
+
+ if(scsiback_gnttab_data_map(ring_req, pending_req)) {
+ DPRINTK("scsiback: invalid buffer\n");
+ err = -EINVAL;
+ goto invalid_value;
+ }
+
+ return 0;
+
+invalid_value:
+ return err;
+}
+
+
+static int scsiback_do_cmd_fn(struct vscsibk_info *info)
+{
+ struct vscsiif_back_ring *ring = &info->ring;
+ vscsiif_request_t *ring_req;
+
+ pending_req_t *pending_req;
+ RING_IDX rc, rp;
+ int err, more_to_do = 0;
+
+ DPRINTK("%s\n",__FUNCTION__);
+
+ rc = ring->req_cons;
+ rp = ring->sring->req_prod;
+ rmb();
+
+ while ((rc != rp)) {
+ if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
+ break;
+ pending_req = alloc_req(info);
+ if (NULL == pending_req) {
+ more_to_do = 1;
+ break;
+ }
+
+ ring_req = RING_GET_REQUEST(ring, rc);
+ ring->req_cons = ++rc;
+
+ err = prepare_pending_reqs(info, ring_req,
+ pending_req);
+ if (err == -EINVAL) {
+ scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
+ 0, pending_req);
+ continue;
+ } else if (err == -ENODEV) {
+ scsiback_do_resp_with_sense(NULL, (DID_NO_CONNECT << 16),
+ 0, pending_req);
+ continue;
+ }
+
+ if (pending_req->act == VSCSIIF_ACT_SCSI_CDB) {
+
+ /* The Host mode is through as for Emulation. */
+ if (info->feature == VSCSI_TYPE_HOST)
+ scsiback_cmd_exec(pending_req);
+ else
+ scsiback_req_emulation_or_cmdexec(pending_req);
+
+ } else if (pending_req->act == VSCSIIF_ACT_SCSI_RESET) {
+ scsiback_device_reset_exec(pending_req);
+ } else {
+ printk(KERN_ERR "scsiback: invalid parameter for request\n");
+ scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
+ 0, pending_req);
+ continue;
+ }
+ }
+
+ if (RING_HAS_UNCONSUMED_REQUESTS(ring))
+ more_to_do = 1;
+
+ /* Yield point for this unbounded loop. */
+ cond_resched();
+
+ return more_to_do;
+}
+
+
+int scsiback_schedule(void *data)
+{
+ struct vscsibk_info *info = (struct vscsibk_info *)data;
+
+ DPRINTK("%s\n",__FUNCTION__);
+
+ while (!kthread_should_stop()) {
+ wait_event_interruptible(
+ info->wq,
+ info->waiting_reqs || kthread_should_stop());
+ wait_event_interruptible(
+ pending_free_wq,
+ !list_empty(&pending_free) || kthread_should_stop());
+
+ info->waiting_reqs = 0;
+ smp_mb();
+
+ if (scsiback_do_cmd_fn(info))
+ info->waiting_reqs = 1;
+ }
+
+ return 0;
+}
+
+
+static int __init scsiback_init(void)
+{
+ int i, mmap_pages;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+ mmap_pages = vscsiif_reqs * VSCSIIF_SG_TABLESIZE;
+
+ pending_reqs = kzalloc(sizeof(pending_reqs[0]) *
+ vscsiif_reqs, GFP_KERNEL);
+ pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
+ mmap_pages, GFP_KERNEL);
+ pending_pages = kzalloc(sizeof(pending_pages[0]) *
+ mmap_pages, GFP_KERNEL);
+
+ if (!pending_reqs || !pending_grant_handles || !pending_pages)
+ goto out_of_memory;
+
+ for (i = 0; i < mmap_pages; i++) {
+ pending_grant_handles[i] = SCSIBACK_INVALID_HANDLE;
+ pending_pages[i] = alloc_page(GFP_KERNEL);
+ if (pending_pages[i] == NULL)
+ goto out_of_memory;
+ }
+ if (scsiback_interface_init() < 0)
+ goto out_of_kmem;
+
+ memset(pending_reqs, 0, sizeof(pending_reqs));
+ INIT_LIST_HEAD(&pending_free);
+
+ for (i = 0; i < vscsiif_reqs; i++)
+ list_add_tail(&pending_reqs[i].free_list, &pending_free);
+
+ if (scsiback_xenbus_init())
+ goto out_of_xenbus;
+
+ scsiback_emulation_init();
+
+ return 0;
+
+out_of_xenbus:
+ scsiback_xenbus_unregister();
+out_of_kmem:
+ scsiback_interface_exit();
+out_of_memory:
+ if (pending_pages) {
+ for (i = 0; i < mmap_pages; i++) {
+ if (pending_pages[i])
+ __free_page(pending_pages[i]);
+ }
+ kfree(pending_pages);
+ }
+ kfree(pending_reqs);
+ kfree(pending_grant_handles);
+ printk(KERN_ERR "scsiback: %s: out of memory\n", __FUNCTION__);
+ return -ENOMEM;
+}
+
+
+static void __exit scsiback_exit(void)
+{
+ scsiback_xenbus_unregister();
+ scsiback_interface_exit();
+ kfree(pending_reqs);
+ kfree(pending_grant_handles);
+ if (pending_pages) {
+ unsigned int i;
+ unsigned int mmap_pages = vscsiif_reqs * VSCSIIF_SG_TABLESIZE;
+ for (i = 0; i < mmap_pages; i++) {
+ if (pending_pages[i])
+ __free_page(pending_pages[i]);
+ }
+ kfree(pending_pages);
+ }
+}
+
+module_init(scsiback_init);
+module_exit(scsiback_exit);
+
+MODULE_DESCRIPTION("Xen SCSI backend driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/scsi/xen-scsiback/translate.c b/drivers/scsi/xen-scsiback/translate.c
new file mode 100644
index 00000000000000..36873cc77fcb72
--- /dev/null
+++ b/drivers/scsi/xen-scsiback/translate.c
@@ -0,0 +1,168 @@
+/*
+ * Xen SCSI backend driver
+ *
+ * Copyright (c) 2008, FUJITSU Limited
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/list.h>
+#include <linux/gfp.h>
+
+#include "common.h"
+
+/*
+ Initialize the translation entry list
+*/
+void scsiback_init_translation_table(struct vscsibk_info *info)
+{
+ INIT_LIST_HEAD(&info->v2p_entry_lists);
+ spin_lock_init(&info->v2p_lock);
+}
+
+
+/*
+ Add a new translation entry
+*/
+int scsiback_add_translation_entry(struct vscsibk_info *info,
+ struct scsi_device *sdev, struct ids_tuple *v)
+{
+ int err = 0;
+ struct v2p_entry *entry;
+ struct v2p_entry *new;
+ struct list_head *head = &(info->v2p_entry_lists);
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->v2p_lock, flags);
+
+ /* Check double assignment to identical virtual ID */
+ list_for_each_entry(entry, head, l) {
+ if ((entry->v.chn == v->chn) &&
+ (entry->v.tgt == v->tgt) &&
+ (entry->v.lun == v->lun)) {
+ printk(KERN_WARNING "scsiback: Virtual ID is already used. "
+ "Assignment was not performed.\n");
+ err = -EEXIST;
+ goto out;
+ }
+
+ }
+
+ /* Create a new translation entry and add to the list */
+ if ((new = kmalloc(sizeof(struct v2p_entry), GFP_ATOMIC)) == NULL) {
+ printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__);
+ err = -ENOMEM;
+ goto out;
+ }
+ new->v = *v;
+ new->sdev = sdev;
+ list_add_tail(&new->l, head);
+
+out:
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+ return err;
+}
+
+
+/*
+ Delete the translation entry specfied
+*/
+int scsiback_del_translation_entry(struct vscsibk_info *info,
+ struct ids_tuple *v)
+{
+ struct v2p_entry *entry;
+ struct list_head *head = &(info->v2p_entry_lists);
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->v2p_lock, flags);
+ /* Find out the translation entry specified */
+ list_for_each_entry(entry, head, l) {
+ if ((entry->v.chn == v->chn) &&
+ (entry->v.tgt == v->tgt) &&
+ (entry->v.lun == v->lun)) {
+ goto found;
+ }
+ }
+
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+ return 1;
+
+found:
+ /* Delete the translation entry specfied */
+ scsi_device_put(entry->sdev);
+ list_del(&entry->l);
+ kfree(entry);
+
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+ return 0;
+}
+
+
+/*
+ Perform virtual to physical translation
+*/
+struct scsi_device *scsiback_do_translation(struct vscsibk_info *info,
+ struct ids_tuple *v)
+{
+ struct v2p_entry *entry;
+ struct list_head *head = &(info->v2p_entry_lists);
+ struct scsi_device *sdev = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->v2p_lock, flags);
+ list_for_each_entry(entry, head, l) {
+ if ((entry->v.chn == v->chn) &&
+ (entry->v.tgt == v->tgt) &&
+ (entry->v.lun == v->lun)) {
+ sdev = entry->sdev;
+ goto out;
+ }
+ }
+out:
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+ return sdev;
+}
+
+
+/*
+ Release the translation entry specfied
+*/
+void scsiback_release_translation_entry(struct vscsibk_info *info)
+{
+ struct v2p_entry *entry, *tmp;
+ struct list_head *head = &(info->v2p_entry_lists);
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->v2p_lock, flags);
+ list_for_each_entry_safe(entry, tmp, head, l) {
+ scsi_device_put(entry->sdev);
+ list_del(&entry->l);
+ kfree(entry);
+ }
+
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+ return;
+
+}
diff --git a/drivers/scsi/xen-scsiback/xenbus.c b/drivers/scsi/xen-scsiback/xenbus.c
new file mode 100644
index 00000000000000..0816c0e41b28e6
--- /dev/null
+++ b/drivers/scsi/xen-scsiback/xenbus.c
@@ -0,0 +1,374 @@
+/*
+ * Xen SCSI backend driver
+ *
+ * Copyright (c) 2008, FUJITSU Limited
+ *
+ * Based on the blkback driver code.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdarg.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+
+#include "common.h"
+
+struct backend_info
+{
+ struct xenbus_device *dev;
+ struct vscsibk_info *info;
+};
+
+
+static int __vscsiif_name(struct backend_info *be, char *buf)
+{
+ struct xenbus_device *dev = be->dev;
+ unsigned int domid, id;
+
+ sscanf(dev->nodename, "backend/vscsi/%u/%u", &domid, &id);
+ snprintf(buf, TASK_COMM_LEN, "vscsi.%u.%u", be->info->domid, id);
+
+ return 0;
+}
+
+static int scsiback_map(struct backend_info *be)
+{
+ struct xenbus_device *dev = be->dev;
+ unsigned long ring_ref = 0;
+ unsigned int evtchn = 0;
+ int err;
+ char name[TASK_COMM_LEN];
+
+ err = xenbus_gather(XBT_NIL, dev->otherend,
+ "ring-ref", "%lu", &ring_ref,
+ "event-channel", "%u", &evtchn, NULL);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "reading %s ring", dev->otherend);
+ return err;
+ }
+ err = scsiback_init_sring(be->info, ring_ref, evtchn);
+ if (err)
+ return err;
+
+ err = __vscsiif_name(be, name);
+ if (err) {
+ xenbus_dev_error(dev, err, "get scsiback dev name");
+ return err;
+ }
+
+ be->info->kthread = kthread_run(scsiback_schedule, be->info, name);
+ if (IS_ERR(be->info->kthread)) {
+ err = PTR_ERR(be->info->kthread);
+ be->info->kthread = NULL;
+ xenbus_dev_error(be->dev, err, "start vscsiif");
+ return err;
+ }
+
+ return 0;
+}
+
+
+struct scsi_device *scsiback_get_scsi_device(struct ids_tuple *phy)
+{
+ struct Scsi_Host *shost;
+ struct scsi_device *sdev = NULL;
+
+ shost = scsi_host_lookup(phy->hst);
+ if (IS_ERR(shost)) {
+ printk(KERN_ERR "scsiback: host%d doesn't exist.\n",
+ phy->hst);
+ return NULL;
+ }
+ sdev = scsi_device_lookup(shost, phy->chn, phy->tgt, phy->lun);
+ if (!sdev) {
+ printk(KERN_ERR "scsiback: %d:%d:%d:%d doesn't exist.\n",
+ phy->hst, phy->chn, phy->tgt, phy->lun);
+ scsi_host_put(shost);
+ return NULL;
+ }
+
+ scsi_host_put(shost);
+ return (sdev);
+}
+
+#define VSCSIBACK_OP_ADD_OR_DEL_LUN 1
+#define VSCSIBACK_OP_UPDATEDEV_STATE 2
+
+
+static void scsiback_do_lun_hotplug(struct backend_info *be, int op)
+{
+ int i, err = 0;
+ struct ids_tuple phy, vir;
+ int device_state;
+ char str[64], state_str[64];
+ char **dir;
+ unsigned int dir_n = 0;
+ struct xenbus_device *dev = be->dev;
+ struct scsi_device *sdev;
+
+ dir = xenbus_directory(XBT_NIL, dev->nodename, "vscsi-devs", &dir_n);
+ if (IS_ERR(dir))
+ return;
+
+ for (i = 0; i < dir_n; i++) {
+ /* read status */
+ snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]);
+ err = xenbus_scanf(XBT_NIL, dev->nodename, state_str, "%u",
+ &device_state);
+ if (XENBUS_EXIST_ERR(err))
+ continue;
+
+ /* physical SCSI device */
+ snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", dir[i]);
+ err = xenbus_scanf(XBT_NIL, dev->nodename, str,
+ "%u:%u:%u:%u", &phy.hst, &phy.chn, &phy.tgt, &phy.lun);
+ if (XENBUS_EXIST_ERR(err)) {
+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
+ "%d", XenbusStateClosed);
+ continue;
+ }
+
+ /* virtual SCSI device */
+ snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
+ err = xenbus_scanf(XBT_NIL, dev->nodename, str,
+ "%u:%u:%u:%u", &vir.hst, &vir.chn, &vir.tgt, &vir.lun);
+ if (XENBUS_EXIST_ERR(err)) {
+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
+ "%d", XenbusStateClosed);
+ continue;
+ }
+
+ switch (op) {
+ case VSCSIBACK_OP_ADD_OR_DEL_LUN:
+ if (device_state == XenbusStateInitialising) {
+ sdev = scsiback_get_scsi_device(&phy);
+ if (!sdev)
+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
+ "%d", XenbusStateClosed);
+ else {
+ err = scsiback_add_translation_entry(be->info, sdev, &vir);
+ if (!err) {
+ if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
+ "%d", XenbusStateInitialised)) {
+ printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
+ scsiback_del_translation_entry(be->info, &vir);
+ }
+ } else {
+ scsi_device_put(sdev);
+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
+ "%d", XenbusStateClosed);
+ }
+ }
+ }
+
+ if (device_state == XenbusStateClosing) {
+ if (!scsiback_del_translation_entry(be->info, &vir)) {
+ if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
+ "%d", XenbusStateClosed))
+ printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
+ }
+ }
+ break;
+
+ case VSCSIBACK_OP_UPDATEDEV_STATE:
+ if (device_state == XenbusStateInitialised) {
+ /* modify vscsi-devs/dev-x/state */
+ if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
+ "%d", XenbusStateConnected)) {
+ printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
+ scsiback_del_translation_entry(be->info, &vir);
+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
+ "%d", XenbusStateClosed);
+ }
+ }
+ break;
+ /*When it is necessary, processing is added here.*/
+ default:
+ break;
+ }
+ }
+
+ kfree(dir);
+ return ;
+}
+
+
+static void scsiback_frontend_changed(struct xenbus_device *dev,
+ enum xenbus_state frontend_state)
+{
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
+ int err;
+
+ switch (frontend_state) {
+ case XenbusStateInitialising:
+ break;
+ case XenbusStateInitialised:
+ err = scsiback_map(be);
+ if (err)
+ break;
+
+ scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN);
+ xenbus_switch_state(dev, XenbusStateConnected);
+
+ break;
+ case XenbusStateConnected:
+
+ scsiback_do_lun_hotplug(be, VSCSIBACK_OP_UPDATEDEV_STATE);
+
+ if (dev->state == XenbusStateConnected)
+ break;
+
+ xenbus_switch_state(dev, XenbusStateConnected);
+
+ break;
+
+ case XenbusStateClosing:
+ scsiback_disconnect(be->info);
+ xenbus_switch_state(dev, XenbusStateClosing);
+ break;
+
+ case XenbusStateClosed:
+ xenbus_switch_state(dev, XenbusStateClosed);
+ if (xenbus_dev_is_online(dev))
+ break;
+ /* fall through if not online */
+ case XenbusStateUnknown:
+ device_unregister(&dev->dev);
+ break;
+
+ case XenbusStateReconfiguring:
+ scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN);
+
+ xenbus_switch_state(dev, XenbusStateReconfigured);
+
+ break;
+
+ default:
+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
+ frontend_state);
+ break;
+ }
+}
+
+
+static int scsiback_remove(struct xenbus_device *dev)
+{
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+ if (be->info) {
+ scsiback_disconnect(be->info);
+ scsiback_release_translation_entry(be->info);
+ scsiback_free(be->info);
+ be->info = NULL;
+ }
+
+ kfree(be);
+ dev_set_drvdata(&dev->dev, NULL);
+
+ return 0;
+}
+
+
+static int scsiback_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int err;
+ unsigned val = 0;
+
+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
+ GFP_KERNEL);
+
+ if (!be) {
+ xenbus_dev_fatal(dev, -ENOMEM,
+ "allocating backend structure");
+ return -ENOMEM;
+ }
+ be->dev = dev;
+ dev_set_drvdata(&dev->dev, be);
+
+ be->info = vscsibk_info_alloc(dev->otherend_id);
+ if (IS_ERR(be->info)) {
+ err = PTR_ERR(be->info);
+ be->info = NULL;
+ xenbus_dev_fatal(dev, err, "creating scsihost interface");
+ goto fail;
+ }
+
+ be->info->dev = dev;
+ be->info->irq = 0;
+ be->info->feature = 0; /*default not HOSTMODE.*/
+
+ scsiback_init_translation_table(be->info);
+
+ err = xenbus_scanf(XBT_NIL, dev->nodename,
+ "feature-host", "%d", &val);
+ if (XENBUS_EXIST_ERR(err))
+ val = 0;
+
+ if (val)
+ be->info->feature = VSCSI_TYPE_HOST;
+
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
+ if (err)
+ goto fail;
+
+ return 0;
+
+
+fail:
+ printk(KERN_WARNING "scsiback: %s failed\n",__func__);
+ scsiback_remove(dev);
+
+ return err;
+}
+
+
+static struct xenbus_device_id scsiback_ids[] = {
+ { "vscsi" },
+ { "" }
+};
+
+static struct xenbus_driver scsiback = {
+ .name = "vscsi",
+ .owner = THIS_MODULE,
+ .ids = scsiback_ids,
+ .probe = scsiback_probe,
+ .remove = scsiback_remove,
+ .otherend_changed = scsiback_frontend_changed
+};
+
+int scsiback_xenbus_init(void)
+{
+ return xenbus_register_backend(&scsiback);
+}
+
+void scsiback_xenbus_unregister(void)
+{
+ xenbus_unregister_driver(&scsiback);
+}
diff --git a/drivers/scsi/xen-scsifront/Makefile b/drivers/scsi/xen-scsifront/Makefile
new file mode 100644
index 00000000000000..18a53291a0313c
--- /dev/null
+++ b/drivers/scsi/xen-scsifront/Makefile
@@ -0,0 +1,4 @@
+
+obj-$(CONFIG_XEN_SCSI_FRONTEND) := xen-scsifront.o
+xen-scsifront-objs := scsifront.o xenbus.o
+
diff --git a/drivers/scsi/xen-scsifront/common.h b/drivers/scsi/xen-scsifront/common.h
new file mode 100644
index 00000000000000..cfa1c32208e14e
--- /dev/null
+++ b/drivers/scsi/xen-scsifront/common.h
@@ -0,0 +1,137 @@
+/*
+ * Xen SCSI frontend driver
+ *
+ * Copyright (c) 2008, FUJITSU Limited
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_DRIVERS_SCSIFRONT_H__
+#define __XEN_DRIVERS_SCSIFRONT_H__
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/blkdev.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <asm/xen/page.h>
+#include <xen/xenbus.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <xen/evtchn.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/io/ring.h>
+#include <xen/interface/io/vscsiif.h>
+#include <xen/interface/grant_table.h>
+#include <xen/interface/io/protocols.h>
+#include <asm/delay.h>
+#include <asm/hypervisor.h>
+/*#include <asm/maddr.h>*/
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+#define GRANT_INVALID_REF 0
+#define VSCSI_IN_ABORT 1
+#define VSCSI_IN_RESET 2
+
+/* tuning point*/
+#define VSCSIIF_DEFAULT_CMD_PER_LUN 10
+#define VSCSIIF_MAX_TARGET 64
+#define VSCSIIF_MAX_LUN 255
+
+#define VSCSIIF_RING_SIZE __CONST_RING_SIZE(vscsiif, PAGE_SIZE)
+#define VSCSIIF_MAX_REQS VSCSIIF_RING_SIZE
+
+struct vscsifrnt_shadow {
+ uint16_t next_free;
+
+ /* command between backend and frontend
+ * VSCSIIF_ACT_SCSI_CDB or VSCSIIF_ACT_SCSI_RESET */
+ unsigned char act;
+
+ /* do reset function */
+ wait_queue_head_t wq_reset; /* reset work queue */
+ int wait_reset; /* reset work queue condition */
+ int32_t rslt_reset; /* reset response status */
+ /* (SUCESS or FAILED) */
+
+ /* for DMA_TO_DEVICE(1), DMA_FROM_DEVICE(2), DMA_NONE(3)
+ requests */
+ unsigned int sc_data_direction;
+
+ /* Number of pieces of scatter-gather */
+ unsigned int nr_segments;
+
+ /* requested struct scsi_cmnd is stored from kernel */
+ unsigned long req_scsi_cmnd;
+ int gref[VSCSIIF_SG_TABLESIZE];
+};
+
+struct vscsifrnt_info {
+ struct xenbus_device *dev;
+
+ struct Scsi_Host *host;
+
+ spinlock_t io_lock;
+ spinlock_t shadow_lock;
+ unsigned int evtchn;
+ unsigned int irq;
+
+ grant_ref_t ring_ref;
+ struct vscsiif_front_ring ring;
+ struct vscsiif_response ring_res;
+
+ struct vscsifrnt_shadow shadow[VSCSIIF_MAX_REQS];
+ uint32_t shadow_free;
+
+ struct task_struct *kthread;
+ wait_queue_head_t wq;
+ unsigned int waiting_resp;
+
+};
+
+#define DPRINTK(_f, _a...) \
+ pr_debug("(file=%s, line=%d) " _f, \
+ __FILE__ , __LINE__ , ## _a )
+
+int scsifront_xenbus_init(void);
+void scsifront_xenbus_unregister(void);
+int scsifront_schedule(void *data);
+irqreturn_t scsifront_intr(int irq, void *dev_id);
+int scsifront_cmd_done(struct vscsifrnt_info *info);
+
+
+#endif /* __XEN_DRIVERS_SCSIFRONT_H__ */
diff --git a/drivers/scsi/xen-scsifront/scsifront.c b/drivers/scsi/xen-scsifront/scsifront.c
new file mode 100644
index 00000000000000..2d5e25f52f01a5
--- /dev/null
+++ b/drivers/scsi/xen-scsifront/scsifront.c
@@ -0,0 +1,469 @@
+/*
+ * Xen SCSI frontend driver
+ *
+ * Copyright (c) 2008, FUJITSU Limited
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/version.h>
+#include "common.h"
+
+static int get_id_from_freelist(struct vscsifrnt_info *info)
+{
+ unsigned long flags;
+ uint32_t free;
+
+ spin_lock_irqsave(&info->shadow_lock, flags);
+
+ free = info->shadow_free;
+ BUG_ON(free > VSCSIIF_MAX_REQS);
+ info->shadow_free = info->shadow[free].next_free;
+ info->shadow[free].next_free = 0x0fff;
+
+ info->shadow[free].wait_reset = 0;
+
+ spin_unlock_irqrestore(&info->shadow_lock, flags);
+
+ return free;
+}
+
+static void add_id_to_freelist(struct vscsifrnt_info *info, uint32_t id)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->shadow_lock, flags);
+
+ info->shadow[id].next_free = info->shadow_free;
+ info->shadow[id].req_scsi_cmnd = 0;
+ info->shadow_free = id;
+
+ spin_unlock_irqrestore(&info->shadow_lock, flags);
+}
+
+
+struct vscsiif_request * scsifront_pre_request(struct vscsifrnt_info *info)
+{
+ struct vscsiif_front_ring *ring = &(info->ring);
+ vscsiif_request_t *ring_req;
+ uint32_t id;
+
+ ring_req = RING_GET_REQUEST(&(info->ring), ring->req_prod_pvt);
+
+ ring->req_prod_pvt++;
+
+ id = get_id_from_freelist(info); /* use id by response */
+ ring_req->rqid = (uint16_t)id;
+
+ return ring_req;
+}
+
+
+static void scsifront_notify_work(struct vscsifrnt_info *info)
+{
+ info->waiting_resp = 1;
+ wake_up(&info->wq);
+}
+
+
+static void scsifront_do_request(struct vscsifrnt_info *info)
+{
+ struct vscsiif_front_ring *ring = &(info->ring);
+ unsigned int irq = info->irq;
+ int notify;
+
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(ring, notify);
+ if (notify)
+ notify_remote_via_irq(irq);
+}
+
+irqreturn_t scsifront_intr(int irq, void *dev_id)
+{
+ scsifront_notify_work((struct vscsifrnt_info *)dev_id);
+ return IRQ_HANDLED;
+}
+
+
+static void scsifront_gnttab_done(struct vscsifrnt_shadow *s, uint32_t id)
+{
+ int i;
+
+ if (s->sc_data_direction == DMA_NONE)
+ return;
+
+ if (s->nr_segments) {
+ for (i = 0; i < s->nr_segments; i++) {
+ if (unlikely(gnttab_query_foreign_access(
+ s->gref[i]) != 0)) {
+ printk(KERN_ALERT "scsifront: "
+ "grant still in use by backend.\n");
+ BUG();
+ }
+ gnttab_end_foreign_access(s->gref[i], 0, 0UL);
+ }
+ }
+
+ return;
+}
+
+
+static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
+ vscsiif_response_t *ring_res)
+{
+ struct scsi_cmnd *sc;
+ uint32_t id;
+ uint8_t sense_len;
+
+ id = ring_res->rqid;
+ sc = (struct scsi_cmnd *)info->shadow[id].req_scsi_cmnd;
+
+ if (sc == NULL)
+ BUG();
+
+ scsifront_gnttab_done(&info->shadow[id], id);
+ add_id_to_freelist(info, id);
+
+ sc->result = ring_res->rslt;
+ scsi_set_resid(sc, ring_res->residual_len);
+
+ if (ring_res->sense_len > VSCSIIF_SENSE_BUFFERSIZE)
+ sense_len = VSCSIIF_SENSE_BUFFERSIZE;
+ else
+ sense_len = ring_res->sense_len;
+
+ if (sense_len)
+ memcpy(sc->sense_buffer, ring_res->sense_buffer, sense_len);
+
+ sc->scsi_done(sc);
+
+ return;
+}
+
+
+static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
+ vscsiif_response_t *ring_res)
+{
+ uint16_t id = ring_res->rqid;
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->shadow_lock, flags);
+ info->shadow[id].wait_reset = 1;
+ info->shadow[id].rslt_reset = ring_res->rslt;
+ spin_unlock_irqrestore(&info->shadow_lock, flags);
+
+ wake_up(&(info->shadow[id].wq_reset));
+}
+
+
+int scsifront_cmd_done(struct vscsifrnt_info *info)
+{
+ vscsiif_response_t *ring_res;
+
+ RING_IDX i, rp;
+ int more_to_do = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->io_lock, flags);
+
+ rp = info->ring.sring->rsp_prod;
+ rmb();
+ for (i = info->ring.rsp_cons; i != rp; i++) {
+
+ ring_res = RING_GET_RESPONSE(&info->ring, i);
+
+ if (info->shadow[ring_res->rqid].act == VSCSIIF_ACT_SCSI_CDB)
+ scsifront_cdb_cmd_done(info, ring_res);
+ else
+ scsifront_sync_cmd_done(info, ring_res);
+ }
+
+ info->ring.rsp_cons = i;
+
+ if (i != info->ring.req_prod_pvt) {
+ RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+ } else {
+ info->ring.sring->rsp_event = i + 1;
+ }
+
+ spin_unlock_irqrestore(&info->io_lock, flags);
+
+
+ /* Yield point for this unbounded loop. */
+ cond_resched();
+
+ return more_to_do;
+}
+
+
+
+
+int scsifront_schedule(void *data)
+{
+ struct vscsifrnt_info *info = (struct vscsifrnt_info *)data;
+
+ while (!kthread_should_stop()) {
+ wait_event_interruptible(
+ info->wq,
+ info->waiting_resp || kthread_should_stop());
+
+ info->waiting_resp = 0;
+ smp_mb();
+
+ if (scsifront_cmd_done(info))
+ info->waiting_resp = 1;
+ }
+
+ return 0;
+}
+
+
+
+static int map_data_for_request(struct vscsifrnt_info *info,
+ struct scsi_cmnd *sc, vscsiif_request_t *ring_req, uint32_t id)
+{
+ grant_ref_t gref_head;
+ int err, ref, ref_cnt = 0;
+ int write = (sc->sc_data_direction == DMA_TO_DEVICE);
+ unsigned int i, nr_pages, off, len, bytes;
+ unsigned long buffer_mfn, buffer_pfn;
+
+ if (sc->sc_data_direction == DMA_NONE)
+ return 0;
+
+ err = gnttab_alloc_grant_references(VSCSIIF_SG_TABLESIZE, &gref_head);
+ if (err) {
+ printk(KERN_ERR "scsifront: gnttab_alloc_grant_references() error\n");
+ return -ENOMEM;
+ }
+
+ if (scsi_bufflen(sc)) {
+ /* quoted scsi_lib.c/scsi_req_map_sg . */
+ struct scatterlist *sg, *sgl = scsi_sglist(sc);
+ unsigned int data_len = scsi_bufflen(sc);
+
+ nr_pages = (data_len + sgl->offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (nr_pages > VSCSIIF_SG_TABLESIZE) {
+ printk(KERN_ERR "scsifront: Unable to map request_buffer for command!\n");
+ ref_cnt = (-E2BIG);
+ goto big_to_sg;
+ }
+
+ for_each_sg (sgl, sg, scsi_sg_count(sc), i) {
+ off = sg->offset;
+ len = sg->length;
+
+ buffer_pfn = page_to_pfn(sg_page(sg));
+ while (len > 0 && data_len > 0) {
+ /*
+ * sg sends a scatterlist that is larger than
+ * the data_len it wants transferred for certain
+ * IO sizes
+ */
+ bytes = min_t(unsigned int, len, PAGE_SIZE - off);
+ bytes = min(bytes, data_len);
+
+ ref = gnttab_claim_grant_reference(&gref_head);
+ BUG_ON(ref == -ENOSPC);
+
+ buffer_mfn = pfn_to_mfn(buffer_pfn);
+ gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id,
+ buffer_mfn, write);
+
+ info->shadow[id].gref[ref_cnt] = ref;
+ ring_req->seg[ref_cnt].gref = ref;
+ ring_req->seg[ref_cnt].offset = (uint16_t)off;
+ ring_req->seg[ref_cnt].length = (uint16_t)bytes;
+
+ buffer_pfn ++;
+ len -= bytes;
+ data_len -= bytes;
+ off = 0;
+ ref_cnt++;
+ }
+ }
+ }
+
+big_to_sg:
+
+ gnttab_free_grant_references(gref_head);
+
+ return ref_cnt;
+}
+
+static int scsifront_queuecommand(struct Scsi_Host *h, struct scsi_cmnd *sc)
+{
+ struct vscsifrnt_info *info =
+ (struct vscsifrnt_info *) sc->device->host->hostdata;
+ vscsiif_request_t *ring_req;
+ int ref_cnt;
+ uint16_t rqid;
+
+ if (RING_FULL(&info->ring)) {
+ goto out_host_busy;
+ }
+
+ sc->result = 0;
+
+ ring_req = scsifront_pre_request(info);
+ rqid = ring_req->rqid;
+ ring_req->act = VSCSIIF_ACT_SCSI_CDB;
+
+ ring_req->id = sc->device->id;
+ ring_req->lun = sc->device->lun;
+ ring_req->channel = sc->device->channel;
+ ring_req->cmd_len = sc->cmd_len;
+
+ BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE);
+
+ if (sc->cmd_len)
+ memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
+ else
+ memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
+
+ ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
+ ring_req->timeout_per_command = (sc->request->timeout / HZ);
+
+ info->shadow[rqid].req_scsi_cmnd = (unsigned long)sc;
+ info->shadow[rqid].sc_data_direction = sc->sc_data_direction;
+ info->shadow[rqid].act = ring_req->act;
+
+ ref_cnt = map_data_for_request(info, sc, ring_req, rqid);
+ if (ref_cnt < 0) {
+ add_id_to_freelist(info, rqid);
+ if (ref_cnt == (-ENOMEM))
+ goto out_host_busy;
+ else {
+ sc->result = (DID_ERROR << 16);
+ goto out_fail_command;
+ }
+ }
+
+ ring_req->nr_segments = (uint8_t)ref_cnt;
+ info->shadow[rqid].nr_segments = ref_cnt;
+
+ scsifront_do_request(info);
+
+ return 0;
+
+out_host_busy:
+ return SCSI_MLQUEUE_HOST_BUSY;
+
+out_fail_command:
+ sc->scsi_done(sc);
+ return 0;
+}
+
+
+static int scsifront_eh_abort_handler(struct scsi_cmnd *sc)
+{
+ return (FAILED);
+}
+
+/* vscsi supports only device_reset, because it is each of LUNs */
+static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
+{
+ struct Scsi_Host *host = sc->device->host;
+ struct vscsifrnt_info *info =
+ (struct vscsifrnt_info *) sc->device->host->hostdata;
+
+ vscsiif_request_t *ring_req;
+ uint16_t rqid;
+ int err;
+
+ spin_lock_irq(host->host_lock);
+
+ ring_req = scsifront_pre_request(info);
+ ring_req->act = VSCSIIF_ACT_SCSI_RESET;
+
+ rqid = ring_req->rqid;
+ info->shadow[rqid].act = VSCSIIF_ACT_SCSI_RESET;
+
+ ring_req->channel = sc->device->channel;
+ ring_req->id = sc->device->id;
+ ring_req->lun = sc->device->lun;
+ ring_req->cmd_len = sc->cmd_len;
+
+ if ( sc->cmd_len )
+ memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
+ else
+ memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
+
+ ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
+ ring_req->timeout_per_command = (sc->request->timeout / HZ);
+ ring_req->nr_segments = 0;
+
+ scsifront_do_request(info);
+
+ spin_unlock_irq(host->host_lock);
+ wait_event_interruptible(info->shadow[rqid].wq_reset,
+ info->shadow[rqid].wait_reset);
+ spin_lock_irq(host->host_lock);
+
+ err = info->shadow[rqid].rslt_reset;
+
+ add_id_to_freelist(info, rqid);
+
+ spin_unlock_irq(host->host_lock);
+ return (err);
+}
+
+
+struct scsi_host_template scsifront_sht = {
+ .module = THIS_MODULE,
+ .name = "Xen SCSI frontend driver",
+ .queuecommand = scsifront_queuecommand,
+ .eh_abort_handler = scsifront_eh_abort_handler,
+ .eh_device_reset_handler= scsifront_dev_reset_handler,
+ .cmd_per_lun = VSCSIIF_DEFAULT_CMD_PER_LUN,
+ .can_queue = VSCSIIF_MAX_REQS,
+ .this_id = -1,
+ .sg_tablesize = VSCSIIF_SG_TABLESIZE,
+ .use_clustering = DISABLE_CLUSTERING,
+ .proc_name = "scsifront",
+};
+
+
+static int __init scsifront_init(void)
+{
+ int err;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+ err = scsifront_xenbus_init();
+
+ return err;
+}
+
+static void __exit scsifront_exit(void)
+{
+ scsifront_xenbus_unregister();
+}
+
+module_init(scsifront_init);
+module_exit(scsifront_exit);
+
+MODULE_DESCRIPTION("Xen SCSI frontend driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/scsi/xen-scsifront/xenbus.c b/drivers/scsi/xen-scsifront/xenbus.c
new file mode 100644
index 00000000000000..de70c533f2ae58
--- /dev/null
+++ b/drivers/scsi/xen-scsifront/xenbus.c
@@ -0,0 +1,413 @@
+/*
+ * Xen SCSI frontend driver
+ *
+ * Copyright (c) 2008, FUJITSU Limited
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/version.h>
+#include "common.h"
+
+extern struct scsi_host_template scsifront_sht;
+
+static void scsifront_free(struct vscsifrnt_info *info)
+{
+ struct Scsi_Host *host = info->host;
+
+ if (host->shost_state != SHOST_DEL)
+ scsi_remove_host(info->host);
+
+ if (info->ring_ref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(info->ring_ref,
+ 0, (unsigned long)info->ring.sring);
+ info->ring_ref = GRANT_INVALID_REF;
+ info->ring.sring = NULL;
+ }
+
+ if (info->irq)
+ unbind_from_irqhandler(info->irq, info);
+ info->irq = 0;
+
+ scsi_host_put(info->host);
+}
+
+
+static int scsifront_alloc_ring(struct vscsifrnt_info *info)
+{
+ struct xenbus_device *dev = info->dev;
+ struct vscsiif_sring *sring;
+ int err = -ENOMEM;
+
+
+ info->ring_ref = GRANT_INVALID_REF;
+
+ /***** Frontend to Backend ring start *****/
+ sring = (struct vscsiif_sring *) __get_free_page(GFP_KERNEL);
+ if (!sring) {
+ xenbus_dev_fatal(dev, err, "fail to allocate shared ring (Front to Back)");
+ return err;
+ }
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+ err = xenbus_grant_ring(dev, virt_to_mfn(sring));
+ if (err < 0) {
+ free_page((unsigned long) sring);
+ info->ring.sring = NULL;
+ xenbus_dev_fatal(dev, err, "fail to grant shared ring (Front to Back)");
+ goto free_sring;
+ }
+ info->ring_ref = err;
+
+ err = xenbus_alloc_evtchn(dev, &info->evtchn);
+ if (err)
+ goto free_sring;
+
+ err = bind_evtchn_to_irqhandler(
+ info->evtchn, scsifront_intr,
+ IRQF_SAMPLE_RANDOM, "scsifront", info);
+
+ if (err <= 0) {
+ xenbus_dev_fatal(dev, err, "bind_evtchn_to_irqhandler");
+ goto free_sring;
+ }
+ info->irq = err;
+
+ return 0;
+
+/* free resource */
+free_sring:
+ scsifront_free(info);
+
+ return err;
+}
+
+
+static int scsifront_init_ring(struct vscsifrnt_info *info)
+{
+ struct xenbus_device *dev = info->dev;
+ struct xenbus_transaction xbt;
+ int err;
+
+ DPRINTK("%s\n",__FUNCTION__);
+
+ err = scsifront_alloc_ring(info);
+ if (err)
+ return err;
+ DPRINTK("%u %u\n", info->ring_ref, info->evtchn);
+
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "starting transaction");
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u",
+ info->ring_ref);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "%s", "writing ring-ref");
+ goto fail;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
+ info->evtchn);
+
+ if (err) {
+ xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
+ goto fail;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(dev, err, "completing transaction");
+ goto free_sring;
+ }
+
+ return 0;
+
+fail:
+ xenbus_transaction_end(xbt, 1);
+free_sring:
+ /* free resource */
+ scsifront_free(info);
+
+ return err;
+}
+
+
+static int scsifront_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ struct vscsifrnt_info *info;
+ struct Scsi_Host *host;
+ int i, err = -ENOMEM;
+ char name[TASK_COMM_LEN];
+
+ host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
+ if (!host) {
+ xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
+ return err;
+ }
+ info = (struct vscsifrnt_info *) host->hostdata;
+ info->host = host;
+
+
+ dev_set_drvdata(&dev->dev, info);
+ info->dev = dev;
+
+ for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
+ info->shadow[i].next_free = i + 1;
+ init_waitqueue_head(&(info->shadow[i].wq_reset));
+ info->shadow[i].wait_reset = 0;
+ }
+ info->shadow[VSCSIIF_MAX_REQS - 1].next_free = 0x0fff;
+
+ err = scsifront_init_ring(info);
+ if (err) {
+ scsi_host_put(host);
+ return err;
+ }
+
+ init_waitqueue_head(&info->wq);
+ spin_lock_init(&info->io_lock);
+ spin_lock_init(&info->shadow_lock);
+
+ snprintf(name, TASK_COMM_LEN, "vscsiif.%d", info->host->host_no);
+
+ info->kthread = kthread_run(scsifront_schedule, info, name);
+ if (IS_ERR(info->kthread)) {
+ err = PTR_ERR(info->kthread);
+ info->kthread = NULL;
+ printk(KERN_ERR "scsifront: kthread start err %d\n", err);
+ goto free_sring;
+ }
+
+ host->max_id = VSCSIIF_MAX_TARGET;
+ host->max_channel = 0;
+ host->max_lun = VSCSIIF_MAX_LUN;
+ host->max_sectors = (VSCSIIF_SG_TABLESIZE - 1) * PAGE_SIZE / 512;
+
+ err = scsi_add_host(host, &dev->dev);
+ if (err) {
+ printk(KERN_ERR "scsifront: fail to add scsi host %d\n", err);
+ goto free_sring;
+ }
+
+ xenbus_switch_state(dev, XenbusStateInitialised);
+
+ return 0;
+
+free_sring:
+ /* free resource */
+ scsifront_free(info);
+ return err;
+}
+
+static int scsifront_remove(struct xenbus_device *dev)
+{
+ struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+
+ DPRINTK("%s: %s removed\n",__FUNCTION__ ,dev->nodename);
+
+ if (info->kthread) {
+ kthread_stop(info->kthread);
+ info->kthread = NULL;
+ }
+
+ scsifront_free(info);
+
+ return 0;
+}
+
+
+static int scsifront_disconnect(struct vscsifrnt_info *info)
+{
+ struct xenbus_device *dev = info->dev;
+ struct Scsi_Host *host = info->host;
+
+ DPRINTK("%s: %s disconnect\n",__FUNCTION__ ,dev->nodename);
+
+ /*
+ When this function is executed, all devices of
+ Frontend have been deleted.
+ Therefore, it need not block I/O before remove_host.
+ */
+
+ scsi_remove_host(host);
+ xenbus_frontend_closed(dev);
+
+ return 0;
+}
+
+#define VSCSIFRONT_OP_ADD_LUN 1
+#define VSCSIFRONT_OP_DEL_LUN 2
+
+static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
+{
+ struct xenbus_device *dev = info->dev;
+ int i, err = 0;
+ char str[64], state_str[64];
+ char **dir;
+ unsigned int dir_n = 0;
+ unsigned int device_state;
+ unsigned int hst, chn, tgt, lun;
+ struct scsi_device *sdev;
+
+ dir = xenbus_directory(XBT_NIL, dev->otherend, "vscsi-devs", &dir_n);
+ if (IS_ERR(dir))
+ return;
+
+ for (i = 0; i < dir_n; i++) {
+ /* read status */
+ snprintf(str, sizeof(str), "vscsi-devs/%s/state", dir[i]);
+ err = xenbus_scanf(XBT_NIL, dev->otherend, str, "%u",
+ &device_state);
+ if (XENBUS_EXIST_ERR(err))
+ continue;
+
+ /* virtual SCSI device */
+ snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
+ err = xenbus_scanf(XBT_NIL, dev->otherend, str,
+ "%u:%u:%u:%u", &hst, &chn, &tgt, &lun);
+ if (XENBUS_EXIST_ERR(err))
+ continue;
+
+ /* front device state path */
+ snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]);
+
+ switch (op) {
+ case VSCSIFRONT_OP_ADD_LUN:
+ if (device_state == XenbusStateInitialised) {
+ sdev = scsi_device_lookup(info->host, chn, tgt, lun);
+ if (sdev) {
+ printk(KERN_ERR "scsifront: Device already in use.\n");
+ scsi_device_put(sdev);
+ xenbus_printf(XBT_NIL, dev->nodename,
+ state_str, "%d", XenbusStateClosed);
+ } else {
+ scsi_add_device(info->host, chn, tgt, lun);
+ xenbus_printf(XBT_NIL, dev->nodename,
+ state_str, "%d", XenbusStateConnected);
+ }
+ }
+ break;
+ case VSCSIFRONT_OP_DEL_LUN:
+ if (device_state == XenbusStateClosing) {
+ sdev = scsi_device_lookup(info->host, chn, tgt, lun);
+ if (sdev) {
+ scsi_remove_device(sdev);
+ scsi_device_put(sdev);
+ xenbus_printf(XBT_NIL, dev->nodename,
+ state_str, "%d", XenbusStateClosed);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ kfree(dir);
+ return;
+}
+
+
+
+
+static void scsifront_backend_changed(struct xenbus_device *dev,
+ enum xenbus_state backend_state)
+{
+ struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+
+ DPRINTK("%p %u %u\n", dev, dev->state, backend_state);
+
+ switch (backend_state) {
+ case XenbusStateUnknown:
+ case XenbusStateInitialising:
+ case XenbusStateInitWait:
+ case XenbusStateClosed:
+ break;
+
+ case XenbusStateInitialised:
+ break;
+
+ case XenbusStateConnected:
+ if (xenbus_read_driver_state(dev->nodename) ==
+ XenbusStateInitialised) {
+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
+ }
+
+ if (dev->state == XenbusStateConnected)
+ break;
+
+ xenbus_switch_state(dev, XenbusStateConnected);
+ break;
+
+ case XenbusStateClosing:
+ scsifront_disconnect(info);
+ break;
+
+ case XenbusStateReconfiguring:
+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_DEL_LUN);
+ xenbus_switch_state(dev, XenbusStateReconfiguring);
+ break;
+
+ case XenbusStateReconfigured:
+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
+ xenbus_switch_state(dev, XenbusStateConnected);
+ break;
+ }
+}
+
+
+static struct xenbus_device_id scsifront_ids[] = {
+ { "vscsi" },
+ { "" }
+};
+MODULE_ALIAS("xen:vscsi");
+
+static struct xenbus_driver scsifront_driver = {
+ .name = "vscsi",
+ .owner = THIS_MODULE,
+ .ids = scsifront_ids,
+ .probe = scsifront_probe,
+ .remove = scsifront_remove,
+/* .resume = scsifront_resume, */
+ .otherend_changed = scsifront_backend_changed,
+};
+
+int scsifront_xenbus_init(void)
+{
+ return xenbus_register_frontend(&scsifront_driver);
+}
+
+void scsifront_xenbus_unregister(void)
+{
+ xenbus_unregister_driver(&scsifront_driver);
+}
+
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 4cb0d0a3e57b77..fc7bbba585cee2 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -66,14 +66,16 @@
static int debug;
module_param(debug, int, 0600);
-#define T1 (HZ/10)
-#define T2 (HZ/3)
-#define N2 3
+/* Defaults: these are from the specification */
+
+#define T1 10 /* 100mS */
+#define T2 34 /* 333mS */
+#define N2 3 /* Retry 3 times */
/* Use long timers for testing at low speed with debug on */
#ifdef DEBUG_TIMING
-#define T1 HZ
-#define T2 (2 * HZ)
+#define T1 100
+#define T2 200
#endif
/*
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index a767884a6c7a10..31ab82fda38a26 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -501,7 +501,7 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target);
* alloc_xenballooned_pages - get pages that have been ballooned out
* @nr_pages: Number of pages to get
* @pages: pages returned
- * @highmem: highmem or lowmem pages
+ * @highmem: allow highmem pages
* @return 0 on success, error otherwise
*/
int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
@@ -511,7 +511,7 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
mutex_lock(&balloon_mutex);
while (pgno < nr_pages) {
page = balloon_retrieve(highmem);
- if (page && PageHighMem(page) == highmem) {
+ if (page && (highmem || !PageHighMem(page))) {
pages[pgno++] = page;
} else {
enum bp_state st;
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index f6832f46aea4a4..e1c4c6e5b469c4 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -135,7 +135,7 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
/* Grant foreign access to the page. */
gref->gref_id = gnttab_grant_foreign_access(op->domid,
pfn_to_mfn(page_to_pfn(gref->page)), readonly);
- if (gref->gref_id < 0) {
+ if ((int)gref->gref_id < 0) {
rc = gref->gref_id;
goto undo;
}
@@ -280,7 +280,7 @@ static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
goto out;
}
- gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
+ gref_ids = kcalloc(op.count, sizeof(gref_ids[0]), GFP_TEMPORARY);
if (!gref_ids) {
rc = -ENOMEM;
goto out;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 39871326afa2eb..afca14d9042e6c 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -114,11 +114,11 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
if (NULL == add)
return NULL;
- add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL);
- add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL);
- add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL);
- add->kmap_ops = kzalloc(sizeof(add->kmap_ops[0]) * count, GFP_KERNEL);
- add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL);
+ add->grants = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL);
+ add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL);
+ add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL);
+ add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
+ add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 8e964b91c447b1..4864e5d72e72c7 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -153,7 +153,7 @@ void __init xen_swiotlb_init(int verbose)
char *m = NULL;
unsigned int repeat = 3;
- nr_tbl = swioltb_nr_tbl();
+ nr_tbl = swiotlb_nr_tbl();
if (nr_tbl)
xen_io_tlb_nslabs = nr_tbl;
else {
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 81c3ce6b8bbeed..1906125eab491b 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -35,6 +35,7 @@
#include <linux/vmalloc.h>
#include <linux/export.h>
#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
#include <xen/events.h>
@@ -436,19 +437,20 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
{
struct gnttab_map_grant_ref op = {
- .flags = GNTMAP_host_map,
+ .flags = GNTMAP_host_map | GNTMAP_contains_pte,
.ref = gnt_ref,
.dom = dev->otherend_id,
};
struct vm_struct *area;
+ pte_t *pte;
*vaddr = NULL;
- area = alloc_vm_area(PAGE_SIZE);
+ area = alloc_vm_area(PAGE_SIZE, &pte);
if (!area)
return -ENOMEM;
- op.host_addr = (unsigned long)area->addr;
+ op.host_addr = arbitrary_virt_to_machine(pte).maddr;
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
BUG();
@@ -527,6 +529,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
struct gnttab_unmap_grant_ref op = {
.host_addr = (unsigned long)vaddr,
};
+ unsigned int level;
/* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
* method so that we don't have to muck with vmalloc internals here.
@@ -548,6 +551,8 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
}
op.handle = (grant_handle_t)area->phys_addr;
+ op.host_addr = arbitrary_virt_to_machine(
+ lookup_address((unsigned long)vaddr, &level)).maddr;
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
BUG();
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 42e34698518643..8d95a42cf21ee9 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -122,17 +122,12 @@ struct ttm_mem_reg {
* be mmapped by user space. Each of these bos occupy a slot in the
* device address space, that can be used for normal vm operations.
*
- * @ttm_bo_type_user: These are user-space memory areas that are made
- * available to the GPU by mapping the buffer pages into the GPU aperture
- * space. These buffers cannot be mmaped from the device address space.
- *
* @ttm_bo_type_kernel: These buffers are like ttm_bo_type_device buffers,
* but they cannot be accessed from user-space. For kernel-only use.
*/
enum ttm_bo_type {
ttm_bo_type_device,
- ttm_bo_type_user,
ttm_bo_type_kernel
};
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 94eb1434316e53..beef9abaaefdf7 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -43,36 +43,9 @@ struct ttm_backend;
struct ttm_backend_func {
/**
- * struct ttm_backend_func member populate
- *
- * @backend: Pointer to a struct ttm_backend.
- * @num_pages: Number of pages to populate.
- * @pages: Array of pointers to ttm pages.
- * @dummy_read_page: Page to be used instead of NULL pages in the
- * array @pages.
- * @dma_addrs: Array of DMA (bus) address of the ttm pages.
- *
- * Populate the backend with ttm pages. Depending on the backend,
- * it may or may not copy the @pages array.
- */
- int (*populate) (struct ttm_backend *backend,
- unsigned long num_pages, struct page **pages,
- struct page *dummy_read_page,
- dma_addr_t *dma_addrs);
- /**
- * struct ttm_backend_func member clear
- *
- * @backend: Pointer to a struct ttm_backend.
- *
- * This is an "unpopulate" function. Release all resources
- * allocated with populate.
- */
- void (*clear) (struct ttm_backend *backend);
-
- /**
* struct ttm_backend_func member bind
*
- * @backend: Pointer to a struct ttm_backend.
+ * @ttm: Pointer to a struct ttm_tt.
* @bo_mem: Pointer to a struct ttm_mem_reg describing the
* memory type and location for binding.
*
@@ -80,46 +53,29 @@ struct ttm_backend_func {
* indicated by @bo_mem. This function should be able to handle
* differences between aperture and system page sizes.
*/
- int (*bind) (struct ttm_backend *backend, struct ttm_mem_reg *bo_mem);
+ int (*bind) (struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
/**
* struct ttm_backend_func member unbind
*
- * @backend: Pointer to a struct ttm_backend.
+ * @ttm: Pointer to a struct ttm_tt.
*
* Unbind previously bound backend pages. This function should be
* able to handle differences between aperture and system page sizes.
*/
- int (*unbind) (struct ttm_backend *backend);
+ int (*unbind) (struct ttm_tt *ttm);
/**
* struct ttm_backend_func member destroy
*
- * @backend: Pointer to a struct ttm_backend.
+ * @ttm: Pointer to a struct ttm_tt.
*
- * Destroy the backend.
+ * Destroy the backend. This will be call back from ttm_tt_destroy so
+ * don't call ttm_tt_destroy from the callback or infinite loop.
*/
- void (*destroy) (struct ttm_backend *backend);
-};
-
-/**
- * struct ttm_backend
- *
- * @bdev: Pointer to a struct ttm_bo_device.
- * @flags: For driver use.
- * @func: Pointer to a struct ttm_backend_func that describes
- * the backend methods.
- *
- */
-
-struct ttm_backend {
- struct ttm_bo_device *bdev;
- uint32_t flags;
- struct ttm_backend_func *func;
+ void (*destroy) (struct ttm_tt *ttm);
};
-#define TTM_PAGE_FLAG_USER (1 << 1)
-#define TTM_PAGE_FLAG_USER_DIRTY (1 << 2)
#define TTM_PAGE_FLAG_WRITE (1 << 3)
#define TTM_PAGE_FLAG_SWAPPED (1 << 4)
#define TTM_PAGE_FLAG_PERSISTENT_SWAP (1 << 5)
@@ -135,23 +91,20 @@ enum ttm_caching_state {
/**
* struct ttm_tt
*
+ * @bdev: Pointer to a struct ttm_bo_device.
+ * @func: Pointer to a struct ttm_backend_func that describes
+ * the backend methods.
* @dummy_read_page: Page to map where the ttm_tt page array contains a NULL
* pointer.
* @pages: Array of pages backing the data.
- * @first_himem_page: Himem pages are put last in the page array, which
- * enables us to run caching attribute changes on only the first part
- * of the page array containing lomem pages. This is the index of the
- * first himem page.
- * @last_lomem_page: Index of the last lomem page in the page array.
* @num_pages: Number of pages in the page array.
* @bdev: Pointer to the current struct ttm_bo_device.
* @be: Pointer to the ttm backend.
- * @tsk: The task for user ttm.
- * @start: virtual address for user ttm.
* @swap_storage: Pointer to shmem struct file for swap storage.
* @caching_state: The current caching state of the pages.
* @state: The current binding state of the pages.
* @dma_address: The DMA (bus) addresses of the pages (if TTM_PAGE_FLAG_DMA32)
+ * @alloc_list: used by some page allocation backend
*
* This is a structure holding the pages, caching- and aperture binding
* status for a buffer object that isn't backed by fixed (VRAM / AGP)
@@ -159,16 +112,14 @@ enum ttm_caching_state {
*/
struct ttm_tt {
+ struct ttm_bo_device *bdev;
+ struct ttm_backend_func *func;
struct page *dummy_read_page;
struct page **pages;
- long first_himem_page;
- long last_lomem_page;
uint32_t page_flags;
unsigned long num_pages;
struct ttm_bo_global *glob;
struct ttm_backend *be;
- struct task_struct *tsk;
- unsigned long start;
struct file *swap_storage;
enum ttm_caching_state caching_state;
enum {
@@ -177,6 +128,7 @@ struct ttm_tt {
tt_unpopulated,
} state;
dma_addr_t *dma_address;
+ struct list_head alloc_list;
};
#define TTM_MEMTYPE_FLAG_FIXED (1 << 0) /* Fixed (on-card) PCI memory */
@@ -351,15 +303,42 @@ struct ttm_mem_type_manager {
struct ttm_bo_driver {
/**
- * struct ttm_bo_driver member create_ttm_backend_entry
+ * ttm_tt_create
+ *
+ * @bdev: pointer to a struct ttm_bo_device:
+ * @size: Size of the data needed backing.
+ * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags.
+ * @dummy_read_page: See struct ttm_bo_device.
+ *
+ * Create a struct ttm_tt to back data with system memory pages.
+ * No pages are actually allocated.
+ * Returns:
+ * NULL: Out of memory.
+ */
+ struct ttm_tt *(*ttm_tt_create)(struct ttm_bo_device *bdev,
+ unsigned long size,
+ uint32_t page_flags,
+ struct page *dummy_read_page);
+
+ /**
+ * ttm_tt_populate
*
- * @bdev: The buffer object device.
+ * @ttm: The struct ttm_tt to contain the backing pages.
*
- * Create a driver specific struct ttm_backend.
+ * Allocate all backing pages
+ * Returns:
+ * -ENOMEM: Out of memory.
*/
+ int (*ttm_tt_populate)(struct ttm_tt *ttm);
- struct ttm_backend *(*create_ttm_backend_entry)
- (struct ttm_bo_device *bdev);
+ /**
+ * ttm_tt_unpopulate
+ *
+ * @ttm: The struct ttm_tt to contain the backing pages.
+ *
+ * Free all backing page
+ */
+ void (*ttm_tt_unpopulate)(struct ttm_tt *ttm);
/**
* struct ttm_bo_driver member invalidate_caches
@@ -600,8 +579,9 @@ ttm_flag_masked(uint32_t *old, uint32_t new, uint32_t mask)
}
/**
- * ttm_tt_create
+ * ttm_tt_init
*
+ * @ttm: The struct ttm_tt.
* @bdev: pointer to a struct ttm_bo_device:
* @size: Size of the data needed backing.
* @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags.
@@ -612,28 +592,9 @@ ttm_flag_masked(uint32_t *old, uint32_t new, uint32_t mask)
* Returns:
* NULL: Out of memory.
*/
-extern struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev,
- unsigned long size,
- uint32_t page_flags,
- struct page *dummy_read_page);
-
-/**
- * ttm_tt_set_user:
- *
- * @ttm: The struct ttm_tt to populate.
- * @tsk: A struct task_struct for which @start is a valid user-space address.
- * @start: A valid user-space address.
- * @num_pages: Size in pages of the user memory area.
- *
- * Populate a struct ttm_tt with a user-space memory area after first pinning
- * the pages backing it.
- * Returns:
- * !0: Error.
- */
-
-extern int ttm_tt_set_user(struct ttm_tt *ttm,
- struct task_struct *tsk,
- unsigned long start, unsigned long num_pages);
+extern int ttm_tt_init(struct ttm_tt *ttm, struct ttm_bo_device *bdev,
+ unsigned long size, uint32_t page_flags,
+ struct page *dummy_read_page);
/**
* ttm_ttm_bind:
@@ -646,20 +607,11 @@ extern int ttm_tt_set_user(struct ttm_tt *ttm,
extern int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem);
/**
- * ttm_tt_populate:
- *
- * @ttm: The struct ttm_tt to contain the backing pages.
- *
- * Add backing pages to all of @ttm
- */
-extern int ttm_tt_populate(struct ttm_tt *ttm);
-
-/**
* ttm_ttm_destroy:
*
* @ttm: The struct ttm_tt.
*
- * Unbind, unpopulate and destroy a struct ttm_tt.
+ * Unbind, unpopulate and destroy common struct ttm_tt.
*/
extern void ttm_tt_destroy(struct ttm_tt *ttm);
@@ -673,19 +625,13 @@ extern void ttm_tt_destroy(struct ttm_tt *ttm);
extern void ttm_tt_unbind(struct ttm_tt *ttm);
/**
- * ttm_ttm_destroy:
+ * ttm_tt_swapin:
*
* @ttm: The struct ttm_tt.
- * @index: Index of the desired page.
- *
- * Return a pointer to the struct page backing @ttm at page
- * index @index. If the page is unpopulated, one will be allocated to
- * populate that index.
*
- * Returns:
- * NULL on OOM.
+ * Swap in a previously swap out ttm_tt.
*/
-extern struct page *ttm_tt_get_page(struct ttm_tt *ttm, int index);
+extern int ttm_tt_swapin(struct ttm_tt *ttm);
/**
* ttm_tt_cache_flush:
@@ -1046,17 +992,23 @@ extern const struct ttm_mem_type_manager_func ttm_bo_manager_func;
#include <linux/agp_backend.h>
/**
- * ttm_agp_backend_init
+ * ttm_agp_tt_create
*
* @bdev: Pointer to a struct ttm_bo_device.
* @bridge: The agp bridge this device is sitting on.
+ * @size: Size of the data needed backing.
+ * @page_flags: Page flags as identified by TTM_PAGE_FLAG_XX flags.
+ * @dummy_read_page: See struct ttm_bo_device.
+ *
*
* Create a TTM backend that uses the indicated AGP bridge as an aperture
* for TT memory. This function uses the linux agpgart interface to
* bind and unbind memory backing a ttm_tt.
*/
-extern struct ttm_backend *ttm_agp_backend_init(struct ttm_bo_device *bdev,
- struct agp_bridge_data *bridge);
+extern struct ttm_tt *ttm_agp_tt_create(struct ttm_bo_device *bdev,
+ struct agp_bridge_data *bridge,
+ unsigned long size, uint32_t page_flags,
+ struct page *dummy_read_page);
#endif
#endif
diff --git a/include/drm/ttm/ttm_memory.h b/include/drm/ttm/ttm_memory.h
index 26c1f78d136f49..c0c732d42cbcdb 100644
--- a/include/drm/ttm/ttm_memory.h
+++ b/include/drm/ttm/ttm_memory.h
@@ -81,14 +81,13 @@ struct ttm_mem_global {
struct work_struct work;
wait_queue_head_t queue;
spinlock_t lock;
- struct ttm_mem_zone *zones[TTM_MEM_MAX_ZONES];
- unsigned int num_zones;
- struct ttm_mem_zone *zone_kernel;
-#ifdef CONFIG_HIGHMEM
- struct ttm_mem_zone *zone_highmem;
-#else
- struct ttm_mem_zone *zone_dma32;
-#endif
+ unsigned long mem;
+ unsigned long max_mem;
+ unsigned long emer_mem;
+ unsigned long used_mem;
+ unsigned long used_dma32_mem;
+ unsigned long swap_limit;
+ unsigned long swap_dma32_limit;
};
/**
@@ -146,14 +145,19 @@ static inline void ttm_mem_unregister_shrink(struct ttm_mem_global *glob,
extern int ttm_mem_global_init(struct ttm_mem_global *glob);
extern void ttm_mem_global_release(struct ttm_mem_global *glob);
-extern int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
- bool no_wait, bool interruptible);
+extern int ttm_mem_global_alloc(struct ttm_mem_global *glob,
+ uint64_t memory,
+ bool no_wait);
extern void ttm_mem_global_free(struct ttm_mem_global *glob,
uint64_t amount);
-extern int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
- struct page *page,
- bool no_wait, bool interruptible);
-extern void ttm_mem_global_free_page(struct ttm_mem_global *glob,
- struct page *page);
+extern int ttm_mem_global_alloc_pages(struct ttm_mem_global *glob,
+ unsigned npages,
+ bool no_wait);
+extern void ttm_mem_global_free_pages(struct ttm_mem_global *glob,
+ struct page **pages, unsigned npages);
+extern void ttm_mem_global_account_pages(struct ttm_mem_global *glob,
+ struct page **pages,
+ unsigned npages);
extern size_t ttm_round_pot(size_t size);
+
#endif
diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
index 129de12353f191..f127635fdb2626 100644
--- a/include/drm/ttm/ttm_page_alloc.h
+++ b/include/drm/ttm/ttm_page_alloc.h
@@ -32,29 +32,28 @@
/**
* Get count number of pages from pool to pages list.
*
- * @pages: head of empty linked list where pages are filled.
+ * @pages: array of pages ptr
+ * @npages: number of pages to allocate.
* @flags: ttm flags for page allocation.
* @cstate: ttm caching state for the page.
- * @count: number of pages to allocate.
* @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
*/
-int ttm_get_pages(struct list_head *pages,
+int ttm_get_pages(struct page **pages,
+ unsigned npages,
int flags,
enum ttm_caching_state cstate,
- unsigned count,
dma_addr_t *dma_address);
/**
* Put linked list of pages to pool.
*
- * @pages: list of pages to free.
- * @page_count: number of pages in the list. Zero can be passed for unknown
- * count.
+ * @pages: array of pages ptr
+ * @npages: number of pages to free.
* @flags: ttm flags for page allocation.
* @cstate: ttm caching state.
* @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
*/
-void ttm_put_pages(struct list_head *pages,
- unsigned page_count,
+void ttm_put_pages(struct page **pages,
+ unsigned npages,
int flags,
enum ttm_caching_state cstate,
dma_addr_t *dma_address);
@@ -68,7 +67,61 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages);
void ttm_page_alloc_fini(void);
/**
+ * ttm_page_alloc_ttm_tt_populate:
+ *
+ * @ttm: The struct ttm_tt to contain the backing pages.
+ *
+ * Add backing pages to all of @ttm
+ */
+extern int ttm_page_alloc_ttm_tt_populate(struct ttm_tt *ttm);
+
+/**
+ * ttm_page_alloc_ttm_tt_unpopulate:
+ *
+ * @ttm: The struct ttm_tt which to free backing pages.
+ *
+ * Free all pages of @ttm
+ */
+extern void ttm_page_alloc_ttm_tt_unpopulate(struct ttm_tt *ttm);
+
+/**
* Output the state of pools to debugfs file
*/
extern int ttm_page_alloc_debugfs(struct seq_file *m, void *data);
+
+
+#ifdef CONFIG_SWIOTLB
+/**
+ * Initialize pool allocator.
+ */
+int ttm_dma_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages);
+
+/**
+ * Free pool allocator.
+ */
+void ttm_dma_page_alloc_fini(void);
+
+/**
+ * Output the state of pools to debugfs file
+ */
+extern int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data);
+
+int ttm_dma_populate(struct ttm_tt *ttm, struct device *dev);
+extern void ttm_dma_unpopulate(struct ttm_tt *ttm, struct device *dev);
+
+#else
+static inline int ttm_dma_page_alloc_init(struct ttm_mem_global *glob,
+ unsigned max_pages)
+{
+ return -ENODEV;
+}
+
+static inline void ttm_dma_page_alloc_fini(void) { return; }
+
+static inline int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data)
+{
+ return 0;
+}
+#endif
+
#endif
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 7408af843b8ac8..2c2cdd2e354c6a 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -128,6 +128,7 @@ struct cpuidle_driver {
};
#ifdef CONFIG_CPU_IDLE
+extern int cpuidle_disabled(void);
extern void disable_cpuidle(void);
extern int cpuidle_idle_call(void);
@@ -143,6 +144,7 @@ extern int cpuidle_enable_device(struct cpuidle_device *dev);
extern void cpuidle_disable_device(struct cpuidle_device *dev);
#else
+static inline int cpuidle_disabled(void) { return 1; }
static inline void disable_cpuidle(void) { }
static inline int cpuidle_idle_call(void) { return -ENODEV; }
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index afb94583960c9f..98ce8124b1cc5e 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -41,7 +41,7 @@ struct devfreq_dev_status {
unsigned long total_time;
unsigned long busy_time;
unsigned long current_frequency;
- void *private_date;
+ void *private_data;
};
/**
diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h
index 08a2fee4065966..aad6bd4b3efdf9 100644
--- a/include/linux/hwspinlock.h
+++ b/include/linux/hwspinlock.h
@@ -118,7 +118,6 @@ int __hwspin_trylock(struct hwspinlock *hwlock, int mode, unsigned long *flags)
static inline
void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags)
{
- return 0;
}
static inline int hwspin_lock_get_id(struct hwspinlock *hwlock)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 445702c60d0468..e872526fdc5fbb 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -24,7 +24,7 @@ extern int swiotlb_force;
extern void swiotlb_init(int verbose);
extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
-extern unsigned long swioltb_nr_tbl(void);
+extern unsigned long swiotlb_nr_tbl(void);
/*
* Enumeration for sync targets
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 687fb11e20107d..4bde182fcf93f2 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -119,7 +119,7 @@ unmap_kernel_range(unsigned long addr, unsigned long size)
#endif
/* Allocate/destroy a 'vmalloc' VM area. */
-extern struct vm_struct *alloc_vm_area(size_t size);
+extern struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes);
extern void free_vm_area(struct vm_struct *area);
/* for /dev/kmem */
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
index 39e571796e324f..ef2b377536ad6c 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -363,6 +363,8 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */
#define GNTST_bad_page (-9) /* Specified page was invalid for op. */
#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary */
+#define GNTST_address_too_big (-11) /* transfer page address too large. */
+#define GNTST_eagain (-12) /* Could not map at the moment. Retry. */
#define GNTTABOP_error_msgs { \
"okay", \
@@ -376,6 +378,8 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
"permission denied", \
"bad page", \
"copy arguments cross page boundary" \
+ "page address size too large", \
+ "could not map at the moment, retry" \
}
#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index 9324488f23f0b2..ee338bfde18b25 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -84,6 +84,21 @@ typedef uint64_t blkif_sector_t;
* e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
* http://www.seagate.com/staticfiles/support/disc/manuals/
* Interface%20manuals/100293068c.pdf
+ * The backend can optionally provide three extra XenBus attributes to
+ * further optimize the discard functionality:
+ * 'discard-aligment' - Devices that support discard functionality may
+ * internally allocate space in units that are bigger than the exported
+ * logical block size. The discard-alignment parameter indicates how many bytes
+ * the beginning of the partition is offset from the internal allocation unit's
+ * natural alignment.
+ * 'discard-granularity' - Devices that support discard functionality may
+ * internally allocate space using units that are bigger than the logical block
+ * size. The discard-granularity parameter indicates the size of the internal
+ * allocation unit in bytes if reported by the device. Otherwise the
+ * discard-granularity will be set to match the device's physical block size.
+ * 'discard-secure' - All copies of the discarded sectors (potentially created
+ * by garbage collection) must also be erased. To use this feature, the flag
+ * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim.
*/
#define BLKIF_OP_DISCARD 5
@@ -95,6 +110,12 @@ typedef uint64_t blkif_sector_t;
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
struct blkif_request_rw {
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+#ifdef CONFIG_X86_64
+ uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */
+#endif
+ uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment {
grant_ref_t gref; /* reference to I/O buffer frame */
@@ -102,23 +123,28 @@ struct blkif_request_rw {
/* @last_sect: last sector in frame to transfer (inclusive). */
uint8_t first_sect, last_sect;
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
+} __attribute__((__packed__));
struct blkif_request_discard {
+ uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */
+#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */
+ blkif_vdev_t _pad1; /* only for read/write requests */
+#ifdef CONFIG_X86_64
+ uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/
+#endif
+ uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;
- uint64_t nr_sectors;
-};
+ uint64_t nr_sectors;
+ uint8_t _pad3;
+} __attribute__((__packed__));
struct blkif_request {
uint8_t operation; /* BLKIF_OP_??? */
- uint8_t nr_segments; /* number of segments */
- blkif_vdev_t handle; /* only for read/write requests */
- uint64_t id; /* private guest value, echoed in resp */
union {
struct blkif_request_rw rw;
struct blkif_request_discard discard;
} u;
-};
+} __attribute__((__packed__));
struct blkif_response {
uint64_t id; /* copied from request */
diff --git a/include/xen/interface/io/vscsiif.h b/include/xen/interface/io/vscsiif.h
new file mode 100644
index 00000000000000..7fbe68839dfde5
--- /dev/null
+++ b/include/xen/interface/io/vscsiif.h
@@ -0,0 +1,105 @@
+/******************************************************************************
+ * vscsiif.h
+ *
+ * Based on the blkif.h code.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright(c) FUJITSU Limited 2008.
+ */
+
+#ifndef __XEN__PUBLIC_IO_SCSI_H__
+#define __XEN__PUBLIC_IO_SCSI_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/* command between backend and frontend */
+#define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */
+#define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/
+#define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/
+
+
+#define VSCSIIF_BACK_MAX_PENDING_REQS 128
+
+/*
+ * Maximum scatter/gather segments per request.
+ *
+ * Considering balance between allocating al least 16 "vscsiif_request"
+ * structures on one page (4096bytes) and number of scatter gather
+ * needed, we decided to use 26 as a magic number.
+ */
+#define VSCSIIF_SG_TABLESIZE 26
+
+/*
+ * base on linux kernel 2.6.18
+ */
+#define VSCSIIF_MAX_COMMAND_SIZE 16
+#define VSCSIIF_SENSE_BUFFERSIZE 96
+
+
+struct vscsiif_request {
+ uint16_t rqid; /* private guest value, echoed in resp */
+ uint8_t act; /* command between backend and frontend */
+ uint8_t cmd_len;
+
+ uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE];
+ uint16_t timeout_per_command; /* The command is issued by twice
+ the value in Backend. */
+ uint16_t channel, id, lun;
+ uint16_t padding;
+ uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1)
+ DMA_FROM_DEVICE(2)
+ DMA_NONE(3) requests */
+ uint8_t nr_segments; /* Number of pieces of scatter-gather */
+
+ struct scsiif_request_segment {
+ grant_ref_t gref;
+ uint16_t offset;
+ uint16_t length;
+ } seg[VSCSIIF_SG_TABLESIZE];
+ uint32_t reserved[3];
+};
+typedef struct vscsiif_request vscsiif_request_t;
+
+struct vscsiif_response {
+ uint16_t rqid;
+ uint8_t padding;
+ uint8_t sense_len;
+ uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
+ int32_t rslt;
+ uint32_t residual_len; /* request bufflen -
+ return the value from physical device */
+ uint32_t reserved[36];
+};
+typedef struct vscsiif_response vscsiif_response_t;
+
+DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response);
+
+
+#endif /*__XEN__PUBLIC_IO_SCSI_H__*/
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h
index a785a3b0c8c7d8..438c256c274b3b 100644
--- a/include/xen/platform_pci.h
+++ b/include/xen/platform_pci.h
@@ -29,8 +29,7 @@
static inline int xen_must_unplug_nics(void) {
#if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \
defined(CONFIG_XEN_NETDEV_FRONTEND_MODULE)) && \
- (defined(CONFIG_XEN_PLATFORM_PCI) || \
- defined(CONFIG_XEN_PLATFORM_PCI_MODULE))
+ defined(CONFIG_XEN_PVHVM)
return 1;
#else
return 0;
@@ -40,8 +39,7 @@ static inline int xen_must_unplug_nics(void) {
static inline int xen_must_unplug_disks(void) {
#if (defined(CONFIG_XEN_BLKDEV_FRONTEND) || \
defined(CONFIG_XEN_BLKDEV_FRONTEND_MODULE)) && \
- (defined(CONFIG_XEN_PLATFORM_PCI) || \
- defined(CONFIG_XEN_PLATFORM_PCI_MODULE))
+ defined(CONFIG_XEN_PVHVM)
return 1;
#else
return 0;
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 56db7514718689..995e3bd3417b93 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -70,6 +70,7 @@ static struct pm_qos_constraints cpu_dma_constraints = {
};
static struct pm_qos_object cpu_dma_pm_qos = {
.constraints = &cpu_dma_constraints,
+ .name = "cpu_dma_latency",
};
static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 99093b39614595..058935ef39752e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -110,11 +110,11 @@ setup_io_tlb_npages(char *str)
__setup("swiotlb=", setup_io_tlb_npages);
/* make io_tlb_overflow tunable too? */
-unsigned long swioltb_nr_tbl(void)
+unsigned long swiotlb_nr_tbl(void)
{
return io_tlb_nslabs;
}
-
+EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
/* Note that this doesn't work with highmem page */
static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
volatile void *address)
@@ -321,6 +321,7 @@ void __init swiotlb_free(void)
free_bootmem_late(__pa(io_tlb_start),
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
}
+ io_tlb_nslabs = 0;
}
static int is_swiotlb_buffer(phys_addr_t paddr)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b669aa6f6caff3..3231bf3328781b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2141,23 +2141,30 @@ void __attribute__((weak)) vmalloc_sync_all(void)
static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
{
- /* apply_to_page_range() does all the hard work. */
+ pte_t ***p = data;
+
+ if (p) {
+ *(*p) = pte;
+ (*p)++;
+ }
return 0;
}
/**
* alloc_vm_area - allocate a range of kernel address space
* @size: size of the area
+ * @ptes: returns the PTEs for the address space
*
* Returns: NULL on failure, vm_struct on success
*
* This function reserves a range of kernel address space, and
* allocates pagetables to map that range. No actual mappings
- * are created. If the kernel address space is not shared
- * between processes, it syncs the pagetable across all
- * processes.
+ * are created.
+ *
+ * If @ptes is non-NULL, pointers to the PTEs (in init_mm)
+ * allocated for the VM area are returned.
*/
-struct vm_struct *alloc_vm_area(size_t size)
+struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
{
struct vm_struct *area;
@@ -2171,19 +2178,11 @@ struct vm_struct *alloc_vm_area(size_t size)
* of kernel virtual address space and mapped into init_mm.
*/
if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
- area->size, f, NULL)) {
+ size, f, ptes ? &ptes : NULL)) {
free_vm_area(area);
return NULL;
}
- /*
- * If the allocated address space is passed to a hypercall
- * before being used then we cannot rely on a page fault to
- * trigger an update of the page tables. So sync all the page
- * tables here.
- */
- vmalloc_sync_all();
-
return area;
}
EXPORT_SYMBOL_GPL(alloc_vm_area);
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 8d02ccb10c598b..30e2befd6f2a23 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -42,6 +42,7 @@ $default{"BISECT_MANUAL"} = 0;
$default{"BISECT_SKIP"} = 1;
$default{"SUCCESS_LINE"} = "login:";
$default{"DETECT_TRIPLE_FAULT"} = 1;
+$default{"NO_INSTALL"} = 0;
$default{"BOOTED_TIMEOUT"} = 1;
$default{"DIE_ON_FAILURE"} = 1;
$default{"SSH_EXEC"} = "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND";
@@ -84,6 +85,7 @@ my $grub_number;
my $target;
my $make;
my $post_install;
+my $no_install;
my $noclean;
my $minconfig;
my $start_minconfig;
@@ -115,6 +117,7 @@ my $timeout;
my $booted_timeout;
my $detect_triplefault;
my $console;
+my $reboot_success_line;
my $success_line;
my $stop_after_success;
my $stop_after_failure;
@@ -130,6 +133,12 @@ my %config_help;
my %variable;
my %force_config;
+# do not force reboots on config problems
+my $no_reboot = 1;
+
+# default variables that can be used
+chomp ($variable{"PWD"} = `pwd`);
+
$config_help{"MACHINE"} = << "EOF"
The machine hostname that you will test.
EOF
@@ -241,6 +250,7 @@ sub read_yn {
sub get_ktest_config {
my ($config) = @_;
+ my $ans;
return if (defined($opt{$config}));
@@ -254,16 +264,17 @@ sub get_ktest_config {
if (defined($default{$config})) {
print "\[$default{$config}\] ";
}
- $entered_configs{$config} = <STDIN>;
- $entered_configs{$config} =~ s/^\s*(.*\S)\s*$/$1/;
- if ($entered_configs{$config} =~ /^\s*$/) {
+ $ans = <STDIN>;
+ $ans =~ s/^\s*(.*\S)\s*$/$1/;
+ if ($ans =~ /^\s*$/) {
if ($default{$config}) {
- $entered_configs{$config} = $default{$config};
+ $ans = $default{$config};
} else {
print "Your answer can not be blank\n";
next;
}
}
+ $entered_configs{$config} = process_variables($ans);
last;
}
}
@@ -298,7 +309,7 @@ sub get_ktest_configs {
}
sub process_variables {
- my ($value) = @_;
+ my ($value, $remove_undef) = @_;
my $retval = "";
# We want to check for '\', and it is just easier
@@ -316,6 +327,10 @@ sub process_variables {
$retval = "$retval$begin";
if (defined($variable{$var})) {
$retval = "$retval$variable{$var}";
+ } elsif (defined($remove_undef) && $remove_undef) {
+ # for if statements, any variable that is not defined,
+ # we simple convert to 0
+ $retval = "${retval}0";
} else {
# put back the origin piece.
$retval = "$retval\$\{$var\}";
@@ -331,10 +346,17 @@ sub process_variables {
}
sub set_value {
- my ($lvalue, $rvalue) = @_;
+ my ($lvalue, $rvalue, $override, $overrides, $name) = @_;
if (defined($opt{$lvalue})) {
- die "Error: Option $lvalue defined more than once!\n";
+ if (!$override || defined(${$overrides}{$lvalue})) {
+ my $extra = "";
+ if ($override) {
+ $extra = "In the same override section!\n";
+ }
+ die "$name: $.: Option $lvalue defined more than once!\n$extra";
+ }
+ ${$overrides}{$lvalue} = $rvalue;
}
if ($rvalue =~ /^\s*$/) {
delete $opt{$lvalue};
@@ -355,86 +377,274 @@ sub set_variable {
}
}
-sub read_config {
- my ($config) = @_;
+sub process_compare {
+ my ($lval, $cmp, $rval) = @_;
+
+ # remove whitespace
+
+ $lval =~ s/^\s*//;
+ $lval =~ s/\s*$//;
+
+ $rval =~ s/^\s*//;
+ $rval =~ s/\s*$//;
+
+ if ($cmp eq "==") {
+ return $lval eq $rval;
+ } elsif ($cmp eq "!=") {
+ return $lval ne $rval;
+ }
+
+ my $statement = "$lval $cmp $rval";
+ my $ret = eval $statement;
+
+ # $@ stores error of eval
+ if ($@) {
+ return -1;
+ }
+
+ return $ret;
+}
+
+sub value_defined {
+ my ($val) = @_;
+
+ return defined($variable{$2}) ||
+ defined($opt{$2});
+}
+
+my $d = 0;
+sub process_expression {
+ my ($name, $val) = @_;
+
+ my $c = $d++;
+
+ while ($val =~ s/\(([^\(]*?)\)/\&\&\&\&VAL\&\&\&\&/) {
+ my $express = $1;
+
+ if (process_expression($name, $express)) {
+ $val =~ s/\&\&\&\&VAL\&\&\&\&/ 1 /;
+ } else {
+ $val =~ s/\&\&\&\&VAL\&\&\&\&/ 0 /;
+ }
+ }
+
+ $d--;
+ my $OR = "\\|\\|";
+ my $AND = "\\&\\&";
+
+ while ($val =~ s/^(.*?)($OR|$AND)//) {
+ my $express = $1;
+ my $op = $2;
+
+ if (process_expression($name, $express)) {
+ if ($op eq "||") {
+ return 1;
+ }
+ } else {
+ if ($op eq "&&") {
+ return 0;
+ }
+ }
+ }
+
+ if ($val =~ /(.*)(==|\!=|>=|<=|>|<)(.*)/) {
+ my $ret = process_compare($1, $2, $3);
+ if ($ret < 0) {
+ die "$name: $.: Unable to process comparison\n";
+ }
+ return $ret;
+ }
+
+ if ($val =~ /^\s*(NOT\s*)?DEFINED\s+(\S+)\s*$/) {
+ if (defined $1) {
+ return !value_defined($2);
+ } else {
+ return value_defined($2);
+ }
+ }
+
+ if ($val =~ /^\s*0\s*$/) {
+ return 0;
+ } elsif ($val =~ /^\s*\d+\s*$/) {
+ return 1;
+ }
+
+ die ("$name: $.: Undefined content $val in if statement\n");
+}
+
+sub process_if {
+ my ($name, $value) = @_;
+
+ # Convert variables and replace undefined ones with 0
+ my $val = process_variables($value, 1);
+ my $ret = process_expression $name, $val;
+
+ return $ret;
+}
- open(IN, $config) || die "can't read file $config";
+sub __read_config {
+ my ($config, $current_test_num) = @_;
+
+ my $in;
+ open($in, $config) || die "can't read file $config";
my $name = $config;
$name =~ s,.*/(.*),$1,;
- my $test_num = 0;
+ my $test_num = $$current_test_num;
my $default = 1;
my $repeat = 1;
my $num_tests_set = 0;
my $skip = 0;
my $rest;
+ my $line;
my $test_case = 0;
+ my $if = 0;
+ my $if_set = 0;
+ my $override = 0;
- while (<IN>) {
+ my %overrides;
+
+ while (<$in>) {
# ignore blank lines and comments
next if (/^\s*$/ || /\s*\#/);
- if (/^\s*TEST_START(.*)/) {
+ if (/^\s*(TEST_START|DEFAULTS)\b(.*)/) {
- $rest = $1;
+ my $type = $1;
+ $rest = $2;
+ $line = $2;
- if ($num_tests_set) {
- die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
- }
+ my $old_test_num;
+ my $old_repeat;
+ $override = 0;
+
+ if ($type eq "TEST_START") {
+
+ if ($num_tests_set) {
+ die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+ }
- my $old_test_num = $test_num;
- my $old_repeat = $repeat;
+ $old_test_num = $test_num;
+ $old_repeat = $repeat;
- $test_num += $repeat;
- $default = 0;
- $repeat = 1;
+ $test_num += $repeat;
+ $default = 0;
+ $repeat = 1;
+ } else {
+ $default = 1;
+ }
- if ($rest =~ /\s+SKIP(.*)/) {
- $rest = $1;
+ # If SKIP is anywhere in the line, the command will be skipped
+ if ($rest =~ s/\s+SKIP\b//) {
$skip = 1;
} else {
$test_case = 1;
$skip = 0;
}
- if ($rest =~ /\s+ITERATE\s+(\d+)(.*)$/) {
- $repeat = $1;
- $rest = $2;
- $repeat_tests{"$test_num"} = $repeat;
+ if ($rest =~ s/\sELSE\b//) {
+ if (!$if) {
+ die "$name: $.: ELSE found with out matching IF section\n$_";
+ }
+ $if = 0;
+
+ if ($if_set) {
+ $skip = 1;
+ } else {
+ $skip = 0;
+ }
}
- if ($rest =~ /\s+SKIP(.*)/) {
- $rest = $1;
- $skip = 1;
+ if ($rest =~ s/\sIF\s+(.*)//) {
+ if (process_if($name, $1)) {
+ $if_set = 1;
+ } else {
+ $skip = 1;
+ }
+ $if = 1;
+ } else {
+ $if = 0;
+ $if_set = 0;
}
- if ($rest !~ /^\s*$/) {
- die "$name: $.: Gargbage found after TEST_START\n$_";
+ if (!$skip) {
+ if ($type eq "TEST_START") {
+ if ($rest =~ s/\s+ITERATE\s+(\d+)//) {
+ $repeat = $1;
+ $repeat_tests{"$test_num"} = $repeat;
+ }
+ } elsif ($rest =~ s/\sOVERRIDE\b//) {
+ # DEFAULT only
+ $override = 1;
+ # Clear previous overrides
+ %overrides = ();
+ }
+ }
+
+ if (!$skip && $rest !~ /^\s*$/) {
+ die "$name: $.: Gargbage found after $type\n$_";
}
- if ($skip) {
+ if ($skip && $type eq "TEST_START") {
$test_num = $old_test_num;
$repeat = $old_repeat;
}
- } elsif (/^\s*DEFAULTS(.*)$/) {
- $default = 1;
-
+ } elsif (/^\s*ELSE\b(.*)$/) {
+ if (!$if) {
+ die "$name: $.: ELSE found with out matching IF section\n$_";
+ }
$rest = $1;
-
- if ($rest =~ /\s+SKIP(.*)/) {
- $rest = $1;
+ if ($if_set) {
$skip = 1;
+ $rest = "";
} else {
$skip = 0;
+
+ if ($rest =~ /\sIF\s+(.*)/) {
+ # May be a ELSE IF section.
+ if (!process_if($name, $1)) {
+ $skip = 1;
+ }
+ $rest = "";
+ } else {
+ $if = 0;
+ }
}
if ($rest !~ /^\s*$/) {
die "$name: $.: Gargbage found after DEFAULTS\n$_";
}
+ } elsif (/^\s*INCLUDE\s+(\S+)/) {
+
+ next if ($skip);
+
+ if (!$default) {
+ die "$name: $.: INCLUDE can only be done in default sections\n$_";
+ }
+
+ my $file = process_variables($1);
+
+ if ($file !~ m,^/,) {
+ # check the path of the config file first
+ if ($config =~ m,(.*)/,) {
+ if (-f "$1/$file") {
+ $file = "$1/$file";
+ }
+ }
+ }
+
+ if ( ! -r $file ) {
+ die "$name: $.: Can't read file $file\n$_";
+ }
+
+ if (__read_config($file, \$test_num)) {
+ $test_case = 1;
+ }
+
} elsif (/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) {
next if ($skip);
@@ -460,10 +670,10 @@ sub read_config {
}
if ($default || $lvalue =~ /\[\d+\]$/) {
- set_value($lvalue, $rvalue);
+ set_value($lvalue, $rvalue, $override, \%overrides, $name);
} else {
my $val = "$lvalue\[$test_num\]";
- set_value($val, $rvalue);
+ set_value($val, $rvalue, $override, \%overrides, $name);
if ($repeat > 1) {
$repeats{$val} = $repeat;
@@ -490,13 +700,26 @@ sub read_config {
}
}
- close(IN);
-
if ($test_num) {
$test_num += $repeat - 1;
$opt{"NUM_TESTS"} = $test_num;
}
+ close($in);
+
+ $$current_test_num = $test_num;
+
+ return $test_case;
+}
+
+sub read_config {
+ my ($config) = @_;
+
+ my $test_case;
+ my $test_num = 0;
+
+ $test_case = __read_config $config, \$test_num;
+
# make sure we have all mandatory configs
get_ktest_configs;
@@ -603,8 +826,20 @@ sub doprint {
}
sub run_command;
+sub start_monitor;
+sub end_monitor;
+sub wait_for_monitor;
sub reboot {
+ my ($time) = @_;
+
+ if (defined($time)) {
+ start_monitor;
+ # flush out current monitor
+ # May contain the reboot success line
+ wait_for_monitor 1;
+ }
+
# try to reboot normally
if (run_command $reboot) {
if (defined($powercycle_after_reboot)) {
@@ -615,12 +850,17 @@ sub reboot {
# nope? power cycle it.
run_command "$power_cycle";
}
+
+ if (defined($time)) {
+ wait_for_monitor($time, $reboot_success_line);
+ end_monitor;
+ }
}
sub do_not_reboot {
my $i = $iteration;
- return $test_type eq "build" ||
+ return $test_type eq "build" || $no_reboot ||
($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build");
}
@@ -693,16 +933,29 @@ sub end_monitor {
}
sub wait_for_monitor {
- my ($time) = @_;
+ my ($time, $stop) = @_;
+ my $full_line = "";
my $line;
+ my $booted = 0;
doprint "** Wait for monitor to settle down **\n";
# read the monitor and wait for the system to calm down
- do {
+ while (!$booted) {
$line = wait_for_input($monitor_fp, $time);
- print "$line" if (defined($line));
- } while (defined($line));
+ last if (!defined($line));
+ print "$line";
+ $full_line .= $line;
+
+ if (defined($stop) && $full_line =~ /$stop/) {
+ doprint "wait for monitor detected $stop\n";
+ $booted = 1;
+ }
+
+ if ($line =~ /\n/) {
+ $full_line = "";
+ }
+ }
print "** Monitor flushed **\n";
}
@@ -719,10 +972,7 @@ sub fail {
# no need to reboot for just building.
if (!do_not_reboot) {
doprint "REBOOTING\n";
- reboot;
- start_monitor;
- wait_for_monitor $sleep_time;
- end_monitor;
+ reboot $sleep_time;
}
my $name = "";
@@ -854,9 +1104,12 @@ sub get_grub_index {
open(IN, "$ssh_grub |")
or die "unable to get menu.lst";
+ my $found = 0;
+
while (<IN>) {
if (/^\s*title\s+$grub_menu\s*$/) {
$grub_number++;
+ $found = 1;
last;
} elsif (/^\s*title\s/) {
$grub_number++;
@@ -865,7 +1118,7 @@ sub get_grub_index {
close(IN);
die "Could not find '$grub_menu' in /boot/grub/menu on $machine"
- if ($grub_number < 0);
+ if (!$found);
doprint "$grub_number\n";
}
@@ -902,7 +1155,8 @@ sub wait_for_input
sub reboot_to {
if ($reboot_type eq "grub") {
- run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch && reboot)'";
+ run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'";
+ reboot;
return;
}
@@ -1083,6 +1337,8 @@ sub do_post_install {
sub install {
+ return if ($no_install);
+
run_scp "$outputdir/$build_target", "$target_image" or
dodie "failed to copy image";
@@ -1140,6 +1396,11 @@ sub get_version {
}
sub start_monitor_and_boot {
+ # Make sure the stable kernel has finished booting
+ start_monitor;
+ wait_for_monitor 5;
+ end_monitor;
+
get_grub_index;
get_version;
install;
@@ -1250,6 +1511,10 @@ sub build {
unlink $buildlog;
+ # Failed builds should not reboot the target
+ my $save_no_reboot = $no_reboot;
+ $no_reboot = 1;
+
if (defined($pre_build)) {
my $ret = run_command $pre_build;
if (!$ret && defined($pre_build_die) &&
@@ -1272,15 +1537,15 @@ sub build {
# allow for empty configs
run_command "touch $output_config";
- run_command "mv $output_config $outputdir/config_temp" or
- dodie "moving .config";
+ if (!$noclean) {
+ run_command "mv $output_config $outputdir/config_temp" or
+ dodie "moving .config";
- if (!$noclean && !run_command "$make mrproper") {
- dodie "make mrproper";
- }
+ run_command "$make mrproper" or dodie "make mrproper";
- run_command "mv $outputdir/config_temp $output_config" or
- dodie "moving config_temp";
+ run_command "mv $outputdir/config_temp $output_config" or
+ dodie "moving config_temp";
+ }
} elsif (!$noclean) {
unlink "$output_config";
@@ -1318,10 +1583,15 @@ sub build {
if (!$build_ret) {
# bisect may need this to pass
- return 0 if ($in_bisect);
+ if ($in_bisect) {
+ $no_reboot = $save_no_reboot;
+ return 0;
+ }
fail "failed build" and return 0;
}
+ $no_reboot = $save_no_reboot;
+
return 1;
}
@@ -1356,10 +1626,7 @@ sub success {
if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) {
doprint "Reboot and wait $sleep_time seconds\n";
- reboot;
- start_monitor;
- wait_for_monitor $sleep_time;
- end_monitor;
+ reboot $sleep_time;
}
}
@@ -1500,10 +1767,7 @@ sub run_git_bisect {
sub bisect_reboot {
doprint "Reboot and sleep $bisect_sleep_time seconds\n";
- reboot;
- start_monitor;
- wait_for_monitor $bisect_sleep_time;
- end_monitor;
+ reboot $bisect_sleep_time;
}
# returns 1 on success, 0 on failure, -1 on skip
@@ -2066,10 +2330,7 @@ sub config_bisect {
sub patchcheck_reboot {
doprint "Reboot and sleep $patchcheck_sleep_time seconds\n";
- reboot;
- start_monitor;
- wait_for_monitor $patchcheck_sleep_time;
- end_monitor;
+ reboot $patchcheck_sleep_time;
}
sub patchcheck {
@@ -2178,12 +2439,31 @@ sub patchcheck {
}
my %depends;
+my %depcount;
my $iflevel = 0;
my @ifdeps;
# prevent recursion
my %read_kconfigs;
+sub add_dep {
+ # $config depends on $dep
+ my ($config, $dep) = @_;
+
+ if (defined($depends{$config})) {
+ $depends{$config} .= " " . $dep;
+ } else {
+ $depends{$config} = $dep;
+ }
+
+ # record the number of configs depending on $dep
+ if (defined $depcount{$dep}) {
+ $depcount{$dep}++;
+ } else {
+ $depcount{$dep} = 1;
+ }
+}
+
# taken from streamline_config.pl
sub read_kconfig {
my ($kconfig) = @_;
@@ -2230,30 +2510,19 @@ sub read_kconfig {
$config = $2;
for (my $i = 0; $i < $iflevel; $i++) {
- if ($i) {
- $depends{$config} .= " " . $ifdeps[$i];
- } else {
- $depends{$config} = $ifdeps[$i];
- }
- $state = "DEP";
+ add_dep $config, $ifdeps[$i];
}
# collect the depends for the config
} elsif ($state eq "NEW" && /^\s*depends\s+on\s+(.*)$/) {
- if (defined($depends{$1})) {
- $depends{$config} .= " " . $1;
- } else {
- $depends{$config} = $1;
- }
+ add_dep $config, $1;
# Get the configs that select this config
- } elsif ($state ne "NONE" && /^\s*select\s+(\S+)/) {
- if (defined($depends{$1})) {
- $depends{$1} .= " " . $config;
- } else {
- $depends{$1} = $config;
- }
+ } elsif ($state eq "NEW" && /^\s*select\s+(\S+)/) {
+
+ # selected by depends on config
+ add_dep $1, $config;
# Check for if statements
} elsif (/^if\s+(.*\S)\s*$/) {
@@ -2365,11 +2634,18 @@ sub make_new_config {
close OUT;
}
+sub chomp_config {
+ my ($config) = @_;
+
+ $config =~ s/CONFIG_//;
+
+ return $config;
+}
+
sub get_depends {
my ($dep) = @_;
- my $kconfig = $dep;
- $kconfig =~ s/CONFIG_//;
+ my $kconfig = chomp_config $dep;
$dep = $depends{"$kconfig"};
@@ -2419,8 +2695,7 @@ sub test_this_config {
return undef;
}
- my $kconfig = $config;
- $kconfig =~ s/CONFIG_//;
+ my $kconfig = chomp_config $config;
# Test dependencies first
if (defined($depends{"$kconfig"})) {
@@ -2510,6 +2785,14 @@ sub make_min_config {
my @config_keys = keys %min_configs;
+ # All configs need a depcount
+ foreach my $config (@config_keys) {
+ my $kconfig = chomp_config $config;
+ if (!defined $depcount{$kconfig}) {
+ $depcount{$kconfig} = 0;
+ }
+ }
+
# Remove anything that was set by the make allnoconfig
# we shouldn't need them as they get set for us anyway.
foreach my $config (@config_keys) {
@@ -2548,8 +2831,13 @@ sub make_min_config {
# Now disable each config one by one and do a make oldconfig
# till we find a config that changes our list.
- # Put configs that did not modify the config at the end.
my @test_configs = keys %min_configs;
+
+ # Sort keys by who is most dependent on
+ @test_configs = sort { $depcount{chomp_config($b)} <=> $depcount{chomp_config($a)} }
+ @test_configs ;
+
+ # Put configs that did not modify the config at the end.
my $reset = 1;
for (my $i = 0; $i < $#test_configs; $i++) {
if (!defined($nochange_config{$test_configs[0]})) {
@@ -2659,10 +2947,7 @@ sub make_min_config {
}
doprint "Reboot and wait $sleep_time seconds\n";
- reboot;
- start_monitor;
- wait_for_monitor $sleep_time;
- end_monitor;
+ reboot $sleep_time;
}
success $i;
@@ -2783,6 +3068,9 @@ sub set_test_option {
# First we need to do is the builds
for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
+ # Do not reboot on failing test options
+ $no_reboot = 1;
+
$iteration = $i;
my $makecmd = set_test_option("MAKE_CMD", $i);
@@ -2811,6 +3099,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
$reboot_type = set_test_option("REBOOT_TYPE", $i);
$grub_menu = set_test_option("GRUB_MENU", $i);
$post_install = set_test_option("POST_INSTALL", $i);
+ $no_install = set_test_option("NO_INSTALL", $i);
$reboot_script = set_test_option("REBOOT_SCRIPT", $i);
$reboot_on_error = set_test_option("REBOOT_ON_ERROR", $i);
$poweroff_on_error = set_test_option("POWEROFF_ON_ERROR", $i);
@@ -2832,6 +3121,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
$console = set_test_option("CONSOLE", $i);
$detect_triplefault = set_test_option("DETECT_TRIPLE_FAULT", $i);
$success_line = set_test_option("SUCCESS_LINE", $i);
+ $reboot_success_line = set_test_option("REBOOT_SUCCESS_LINE", $i);
$stop_after_success = set_test_option("STOP_AFTER_SUCCESS", $i);
$stop_after_failure = set_test_option("STOP_AFTER_FAILURE", $i);
$stop_test_after = set_test_option("STOP_TEST_AFTER", $i);
@@ -2850,9 +3140,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
chdir $builddir || die "can't change directory to $builddir";
- if (!-d $tmpdir) {
- mkpath($tmpdir) or
- die "can't create $tmpdir";
+ foreach my $dir ($tmpdir, $outputdir) {
+ if (!-d $dir) {
+ mkpath($dir) or
+ die "can't create $dir";
+ }
}
$ENV{"SSH_USER"} = $ssh_user;
@@ -2889,8 +3181,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
$run_type = "ERROR";
}
+ my $installme = "";
+ $installme = " no_install" if ($no_install);
+
doprint "\n\n";
- doprint "RUNNING TEST $i of $opt{NUM_TESTS} with option $test_type $run_type\n\n";
+ doprint "RUNNING TEST $i of $opt{NUM_TESTS} with option $test_type $run_type$installme\n\n";
unlink $dmesg;
unlink $buildlog;
@@ -2911,6 +3206,9 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
die "failed to checkout $checkout";
}
+ $no_reboot = 0;
+
+
if ($test_type eq "bisect") {
bisect $i;
next;
@@ -2929,6 +3227,13 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
build $build_type or next;
}
+ if ($test_type eq "install") {
+ get_version;
+ install;
+ success $i;
+ next;
+ }
+
if ($test_type ne "build") {
my $failed = 0;
start_monitor_and_boot or $failed = 1;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index b8bcd14b5a4da7..dbedfa196727dd 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -72,6 +72,128 @@
# the same option name under the same test or as default
# ktest will fail to execute, and no tests will run.
#
+# DEFAULTS OVERRIDE
+#
+# Options defined in the DEFAULTS section can not be duplicated
+# even if they are defined in two different DEFAULT sections.
+# This is done to catch mistakes where an option is added but
+# the previous option was forgotten about and not commented.
+#
+# The OVERRIDE keyword can be added to a section to allow this
+# section to override other DEFAULT sections values that have
+# been defined previously. It will only override options that
+# have been defined before its use. Options defined later
+# in a non override section will still error. The same option
+# can not be defined in the same section even if that section
+# is marked OVERRIDE.
+#
+#
+#
+# Both TEST_START and DEFAULTS sections can also have the IF keyword
+# The value after the IF must evaluate into a 0 or non 0 positive
+# integer, and can use the config variables (explained below).
+#
+# DEFAULTS IF ${IS_X86_32}
+#
+# The above will process the DEFAULTS section if the config
+# variable IS_X86_32 evaluates to a non zero positive integer
+# otherwise if it evaluates to zero, it will act the same
+# as if the SKIP keyword was used.
+#
+# The ELSE keyword can be used directly after a section with
+# a IF statement.
+#
+# TEST_START IF ${RUN_NET_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# ELSE
+#
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-normal
+#
+#
+# The ELSE keyword can also contain an IF statement to allow multiple
+# if then else sections. But all the sections must be either
+# DEFAULT or TEST_START, they can not be a mixture.
+#
+# TEST_START IF ${RUN_NET_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# ELSE IF ${RUN_DISK_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-tests
+#
+# ELSE IF ${RUN_CPU_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-cpu
+#
+# ELSE
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# The if statement may also have comparisons that will and for
+# == and !=, strings may be used for both sides.
+#
+# BOX_TYPE := x86_32
+#
+# DEFAULTS IF ${BOX_TYPE} == x86_32
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-32
+# ELSE
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-64
+#
+# The DEFINED keyword can be used by the IF statements too.
+# It returns true if the given config variable or option has been defined
+# or false otherwise.
+#
+#
+# DEFAULTS IF DEFINED USE_CC
+# CC := ${USE_CC}
+# ELSE
+# CC := gcc
+#
+#
+# As well as NOT DEFINED.
+#
+# DEFAULTS IF NOT DEFINED MAKE_CMD
+# MAKE_CMD := make ARCH=x86
+#
+#
+# And/or ops (&&,||) may also be used to make complex conditionals.
+#
+# TEST_START IF (DEFINED ALL_TESTS || ${MYTEST} == boottest) && ${MACHINE} == gandalf
+#
+# Notice the use of paranthesis. Without any paranthesis the above would be
+# processed the same as:
+#
+# TEST_START IF DEFINED ALL_TESTS || (${MYTEST} == boottest && ${MACHINE} == gandalf)
+#
+#
+#
+# INCLUDE file
+#
+# The INCLUDE keyword may be used in DEFAULT sections. This will
+# read another config file and process that file as well. The included
+# file can include other files, add new test cases or default
+# statements. Config variables will be passed to these files and changes
+# to config variables will be seen by top level config files. Including
+# a file is processed just like the contents of the file was cut and pasted
+# into the top level file, except, that include files that end with
+# TEST_START sections will have that section ended at the end of
+# the include file. That is, an included file is included followed
+# by another DEFAULT keyword.
+#
+# Unlike other files referenced in this config, the file path does not need
+# to be absolute. If the file does not start with '/', then the directory
+# that the current config file was located in is used. If no config by the
+# given name is found there, then the current directory is searched.
+#
+# INCLUDE myfile
+# DEFAULT
+#
+# is the same as:
+#
+# INCLUDE myfile
+#
+# Note, if the include file does not contain a full path, the file is
+# searched first by the location of the original include file, and then
+# by the location that ktest.pl was executed in.
+#
#### Config variables ####
#
@@ -253,9 +375,10 @@
# The default test type (default test)
# The test types may be:
-# build - only build the kernel, do nothing else
-# boot - build and boot the kernel
-# test - build, boot and if TEST is set, run the test script
+# build - only build the kernel, do nothing else
+# install - build and install, but do nothing else (does not reboot)
+# boot - build, install, and boot the kernel
+# test - build, boot and if TEST is set, run the test script
# (If TEST is not set, it defaults back to boot)
# bisect - Perform a bisect on the kernel (see BISECT_TYPE below)
# patchcheck - Do a test on a series of commits in git (see PATCHCHECK below)
@@ -293,6 +416,13 @@
# or on some systems:
#POST_INSTALL = ssh user@target /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+# If for some reason you just want to boot the kernel and you do not
+# want the test to install anything new. For example, you may just want
+# to boot test the same kernel over and over and do not want to go through
+# the hassle of installing anything, you can set this option to 1
+# (default 0)
+#NO_INSTALL = 1
+
# If there is a script that you require to run before the build is done
# you can specify it with PRE_BUILD.
#
@@ -415,6 +545,14 @@
# (default "login:")
#SUCCESS_LINE = login:
+# To speed up between reboots, defining a line that the
+# default kernel produces that represents that the default
+# kernel has successfully booted and can be used to pass
+# a new test kernel to it. Otherwise ktest.pl will wait till
+# SLEEP_TIME to continue.
+# (default undefined)
+#REBOOT_SUCCESS_LINE = login:
+
# In case the console constantly fills the screen, having
# a specified time to stop the test after success is recommended.
# (in seconds)
@@ -480,6 +618,8 @@
# another test. If a reboot to the reliable kernel happens,
# we wait SLEEP_TIME for the console to stop producing output
# before starting the next test.
+#
+# You can speed up reboot times even more by setting REBOOT_SUCCESS_LINE.
# (default 60)
#SLEEP_TIME = 60