bk://linux-voyager.bkbits.net/dma-declare-coherent-memory-2.6 jejb@raven.il.steeleye.com|ChangeSet|20040710140215|32090 jejb # This is a BitKeeper generated diff -Nru style patch. # # ChangeSet # 2004/07/10 13:17:00-07:00 akpm@bix.(none) # Merge bk://linux-voyager.bkbits.net/dma-declare-coherent-memory-2.6 # into bix.(none):/usr/src/bk-dma-declare-coherent-memory # # include/linux/dma-mapping.h # 2004/07/10 13:16:56-07:00 akpm@bix.(none) +0 -0 # Auto merged # # ChangeSet # 2004/07/10 09:02:15-05:00 jejb@raven.il.steeleye.com # dma_alloc_coherent() still needs to support a NULL device # # Fix the part of the on-chip memory API that broke this. # # Signed-off-by: James Bottomley # # arch/i386/kernel/pci-dma.c # 2004/07/10 09:01:31-05:00 jejb@raven.il.steeleye.com +1 -1 # dma_alloc_coherent() still needs to support a NULL device # # ChangeSet # 2004/07/09 18:06:52-05:00 jejb@raven.il.steeleye.com # Fix region sizing problem in dma_mark_declared_memory_occupied() # # The current code reserves too few pages if addr isn't # page aligned and size just spans onto the last page. # Fix by increasing size by the addr misalignment amount. # # Signed-off-by: James Bottomley # # arch/i386/kernel/pci-dma.c # 2004/07/09 18:06:07-05:00 jejb@raven.il.steeleye.com +1 -1 # Fix region sizing problem in dma_mark_declared_memory_occupied() # # ChangeSet # 2004/07/09 17:16:25-05:00 akpm@osdl.org # [PATCH] Fix sparc compile error in dma-mapping.h # # William Lee Irwin III wrote: # > # > SPLIT include/linux/autoconf.h -> include/config/* # > CHK include/linux/compile.h # > UPD include/linux/compile.h # > In file included from include/asm/sbus.h:10, # > from arch/sparc64/kernel/auxio.c:15: # # It needs err.h. # # Signed-off-by: James Bottomley # # include/linux/dma-mapping.h # 2004/07/09 17:07:13-05:00 akpm@osdl.org +2 -0 # Fix sparc compile error # # ChangeSet # 2004/07/08 15:53:54-05:00 jejb@raven.il.steeleye.com # Fix bug in __get_vm_area() alignment code # # If we are still above the vma start address on the last vma, we can use addr even if it is # inside the vma (i.e. addr < vma->addr + vma->size). Fix this by aligning addr past the # end of the vma in this case # # mm/vmalloc.c # 2004/07/08 15:53:09-05:00 jejb@raven.il.steeleye.com +6 -3 # Fix bug in __get_vm_area() alignment code # # ChangeSet # 2004/07/08 03:30:21-07:00 akpm@bix.(none) # Merge bk://linux-voyager.bkbits.net/dma-declare-coherent-memory-2.6 # into bix.(none):/usr/src/bk-dma-declare-coherent-memory # # include/linux/dma-mapping.h # 2004/07/08 03:30:18-07:00 akpm@bix.(none) +0 -0 # Auto merged # # ChangeSet # 2004/07/07 11:46:31-05:00 jejb@raven.il.steeleye.com # Fix incorrect prototype in the dma_declare_coherent_memory API # # dma_mark_declared_memory_occupied() wasn't declared static inline in the NULL (default) # implementation leading to compile failures. # # include/linux/dma-mapping.h # 2004/07/07 11:45:46-05:00 jejb@raven.il.steeleye.com +3 -2 # Fix incorrect prototype in the dma_declare_coherent_memory API # # ChangeSet # 2004/07/06 16:31:59-07:00 akpm@bix.(none) # Merge bk://linux-voyager.bkbits.net/dma-declare-coherent-memory-2.6 # into bix.(none):/usr/src/bk-dma-declare-coherent-memory # # include/linux/dma-mapping.h # 2004/07/06 16:31:56-07:00 akpm@bix.(none) +0 -0 # Auto merged # # ChangeSet # 2004/06/30 21:44:24-05:00 jejb@mulgrave.(none) # Convert NCR_Q720 to use dma_declare_coherent_memory # # This board makes an ideal example for using the API # since it consists of 4 SCSI I/O processors and a # 0.5-2MB block of memory on a single MCA card. # # Signed-off-by: James Bottomley # # drivers/scsi/NCR_Q720.c # 2004/06/30 21:44:03-05:00 jejb@mulgrave.(none) +18 -3 # Convert NCR_Q720 to use dma_declare_coherent_memory # # ChangeSet # 2004/06/30 21:38:34-05:00 jejb@mulgrave.(none) # Add x86 implementation of dma_declare_coherent_memory # # This actually implements the API (all except for # DMA_MEMORY_INCLUDES_CHILDREN). # # Signed-off-by: James Bottomley # # include/linux/device.h # 2004/06/30 21:37:55-05:00 jejb@mulgrave.(none) +3 -0 # Add x86 implementation of dma_declare_coherent_memory # # include/asm-i386/dma-mapping.h # 2004/06/30 21:37:55-05:00 jejb@mulgrave.(none) +12 -0 # Add x86 implementation of dma_declare_coherent_memory # # arch/i386/kernel/pci-dma.c # 2004/06/30 21:37:55-05:00 jejb@mulgrave.(none) +109 -2 # Add x86 implementation of dma_declare_coherent_memory # # ChangeSet # 2004/06/30 21:11:14-05:00 jejb@mulgrave.(none) # Add vmalloc alignment constraints # # vmalloc is used by ioremap() to get regions for # remapping I/O space. To feed these regions back # into a __get_free_pages() type memory allocator, # they are expected to have more alignment than # get_vm_area() proves. So add additional alignment # constraints for VM_IOREMAP. # # Signed-off-by: James Bottomley # # mm/vmalloc.c # 2004/06/30 21:10:54-05:00 jejb@mulgrave.(none) +18 -2 # Add vmalloc alignment constraints # # ChangeSet # 2004/06/30 21:08:15-05:00 jejb@mulgrave.(none) # Add memory region bitmap implementations # # These APIs deal with bitmaps representing contiguous # memory regions. The idea is to set, free and find # a contiguous area. # # For ease of implementation (as well as to conform # to the standard requirements), the bitmaps always # return n aligned n length regions. The implementation # is also limited to BITS_PER_LONG contiguous regions. # # Signed-off-by: James Bottomley # # lib/bitmap.c # 2004/06/30 21:07:26-05:00 jejb@mulgrave.(none) +76 -0 # Add memory region bitmap implementations # # include/linux/bitmap.h # 2004/06/30 21:07:26-05:00 jejb@mulgrave.(none) +3 -0 # Add memory region bitmap implementations # # ChangeSet # 2004/06/30 21:02:11-05:00 jejb@mulgrave.(none) # Add dma_declare_coherent_memory() API # # This adds the description and a null prototype. # # Signed-off-by: James Bottomley # # include/linux/dma-mapping.h # 2004/06/30 21:01:43-05:00 jejb@mulgrave.(none) +26 -0 # Add dma_declare_coherent_memory() API # # Documentation/DMA-API.txt # 2004/06/30 21:01:43-05:00 jejb@mulgrave.(none) +79 -0 # Add dma_declare_coherent_memory() API # diff -Nru a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt --- a/Documentation/DMA-API.txt 2004-07-13 12:36:09 -07:00 +++ b/Documentation/DMA-API.txt 2004-07-13 12:36:09 -07:00 @@ -444,4 +444,83 @@ continuing on for size. Again, you *must* observe the cache line boundaries when doing this. +int +dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int + flags) + + +Declare region of memory to be handed out by dma_alloc_coherent when +it's asked for coherent memory for this device. + +bus_addr is the physical address to which the memory is currently +assigned in the bus responding region (this will be used by the +platform to perform the mapping) + +device_addr is the physical address the device needs to be programmed +with actually to address this memory (this will be handed out as the +dma_addr_t in dma_alloc_coherent()) + +size is the size of the area (must be multiples of PAGE_SIZE). + +flags can be or'd together and are + +DMA_MEMORY_MAP - request that the memory returned from +dma_alloc_coherent() be directly writeable. + +DMA_MEMORY_IO - request that the memory returned from +dma_alloc_coherent() be addressable using read/write/memcpy_toio etc. + +One or both of these flags must be present + +DMA_MEMORY_INCLUDES_CHILDREN - make the declared memory be allocated by +dma_alloc_coherent of any child devices of this one (for memory residing +on a bridge). + +DMA_MEMORY_EXCLUSIVE - only allocate memory from the declared regions. +Do not allow dma_alloc_coherent() to fall back to system memory when +it's out of memory in the declared region. + +The return value will be either DMA_MEMORY_MAP or DMA_MEMORY_IO and +must correspond to a passed in flag (i.e. no returning DMA_MEMORY_IO +if only DMA_MEMORY_MAP were passed in) for success or zero for +failure. + +Note, for DMA_MEMORY_IO returns, all subsequent memory returned by +dma_alloc_coherent() may no longer be accessed directly, but instead +must be accessed using the correct bus functions. If your driver +isn't prepared to handle this contingency, it should not specify +DMA_MEMORY_IO in the input flags. + +As a simplification for the platforms, only *one* such region of +memory may be declared per device. + +For reasons of efficiency, most platforms choose to track the declared +region only at the granularity of a page. For smaller allocations, +you should use the dma_pool() API. + +void +dma_release_declared_memory(struct device *dev) + +Remove the memory region previously declared from the system. This +API performs *no* in-use checking for this region and will return +unconditionally having removed all the required structures. It is the +drivers job to ensure that no parts of this memory region are +currently in use. + +void * +dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size) + +This is used to occupy specific regions of the declared space +(dma_alloc_coherent() will hand out the first free region it finds). + +device_addr is the *device* address of the region requested + +size is the size (and should be a page sized multiple). + +The return value will be either a pointer to the processor virtual +address of the memory, or an error (via PTR_ERR()) if any part of the +region is occupied. + diff -Nru a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c --- a/arch/i386/kernel/pci-dma.c 2004-07-13 12:36:09 -07:00 +++ b/arch/i386/kernel/pci-dma.c 2004-07-13 12:36:09 -07:00 @@ -13,17 +13,40 @@ #include #include +struct dma_coherent_mem { + void *virt_base; + u32 device_base; + int size; + int flags; + unsigned long *bitmap; +}; + void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, int gfp) { void *ret; + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + int order = get_order(size); /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); + if (mem) { + int page = bitmap_find_free_region(mem->bitmap, mem->size, + order); + if (page >= 0) { + *dma_handle = mem->device_base + (page << PAGE_SHIFT); + ret = mem->virt_base + (page << PAGE_SHIFT); + memset(ret, 0, size); + return ret; + } + if (mem->flags & DMA_MEMORY_EXCLUSIVE) + return NULL; + } + if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) gfp |= GFP_DMA; - ret = (void *)__get_free_pages(gfp, get_order(size)); + ret = (void *)__get_free_pages(gfp, order); if (ret != NULL) { memset(ret, 0, size); @@ -35,5 +58,89 @@ void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - free_pages((unsigned long)vaddr, get_order(size)); + struct dma_coherent_mem *mem = dev->dma_mem; + int order = get_order(size); + + if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; + + bitmap_release_region(mem->bitmap, page, order); + } else + free_pages((unsigned long)vaddr, order); +} + +int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags) +{ + void *mem_base; + int pages = size >> PAGE_SHIFT; + int bitmap_size = (pages + 31)/32; + + if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) + goto out; + if (!size) + goto out; + if (dev->dma_mem) + goto out; + + /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ + + mem_base = ioremap(bus_addr, size); + if (!mem_base) + goto out; + + dev->dma_mem = kmalloc(GFP_KERNEL, sizeof(struct dma_coherent_mem)); + if (!dev->dma_mem) + goto out; + memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); + dev->dma_mem->bitmap = kmalloc(GFP_KERNEL, bitmap_size); + if (!dev->dma_mem->bitmap) + goto free1_out; + memset(dev->dma_mem->bitmap, 0, bitmap_size); + + dev->dma_mem->virt_base = mem_base; + dev->dma_mem->device_base = device_addr; + dev->dma_mem->size = pages; + dev->dma_mem->flags = flags; + + if (flags & DMA_MEMORY_MAP) + return DMA_MEMORY_MAP; + + return DMA_MEMORY_IO; + + free1_out: + kfree(dev->dma_mem->bitmap); + out: + return 0; +} +EXPORT_SYMBOL(dma_declare_coherent_memory); + +void dma_release_declared_memory(struct device *dev) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + + if(!mem) + return; + dev->dma_mem = NULL; + kfree(mem->bitmap); + kfree(mem); +} +EXPORT_SYMBOL(dma_release_declared_memory); + +void *dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; + int pos, err; + + if (!mem) + return ERR_PTR(-EINVAL); + + pos = (device_addr - mem->device_base) >> PAGE_SHIFT; + err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); + if (err != 0) + return ERR_PTR(err); + return mem->virt_base + (pos << PAGE_SHIFT); } +EXPORT_SYMBOL(dma_mark_declared_memory_occupied); diff -Nru a/drivers/scsi/NCR_Q720.c b/drivers/scsi/NCR_Q720.c --- a/drivers/scsi/NCR_Q720.c 2004-07-13 12:36:09 -07:00 +++ b/drivers/scsi/NCR_Q720.c 2004-07-13 12:36:09 -07:00 @@ -216,7 +216,21 @@ goto out_free; } - mem_base = (__u32)ioremap(base_addr, mem_size); + if (dma_declare_coherent_memory(dev, base_addr, base_addr, + mem_size, DMA_MEMORY_MAP) + != DMA_MEMORY_MAP) { + printk(KERN_ERR "NCR_Q720: DMA declare memory failed\n"); + goto out_release_region; + } + + /* The first 1k of the memory buffer is a memory map of the registers + */ + mem_base = (__u32)dma_mark_declared_memory_occupied(dev, base_addr, + 1024); + if (IS_ERR((void *)mem_base)) { + printk("NCR_Q720 failed to reserve memory mapped region\n"); + goto out_release; + } /* now also enable accesses in asr 2 */ asr2 = inb(io_base + 0x0a); @@ -296,7 +310,8 @@ return 0; out_release: - iounmap((void *)mem_base); + dma_release_declared_memory(dev); + out_release_region: release_mem_region(base_addr, mem_size); out_free: kfree(p); @@ -321,7 +336,7 @@ if(p->hosts[i]) NCR_Q720_remove_one(p->hosts[i]); - iounmap((void *)p->mem_base); + dma_release_declared_memory(dev); release_mem_region(p->phys_mem_base, p->mem_size); free_irq(p->irq, p); kfree(p); diff -Nru a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h --- a/include/asm-i386/dma-mapping.h 2004-07-13 12:36:09 -07:00 +++ b/include/asm-i386/dma-mapping.h 2004-07-13 12:36:09 -07:00 @@ -163,4 +163,16 @@ flush_write_buffers(); } +#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY +extern int +dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags); + +extern void +dma_release_declared_memory(struct device *dev); + +extern void * +dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size); + #endif diff -Nru a/include/linux/bitmap.h b/include/linux/bitmap.h --- a/include/linux/bitmap.h 2004-07-13 12:36:09 -07:00 +++ b/include/linux/bitmap.h 2004-07-13 12:36:09 -07:00 @@ -98,6 +98,9 @@ const unsigned long *src, int nbits); extern int bitmap_parse(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); +extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); +extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); +extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); #define BITMAP_LAST_WORD_MASK(nbits) \ ( \ diff -Nru a/include/linux/device.h b/include/linux/device.h --- a/include/linux/device.h 2004-07-13 12:36:09 -07:00 +++ b/include/linux/device.h 2004-07-13 12:36:09 -07:00 @@ -283,6 +283,9 @@ struct list_head dma_pools; /* dma pools (if dma'ble) */ + struct dma_coherent_mem *dma_mem; /* internal for coherent mem + override */ + void (*release)(struct device * dev); }; diff -Nru a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h --- a/include/linux/dma-mapping.h 2004-07-13 12:36:09 -07:00 +++ b/include/linux/dma-mapping.h 2004-07-13 12:36:09 -07:00 @@ -1,6 +1,8 @@ #ifndef _ASM_LINUX_DMA_MAPPING_H #define _ASM_LINUX_DMA_MAPPING_H +#include + /* These definitions mirror those in pci.h, so they can be used * interchangeably with their PCI_ counterparts */ enum dma_data_direction { @@ -20,6 +22,33 @@ #define dma_sync_sg dma_sync_sg_for_cpu extern u64 dma_get_required_mask(struct device *dev); + +/* flags for the coherent memory api */ +#define DMA_MEMORY_MAP 0x01 +#define DMA_MEMORY_IO 0x02 +#define DMA_MEMORY_INCLUDES_CHILDREN 0x04 +#define DMA_MEMORY_EXCLUSIVE 0x08 + +#ifndef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY +static inline int +dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags) +{ + return 0; +} + +static inline void +dma_release_declared_memory(struct device *dev) +{ +} + +static inline void * +dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size) +{ + return ERR_PTR(-EBUSY); +} +#endif #endif diff -Nru a/lib/bitmap.c b/lib/bitmap.c --- a/lib/bitmap.c 2004-07-13 12:36:09 -07:00 +++ b/lib/bitmap.c 2004-07-13 12:36:09 -07:00 @@ -408,3 +408,79 @@ return 0; } EXPORT_SYMBOL(bitmap_parse); + +/** + * bitmap_find_free_region - find a contiguous aligned mem region + * @bitmap: an array of unsigned longs corresponding to the bitmap + * @bits: number of bits in the bitmap + * @order: region size to find (size is actually 1< BITS_PER_LONG) + return -EINVAL; + + /* make a mask of the order */ + mask = (1ul << (pages - 1)); + mask += mask - 1; + + /* run up the bitmap pages bits at a time */ + for (i = 0; i < bits; i += pages) { + int index = BITS_TO_LONGS(i); + int offset = i - (index * BITS_PER_LONG); + if((bitmap[index] & (mask << offset)) == 0) { + /* set region in bimap */ + bitmap[index] |= (mask << offset); + return i; + } + } + return -ENOMEM; +} +EXPORT_SYMBOL(bitmap_find_free_region); + +/** + * bitmap_release_region - release allocated bitmap region + * @bitmap: a pointer to the bitmap + * @pos: the beginning of the region + * @order: the order of the bits to release (number is 1< IOREMAP_MAX_ORDER) + bit = IOREMAP_MAX_ORDER; + else if (bit < PAGE_SHIFT) + bit = PAGE_SHIFT; + + align = 1ul << bit; + } + addr = ALIGN(start, align); area = kmalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) @@ -200,13 +215,17 @@ write_lock(&vmlist_lock); for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) { - if ((unsigned long)tmp->addr < addr) + if ((unsigned long)tmp->addr < addr) { + if((unsigned long)tmp->addr + tmp->size >= addr) + addr = ALIGN(tmp->size + + (unsigned long)tmp->addr, align); continue; + } if ((size + addr) < addr) goto out; if (size + addr <= (unsigned long)tmp->addr) goto found; - addr = tmp->size + (unsigned long)tmp->addr; + addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align); if (addr > end - size) goto out; }