# This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet v2.5.74 -> 1.1421 # drivers/eisa/eisa.ids 1.2 -> 1.3 # drivers/net/irda/irtty-sir.c 1.7 -> 1.8 # arch/ppc/boot/simple/m8xx_tty.c 1.2 -> 1.3 # drivers/pci/search.c 1.10 -> 1.13 # arch/ppc/configs/menf1_defconfig 1.11 -> 1.12 # include/linux/proc_fs.h 1.22 -> 1.23 # arch/ppc/platforms/sandpoint_pci.c 1.3 -> (deleted) # arch/ppc/platforms/zx4500_setup.c 1.8 -> (deleted) # arch/ppc/boot/simple/Makefile 1.15 -> 1.19 # arch/ppc/platforms/menf1.h 1.3 -> (deleted) # drivers/base/firmware_class.c 1.3 -> 1.4 # include/linux/affs_fs.h 1.4 -> 1.6 # drivers/mtd/mtd_blkdevs.c 1.3 -> 1.4 # fs/libfs.c 1.21 -> 1.22 # drivers/parisc/eisa.c 1.6 -> 1.7 # kernel/ksyms.c 1.206 -> 1.208 # fs/autofs4/root.c 1.13 -> 1.14 # arch/ppc/configs/k2_defconfig 1.11 -> 1.13 # include/linux/elevator.h 1.21 -> 1.25 # include/linux/buffer_head.h 1.42 -> 1.43 # arch/x86_64/ia32/ia32_binfmt.c 1.14 -> 1.15 # include/linux/kobject.h 1.22 -> 1.23 # fs/cramfs/inode.c 1.29 -> 1.30 # arch/alpha/mm/numa.c 1.12 -> 1.13 # fs/sysfs/file.c 1.6 -> 1.7 # include/linux/mm.h 1.121 -> 1.123 # arch/i386/kernel/cpu/common.c 1.21 -> 1.22 # fs/open.c 1.42 -> 1.44 # include/asm-mips64/mmzone.h 1.7 -> 1.8 # fs/jfs/acl.c 1.1 -> 1.2 # arch/ppc/syslib/Makefile 1.6 -> 1.8 # mm/page_alloc.c 1.163 -> 1.166 # drivers/parisc/eisa_enumerator.c 1.1 -> 1.2 # arch/arm/mm/init.c 1.22 -> 1.23 # arch/ppc/configs/mvme5100_defconfig 1.9 -> 1.11 # kernel/fork.c 1.127 -> 1.130 # include/linux/sched.h 1.153 -> 1.155 # kernel/sysctl.c 1.46 -> 1.47 # drivers/block/ll_rw_blk.c 1.174 -> 1.190 # drivers/net/wan/hdlc_generic.c 1.9 -> 1.10 # include/linux/mman.h 1.3 -> 1.4 # fs/ext3/xattr_user.c 1.7 -> 1.8 # fs/coda/dir.c 1.20 -> 1.23 # drivers/ieee1394/sbp2.c 1.36 -> 1.37 # fs/attr.c 1.17 -> 1.18 # fs/proc/proc_misc.c 1.80 -> 1.81 # arch/ppc/configs/spruce_defconfig 1.6 -> 1.8 # drivers/pnp/interface.c 1.17 -> 1.18 # arch/i386/Kconfig 1.64 -> 1.65 # drivers/block/floppy.c 1.78 -> 1.79 # drivers/eisa/pci_eisa.c 1.1 -> 1.2 # fs/devfs/base.c 1.92 -> 1.93 # include/linux/coda_linux.h 1.9 -> 1.10 # fs/afs/mntpt.c 1.2 -> 1.3 # arch/ppc/platforms/sandpoint_serial.h 1.3 -> (deleted) # fs/cifs/cifsfs.h 1.3 -> 1.5 # arch/ppc/platforms/menf1_pci.c 1.3 -> (deleted) # include/linux/ioport.h 1.10 -> 1.11 # arch/ppc/configs/prpmc750_defconfig 1.6 -> 1.8 # arch/x86_64/mm/init.c 1.19 -> 1.20 # mm/swapfile.c 1.79 -> 1.80 # fs/umsdos/rdir.c 1.8 -> 1.9 # drivers/pnp/support.c 1.6 -> 1.7 # security/dummy.c 1.26 -> 1.27 # arch/i386/pci/direct.c 1.17 -> 1.18 # include/linux/blkdev.h 1.109 -> 1.116 # arch/ppc/configs/power3_defconfig 1.12 -> 1.14 # kernel/softirq.c 1.41 -> 1.43 # arch/ppc/configs/zx4500_defconfig 1.6 -> (deleted) # include/linux/fs.h 1.251 -> 1.254 # include/asm-i386/timer.h 1.7 -> 1.9 # fs/intermezzo/dir.c 1.13 -> 1.16 # fs/hpfs/dir.c 1.11 -> 1.12 # drivers/eisa/eisa-bus.c 1.7 -> 1.8 # arch/ppc/configs/common_defconfig 1.19 -> 1.21 # fs/jbd/transaction.c 1.68 -> 1.69 # mm/bootmem.c 1.17 -> 1.18 # fs/affs/namei.c 1.22 -> 1.24 # include/linux/umsdos_fs.p 1.3 -> 1.4 # fs/intermezzo/vfs.c 1.20 -> 1.22 # include/linux/namei.h 1.5 -> 1.7 # arch/i386/kernel/io_apic.c 1.74 -> 1.75 # fs/xfs/linux/xfs_iops.c 1.25 -> 1.28 # drivers/pci/hotplug/acpiphp_glue.c 1.14 -> 1.15 # arch/ppc/boot/simple/m8260_tty.c 1.2 -> 1.3 # fs/ext3/acl.c 1.9 -> 1.10 # fs/ramfs/inode.c 1.34 -> 1.36 # fs/namei.c 1.76 -> 1.79 # fs/afs/dir.c 1.4 -> 1.5 # drivers/pci/pci.h 1.9 -> 1.10 # kernel/user.c 1.6 -> 1.7 # security/capability.c 1.18 -> 1.19 # fs/nfsd/nfsfh.c 1.41 -> 1.42 # include/linux/nfs_fs.h 1.45 -> 1.46 # ipc/sem.c 1.20 -> 1.21 # include/linux/sysfs.h 1.27 -> 1.29 # fs/jfs/xattr.c 1.12 -> 1.13 # fs/exec.c 1.85 -> 1.90 # fs/minix/namei.c 1.15 -> 1.17 # fs/nfsd/vfs.c 1.64 -> 1.66 # fs/eventpoll.c 1.21 -> 1.22 # mm/swap.c 1.51 -> 1.52 # include/linux/sem.h 1.7 -> 1.8 # arch/ppc/platforms/sandpoint.h 1.3 -> 1.4 # kernel/signal.c 1.86 -> 1.88 # fs/coda/file.c 1.10 -> 1.11 # mm/mprotect.c 1.22 -> 1.23 # include/linux/efs_fs.h 1.10 -> 1.11 # fs/hpfs/namei.c 1.18 -> 1.20 # fs/sysfs/dir.c 1.5 -> 1.6 # mm/shmem.c 1.127 -> 1.129 # arch/ia64/mm/init.c 1.44 -> 1.45 # arch/i386/mm/pageattr.c 1.4 -> 1.5 # fs/befs/linuxvfs.c 1.11 -> 1.12 # arch/ppc/configs/gemini_defconfig 1.12 -> 1.14 # include/linux/eventpoll.h 1.10 -> 1.11 # fs/intermezzo/intermezzo_fs.h 1.11 -> 1.12 # drivers/net/wan/comx.c 1.18 -> 1.19 # arch/ppc/configs/sandpoint_defconfig 1.12 -> 1.14 # fs/openpromfs/inode.c 1.22 -> 1.24 # arch/ppc/configs/ibmchrp_defconfig 1.12 -> 1.14 # drivers/net/e100/e100_main.c 1.78 -> 1.79 # include/linux/pci.h 1.97 -> 1.99 # Documentation/pci.txt 1.10 -> 1.11 # drivers/block/genhd.c 1.89 -> 1.90 # fs/umsdos/dir.c 1.10 -> 1.11 # fs/nfs/dir.c 1.56 -> 1.60 # fs/autofs/root.c 1.11 -> 1.12 # arch/i386/kernel/time.c 1.37 -> 1.38 # fs/sysfs/bin.c 1.7 -> 1.9 # include/asm-i386/mmzone.h 1.12 -> 1.13 # net/core/dev.c 1.88 -> 1.89 # drivers/scsi/scsi.c 1.119 -> 1.120 # fs/ext2/ialloc.c 1.34 -> 1.35 # arch/ppc/configs/pcore_defconfig 1.5 -> 1.7 # include/asm-i386/cacheflush.h 1.3 -> 1.4 # include/linux/interrupt.h 1.25 -> 1.26 # fs/cifs/cifsfs.c 1.18 -> 1.19 # mm/mremap.c 1.29 -> 1.31 # drivers/pci/pci-sysfs.c 1.4 -> 1.5 # include/linux/mmzone.h 1.39 -> 1.40 # fs/ncpfs/ioctl.c 1.6 -> 1.7 # net/bluetooth/hci_sock.c 1.23 -> 1.24 # fs/ncpfs/dir.c 1.24 -> 1.26 # arch/ppc/boot/common/Makefile 1.7 -> 1.8 # arch/ia64/kernel/sys_ia64.c 1.23 -> 1.24 # fs/buffer.c 1.203 -> 1.205 # drivers/pnp/resource.c 1.17 -> 1.18 # fs/reiserfs/namei.c 1.45 -> 1.47 # arch/ppc/syslib/open_pic.c 1.25 -> 1.26 # arch/ppc/boot/simple/direct.S 1.2 -> (deleted) # fs/ext3/namei.c 1.42 -> 1.44 # arch/ppc64/mm/init.c 1.46 -> 1.47 # fs/sysv/namei.c 1.15 -> 1.17 # fs/hfs/sysdep.c 1.6 -> 1.7 # include/linux/ext3_fs.h 1.28 -> 1.29 # fs/udf/file.c 1.15 -> 1.16 # fs/hpfs/hpfs_fn.h 1.14 -> 1.16 # arch/ppc/configs/mcpn765_defconfig 1.6 -> 1.8 # fs/jffs/inode-v23.c 1.47 -> 1.49 # arch/ppc/configs/pplus_defconfig 1.11 -> 1.13 # fs/ext2/xattr_user.c 1.6 -> 1.7 # include/linux/iso_fs.h 1.11 -> 1.12 # arch/ppc/platforms/zx4500.h 1.3 -> (deleted) # mm/mmap.c 1.87 -> 1.88 # fs/proc/base.c 1.49 -> 1.51 # drivers/net/irda/sir_dev.c 1.5 -> 1.6 # net/unix/af_unix.c 1.49 -> 1.50 # arch/ppc/configs/lopec_defconfig 1.7 -> 1.9 # drivers/pci/hotplug/cpci_hotplug_pci.c 1.10 -> 1.11 # Documentation/eisa.txt 1.2 -> 1.3 # fs/intermezzo/file.c 1.10 -> 1.11 # fs/cifs/dir.c 1.4 -> 1.6 # net/bluetooth/rfcomm/sock.c 1.21 -> 1.22 # fs/vfat/namei.c 1.32 -> 1.34 # fs/namespace.c 1.46 -> 1.47 # fs/nfs/file.c 1.28 -> 1.30 # fs/qnx4/namei.c 1.8 -> 1.10 # fs/adfs/dir.c 1.9 -> 1.10 # fs/efs/namei.c 1.4 -> 1.5 # fs/ext3/inode.c 1.76 -> 1.77 # fs/romfs/inode.c 1.30 -> 1.31 # fs/coda/pioctl.c 1.10 -> 1.11 # arch/i386/kernel/timers/timer_tsc.c 1.19 -> 1.22 # arch/ppc64/mm/numa.c 1.7 -> 1.8 # arch/i386/kernel/timers/timer_cyclone.c 1.8 -> 1.10 # drivers/pci/probe.c 1.46 -> 1.48 # fs/ext2/acl.h 1.1 -> 1.2 # include/linux/security.h 1.24 -> 1.25 # drivers/eisa/virtual_root.c 1.2 -> 1.3 # arch/ppc/configs/adir_defconfig 1.6 -> 1.8 # include/linux/hfs_fs.h 1.8 -> 1.9 # include/asm-i386/hardirq.h 1.18 -> 1.20 # fs/umsdos/namei.c 1.10 -> 1.11 # fs/isofs/namei.c 1.9 -> 1.10 # fs/ntfs/namei.c 1.33 -> 1.34 # fs/jfs/namei.c 1.26 -> 1.28 # fs/proc/root.c 1.13 -> 1.15 # mm/slab.c 1.90 -> 1.93 # drivers/block/elevator.c 1.42 -> 1.46 # include/linux/eisa.h 1.4 -> 1.5 # arch/sh/kernel/cpu/sh4/pci-sh7751.c 1.1 -> 1.2 # drivers/message/i2o/i2o_scsi.c 1.19 -> 1.20 # fs/ext2/acl.c 1.5 -> 1.6 # drivers/scsi/scsi_scan.c 1.96 -> 1.97 # include/linux/msdos_fs.h 1.26 -> 1.28 # fs/jffs2/dir.c 1.27 -> 1.29 # arch/ppc/configs/apus_defconfig 1.18 -> 1.20 # drivers/net/irda/sir_kthread.c 1.6 -> 1.7 # init/Kconfig 1.16 -> 1.17 # drivers/block/Makefile 1.16 -> 1.18 # arch/ppc/defconfig 1.19 -> 1.21 # arch/x86_64/mm/numa.c 1.3 -> 1.4 # fs/hfs/dir_cap.c 1.5 -> 1.6 # arch/i386/pci/legacy.c 1.9 -> 1.11 # fs/hfs/dir.c 1.8 -> 1.9 # include/asm-x86_64/mmzone.h 1.3 -> 1.4 # arch/ppc/platforms/menf1_setup.c 1.9 -> (deleted) # fs/intermezzo/dcache.c 1.9 -> 1.10 # fs/jfs/jfs_acl.h 1.2 -> 1.3 # drivers/net/Kconfig 1.34 -> 1.35 # fs/hfs/dir_dbl.c 1.10 -> 1.12 # fs/udf/namei.c 1.24 -> 1.26 # mm/nommu.c 1.3 -> 1.4 # fs/ext2/namei.c 1.18 -> 1.20 # kernel/exit.c 1.104 -> 1.108 # arch/s390/kernel/compat_exec.c 1.2 -> 1.3 # fs/jbd/commit.c 1.36 -> 1.37 # Documentation/filesystems/Locking 1.42 -> 1.43 # fs/bfs/dir.c 1.19 -> 1.21 # fs/smbfs/file.c 1.21 -> 1.22 # fs/hugetlbfs/inode.c 1.28 -> 1.29 # arch/ppc/configs/ev64260_defconfig 1.6 -> 1.8 # arch/ppc/configs/prpmc800_defconfig 1.6 -> 1.8 # arch/ppc/kernel/ppc-stub.c 1.8 -> 1.9 # arch/i386/lib/delay.c 1.4 -> 1.5 # include/linux/device.h 1.100 -> 1.102 # include/asm-alpha/mmzone.h 1.8 -> 1.9 # lib/kobject.c 1.26 -> 1.27 # arch/ppc/configs/pmac_defconfig 1.11 -> 1.13 # arch/ppc/kernel/ppc_ksyms.c 1.41 -> 1.42 # fs/umsdos/emd.c 1.7 -> 1.8 # arch/mips/kernel/sysirix.c 1.11 -> 1.12 # arch/ia64/ia32/binfmt_elf32.c 1.13 -> 1.14 # arch/ppc/platforms/zx4500_pci.c 1.3 -> (deleted) # fs/adfs/adfs.h 1.5 -> 1.6 # include/linux/irq_cpustat.h 1.8 -> 1.10 # arch/arm26/mm/init.c 1.1 -> 1.2 # include/linux/netdevice.h 1.44 -> 1.45 # arch/ppc/kernel/setup.c 1.39 -> 1.40 # include/asm-ppc/serial.h 1.8 -> 1.10 # include/asm-ppc64/mmzone.h 1.11 -> 1.12 # fs/ext3/acl.h 1.1 -> 1.2 # arch/i386/kernel/timers/timer.c 1.8 -> 1.10 # drivers/pnp/manager.c 1.9 -> 1.12 # arch/ppc/platforms/zx4500_serial.h 1.3 -> (deleted) # fs/hfs/dir_nat.c 1.8 -> 1.9 # drivers/block/cciss.c 1.82 -> 1.83 # fs/smbfs/dir.c 1.24 -> 1.26 # arch/ppc/boot/common/ns16550.c 1.4 -> 1.5 # arch/ppc/Kconfig 1.28 -> 1.30 # drivers/base/class.c 1.36 -> 1.38 # include/linux/slab.h 1.25 -> 1.27 # include/linux/dcache.h 1.33 -> 1.34 # arch/i386/pci/irq.c 1.25 -> 1.27 # fs/proc/generic.c 1.21 -> 1.22 # arch/ppc/platforms/sandpoint_setup.c 1.13 -> 1.14 arch/ppc/platforms/sandpoint.c (moved) # fs/freevxfs/vxfs_lookup.c 1.7 -> 1.8 # arch/ppc/platforms/Makefile 1.18 -> 1.20 # include/linux/qnx4_fs.h 1.7 -> 1.9 # fs/ufs/namei.c 1.18 -> 1.20 # fs/fs-writeback.c 1.39 -> 1.40 # fs/msdos/namei.c 1.28 -> 1.30 # drivers/pci/hotplug/ibmphp_core.c 1.33 -> 1.34 # fs/block_dev.c 1.133 -> 1.134 # arch/i386/mm/pgtable.c 1.12 -> 1.13 # (new) -> 1.4 drivers/block/as-iosched.c # (new) -> 1.1 arch/ppc/syslib/gen550_kgdb.c # (new) -> 1.1 arch/ppc/syslib/gen550_dbg.c # (new) -> 1.1 arch/ppc/boot/common/serial_stub.c # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 03/07/02 torvalds@home.osdl.org 1.1360.1.1 # Linux 2.5.74 # -------------------------------------------- # 03/07/02 ilmari@ilmari.org 1.1360.1.2 # [PATCH] Allow modular DM # # With the recent fixes, io_schedule needs to be exported for modular dm # to work. # -------------------------------------------- # 03/07/03 paulus@samba.org 1.1360.1.3 # Merge samba.org:/home/paulus/kernel/linux-2.5 # into samba.org:/home/paulus/kernel/for-linus-ppc # -------------------------------------------- # 03/07/02 greg@kroah.com 1.1362 # Merge kroah.com:/home/greg/linux/BK/bleed-2.5 # into kroah.com:/home/greg/linux/BK/pci-2.5 # -------------------------------------------- # 03/07/03 paulus@samba.org 1.1360.1.4 # Merge bk://stop.crashing.org/linux-2.5-obsolete # into samba.org:/home/paulus/kernel/for-linus-ppc # -------------------------------------------- # 03/07/03 paulus@samba.org 1.1360.1.5 # Merge bk://stop.crashing.org/linux-2.5-misc # into samba.org:/home/paulus/kernel/for-linus-ppc # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.1 # [PATCH] move_vma() make_pages_present() fix # # From: Hugh Dickins # # mremap's move_vma VM_LOCKED case was still wrong. # # If the do_munmap unmaps a part of new_vma, then its vm_start and vm_end # from before cannot both be the right addresses for the make_pages_present # range, and may BUG() there. # # We need [new_addr, new_addr+new_len) to be locked down; but # move_page_tables already transferred the locked pages [new_addr, # new_addr+old_len), and they're either held in a VM_LOCKED vma throughout, # or temporarily in no vma: in neither case can be swapped out, so no need to # run over that range again. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.2 # [PATCH] page unmapping debug # # From: Manfred Spraul # # Manfred's latest page unmapping debug patch. # # The patch adds support for a special debug mode to both the page and the slab # allocator: Unused pages are removed from the kernel linear mapping. This # means that now any access to freed memory will cause an immediate exception. # Right now, read accesses remain totally unnoticed and write accesses may be # catched by the slab poisoning, but usually far too late for a meaningfull bug # report. # # The implementation is based on a new arch dependant function, # kernel_map_pages(), that removes the pages from the linear mapping. It's # right now only implemented for i386. # # Changelog: # # - Add kernel_map_pages() for i386, based on change_page_attr. If # DEBUG_PAGEALLOC is not set, then the function is an empty stub. The stub # is in , i.e. it exists for all archs. # # - Make change_page_attr irq safe. Note that it's not fully irq safe due to # the lack of the tlb flush ipi, but it's good enough for kernel_map_pages(). # Another problem is that kernel_map_pages is not permitted to fail, thus # PSE is disabled if DEBUG_PAGEALLOC is enabled # # - use kernel_map pages for the page allocator. # # - use kernel_map_pages for the slab allocator. # # I couldn't resist and added additional debugging support into mm/slab.c: # # * at kfree time, the complete backtrace of the kfree caller is stored # in the freed object. # # * a ptrinfo() function that dumps all known data about a kernel virtual # address: the pte value, if it belongs to a slab cache the cache name and # additional info. # # * merging of common code: new helper function obj_dbglen and obj_dbghdr # for the conversion between the user visible object pointers/len and the # actual, internal addresses and len values. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.3 # [PATCH] NUMA memory reporting fix # # From: Dave Hansen # # The current numa meminfo code exports (via sysfs) pgdat->node_size, as # totalram. This variable is consistently used elsewhere to mean "the number # of physical pages that this particular node spans". This is _not_ what we # want to see from meminfo, which is: "how much actual memory does this node # have?" # # The following patch removes pgdat->node_size, and replaces it with # ->node_spanned_pages. This is to avoid confusion with a new variable, # node_present_pages, which is the _actual_ value that we want to export in # meminfo. Most of the patch is a simple s/node_size/node_spanned_pages/. # The node_size() macro is also removed, and replaced with new ones for # node_{spanned,present}_pages() to avoid confusion. # # We were bitten by this problem in this bug: # http://bugme.osdl.org/show_bug.cgi?id=818 # # Compiled and tested on NUMA-Q. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.4 # [PATCH] ramfs: use rgeneric_file_llseek # # Teach ramfs to use generic_file_llseek: default_llseek takes lock_kernel(). # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.5 # [PATCH] inode_change_ok(): remove lock_kernel() # # `attr' is on the stack, and the inode's contents can change as soon as we # return from inode_change_ok() anyway. I can't see anything which is actually # being locked in there. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.6 # [PATCH] nommu vmtruncate: remove lock_kernel() # # lock_kernel() need not be held across truncate. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.7 # [PATCH] procfs: remove some unneeded lock_kernel()s # # From: William Lee Irwin III # # Remove spurious BKL acquisitions in /proc/. The BKL is not required to # access nr_threads for reporting, and get_locks_status() takes it # internally, wrapping all operations with it. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.8 # [PATCH] remove lock_kernel() from file_ops.flush() # # Rework the file_ops.flush() API sothat it is no longer called under # lock_kernel(). Push lock_kernel() down to all impementations except CIFS, # which doesn't want it. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.9 # [PATCH] block_llseek(): remove lock_kernel() # # Replace it with the blockdev inode's i_sem. And we only really need that for # atomic access to file->f_pos. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.10 # [PATCH] Make CONFIG_TC35815 depend on CONFIG_TOSHIBA_JMR3927 # # From: Adrian Bunk # # I got an error at the final linking with CONFIG_TC35815 enabled since # the variables tc_readl and tc_writel are not available. # # The only place where they are defined is arch/mips/pci/ops-jmr3927.c. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.11 # [PATCH] Report detached thread exit to the debugger # # From: Daniel Jacobowitz # # Right now, CLONE_DETACHED threads silently vanish from GDB's sight when # they exit. This patch lets the thread report its exit to the debugger, and # then be auto-reaped as soon as it is collected, instead of being reaped as # soon as it exits and not reported at all. # # GDB works either way, but this is more correct and will be useful for some # later GDB patches. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.12 # [PATCH] timer renaming and cleanups # # From: john stultz # # This renames the bad "timer" variable to "cur_timer" and moves externs to # .h files. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.13 # [PATCH] fix lost_tick detector for speedstep # # From: john stultz # # The patch tries to resolve issues caused by running the TSC based lost # tick compensation code on CPUs that change frequency (speedstep, etc). # # Should the CPU be in slow mode when calibrate_tsc() executes, the kernel # will assume we have so many cycles per tick. Later when the cpu speeds up, # the kernel will start noting that too many cycles have past since the last # interrupt. Since this can occasionally happen, the lost tick compensation # code then tries to fix this by incrementing jiffies. Thus every tick we # end up incrementing jiffies many times, causing timers to expire too # quickly and time to rush ahead. # # This patch detects when there has been 100 consecutive interrupts where we # had to compensate for lost ticks. If this occurs, we spit out a warning # and fall back to using the PIT as a time source. # # I've tested this on my speedstep enabled laptop with success, and others # laptop users seeing this problem have reported it works for them. Also to # ensure we don't fall back to the slower PIT too quickly, I tested the code # on a system I have that looses ~30 ticks about every second and it can # still manage to use the TSC as a good time source. # # This solves most of the "time doubling" problems seen on laptops. # Additionally this revision has been modified to use the cleanups made in # rename-timer_A1. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.14 # [PATCH] fix lost-tick compensation corner-case # # From: john stultz # # This patch catches a corner case in the lost-tick compensation code. # # There is a check to see if we overflowed between reads of the two time # sources, however should the high res time source be slightly slower then # what we calibrated, its possible to trigger this code when no ticks have # been lost. # # This patch adds an extra check to insure we have seen more then one tick # before we check for this overflow. This seems to resolve the remaining # "time doubling" issues that I've seen reported. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.15 # [PATCH] cleanup and generalise lowmem_page_address # # From: William Lee Irwin III # # This patch allows architectures to micro-optimize lowmem_page_address() at # their whims. Roman Zippel originally wrote and/or suggested this back when # dependencies on page->virtual existing were being shaken out. That's # long-settled, so it's fine to do this now. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.16 # [PATCH] Security hook for vm_enough_memory # # From: Stephen Smalley # # This patch against 2.5.73 replaces vm_enough_memory with a security hook # per Alan Cox's suggestion so that security modules can completely replace # the logic if desired. # # Note that the patch changes the interface to follow the convention of the # other security hooks, i.e. return 0 if ok or -errno on failure (-ENOMEM in # this case) rather than returning a boolean. It also exports various # variables and functions required for the vm_enough_memory logic. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.17 # [PATCH] ext2: inode allocation race fix # # ext2's inode allocator will call find_group_orlov(), which will return a # suitable blockgroup in which the inode should be allocated. But by the time # we actually try to allocate an inode in the blockgroup, other CPUs could have # used them all up. # # ext2 will bogusly fail with "ext2_new_inode: Free inodes count corrupted in # group NN". # # # To fix this we just advance onto the next blockgroup if the rare race # happens. If we've scanned all blockgroups then return -ENOSPC. # # # (This is a bit inaccurate: after we've scanned all blockgroups, there may # still be available inodes due to inode freeing activity in other blockgroups. # This cannot be fixed without fs-wide locking. The effect is a slightly # early ENOSPC in a nearly-full filesystem). # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.18 # [PATCH] fix double mmdrop() on exec path # # If load_elf_binary() (and the other binary handlers) fail after # flush_old_exec() (for example, in setup_arg_pages()) then do_execve() will go # through and do mmdrop(bprm.mm). # # But bprm.mm is now current->mm. We've just freed the current process's mm. # The kernel dies in a most ghastly manner. # # Fix that up by nulling out bprm.mm in flush_old_exec(), at the point where we # consumed the mm. Handle the null pointer in the do_execve() error path. # # Also: don't open-code free_arg_pages() in do_execve(): call it instead. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.19 # [PATCH] ext3: fix journal_release_buffer() race # # CPU0 CPU1 # # journal_get_write_access(bh) # (Add buffer to t_reserved_list) # # journal_get_write_access(bh) # (It's already on t_reserved_list: # nothing to do) # # (We decide we don't want to # journal the buffer after all) # journal_release_buffer() # (It gets pulled off the transaction) # # # journal_dirty_metadata() # (The buffer isn't on the reserved # list! The kernel explodes) # # # Simple fix: just leave the buffer on t_reserved_list in # journal_release_buffer(). If nobody ends up claiming the buffer then it will # get thrown away at start of transaction commit. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.20 # [PATCH] Set limits on CONFIG_LOG_BUF_SHIFT # # From: bert hubert # # Attached patch adds a range check to LOG_BUF_SHIFT and clarifies the # configuration somewhat. I managed to build a non-booting kernel because I # thought 64 was a nice power of two, which lead to the kernel blocking when # it tried to actually use or allocate a 2^64 buffer. # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.21 # [PATCH] Fix cciss hang # # From: Jens Axboe # # It fixes a hang when performing large I/O's. Has been tested and acked by # the maintainer, "Wiran, Francis" . # -------------------------------------------- # 03/07/02 akpm@osdl.org 1.1360.2.22 # [PATCH] e100 use-after-free fix # # I though Scott had recently merged this but it seems not. We'll be # needing this patch if you merge Manfred's page unmapping debug patch. # -------------------------------------------- # 03/07/03 rusty@rustcorp.com.au 1.1360.2.23 # [PATCH] Remove cpu arg from cpu_raise_irq # # The function cpu_raise_softirq() takes a softirq number, and a cpu number, # but cannot be used with cpu != smp_processor_id(), because there's no # locking around the pending softirq lists. Since noone does this, remove # that arg. # # As per Linus' suggestion, names changed: # raise_softirq(int nr) # cpu_raise_softirq(int cpu, int nr) -> raise_softirq_irqoff(int nr) # __cpu_raise_softirq(int cpu, int nr) -> __raise_softirq_irqoff(int nr) # -------------------------------------------- # 03/07/03 rusty@rustcorp.com.au 1.1360.2.24 # [PATCH] Per-cpu variable in mm/slab.c # # Rather trivial conversion. Tested on SMP. # -------------------------------------------- # 03/07/03 rusty@rustcorp.com.au 1.1360.2.25 # [PATCH] Remove unused __syscall_count # # Noone seems to use __syscall_count. Remove the field from i386 # irq_cpustat_t struct, and the generic accessor macros. # # Because some archs have hardcoded asm references to offsets in this # structure, I haven't touched non-x86, but doing so is usually # trivial. # -------------------------------------------- # 03/07/03 rusty@rustcorp.com.au 1.1360.2.26 # [PATCH] Make ksoftirqd a normal per-cpu variable. # # This moves the ksoftirqd pointers out of the irq_stat struct, and uses a # normal per-cpu variable. It's not that time critical, nor referenced in # assembler. This moves us closer to making irq_stat a per-cpu variable. # # Because some archs have hardcoded asm references to offsets in this # structure, I haven't touched non-x86. The __ksoftirqd_task field is # unused in other archs, too. # -------------------------------------------- # 03/07/03 torvalds@home.osdl.org 1.1360.2.27 # The sbp2 driver needs , but didn't include it. It apparently # used to work due to some random magic indirect include, but broke lately. # # Do the obvious fix. # -------------------------------------------- # 03/07/03 ambx1@neo.rr.com 1.1360.3.1 # [PNP] Handle Disabled Resources Properly # # Some devices will allow for individual resources to be disabled, # even when the device as a whole is active. The current PnP # resource manager is not handling this situation properly. This # patch corrects the issue by detecting disabled resources and then # flagging them. The pnp layer will now skip over any disabled # resources. Interface updates have also been included so that we # can properly display resource tables when a resource is disabled. # # Also note that a new flag "IORESOURCE_DISABLED" has been added to # linux/ioports.h. # # -------------------------------------------- # 03/07/03 ambx1@neo.rr.com 1.1360.3.2 # [PNP] Allow resource auto config to assign disabled resources # # This patch updates the resource manager so that it actually assigns # disabled resources when they are requested by the device. # -------------------------------------------- # 03/07/03 ambx1@neo.rr.com 1.1360.3.3 # [PNP] Fix manual resource setting API # # This patch corrects a trivial thinko in the manual resource api. # -------------------------------------------- # 03/07/03 willy@debian.org 1.1363 # [PATCH] PCI: Improve documentation # Fix some grammar problems # Add a note about Fast Back to Back support # Change the slot_name recommendation to pci_name(). # -------------------------------------------- # 03/07/03 willy@debian.org 1.1364 # [PATCH] PCI: arch/i386/pci/direct.c can use __init, not __devinit # pci_sanity_check() is only called from functions marked __init, so it # can be __init too. # -------------------------------------------- # 03/07/03 willy@debian.org 1.1365 # [PATCH] PCI: pci_find_bus needs a domain # Give pci_find_bus a domain argument and move its declaration to # -------------------------------------------- # 03/07/03 willy@debian.org 1.1366 # [PATCH] PCI: Remove pci_bus_exists # Convert all callers of pci_bus_exists() to call pci_find_bus() instead. # Since all callers of pci_find_bus() are __init or __devinit, mark it as # __devinit too. # -------------------------------------------- # 03/07/03 willy@debian.org 1.1367 # [PATCH] PCI: arch/i386/pci/irq.c should use pci_find_bus # Use pci_find_bus rather than relying on the return value of pci_scan_bus. # -------------------------------------------- # 03/07/03 willy@debian.org 1.1368 # [PATCH] PCI: arch/i386/pci/legacy.c: use raw_pci_ops # Make pcibios_fixup_peer_bridges() use raw_pci_ops directly instead of # faking pci_bus and pci_dev. # -------------------------------------------- # 03/07/03 willy@debian.org 1.1369 # [PATCH] PCI config space in sysfs # - Fix a couple of bugs in sysfs's handling of binary files (my fault). # - Implement pci config space reads and writes in sysfs # -------------------------------------------- # 03/07/03 willy@debian.org 1.1370 # [PATCH] Driver Core: fix firmware binary files # Fixes the sysfs binary file bug. # -------------------------------------------- # 03/07/03 greg@kroah.com 1.1371 # [PATCH] sysfs: change print() to pr_debug() to not annoy everyone. # -------------------------------------------- # 03/07/03 torvalds@home.osdl.org 1.1360.2.28 # Merge bk://linux-pnp.bkbits.net/pnp-2.5 # into home.osdl.org:/home/torvalds/v2.5/linux # -------------------------------------------- # 03/07/03 greg@kroah.com 1.1372 # [PATCH] SYSFS: add module referencing to sysfs attribute files. # -------------------------------------------- # 03/07/03 johnstul@us.ibm.com 1.1373 # [PATCH] jiffies include fix # This patch fixes a bad declaration of jiffies in timer_tsc.c and # timer_cyclone.c, replacing it with the proper usage of jiffies.h. # Caught by gregkh. # -------------------------------------------- # 03/07/03 greg@kroah.com 1.1374 # [PATCH] sysfs: add sysfs_rename_dir() # Based on a patch written by Dan Aloni # -------------------------------------------- # 03/07/03 greg@kroah.com 1.1375 # [PATCH] kobject: add kobject_rename() # Based on a patch written by Dan Aloni # -------------------------------------------- # 03/07/03 greg@kroah.com 1.1376 # [PATCH] driver core: added class_device_rename() # Based on a patch written by Dan Aloni # -------------------------------------------- # 03/07/03 greg@kroah.com 1.1377 # driver core: add my copyright to class.c # -------------------------------------------- # 03/07/03 greg@kroah.com 1.1378 # Merge kroah.com:/home/linux/BK/bleed-2.5 # into kroah.com:/home/linux/BK/pci-2.5 # -------------------------------------------- # 03/07/03 torvalds@home.osdl.org 1.1360.2.29 # Add an asynchronous buffer read-ahead facility. Nobody # uses it for now, but I needed it for some tuning tests, # and it is potentially useful for others. # -------------------------------------------- # 03/07/03 torvalds@home.osdl.org 1.1360.2.30 # Re-organize "ext3_get_inode_loc()" and make it easier to # follow by splitting it into two functions: one that calculates # the position, and the other that actually reads the inode # block off the disk. # -------------------------------------------- # 03/07/03 torvalds@home.osdl.org 1.1379 # Merge bk://kernel.bkbits.net/gregkh/linux/pci-2.5 # into home.osdl.org:/home/torvalds/v2.5/linux # -------------------------------------------- # 03/07/03 jgarzik@pobox.com 1.1380 # [PATCH] fix via irq routing # Via irq routing has a funky PIRQD location. I checked my datasheets # and, yep, this is correct all the way back to via686a. # This bug existed for _ages_. I wonder if I created it, even... # -------------------------------------------- # 03/07/04 trond.myklebust@fys.uio.no 1.1381 # [PATCH] Add open intent information to the 'struct nameidata' # # - Add open intent information to the 'struct nameidata'. # - Pass the struct nameidata as an optional parameter to the # lookup() inode operation. # - Pass the struct nameidata as an optional parameter to the # d_revalidate() dentry operation. # - Make link_path_walk() set the LOOKUP_CONTINUE flag in nd->flags instead # of passing it as an extra parameter to d_revalidate(). # - Make open_namei(), and sys_uselib() set the open()/create() intent # data. # -------------------------------------------- # 03/07/04 trond.myklebust@fys.uio.no 1.1382 # [PATCH] Pass 'nameidata' to ->create() # # - Make the VFS pass the struct nameidata as an optional argument # to the create inode operation. # - Patch vfs_create() to take a struct nameidata as an optional # argument. # -------------------------------------------- # 03/07/04 trond.myklebust@fys.uio.no 1.1383 # [PATCH] Pass 'nameidata' to ->permission() # # - Make the VFS pass the struct nameidata as an optional parameter # to the permission() inode operation. # # - Patch may_create()/may_open() so it passes the struct nameidata from # vfs_create()/open_namei() as an argument to permission(). # # - Add an intent flag for the sys_access() function. # -------------------------------------------- # 03/07/04 trond.myklebust@fys.uio.no 1.1384 # [PATCH] Use the intents in 'nameidata' to improve NFS close-to-open consistency # # - Make use of the open intents to improve close-to-open # cache consistency. Only force data cache revalidation when # we're doing an open(). # # - Add true exclusive create to NFSv3. # # - Optimize away the redundant ->lookup() to check for an # existing file when we know that we're doing NFSv3 exclusive # create. # # - Optimize away all ->permission() checks other than those for # path traversal, open(), and sys_access(). # -------------------------------------------- # 03/07/04 torvalds@home.osdl.org 1.1385 # Carl-Daniel Hailfinger suggest adding a paranoid incoming # trigger as per the "bk help triggers" suggestion, so that # we'll see any new triggers showing up in the tree. # # Make it so. # -------------------------------------------- # 03/07/04 torvalds@home.osdl.org 1.1386 # Merge bk://kernel.bkbits.net/jgarzik/irda-2.5 # into home.osdl.org:/home/torvalds/v2.5/linux # -------------------------------------------- # 03/07/04 mzyngier@freesurf.fr 1.1387 # [PATCH] EISA: core changes # # - Now reserves I/O ranges according to EISA specs (four 256 bytes # regions instead of a single 4KB region). # # - By default, do not try to probe the bus if the mainboard does not # seems to support EISA (allow this behaviour to be changed through a # command-line option). # # - Use parent bridge device dma_mask as default for each discovered # device. # # - Allow devices to be enabled or disabled from the kernel command line # (useful for non-x86 platforms where the firmware simply disable # devices it doesn't know about...). # -------------------------------------------- # 03/07/04 mzyngier@freesurf.fr 1.1388 # [PATCH] EISA: Documentation update # -------------------------------------------- # 03/07/04 mzyngier@freesurf.fr 1.1389 # [PATCH] EISA: More EISA ids # -------------------------------------------- # 03/07/04 mzyngier@freesurf.fr 1.1390 # [PATCH] EISA: PA-RISC changes # # - Probe the right number of EISA slots on PA-RISC. No more, no less. # -------------------------------------------- # 03/07/04 mzyngier@freesurf.fr 1.1391 # [PATCH] EISA: PCI-EISA dma_mask # # - Use parent bridge device dma_mask as default for each discovered # device. # -------------------------------------------- # 03/07/04 mzyngier@freesurf.fr 1.1392 # [PATCH] EISA: avoid unnecessary probing # # - By default, do not try to probe the bus if the mainboard does not # seems to support EISA (allow this behaviour to be changed through a # command-line option). # -------------------------------------------- # 03/07/04 torvalds@home.osdl.org 1.1393 # Go back to defaulting to 6-byte commands for MODE SENSE, # since some drivers seem to be unhappy about the 10-byte # version. # # The subsystem configuration can override this (eg USB or # ide-scsi). # -------------------------------------------- # 03/07/04 torvalds@home.osdl.org 1.1394 # When forcing through a signal for some thread-synchronous # event (ie SIGSEGV, SIGFPE etc that happens as a result of a # trap as opposed to an external event), if the signal is # blocked we will not invoce a signal handler, we will just # kill the thread with the signal. # # This is equivalent to what we do in the SIG_IGN case: you # cannot ignore or block synchronous signals, and if you try, # we'll just have to kill you. # # We don't want to handle endless recursive faults, which the # old behaviour easily led to if the stack was bad, for example. # -------------------------------------------- # 03/07/04 drepper@redhat.com 1.1395 # [PATCH] wrong pid in siginfo_t # # If a signal is sent via kill() or tkill() the kernel fills in the wrong # PID value in the siginfo_t structure (obviously only if the handler has # SA_SIGINFO set). # # POSIX specifies the the si_pid field is filled with the process ID, and # in Linux parlance that's the "thread group" ID, not the thread ID. # -------------------------------------------- # 03/07/05 paulus@samba.org 1.1394.1.1 # Merge samba.org:/home/paulus/kernel/linux-2.5 # into samba.org:/home/paulus/kernel/for-linus-ppc # -------------------------------------------- # 03/07/05 torvalds@home.osdl.org 1.1396 # Merge bk://ppc.bkbits.net/for-linus-ppc # into home.osdl.org:/home/torvalds/v2.5/linux # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1397 # [PATCH] PCI domain scanning fix # # From: Matthew Wilcox # # ppc64 oopses on boot because pci_scan_bus_parented() is unexpectedly # returning NULL. Change pci_scan_bus_parented() to correctly handle # overlapping PCI bus numbers on different domains. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1398 # [PATCH] ipc semaphore optimization # # From: "Chen, Kenneth W" # # This patch proposes a performance fix for the current IPC semaphore # implementation. # # There are two shortcoming in the current implementation: # try_atomic_semop() was called two times to wake up a blocked process, # once from the update_queue() (executed from the process that wakes up # the sleeping process) and once in the retry part of the blocked process # (executed from the block process that gets woken up). # # A second issue is that when several sleeping processes that are eligible # for wake up, they woke up in daisy chain formation and each one in turn # to wake up next process in line. However, every time when a process # wakes up, it start scans the wait queue from the beginning, not from # where it was last scanned. This causes large number of unnecessary # scanning of the wait queue under a situation of deep wait queue. # Blocked processes come and go, but chances are there are still quite a # few blocked processes sit at the beginning of that queue. # # What we are proposing here is to merge the portion of the code in the # bottom part of sys_semtimedop() (code that gets executed when a sleeping # process gets woken up) into update_queue() function. The benefit is two # folds: (1) is to reduce redundant calls to try_atomic_semop() and (2) to # increase efficiency of finding eligible processes to wake up and higher # concurrency for multiple wake-ups. # # We have measured that this patch improves throughput for a large # application significantly on a industry standard benchmark. # # This patch is relative to 2.5.72. Any feedback is very much # appreciated. # # Some kernel profile data attached: # # Kernel profile before optimization: # ----------------------------------------------- # 0.05 0.14 40805/529060 sys_semop [133] # 0.55 1.73 488255/529060 ia64_ret_from_syscall # [2] # [52] 2.5 0.59 1.88 529060 sys_semtimedop [52] # 0.05 0.83 477766/817966 schedule_timeout [62] # 0.34 0.46 529064/989340 update_queue [61] # 0.14 0.00 1006740/6473086 try_atomic_semop [75] # 0.06 0.00 529060/989336 ipcperms [149] # ----------------------------------------------- # # 0.30 0.40 460276/989340 semctl_main [68] # 0.34 0.46 529064/989340 sys_semtimedop [52] # [61] 1.5 0.64 0.87 989340 update_queue [61] # 0.75 0.00 5466346/6473086 try_atomic_semop [75] # 0.01 0.11 477676/576698 wake_up_process [146] # ----------------------------------------------- # 0.14 0.00 1006740/6473086 sys_semtimedop [52] # 0.75 0.00 5466346/6473086 update_queue [61] # [75] 0.9 0.89 0.00 6473086 try_atomic_semop [75] # ----------------------------------------------- # # Kernel profile with optimization: # # ----------------------------------------------- # 0.03 0.05 26139/503178 sys_semop [155] # 0.46 0.92 477039/503178 ia64_ret_from_syscall # [2] # [61] 1.2 0.48 0.97 503178 sys_semtimedop [61] # 0.04 0.79 470724/784394 schedule_timeout [62] # 0.05 0.00 503178/3301773 try_atomic_semop [109] # 0.05 0.00 503178/930934 ipcperms [149] # 0.00 0.03 32454/460210 update_queue [99] # ----------------------------------------------- # 0.00 0.03 32454/460210 sys_semtimedop [61] # 0.06 0.36 427756/460210 semctl_main [75] # [99] 0.4 0.06 0.39 460210 update_queue [99] # 0.30 0.00 2798595/3301773 try_atomic_semop [109] # 0.00 0.09 470630/614097 wake_up_process [146] # ----------------------------------------------- # 0.05 0.00 503178/3301773 sys_semtimedop [61] # 0.30 0.00 2798595/3301773 update_queue [99] # [109] 0.3 0.35 0.00 3301773 try_atomic_semop [109] # -----------------------------------------------=20 # # Both number of function calls to try_atomic_semop() and update_queue() # are reduced by 50% as a result of the merge. Execution time of # sys_semtimedop is reduced because of the reduction in the low level # functions. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1399 # [PATCH] bring back the batch_requests function # # From: Nick Piggin # # The batch_requests function got lost during the merge of the dynamic request # allocation patch. # # We need it for the anticipatory scheduler - when the number of threads # exceeds the number of requests, the anticipated-upon task will undesirably # sleep in get_request_wait(). # # And apparently some block devices which use small requests need it so they # string a decent number together. # # Jens has acked this patch. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1400 # [PATCH] Create `kblockd' workqueue # # keventd is inappropriate for running block request queues because keventd # itself can get blocked on disk I/O. Via call_usermodehelper()'s vfork and, # presumably, GFP_KERNEL allocations. # # So create a new gang of kernel threads whose mandate is for running low-level # disk operations. It must ever block on disk IO, so any memory allocations # should be GFP_NOIO. # # We mainly use it for running unplug operations from interrupt context. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1401 # [PATCH] elv_may_queue() API function # # Introduces the elv_may_queue() predicate with which the IO scheduler may tell # the generic request layer that we may add another request to this queue. # # It is used by the CFQ elevator. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1402 # [PATCH] elevator completion API # # From: Nick Piggin # # Introduces an elevator_completed_req() callback with which the generic # queueing layer may tell an IO scheduler that a particualr request has # finished. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1403 # [PATCH] anticipatory I/O scheduler # # From: Nick Piggin # # This is the core anticipatory IO scheduler. There are nearly 100 changesets # in this and five months work. I really cannot describe it fully here. # # Major points: # # - It works by recognising that reads are dependent: we don't know where the # next read will occur, but it's probably close-by the previous one. So once # a read has completed we leave the disk idle, anticipating that a request # for a nearby read will come in. # # - There is read batching and write batching logic. # # - when we're servicing a batch of writes we will refuse to seek away # for a read for some tens of milliseconds. Then the write stream is # preempted. # # - when we're servicing a batch of reads (via anticipation) we'll do # that for some tens of milliseconds, then preempt. # # - There are request deadlines, for latency and fairness. # The oldest outstanding request is examined at regular intervals. If # this request is older than a specific deadline, it will be the next # one dispatched. This gives a good fairness heuristic while being simple # because processes tend to have localised IO. # # # Just about all of the rest of the complexity involves an array of fixups # which prevent most of teh obvious failure modes with anticipation: trying to # not leave the disk head pointlessly idle. Some of these algorithms are: # # - Process tracking. If the process whose read we are anticipating submits # a write, abandon anticipation. # # - Process exit tracking. If the process whose read we are anticipating # exits, abandon anticipation. # # - Process IO history. We accumulate statistical info on the process's # recent IO patterns to aid in making decisions about how long to anticipate # new reads. # # Currently thinktime and seek distance are tracked. Thinktime is the # time between when a process's last request has completed and when it # submits another one. Seek distance is simply the number of sectors # between each read request. If either statistic becomes too high, the # it isn't anticipated that the process will submit another read. # # The above all means that we need a per-process "io context". This is a fully # refcounted structure. In this patch it is AS-only. later we generalise it a # little so other IO schedulers could use the same framework. # # - Requests are grouped as synchronous and asynchronous whereas deadline # scheduler groups requests as reads and writes. This can provide better # sync write performance, and may give better responsiveness with journalling # filesystems (although we haven't done that yet). # # We currently detect synchronous writes by nastily setting PF_SYNCWRITE in # current->flags. The plan is to remove this later, and to propagate the # sync hint from writeback_contol.sync_mode into bio->bi_flags thence into # request->flags. Once that is done, direct-io needs to set the BIO sync # hint as well. # # - There is also quite a bit of complexity gone into bashing TCQ into # submission. Timing for a read batch is not started until the first read # request actually completes. A read batch also does not start until all # outstanding writes have completed. # # AS is the default IO scheduler. deadline may be chosen by booting with # "elevator=deadline". # # There are a few reasons for retaining deadline: # # - AS is often slower than deadline in random IO loads with large TCQ # windows. The usual real world task here is OLTP database loads. # # - deadline is presumably more stable. # # - deadline is much simpler. # # # # The tunable per-queue entries under /sys/block/*/iosched/ are all in # milliseconds: # # * read_expire # # Controls how long until a request becomes "expired". # # It also controls the interval between which expired requests are served, # so set to 50, a request might take anywhere < 100ms to be serviced _if_ it # is the next on the expired list. # # Obviously it can't make the disk go faster. Result is basically the # timeslice a reader gets in the presence of other IO. 100*((seek time / # read_expire) + 1) is very roughly the % streaming read efficiency your disk # should get in the presence of multiple readers. # # * read_batch_expire # # Controls how much time a batch of reads is given before pending writes # are served. Higher value is more efficient. Shouldn't really be below # read_expire. # # * write_ versions of the above # # * antic_expire # # Controls the maximum amount of time we can anticipate a good read before # giving up. Many other factors may cause anticipation to be stopped early, # or some processes will not be "anticipated" at all. Should be a bit higher # for big seek time devices though not a linear correspondance - most # processes have only a few ms thinktime. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1404 # [PATCH] Use kblockd for running request queues # # Using keventd for running request_fns is risky because keventd itself can # block on disk I/O. Use the new kblockd kernel threads for the generic # unplugging. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1405 # [PATCH] per queue nr_requests # # From: Nick Piggin # # This gets rid of the global queue_nr_requests and usage of BLKDEV_MAX_RQ # (the latter is now only used to set the queues' defaults). # # The queue depth becomes per-queue, controlled by a sysfs entry. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1406 # [PATCH] blk_congestion_wait threshold cleanup # # From: Nick Piggin # # Now that we are counting requests (not requests free), this patch changes # the congested & batch watermarks to be more logical. Also a minor fix to # the sysfs code. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1407 # [PATCH] allow the IO scheduler to pass an allocation hint to # # From: Nick Piggin # # # This patch implements a hint so that AS can tell the request allocator to # allocate a request even if there are none left (the accounting is quite # flexible and easily handles overallocations). # # elv_may_queue semantics have changed from "the elevator does _not_ want # another request allocated" to "the elevator _insists_ that another request is # allocated". I couldn't see any harm ;) # # Now in practice, AS will only allow _1_ request over the limit, because as # soon as the request is sent to AS, it stops anticipating. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1408 # [PATCH] handle OOM in get_request_wait() # # From: Nick Piggin # # If there are no requess in flight against the target device and # get_request() fails, nothing will wake us up. Fix. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1409 # [PATCH] block batching fairness # # From: Nick Piggin # # This patch fixes the request batching fairness/starvation issue. Its not # clear what is going on with 2.4, but it seems that its a problem around this # area. # # Anyway, previously: # # * request queue fills up # * process 1 calls get_request, sleeps # * a couple of requests are freed # * process 2 calls get_request, proceeds # * a couple of requests are freed # * process 2 calls get_request... # # Now as unlikely as it seems, it could be a problem. Its a fairness problem # that process 2 can skip ahead of process 1 anyway. # # With the patch: # # * request queue fills up # * any process calling get_request will sleep # * once the queue gets below the batch watermark, processes # start being worken, and may allocate. # # # This patch includes Chris Mason's fix to only clear queue_full when all tasks # have been woken. Previously I think starvation and unfairness could still # occur. # # With this change to the blk-fair-batches patch, Chris is showing some much # improved numbers for 2.4 - 170 ms max wait vs 2700ms without blk-fair-batches # for a dbench 90 run. He didn't indicate how much difference his patch alone # made, but it is an important fix I think. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1410 # [PATCH] generic io contexts # # From: Nick Piggin # # Generalise the AS-specific per-process IO context so that other IO schedulers # could use it. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1411 # [PATCH] block request batching # # From: Nick Piggin # # The following patch gets batching working how it should be. # # After a process is woken up, it is allowed to allocate up to 32 requests # for 20ms. It does not stop other processes submitting requests if it isn't # submitting though. This should allow less context switches, and allow # batches of requests from each process to be sent to the io scheduler # instead of 1 request from each process. # # tiobench sequential writes are more than tripled, random writes are nearly # doubled over mm1. In earlier tests I generally saw better CPU efficiency # but it doesn't show here. There is still debug to be taken out. Its also # only on UP. # # Avg Maximum Lat% Lat% CPU # Identifier Rate (CPU%) Latency Latency >2s >10s Eff # ------------------- ------ --------- ---------- ------- ------ ---- # -2.5.71-mm1 11.13 3.783% 46.10 24668.01 0.84 0.02 294 # +2.5.71-mm1 13.21 4.489% 37.37 5691.66 0.76 0.00 294 # # Random Reads # ------------------- ------ --------- ---------- ------- ------ ---- # -2.5.71-mm1 0.97 0.582% 519.86 6444.66 11.93 0.00 167 # +2.5.71-mm1 1.01 0.604% 484.59 6604.93 10.73 0.00 167 # # Sequential Writes # ------------------- ------ --------- ---------- ------- ------ ---- # -2.5.71-mm1 4.85 4.456% 77.80 99359.39 0.18 0.13 109 # +2.5.71-mm1 14.11 14.19% 10.07 22805.47 0.09 0.04 99 # # Random Writes # ------------------- ------ --------- ---------- ------- ------ ---- # -2.5.71-mm1 0.46 0.371% 14.48 6173.90 0.23 0.00 125 # +2.5.71-mm1 0.86 0.744% 24.08 8753.66 0.31 0.00 115 # # It decreases context switch rate on IBM's 8-way on ext2 tiobench 64 threads # from ~2500/s to ~140/s on their regression tests. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1412 # [PATCH] get_io_context fixes # # - pass gfp_flags to get_io_context(): not all callers are forced to use # GFP_ATOMIC(). # # - fix locking in get_io_context(): bump the refcount whilein the exclusive # region. # # - don't go oops in get_io_context() if the kmalloc failed. # # - in as_get_io_context(): fail the whole thing if we were unable to # allocate the AS-specific part. # # - as_remove_queued_request() cleanup # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1413 # [PATCH] block allocation comments # # From: Nick Piggin # # Add some comments to the request allocation code. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1414 # [PATCH] after exec_mmap(), exec cannot fail # # If de_thread() fails in flush_old_exec() then we try to fail the execve(). # # That is a bad move, because exec_mmap() has already switched the current # process over to the new mm. The new process is not yet sufficiently set up # to handle the error and the kernel doublefaults and dies. exec_mmap() is the # point of no return. # # Change flush_old_exec() to call de_thread() before running exec_mmap() so the # execing program sees the error. I added fault injection to both de_thread() # and exec_mmap() - everything now survives OK. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1415 # [PATCH] bootmem.c cleanups # # From: Davide Libenzi # # - Remove a couple of impossible debug checks (unsigneds cannot be # negative!) # # - If __alloc_bootmem_core() fails with a goal and unaligned node_boot_start # it'll loop fovever. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1416 # [PATCH] epoll: microoptimisations # # From: Davide Libenzi # # - Inline eventpoll_release() so that __fput() does not need to call in # epoll code if the file itself is not registered inside an epoll fd # # - Add inclusion due __u32 and __u64 usage # # - Fix debug printf that would otherwise panic if enabled with the new # epoll code # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1417 # [PATCH] fix current->user->__count leak # # From: Arvind Kandhare # # When switch_uid is called, the reference count of the new user is # incremented twice. I think the increment in the switch_uid is done because # of the reparent_to_init() function which does not increase the __count for # root user. # # But if switch_uid is called from any other function, the reference count is # already incremented by the caller by calling alloc_uid for the new user. # Hence the count is incremented twice. The user struct will not be deleted # even when there are no processes holding a reference count for it. This # does not cause any problem currently because nothing is dependent on timely # deletion of the user struct. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1418 # [PATCH] MTD build fix for old gcc's # # From: junkio@cox.net # # Sigh. Is there a gcc option to tell it to not accept this incompatible C99 # extension? # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1419 # [PATCH] fix rfcomm oops # # From: ilmari@ilmari.org (Dagfinn Ilmari Mannsaker) # # It turns out that net/bluetooth/rfcomm/sock.c (and # net/bluetooth/hci_sock.c) had been left out when net_proto_family gained an # owner field, here's a patch that fixes them both. # -------------------------------------------- # 03/07/05 akpm@osdl.org 1.1420 # [PATCH] i2o_scsi build fix # # i2o_scsi.c now needs pci.h. # -------------------------------------------- # 03/07/05 khc@pm.waw.pl 1.1421 # [PATCH] C99 initializers in hdlc_generic.c # -------------------------------------------- # diff -Nru a/Documentation/eisa.txt b/Documentation/eisa.txt --- a/Documentation/eisa.txt Sat Jul 5 12:40:32 2003 +++ b/Documentation/eisa.txt Sat Jul 5 12:40:32 2003 @@ -46,12 +46,14 @@ to this device, as well as some parameters for probing purposes. struct eisa_root_device { - struct list_head node; - struct device *dev; /* Pointer to bridge device */ - struct resource *res; - unsigned long bus_base_addr; - int slots; /* Max slot number */ - int bus_nr; /* Set by eisa_root_register */ + struct device *dev; /* Pointer to bridge device */ + struct resource *res; + unsigned long bus_base_addr; + int slots; /* Max slot number */ + int force_probe; /* Probe even when no slot 0 */ + u64 dma_mask; /* from bridge device */ + int bus_nr; /* Set by eisa_root_register */ + struct resource eisa_root_res; /* ditto */ }; node : used for eisa_root_register internal purpose @@ -59,6 +61,8 @@ res : root device I/O resource bus_base_addr : slot 0 address on this bus slots : max slot number to probe +force_probe : Probe even when slot 0 is empty (no EISA mainboard) +dma_mask : Default DMA mask. Usualy the bridge device dma_mask. bus_nr : unique bus id, set by eisa_root_register ** Driver : @@ -87,7 +91,7 @@ Documentation/driver-model/driver.txt. Only .name, .probe and .remove members are mandatory. -An example is the 3c509 driver : +An example is the 3c59x driver : static struct eisa_device_id vortex_eisa_ids[] = { { "TCM5920", EISA_3C592_OFFSET }, @@ -116,15 +120,20 @@ struct eisa_device { struct eisa_device_id id; int slot; - unsigned long base_addr; - struct resource res; + int state; + unsigned long base_addr; + struct resource res[EISA_MAX_RESOURCES]; + u64 dma_mask; struct device dev; /* generic device */ }; id : EISA id, as read from device. id.driver_data is set from the matching driver EISA id. slot : slot number which the device was detected on -res : I/O resource allocated to this device +state : set of flags indicating the state of the device. Current + flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED. +res : set of four 256 bytes I/O regions allocated to this device +dma_mask: DMA mask set from the parent device. dev : generic device (see Documentation/driver-model/device.txt) You can get the 'struct eisa_device' from 'struct device' using the @@ -140,6 +149,32 @@ Gets the pointer previously stored into the device's driver_data area. +int eisa_get_region_index (void *addr); + +Returns the region number (0 <= x < EISA_MAX_RESOURCES) of a given +address. + +** Kernel parameters : + +eisa_bus.enable_dev : + +A comma-separated list of slots to be enabled, even if the firmware +set the card as disabled. The driver must be able to properly +initialize the device in such conditions. + +eisa_bus.disable_dev : + +A comma-separated list of slots to be enabled, even if the firmware +set the card as enabled. The driver won't be called to handle this +device. + +virtual_root.force_probe : + +Force the probing code to probe EISA slots even when it cannot find an +EISA compliant mainboard (nothing appears on slot 0). Defaultd to 0 +(don't force), and set to 1 (force probing) when either +CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set. + ** Random notes : Converting an EISA driver to the new API mostly involves *deleting* @@ -156,10 +191,13 @@ expect to have explored the whole machine when they exit their probe routine. +For example, switching your favorite EISA SCSI card to the "hotplug" +model is "the right thing"(tm). + ** Thanks : I'd like to thank the following people for their help : - Xavier Benigni for lending me a wonderful Alpha Jensen, - James Bottomley, Jeff Garzik for getting this stuff into the kernel, - Andries Brouwer for contributing numerous EISA ids, -- Catrin Jones for coping with too many machines at home +- Catrin Jones for coping with far too many machines at home. diff -Nru a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking --- a/Documentation/filesystems/Locking Sat Jul 5 12:40:32 2003 +++ b/Documentation/filesystems/Locking Sat Jul 5 12:40:32 2003 @@ -318,7 +318,7 @@ ioctl: yes (see below) mmap: no open: maybe (see below) -flush: yes +flush: no release: no fsync: yes (see below) fasync: yes (see below) diff -Nru a/Documentation/pci.txt b/Documentation/pci.txt --- a/Documentation/pci.txt Sat Jul 5 12:40:31 2003 +++ b/Documentation/pci.txt Sat Jul 5 12:40:31 2003 @@ -7,14 +7,14 @@ Different PCI devices have different requirements and different bugs -- because of this, the PCI support layer in Linux kernel is not as trivial as one would wish. This short pamphlet tries to help all potential driver -authors to find their way through the deep forests of PCI handling. +authors find their way through the deep forests of PCI handling. 0. Structure of PCI drivers ~~~~~~~~~~~~~~~~~~~~~~~~~~~ There exist two kinds of PCI drivers: new-style ones (which leave most of probing for devices to the PCI layer and support online insertion and removal -of devices [thus supporting PCI, hot-pluggable PCI and CardBus in single +of devices [thus supporting PCI, hot-pluggable PCI and CardBus in a single driver]) and old-style ones which just do all the probing themselves. Unless you have a very good reason to do so, please don't use the old way of probing in any new code. After the driver finds the devices it wishes to operate @@ -174,7 +174,7 @@ the latency timer value if it's set to something bogus by the BIOS. If you want to use the PCI Memory-Write-Invalidate transaction, -call pci_set_mwi(). This enables bit PCI_COMMAND bit for Mem-Wr-Inval +call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval and also ensures that the cache line size register is set correctly. Make sure to check the return value of pci_set_mwi(), not all architectures may support Memory-Write-Invalidate. @@ -236,7 +236,7 @@ 7. Miscellaneous hints ~~~~~~~~~~~~~~~~~~~~~~ When displaying PCI slot names to the user (for example when a driver wants -to tell the user what card has it found), please use pci_dev->slot_name +to tell the user what card has it found), please use pci_name(pci_dev) for this purpose. Always refer to the PCI devices by a pointer to the pci_dev structure. @@ -247,6 +247,10 @@ If you're going to use PCI bus mastering DMA, take a look at Documentation/DMA-mapping.txt. + +Don't try to turn on Fast Back to Back writes in your driver. All devices +on the bus need to be capable of doing it, so this is something which needs +to be handled by platform and generic code, not individual drivers. 8. Obsolete functions diff -Nru a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c --- a/arch/alpha/mm/numa.c Sat Jul 5 12:40:31 2003 +++ b/arch/alpha/mm/numa.c Sat Jul 5 12:40:31 2003 @@ -338,7 +338,7 @@ lmem_map = node_mem_map(nid); pfn = NODE_DATA(nid)->node_start_pfn; - for (i = 0; i < node_size(nid); i++, pfn++) + for (i = 0; i < node_spanned_pages(nid); i++, pfn++) if (page_is_ram(pfn) && PageReserved(lmem_map+i)) reservedpages++; } @@ -372,7 +372,7 @@ printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for (nid = 0; nid < numnodes; nid++) { struct page * lmem_map = node_mem_map(nid); - i = node_size(nid); + i = node_spanned_pages(nid); while (i-- > 0) { total++; if (PageReserved(lmem_map+i)) diff -Nru a/arch/arm/mm/init.c b/arch/arm/mm/init.c --- a/arch/arm/mm/init.c Sat Jul 5 12:40:31 2003 +++ b/arch/arm/mm/init.c Sat Jul 5 12:40:31 2003 @@ -79,7 +79,7 @@ struct page *page, *end; page = NODE_MEM_MAP(node); - end = page + NODE_DATA(node)->node_size; + end = page + NODE_DATA(node)->node_spanned_pages; do { total++; @@ -576,7 +576,7 @@ for (node = 0; node < numnodes; node++) { pg_data_t *pgdat = NODE_DATA(node); - if (pgdat->node_size != 0) + if (pgdat->node_spanned_pages != 0) totalram_pages += free_all_bootmem_node(pgdat); } diff -Nru a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c --- a/arch/arm26/mm/init.c Sat Jul 5 12:40:32 2003 +++ b/arch/arm26/mm/init.c Sat Jul 5 12:40:32 2003 @@ -68,7 +68,7 @@ page = NODE_MEM_MAP(0); - end = page + NODE_DATA(0)->node_size; + end = page + NODE_DATA(0)->node_spanned_pages; do { total++; @@ -353,7 +353,7 @@ max_mapnr = virt_to_page(high_memory) - mem_map; /* this will put all unused low memory onto the freelists */ - if (pgdat->node_size != 0) + if (pgdat->node_spanned_pages != 0) totalram_pages += free_all_bootmem_node(pgdat); printk(KERN_INFO "Memory:"); diff -Nru a/arch/i386/Kconfig b/arch/i386/Kconfig --- a/arch/i386/Kconfig Sat Jul 5 12:40:31 2003 +++ b/arch/i386/Kconfig Sat Jul 5 12:40:31 2003 @@ -1339,6 +1339,14 @@ best used in conjunction with the NMI watchdog so that spinlock deadlocks are also debuggable. +config DEBUG_PAGEALLOC + bool "Page alloc debugging" + depends on DEBUG_KERNEL + help + Unmap pages from the kernel linear mapping after free_pages(). + This results in a large slowdown, but helps to find certain types + of memory corruptions. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM diff -Nru a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c --- a/arch/i386/kernel/cpu/common.c Sat Jul 5 12:40:31 2003 +++ b/arch/i386/kernel/cpu/common.c Sat Jul 5 12:40:31 2003 @@ -430,6 +430,14 @@ rise_init_cpu(); nexgen_init_cpu(); umc_init_cpu(); + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* pse is not compatible with on-the-fly unmapping, + * disable it even if the cpus claim to support it. + */ + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; +#endif } /* * cpu_init() initializes state that is per-CPU. Some data is already diff -Nru a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c --- a/arch/i386/kernel/io_apic.c Sat Jul 5 12:40:31 2003 +++ b/arch/i386/kernel/io_apic.c Sat Jul 5 12:40:31 2003 @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -2052,7 +2053,6 @@ */ static inline void check_timer(void) { - extern int timer_ack; int pin1, pin2; int vector; diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c --- a/arch/i386/kernel/time.c Sat Jul 5 12:40:32 2003 +++ b/arch/i386/kernel/time.c Sat Jul 5 12:40:32 2003 @@ -80,8 +80,7 @@ spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED; EXPORT_SYMBOL(i8253_lock); -extern struct timer_opts timer_none; -struct timer_opts* timer = &timer_none; +struct timer_opts *cur_timer = &timer_none; /* * This version of gettimeofday has microsecond resolution @@ -93,14 +92,14 @@ unsigned long usec, sec; do { + unsigned long lost; + seq = read_seqbegin(&xtime_lock); - usec = timer->get_offset(); - { - unsigned long lost = jiffies - wall_jiffies; - if (lost) - usec += lost * (1000000 / HZ); - } + usec = cur_timer->get_offset(); + lost = jiffies - wall_jiffies; + if (lost) + usec += lost * (1000000 / HZ); sec = xtime.tv_sec; usec += (xtime.tv_nsec / 1000); } while (read_seqretry(&xtime_lock, seq)); @@ -126,7 +125,7 @@ * wall time. Discover what correction gettimeofday() would have * made, and then undo it! */ - tv->tv_nsec -= timer->get_offset() * NSEC_PER_USEC; + tv->tv_nsec -= cur_timer->get_offset() * NSEC_PER_USEC; tv->tv_nsec -= (jiffies - wall_jiffies) * TICK_NSEC; while (tv->tv_nsec < 0) { @@ -180,7 +179,7 @@ */ unsigned long long monotonic_clock(void) { - return timer->monotonic_clock(); + return cur_timer->monotonic_clock(); } EXPORT_SYMBOL(monotonic_clock); @@ -189,7 +188,8 @@ * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static inline void do_timer_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { #ifdef CONFIG_X86_IO_APIC if (timer_ack) { @@ -259,7 +259,7 @@ */ write_seqlock(&xtime_lock); - timer->mark_offset(); + cur_timer->mark_offset(); do_timer_interrupt(irq, NULL, regs); @@ -301,16 +301,13 @@ device_initcall(time_init_device); - void __init time_init(void) { - xtime.tv_sec = get_cmos_time(); wall_to_monotonic.tv_sec = -xtime.tv_sec; xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); wall_to_monotonic.tv_nsec = -xtime.tv_nsec; - - timer = select_timer(); + cur_timer = select_timer(); time_init_hook(); } diff -Nru a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c --- a/arch/i386/kernel/timers/timer.c Sat Jul 5 12:40:32 2003 +++ b/arch/i386/kernel/timers/timer.c Sat Jul 5 12:40:32 2003 @@ -3,12 +3,6 @@ #include #include -/* list of externed timers */ -extern struct timer_opts timer_pit; -extern struct timer_opts timer_tsc; -#ifdef CONFIG_X86_CYCLONE_TIMER -extern struct timer_opts timer_cyclone; -#endif /* list of timers, ordered by preference, NULL terminated */ static struct timer_opts* timers[] = { #ifdef CONFIG_X86_CYCLONE_TIMER @@ -28,6 +22,15 @@ return 1; } __setup("clock=", clock_setup); + + +/* The chosen timesource has been found to be bad. + * Fall back to a known good timesource (the PIT) + */ +void clock_fallback(void) +{ + cur_timer = &timer_pit; +} /* iterates through the list of timers, returning the first * one that initializes successfully. diff -Nru a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c --- a/arch/i386/kernel/timers/timer_cyclone.c Sat Jul 5 12:40:32 2003 +++ b/arch/i386/kernel/timers/timer_cyclone.c Sat Jul 5 12:40:32 2003 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -18,7 +19,6 @@ #include extern spinlock_t i8253_lock; -extern unsigned long jiffies; extern unsigned long calibrate_tsc(void); /* Number of usecs that the last interrupt was delayed */ @@ -88,7 +88,7 @@ * between cyclone and pit reads (as noted when * usec delta is > 90% # of usecs/tick) */ - if (abs(delay - delay_at_last_interrupt) > (900000/HZ)) + if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) jiffies++; } diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c --- a/arch/i386/kernel/timers/timer_tsc.c Sat Jul 5 12:40:32 2003 +++ b/arch/i386/kernel/timers/timer_tsc.c Sat Jul 5 12:40:32 2003 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -21,7 +22,6 @@ int tsc_disable __initdata = 0; extern spinlock_t i8253_lock; -extern unsigned long jiffies; static int use_tsc; /* Number of usecs that the last interrupt was delayed */ @@ -124,6 +124,7 @@ int countmp; static int count1 = 0; unsigned long long this_offset, last_offset; + static int lost_count = 0; write_lock(&monotonic_lock); last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; @@ -178,9 +179,19 @@ delta += delay_at_last_interrupt; lost = delta/(1000000/HZ); delay = delta%(1000000/HZ); - if (lost >= 2) + if (lost >= 2) { jiffies += lost-1; + /* sanity check to ensure we're not always loosing ticks */ + if (lost_count++ > 100) { + printk(KERN_WARNING "Loosing too many ticks!\n"); + printk(KERN_WARNING "TSC cannot be used as a timesource." + " (Are you running with SpeedStep?)\n"); + printk(KERN_WARNING "Falling back to a sane timesource.\n"); + clock_fallback(); + } + } else + lost_count = 0; /* update the monotonic base value */ this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; monotonic_base += cycles_2_ns(this_offset - last_offset); @@ -194,7 +205,7 @@ * between tsc and pit reads (as noted when * usec delta is > 90% # of usecs/tick) */ - if (abs(delay - delay_at_last_interrupt) > (900000/HZ)) + if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) jiffies++; } diff -Nru a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c --- a/arch/i386/lib/delay.c Sat Jul 5 12:40:32 2003 +++ b/arch/i386/lib/delay.c Sat Jul 5 12:40:32 2003 @@ -25,7 +25,7 @@ void __delay(unsigned long loops) { - timer->delay(loops); + cur_timer->delay(loops); } inline void __const_udelay(unsigned long xloops) diff -Nru a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c --- a/arch/i386/mm/pageattr.c Sat Jul 5 12:40:31 2003 +++ b/arch/i386/mm/pageattr.c Sat Jul 5 12:40:31 2003 @@ -13,6 +13,10 @@ #include #include +static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED; +static struct list_head df_list = LIST_HEAD_INIT(df_list); + + static inline pte_t *lookup_address(unsigned long address) { pgd_t *pgd = pgd_offset_k(address); @@ -31,10 +35,15 @@ { int i; unsigned long addr; - struct page *base = alloc_pages(GFP_KERNEL, 0); + struct page *base; pte_t *pbase; + + spin_unlock_irq(&cpa_lock); + base = alloc_pages(GFP_KERNEL, 0); + spin_lock_irq(&cpa_lock); if (!base) return NULL; + address = __pa(address); addr = address & LARGE_PAGE_MASK; pbase = (pte_t *)page_address(base); @@ -87,7 +96,7 @@ } static int -__change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage) +__change_page_attr(struct page *page, pgprot_t prot) { pte_t *kpte; unsigned long address; @@ -123,7 +132,7 @@ } if (cpu_has_pse && (atomic_read(&kpte_page->count) == 1)) { - *oldpage = kpte_page; + list_add(&kpte_page->list, &df_list); revert_page(kpte_page, address); } return 0; @@ -134,12 +143,6 @@ on_each_cpu(flush_kernel_map, NULL, 1, 1); } -struct deferred_page { - struct deferred_page *next; - struct page *fpage; -}; -static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ - /* * Change the page attributes of an page in the linear mapping. * @@ -156,47 +159,54 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot) { int err = 0; - struct page *fpage; int i; + unsigned long flags; - down_write(&init_mm.mmap_sem); + spin_lock_irqsave(&cpa_lock, flags); for (i = 0; i < numpages; i++, page++) { - fpage = NULL; - err = __change_page_attr(page, prot, &fpage); + err = __change_page_attr(page, prot); if (err) break; - if (fpage) { - struct deferred_page *df; - df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); - if (!df) { - flush_map(); - __free_page(fpage); - } else { - df->next = df_list; - df->fpage = fpage; - df_list = df; - } - } } - up_write(&init_mm.mmap_sem); + spin_unlock_irqrestore(&cpa_lock, flags); return err; } void global_flush_tlb(void) { - struct deferred_page *df, *next_df; + LIST_HEAD(l); + struct list_head* n; - down_read(&init_mm.mmap_sem); - df = xchg(&df_list, NULL); - up_read(&init_mm.mmap_sem); + BUG_ON(irqs_disabled()); + + spin_lock_irq(&cpa_lock); + list_splice_init(&df_list, &l); + spin_unlock_irq(&cpa_lock); flush_map(); - for (; df; df = next_df) { - next_df = df->next; - if (df->fpage) - __free_page(df->fpage); - kfree(df); - } + n = l.next; + while (n != &l) { + struct page *pg = list_entry(n, struct page, list); + n = n->next; + __free_page(pg); + } } + +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (PageHighMem(page)) + return; + /* the return value is ignored - the calls cannot fail, + * large pages are disabled at boot time. + */ + change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); + /* we should perform an IPI and flush all tlbs, + * but that can deadlock->flush only current cpu. + */ + __flush_tlb_all(); +} +EXPORT_SYMBOL(kernel_map_pages); +#endif EXPORT_SYMBOL(change_page_attr); EXPORT_SYMBOL(global_flush_tlb); diff -Nru a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c --- a/arch/i386/mm/pgtable.c Sat Jul 5 12:40:34 2003 +++ b/arch/i386/mm/pgtable.c Sat Jul 5 12:40:34 2003 @@ -34,7 +34,7 @@ show_free_areas(); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; ++i) { + for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat->node_mem_map + i; total++; if (PageHighMem(page)) diff -Nru a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c --- a/arch/i386/pci/direct.c Sat Jul 5 12:40:31 2003 +++ b/arch/i386/pci/direct.c Sat Jul 5 12:40:31 2003 @@ -177,7 +177,7 @@ * This should be close to trivial, but it isn't, because there are buggy * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. */ -static int __devinit pci_sanity_check(struct pci_raw_ops *o) +static int __init pci_sanity_check(struct pci_raw_ops *o) { u32 x = 0; int devfn; diff -Nru a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c --- a/arch/i386/pci/irq.c Sat Jul 5 12:40:34 2003 +++ b/arch/i386/pci/irq.c Sat Jul 5 12:40:34 2003 @@ -102,13 +102,12 @@ #endif busmap[e->bus] = 1; } - for(i=1; i<256; i++) - /* - * It might be a secondary bus, but in this case its parent is already - * known (ascending bus order) and therefore pci_scan_bus returns immediately. - */ - if (busmap[i] && pci_scan_bus(i, &pci_root_ops, NULL)) + for(i = 1; i < 256; i++) { + if (!busmap[i] || pci_find_bus(0, i)) + continue; + if (pci_scan_bus(i, &pci_root_ops, NULL)) printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i); + } pcibios_last_bus = -1; } @@ -196,15 +195,16 @@ /* * The VIA pirq rules are nibble-based, like ALI, * but without the ugly irq number munging. + * However, PIRQD is in the upper instead of lower 4 bits. */ static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { - return read_config_nybble(router, 0x55, pirq); + return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq); } static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - write_config_nybble(router, 0x55, pirq, irq); + write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq); return 1; } diff -Nru a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c --- a/arch/i386/pci/legacy.c Sat Jul 5 12:40:32 2003 +++ b/arch/i386/pci/legacy.c Sat Jul 5 12:40:32 2003 @@ -11,40 +11,26 @@ */ static void __devinit pcibios_fixup_peer_bridges(void) { - int n; - struct pci_bus *bus; - struct pci_dev *dev; - u16 l; + int n, devfn; if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) return; DBG("PCI: Peer bridge fixup\n"); - bus = kmalloc(sizeof(*bus), GFP_ATOMIC); - dev = kmalloc(sizeof(*dev), GFP_ATOMIC); - if (!bus || !dev) { - printk(KERN_ERR "Out of memory in %s\n", __FUNCTION__); - goto exit; - } - for (n=0; n <= pcibios_last_bus; n++) { - if (pci_bus_exists(&pci_root_buses, n)) + u32 l; + if (pci_find_bus(0, n)) continue; - bus->number = n; - bus->ops = &pci_root_ops; - dev->bus = bus; - for (dev->devfn=0; dev->devfn<256; dev->devfn += 8) - if (!pci_read_config_word(dev, PCI_VENDOR_ID, &l) && + for (devfn = 0; devfn < 256; devfn += 8) { + if (!raw_pci_ops->read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && l != 0x0000 && l != 0xffff) { DBG("Found device at %02x:%02x [%04x]\n", n, dev->devfn, l); printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); pci_scan_bus(n, &pci_root_ops, NULL); break; } + } } -exit: - kfree(dev); - kfree(bus); } static int __init pci_legacy_init(void) diff -Nru a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c --- a/arch/ia64/ia32/binfmt_elf32.c Sat Jul 5 12:40:32 2003 +++ b/arch/ia64/ia32/binfmt_elf32.c Sat Jul 5 12:40:32 2003 @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -177,7 +178,7 @@ if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff -Nru a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c --- a/arch/ia64/kernel/sys_ia64.c Sat Jul 5 12:40:32 2003 +++ b/arch/ia64/kernel/sys_ia64.c Sat Jul 5 12:40:32 2003 @@ -100,7 +100,6 @@ asmlinkage unsigned long ia64_brk (unsigned long brk) { - extern int vm_enough_memory (long pages); unsigned long rlim, retval, newbrk, oldbrk; struct mm_struct *mm = current->mm; diff -Nru a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c --- a/arch/ia64/mm/init.c Sat Jul 5 12:40:31 2003 +++ b/arch/ia64/mm/init.c Sat Jul 5 12:40:31 2003 @@ -232,7 +232,7 @@ printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { printk("Node ID: %d\n", pgdat->node_id); - for(i = 0; i < pgdat->node_size; i++) { + for(i = 0; i < pgdat->node_spanned_pages; i++) { if (PageReserved(pgdat->node_mem_map+i)) reserved++; else if (PageSwapCache(pgdat->node_mem_map+i)) @@ -240,7 +240,7 @@ else if (page_count(pgdat->node_mem_map + i)) shared += page_count(pgdat->node_mem_map + i) - 1; } - printk("\t%d pages of RAM\n", pgdat->node_size); + printk("\t%d pages of RAM\n", pgdat->node_spanned_pages); printk("\t%d reserved pages\n", reserved); printk("\t%d pages shared\n", shared); printk("\t%d pages swap cached\n", cached); diff -Nru a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c --- a/arch/mips/kernel/sysirix.c Sat Jul 5 12:40:32 2003 +++ b/arch/mips/kernel/sysirix.c Sat Jul 5 12:40:32 2003 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -527,8 +528,6 @@ return get_seconds(); } -int vm_enough_memory(long pages); - /* * IRIX is completely broken... it returns 0 on success, otherwise * ENOMEM. @@ -585,7 +584,7 @@ /* * Check if we have enough memory.. */ - if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) { + if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) { ret = -ENOMEM; goto out; } diff -Nru a/arch/ppc/Kconfig b/arch/ppc/Kconfig --- a/arch/ppc/Kconfig Sat Jul 5 12:40:33 2003 +++ b/arch/ppc/Kconfig Sat Jul 5 12:40:33 2003 @@ -450,9 +450,6 @@ config SPRUCE bool "IBM-Spruce" -config MENF1 - bool "MEN-F1" - config LOPEC bool "Motorola-LoPEC" @@ -473,6 +470,10 @@ config SANDPOINT bool "Motorola-Sandpoint" + help + Select SANDPOINT if configuring for a Motorola Sandpoint X3 + or X3(b). + X3 (any flavor). config ADIR bool "SBS-Adirondack" @@ -490,9 +491,6 @@ series Single Board Computer. More information is available at: . -config ZX4500 - bool "Zynx-ZX4500" - endchoice config PPC_CHRP @@ -500,6 +498,11 @@ depends on PPC_MULTIPLATFORM default y +config PPC_GEN550 + bool + depends on SANDPOINT + default y + config PPC_PMAC bool depends on PPC_MULTIPLATFORM @@ -515,10 +518,6 @@ depends on PPC_PMAC || PPC_CHRP default y -config SANDPOINT_X3 - bool "Sandpoint X3" - depends on SANDPOINT - config FORCE bool depends on 6xx && !8260 && (PCORE || POWERPMC250) @@ -526,7 +525,7 @@ config EPIC_SERIAL_MODE bool - depends on 6xx && !8260 && (LOPEC || SANDPOINT_X3) + depends on 6xx && !8260 && (LOPEC || SANDPOINT) default y config WILLOW @@ -536,7 +535,7 @@ config MPC10X_STORE_GATHERING bool "Enable MPC10x store gathering" - depends on FORCE || MENF1 || SANDPOINT || ZX4500 + depends on FORCE || SANDPOINT config GT64260 bool @@ -1528,7 +1527,7 @@ config SERIAL_TEXT_DEBUG bool "Support for early boot texts over serial port" - depends on 4xx || GT64260 || LOPEC || MCPN765 || PPLUS || PRPMC800 || SANDPOINT || ZX4500 + depends on 4xx || GT64260 || LOPEC || MCPN765 || PPLUS || PRPMC800 || SANDPOINT config OCP bool diff -Nru a/arch/ppc/boot/common/Makefile b/arch/ppc/boot/common/Makefile --- a/arch/ppc/boot/common/Makefile Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/boot/common/Makefile Sat Jul 5 12:40:32 2003 @@ -8,7 +8,8 @@ # Tom Rini January 2001 # -lib-y := string.o util.o misc-common.o +lib-y := string.o util.o misc-common.o \ + serial_stub.o lib-$(CONFIG_PPC_PREP) += mpc10x_memory.o lib-$(CONFIG_LOPEC) += mpc10x_memory.o lib-$(CONFIG_PAL4) += cpc700_memory.o diff -Nru a/arch/ppc/boot/common/ns16550.c b/arch/ppc/boot/common/ns16550.c --- a/arch/ppc/boot/common/ns16550.c Sat Jul 5 12:40:33 2003 +++ b/arch/ppc/boot/common/ns16550.c Sat Jul 5 12:40:33 2003 @@ -95,8 +95,3 @@ { return ((inb(com_port + (UART_LSR << shift)) & UART_LSR_DR) != 0); } - -void -serial_close(unsigned long com_port) -{ -} diff -Nru a/arch/ppc/boot/common/serial_stub.c b/arch/ppc/boot/common/serial_stub.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/arch/ppc/boot/common/serial_stub.c Sat Jul 5 12:40:34 2003 @@ -0,0 +1,28 @@ +/* + * arch/ppc/boot/common/serial_stub.c + * + * This is a few stub routines to make the boot code cleaner looking when + * there is no serial port support doesn't need to be closed, for example. + * + * Author: Tom Rini + * + * 2003 (c) MontaVista, Software, Inc. This file is licensed under the terms + * of the GNU General Public License version 2. This program is licensed "as + * is" without any warranty of any kind, whether express or implied. + */ + +void __attribute__ ((weak)) +serial_fixups(void) +{ +} + +unsigned long __attribute__ ((weak)) +serial_init(int chan, void *ignored) +{ + return 0; +} + +void __attribute__ ((weak)) +serial_close(unsigned long com_port) +{ +} diff -Nru a/arch/ppc/boot/simple/Makefile b/arch/ppc/boot/simple/Makefile --- a/arch/ppc/boot/simple/Makefile Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/boot/simple/Makefile Sat Jul 5 12:40:31 2003 @@ -47,28 +47,20 @@ ifeq ($(CONFIG_EBONY),y) ZIMAGE := zImage-TREE ZIMAGEINITRD := zImage.initrd-TREE -EXTRA := direct.o END := ebony ENTRYPOINT := 0x01000000 TFTPIMAGE := /tftpboot/zImage.$(END) endif ifeq ($(CONFIG_EV64260),y) -EXTRA := direct.o misc-ev64260.o +EXTRA := misc-ev64260.o TFTPIMAGE := /tftpboot/zImage.ev64260 endif ifeq ($(CONFIG_GEMINI),y) ZIMAGE := zImage-STRIPELF ZIMAGEINITRD := zImage.initrd-STRIPELF -EXTRA := direct.o END := gemini TFTPIMAGE := /tftpboot/zImage.$(END) endif -ifeq ($(CONFIG_MENF1),y) -ZIMAGE := zImage-MENF1 -ZIMAGEINITRD := zImage.initrd-MENF1 -EXTRA := chrpmap.o -TFTPIMAGE := /tftpboot/zImage.menf1 -endif ifeq ($(CONFIG_K2),y) EXTRA := legacy.o TFTPIMAGE := /tftpboot/zImage.k2 @@ -78,7 +70,6 @@ ifeq ($(CONFIG_MCPN765)$(CONFIG_MVME5100)$(CONFIG_PRPMC750)$(CONFIG_PRPMC800)$(CONFIG_LOPEC)$(CONFIG_PPLUS),y) ZIMAGE := zImage-PPLUS ZIMAGEINITRD := zImage.initrd-PPLUS -EXTRA := direct.o TFTPIMAGE := /tftpboot/zImage.pplus ZNETBOOT := zImage.pplus ZNETBOOTRD := zImage.initrd.pplus @@ -86,9 +77,6 @@ ifeq ($(CONFIG_PPLUS),y) EXTRA := legacy.o endif -ifeq ($(CONFIG_PAL4),y) -EXTRA := direct.o -endif ifeq ($(CONFIG_PCORE)$(CONFIG_POWERPMC250),y) ZIMAGE := zImage-STRIPELF ZIMAGEINITRD := zImage.initrd-STRIPELF @@ -96,30 +84,17 @@ END := pcore TFTPIMAGE := /tftpboot/zImage.$(END) endif -# The PowerPMC 250 needs the dummy serial_fixups() -ifeq ($(CONFIG_POWERPMC250),y) -EXTRA := direct.o -endif ifeq ($(CONFIG_SANDPOINT),y) -EXTRA := direct.o TFTPIMAGE := /tftpboot/zImage.sandpoint endif ifeq ($(CONFIG_SPRUCE),y) ZIMAGE := zImage-TREE ZIMAGEINITRD := zImage.initrd-TREE -EXTRA := direct.o END := spruce ENTRYPOINT := 0x00800000 MISC := misc-spruce.o TFTPIMAGE := /tftpboot/zImage.$(END) endif -ifeq ($(CONFIG_ZX4500),y) -ZIMAGE := zImage-STRIPELF -ZIMAGEINITRD := zImage.initrd-STRIPELF -EXTRA := direct.o -END := zx4500 -TFTPIMAGE := /tftpboot/zImage.$(END) -endif ifeq ($(CONFIG_SMP),y) TFTPIMAGE += .smp endif @@ -220,12 +195,6 @@ $(images)/zImage.initrd-TREE: $(obj)/zvmlinux.initrd $(MKTREE) $(MKTREE) $(obj)/zvmlinux.initrd $(images)/zImage.initrd.$(END) \ $(ENTRYPOINT) - -$(images)/zImage-MENF1: $(obj)/zvmlinux $(MKPREP) - $(MKPREP) -pbp $(obj)/zvmlinux $(images)/zImage.menf1 - -$(images)/zImage.initrd-MENF1: $(obj)/zvmlinux.initrd $(MKPREP) - $(MKPREP) -pbp $(obj)/zvmlinux.initrd $(images)/zImage.initrd.menf1 $(images)/zImage-PPLUS: $(obj)/zvmlinux $(MKPREP) $(MKBUGBOOT) $(MKPREP) -pbp $(obj)/zvmlinux $(images)/zImage.pplus diff -Nru a/arch/ppc/boot/simple/direct.S b/arch/ppc/boot/simple/direct.S --- a/arch/ppc/boot/simple/direct.S Sat Jul 5 12:40:32 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,15 +0,0 @@ -/* - * arch/ppc/boot/simple/direct.S - * - * Author: Tom Rini - * - * This is an empty function for machines which use SERIAL_IO_MEM - * and don't need ISA_io set to anything but 0, or perform any other - * serial fixups. - */ - - .text - - .globl serial_fixups -serial_fixups: - blr diff -Nru a/arch/ppc/boot/simple/m8260_tty.c b/arch/ppc/boot/simple/m8260_tty.c --- a/arch/ppc/boot/simple/m8260_tty.c Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/boot/simple/m8260_tty.c Sat Jul 5 12:40:31 2003 @@ -311,8 +311,3 @@ return(!(rbdf->cbd_sc & BD_SC_EMPTY)); } - -void -serial_close(unsigned long com_port) -{ -} diff -Nru a/arch/ppc/boot/simple/m8xx_tty.c b/arch/ppc/boot/simple/m8xx_tty.c --- a/arch/ppc/boot/simple/m8xx_tty.c Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/boot/simple/m8xx_tty.c Sat Jul 5 12:40:31 2003 @@ -288,8 +288,3 @@ return(!(rbdf->cbd_sc & BD_SC_EMPTY)); } - -void -serial_close(unsigned long com_port) -{ -} diff -Nru a/arch/ppc/configs/adir_defconfig b/arch/ppc/configs/adir_defconfig --- a/arch/ppc/configs/adir_defconfig Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/configs/adir_defconfig Sat Jul 5 12:40:32 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set # CONFIG_ALTIVEC is not set diff -Nru a/arch/ppc/configs/apus_defconfig b/arch/ppc/configs/apus_defconfig --- a/arch/ppc/configs/apus_defconfig Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/configs/apus_defconfig Sat Jul 5 12:40:32 2003 @@ -56,7 +56,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -68,7 +67,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set # CONFIG_ALTIVEC is not set diff -Nru a/arch/ppc/configs/common_defconfig b/arch/ppc/configs/common_defconfig --- a/arch/ppc/configs/common_defconfig Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/configs/common_defconfig Sat Jul 5 12:40:31 2003 @@ -56,7 +56,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -68,7 +67,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_PPC_CHRP=y CONFIG_PPC_PMAC=y CONFIG_PPC_PREP=y diff -Nru a/arch/ppc/configs/ev64260_defconfig b/arch/ppc/configs/ev64260_defconfig --- a/arch/ppc/configs/ev64260_defconfig Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/configs/ev64260_defconfig Sat Jul 5 12:40:32 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set CONFIG_EV64260=y # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_GT64260=y CONFIG_SERIAL_CONSOLE_BAUD=115200 # CONFIG_SMP is not set diff -Nru a/arch/ppc/configs/gemini_defconfig b/arch/ppc/configs/gemini_defconfig --- a/arch/ppc/configs/gemini_defconfig Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/configs/gemini_defconfig Sat Jul 5 12:40:31 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set CONFIG_GEMINI=y -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set CONFIG_ALTIVEC=y diff -Nru a/arch/ppc/configs/ibmchrp_defconfig b/arch/ppc/configs/ibmchrp_defconfig --- a/arch/ppc/configs/ibmchrp_defconfig Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/configs/ibmchrp_defconfig Sat Jul 5 12:40:31 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_PPC_CHRP=y CONFIG_PPC_PMAC=y CONFIG_PPC_PREP=y diff -Nru a/arch/ppc/configs/k2_defconfig b/arch/ppc/configs/k2_defconfig --- a/arch/ppc/configs/k2_defconfig Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/configs/k2_defconfig Sat Jul 5 12:40:31 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ CONFIG_K2=y # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_CPC710_DATA_GATHERING is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set diff -Nru a/arch/ppc/configs/lopec_defconfig b/arch/ppc/configs/lopec_defconfig --- a/arch/ppc/configs/lopec_defconfig Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/configs/lopec_defconfig Sat Jul 5 12:40:32 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set CONFIG_LOPEC=y # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_EPIC_SERIAL_MODE=y # CONFIG_SMP is not set # CONFIG_PREEMPT is not set diff -Nru a/arch/ppc/configs/mcpn765_defconfig b/arch/ppc/configs/mcpn765_defconfig --- a/arch/ppc/configs/mcpn765_defconfig Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/configs/mcpn765_defconfig Sat Jul 5 12:40:32 2003 @@ -50,7 +50,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set CONFIG_MCPN765=y # CONFIG_MVME5100 is not set @@ -62,7 +61,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set CONFIG_ALTIVEC=y diff -Nru a/arch/ppc/configs/menf1_defconfig b/arch/ppc/configs/menf1_defconfig --- a/arch/ppc/configs/menf1_defconfig Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/configs/menf1_defconfig Sat Jul 5 12:40:31 2003 @@ -67,7 +67,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_MPC10X_STORE_GATHERING=y # CONFIG_SMP is not set # CONFIG_PREEMPT is not set diff -Nru a/arch/ppc/configs/mvme5100_defconfig b/arch/ppc/configs/mvme5100_defconfig --- a/arch/ppc/configs/mvme5100_defconfig Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/configs/mvme5100_defconfig Sat Jul 5 12:40:31 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set CONFIG_MVME5100=y @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_MVME5100_IPMC761_PRESENT is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set diff -Nru a/arch/ppc/configs/pcore_defconfig b/arch/ppc/configs/pcore_defconfig --- a/arch/ppc/configs/pcore_defconfig Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/configs/pcore_defconfig Sat Jul 5 12:40:32 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_FORCE=y # CONFIG_MPC10X_STORE_GATHERING is not set # CONFIG_SMP is not set diff -Nru a/arch/ppc/configs/pmac_defconfig b/arch/ppc/configs/pmac_defconfig --- a/arch/ppc/configs/pmac_defconfig Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/configs/pmac_defconfig Sat Jul 5 12:40:32 2003 @@ -57,7 +57,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -69,7 +68,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_PPC_CHRP=y CONFIG_PPC_PMAC=y CONFIG_PPC_PREP=y diff -Nru a/arch/ppc/configs/power3_defconfig b/arch/ppc/configs/power3_defconfig --- a/arch/ppc/configs/power3_defconfig Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/configs/power3_defconfig Sat Jul 5 12:40:31 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_PPC_CHRP=y CONFIG_PPC_PMAC=y CONFIG_PPC_PREP=y diff -Nru a/arch/ppc/configs/pplus_defconfig b/arch/ppc/configs/pplus_defconfig --- a/arch/ppc/configs/pplus_defconfig Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/configs/pplus_defconfig Sat Jul 5 12:40:32 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set # CONFIG_ALTIVEC is not set diff -Nru a/arch/ppc/configs/prpmc750_defconfig b/arch/ppc/configs/prpmc750_defconfig --- a/arch/ppc/configs/prpmc750_defconfig Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/configs/prpmc750_defconfig Sat Jul 5 12:40:31 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set # CONFIG_ALTIVEC is not set diff -Nru a/arch/ppc/configs/prpmc800_defconfig b/arch/ppc/configs/prpmc800_defconfig --- a/arch/ppc/configs/prpmc800_defconfig Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/configs/prpmc800_defconfig Sat Jul 5 12:40:32 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set # CONFIG_ALTIVEC is not set diff -Nru a/arch/ppc/configs/sandpoint_defconfig b/arch/ppc/configs/sandpoint_defconfig --- a/arch/ppc/configs/sandpoint_defconfig Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/configs/sandpoint_defconfig Sat Jul 5 12:40:31 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,8 +66,7 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set -CONFIG_SANDPOINT_X3=y +CONFIG_PPC_GEN550=y CONFIG_EPIC_SERIAL_MODE=y # CONFIG_MPC10X_STORE_GATHERING is not set # CONFIG_SMP is not set @@ -84,8 +82,8 @@ CONFIG_PCI=y CONFIG_PCI_DOMAINS=y CONFIG_KCORE_ELF=y -CONFIG_BINFMT_ELF=y CONFIG_KERNEL_ELF=y +CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m # CONFIG_PCI_LEGACY_PROC is not set # CONFIG_PCI_NAMES is not set @@ -118,6 +116,11 @@ CONFIG_BOOT_LOAD=0x00800000 # +# Generic Driver Options +# +# CONFIG_FW_LOADER is not set + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -147,7 +150,7 @@ # CONFIG_MD is not set # -# ATA/IDE/MFM/RLL support +# ATA/ATAPI/MFM/RLL support # CONFIG_IDE=y @@ -166,6 +169,7 @@ CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_IDEFLOPPY is not set # CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_TASKFILE_IO=y # # IDE chipset support/bugfixes @@ -173,7 +177,7 @@ # CONFIG_BLK_DEV_IDEPCI is not set # -# SCSI support +# SCSI device support # # CONFIG_SCSI is not set @@ -292,6 +296,7 @@ # CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_B44 is not set +# CONFIG_TC35815 is not set # CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y @@ -413,11 +418,6 @@ # CONFIG_LOGO is not set # -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# # Input device support # CONFIG_INPUT=y @@ -443,6 +443,7 @@ CONFIG_SERIO_I8042=y # CONFIG_SERIO_SERPORT is not set # CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set # # Input Device Drivers @@ -731,6 +732,7 @@ # # USB Network adaptors # +# CONFIG_USB_AX8817X is not set # CONFIG_USB_CATC is not set # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set diff -Nru a/arch/ppc/configs/spruce_defconfig b/arch/ppc/configs/spruce_defconfig --- a/arch/ppc/configs/spruce_defconfig Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/configs/spruce_defconfig Sat Jul 5 12:40:31 2003 @@ -55,7 +55,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set CONFIG_SPRUCE=y -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -67,7 +66,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set # CONFIG_SPRUCE_BAUD_33M is not set # CONFIG_SMP is not set # CONFIG_PREEMPT is not set diff -Nru a/arch/ppc/configs/zx4500_defconfig b/arch/ppc/configs/zx4500_defconfig --- a/arch/ppc/configs/zx4500_defconfig Sat Jul 5 12:40:31 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,560 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_MMU=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_HAVE_DEC_LOCK=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# General setup -# -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EMBEDDED=y -CONFIG_FUTEX=y -# CONFIG_EPOLL is not set - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_MODULE_FORCE_UNLOAD is not set -CONFIG_OBSOLETE_MODPARM=y -# CONFIG_MODVERSIONS is not set -CONFIG_KMOD=y - -# -# Platform support -# -CONFIG_PPC=y -CONFIG_PPC32=y -CONFIG_6xx=y -# CONFIG_40x is not set -# CONFIG_POWER3 is not set -# CONFIG_8xx is not set - -# -# IBM 4xx options -# -# CONFIG_8260 is not set -CONFIG_GENERIC_ISA_DMA=y -CONFIG_PPC_STD_MMU=y -# CONFIG_PPC_MULTIPLATFORM is not set -# CONFIG_APUS is not set -# CONFIG_WILLOW_2 is not set -# CONFIG_PCORE is not set -# CONFIG_POWERPMC250 is not set -# CONFIG_EV64260 is not set -# CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set -# CONFIG_LOPEC is not set -# CONFIG_MCPN765 is not set -# CONFIG_MVME5100 is not set -# CONFIG_PPLUS is not set -# CONFIG_PRPMC750 is not set -# CONFIG_PRPMC800 is not set -# CONFIG_SANDPOINT is not set -# CONFIG_ADIR is not set -# CONFIG_K2 is not set -# CONFIG_PAL4 is not set -# CONFIG_GEMINI is not set -CONFIG_ZX4500=y -# CONFIG_MPC10X_STORE_GATHERING is not set -# CONFIG_SMP is not set -# CONFIG_PREEMPT is not set -# CONFIG_ALTIVEC is not set -# CONFIG_TAU is not set -# CONFIG_CPU_FREQ is not set - -# -# General setup -# -# CONFIG_HIGHMEM is not set -CONFIG_PCI=y -CONFIG_PCI_DOMAINS=y -CONFIG_KCORE_ELF=y -CONFIG_BINFMT_ELF=y -CONFIG_KERNEL_ELF=y -CONFIG_BINFMT_MISC=y -# CONFIG_PCI_LEGACY_PROC is not set -CONFIG_PCI_NAMES=y -# CONFIG_HOTPLUG is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set -CONFIG_PPC601_SYNC_FIX=y -# CONFIG_CMDLINE_BOOL is not set - -# -# Advanced setup -# -# CONFIG_ADVANCED_OPTIONS is not set - -# -# Default settings for advanced configuration options are used -# -CONFIG_HIGHMEM_START=0xfe000000 -CONFIG_LOWMEM_SIZE=0x30000000 -CONFIG_KERNEL_START=0xc0000000 -CONFIG_TASK_SIZE=0x80000000 -CONFIG_BOOT_LOAD=0x00800000 - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Plug and Play support -# -# CONFIG_PNP is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 -CONFIG_BLK_DEV_INITRD=y - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set - -# -# ATA/IDE/MFM/RLL support -# -# CONFIG_IDE is not set - -# -# SCSI support -# -# CONFIG_SCSI is not set - -# -# Fusion MPT device support -# - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# -# CONFIG_I2O is not set - -# -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -# CONFIG_NETLINK_DEV is not set -# CONFIG_NETFILTER is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_IPV6 is not set -# CONFIG_XFRM_USER is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -CONFIG_IPV6_SCTP__=y -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_LLC is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_ETHERTAP is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_MII is not set -# CONFIG_OAKNET is not set -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNGEM is not set -# CONFIG_NET_VENDOR_3COM is not set - -# -# Tulip family network device support -# -# CONFIG_NET_TULIP is not set -# CONFIG_HP100 is not set -CONFIG_NET_PCI=y -# CONFIG_PCNET32 is not set -# CONFIG_AMD8111_ETH is not set -# CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_B44 is not set -# CONFIG_DGRS is not set -CONFIG_EEPRO100=y -# CONFIG_EEPRO100_PIO is not set -# CONFIG_E100 is not set -# CONFIG_FEALNX is not set -# CONFIG_NATSEMI is not set -# CONFIG_NE2K_PCI is not set -# CONFIG_8139CP is not set -# CONFIG_8139TOO is not set -# CONFIG_SIS900 is not set -# CONFIG_EPIC100 is not set -# CONFIG_SUNDANCE is not set -# CONFIG_TLAN is not set -# CONFIG_VIA_RHINE is not set - -# -# Ethernet (1000 Mbit) -# -# CONFIG_ACENIC is not set -# CONFIG_DL2K is not set -# CONFIG_E1000 is not set -# CONFIG_NS83820 is not set -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set -# CONFIG_R8169 is not set -# CONFIG_SK98LIN is not set -# CONFIG_TIGON3 is not set - -# -# Ethernet (10000 Mbit) -# -# CONFIG_IXGB is not set -# CONFIG_FDDI is not set -# CONFIG_HIPPI is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Token Ring devices (depends on LLC=y) -# -# CONFIG_RCPCI is not set -# CONFIG_SHAPER is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# IrDA (infrared) support -# -# CONFIG_IRDA is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN_BOOL is not set - -# -# Graphics support -# -# CONFIG_FB is not set - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input device support -# -# CONFIG_INPUT is not set - -# -# Userland interfaces -# - -# -# Input I/O drivers -# -# CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y -# CONFIG_SERIO is not set - -# -# Input Device Drivers -# - -# -# Macintosh device drivers -# - -# -# Character devices -# -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -# CONFIG_SERIAL_8250_EXTENDED is not set - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# I2C Hardware Sensors Mainboard support -# - -# -# I2C Hardware Sensors Chip support -# -# CONFIG_I2C_SENSOR is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -# CONFIG_QIC02_TAPE is not set - -# -# IPMI -# -# CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_NVRAM is not set -CONFIG_GEN_RTC=y -# CONFIG_GEN_RTC_X is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -# CONFIG_AGP is not set -# CONFIG_DRM is not set -# CONFIG_RAW_DRIVER is not set -# CONFIG_HANGCHECK_TIMER is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# Digital Video Broadcasting Devices -# -# CONFIG_DVB is not set - -# -# File systems -# -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -# CONFIG_EXT3_FS_POSIX_ACL is not set -# CONFIG_EXT3_FS_SECURITY is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FS_MBCACHE=y -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -# CONFIG_XFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_QUOTA is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set - -# -# CD-ROM/DVD Filesystems -# -CONFIG_ISO9660_FS=y -# CONFIG_JOLIET is not set -# CONFIG_ZISOFS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_FAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_DEVFS_FS=y -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_DEVPTS_FS_XATTR is not set -CONFIG_TMPFS=y -CONFIG_RAMFS=y - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -# CONFIG_NFS_V3 is not set -# CONFIG_NFS_V4 is not set -CONFIG_NFSD=y -# CONFIG_NFSD_V3 is not set -# CONFIG_NFSD_TCP is not set -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_EXPORTFS=y -CONFIG_SUNRPC=y -# CONFIG_SUNRPC_GSS is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -# CONFIG_AFS_FS is not set - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_MSDOS_PARTITION=y - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB is not set -# CONFIG_USB_GADGET is not set - -# -# Bluetooth support -# -# CONFIG_BT is not set - -# -# Library routines -# -# CONFIG_CRC32 is not set - -# -# Kernel hacking -# -# CONFIG_DEBUG_KERNEL is not set -# CONFIG_KALLSYMS is not set -CONFIG_SERIAL_TEXT_DEBUG=y - -# -# Security options -# -# CONFIG_SECURITY is not set - -# -# Cryptographic options -# -# CONFIG_CRYPTO is not set diff -Nru a/arch/ppc/defconfig b/arch/ppc/defconfig --- a/arch/ppc/defconfig Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/defconfig Sat Jul 5 12:40:32 2003 @@ -56,7 +56,6 @@ # CONFIG_POWERPMC250 is not set # CONFIG_EV64260 is not set # CONFIG_SPRUCE is not set -# CONFIG_MENF1 is not set # CONFIG_LOPEC is not set # CONFIG_MCPN765 is not set # CONFIG_MVME5100 is not set @@ -68,7 +67,6 @@ # CONFIG_K2 is not set # CONFIG_PAL4 is not set # CONFIG_GEMINI is not set -# CONFIG_ZX4500 is not set CONFIG_PPC_CHRP=y CONFIG_PPC_PMAC=y CONFIG_PPC_PREP=y diff -Nru a/arch/ppc/kernel/ppc-stub.c b/arch/ppc/kernel/ppc-stub.c --- a/arch/ppc/kernel/ppc-stub.c Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/kernel/ppc-stub.c Sat Jul 5 12:40:32 2003 @@ -851,14 +851,4 @@ putpacket(buffer); return 1; - } - -#if defined(CONFIG_6xx) || defined(CONFIG_POWER3) - -/* This is used on arches which don't have a serial driver that maps - * the ports for us */ -void -kgdb_map_scc(void) -{ } -#endif diff -Nru a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c --- a/arch/ppc/kernel/ppc_ksyms.c Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/kernel/ppc_ksyms.c Sat Jul 5 12:40:32 2003 @@ -272,8 +272,10 @@ #ifdef CONFIG_NVRAM EXPORT_SYMBOL(nvram_read_byte); EXPORT_SYMBOL(nvram_write_byte); +#ifdef CONFIG_PPC_PMAC EXPORT_SYMBOL(pmac_xpram_read); EXPORT_SYMBOL(pmac_xpram_write); +#endif #endif /* CONFIG_NVRAM */ EXPORT_SYMBOL(to_tm); diff -Nru a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c --- a/arch/ppc/kernel/setup.c Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/kernel/setup.c Sat Jul 5 12:40:32 2003 @@ -619,6 +619,8 @@ if (strstr(cmd_line, "nokgdb")) printk("kgdb default breakpoint deactivated on command line\n"); else { + if (ppc_md.progress) + ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000); printk("kgdb default breakpoint activated\n"); breakpoint(); } diff -Nru a/arch/ppc/platforms/Makefile b/arch/ppc/platforms/Makefile --- a/arch/ppc/platforms/Makefile Sat Jul 5 12:40:34 2003 +++ b/arch/ppc/platforms/Makefile Sat Jul 5 12:40:34 2003 @@ -43,9 +43,8 @@ obj-$(CONFIG_PPLUS) += pplus_pci.o pplus_setup.o obj-$(CONFIG_PRPMC750) += prpmc750_setup.o prpmc750_pci.o obj-$(CONFIG_PRPMC800) += prpmc800_setup.o prpmc800_pci.o -obj-$(CONFIG_SANDPOINT) += sandpoint_setup.o sandpoint_pci.o +obj-$(CONFIG_SANDPOINT) += sandpoint.o obj-$(CONFIG_SPRUCE) += spruce_setup.o spruce_pci.o -obj-$(CONFIG_ZX4500) += zx4500_setup.o zx4500_pci.o ifeq ($(CONFIG_SMP),y) obj-$(CONFIG_PPC_PMAC) += pmac_smp.o diff -Nru a/arch/ppc/platforms/menf1.h b/arch/ppc/platforms/menf1.h --- a/arch/ppc/platforms/menf1.h Sat Jul 5 12:40:31 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,24 +0,0 @@ -/* - * arch/ppc/platforms/menf1.h - * - * Definitions for MEN F1 board support - * - * Author: Matt Porter - * - * 2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#ifndef __PPC_PLATFORMS_MENF1_H -#define __PPC_PLATFORMS_MENF1_H - -#define MENF1_NVRAM_AS0 0x70 -#define MENF1_NVRAM_AS1 0x72 -#define MENF1_NVRAM_DATA 0x71 - -#define MENF1_IDE0_BASE_ADDR 0x1f0 -#define MENF1_IDE1_BASE_ADDR 0x170 - -#endif /* __PPC_PLATFORMS_MENF1_H */ diff -Nru a/arch/ppc/platforms/menf1_pci.c b/arch/ppc/platforms/menf1_pci.c --- a/arch/ppc/platforms/menf1_pci.c Sat Jul 5 12:40:31 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,98 +0,0 @@ -/* - * arch/ppc/platforms/menf1_pci.c - * - * PCI support for MEN F1 - * - * Author: Matt Porter - * - * 2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "menf1.h" - -#undef DEBUG -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif /* DEBUG */ - -static inline int __init -menf1_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) -{ - static char pci_irq_table[][4] = - /* - * PCI IDSEL/INTPIN->INTLINE - * A B C D - */ - { - {10, 11, 7, 9}, /* IDSEL 26 - PCMIP 0 */ - {0, 0, 0, 0}, /* IDSEL 27 - M5229 IDE */ - {0, 0, 0, 0}, /* IDSEL 28 - M7101 PMU */ - {9, 10, 11, 7}, /* IDSEL 29 - PCMIP 1 */ - {10, 11, 7, 9}, /* IDSEL 30 - P2P Bridge */ - }; - const long min_idsel = 26, max_idsel = 30, irqs_per_slot = 4; - return PCI_IRQ_TABLE_LOOKUP; -}; - -static int -menf1_exclude_device(u_char bus, u_char devfn) -{ - if ((bus == 0) && (devfn == 0xe0)) { - return PCIBIOS_DEVICE_NOT_FOUND; - } - else { - return PCIBIOS_SUCCESSFUL; - } -} - -void __init -menf1_find_bridges(void) -{ - struct pci_controller* hose; - - hose = pcibios_alloc_controller(); - if (!hose) - return; - - hose->first_busno = 0; - hose->last_busno = 0xff; - - ppc_md.pci_exclude_device = menf1_exclude_device; - - mpc10x_bridge_init(hose, - MPC10X_MEM_MAP_B, - MPC10X_MEM_MAP_B, - MPC10X_MAPB_EUMB_BASE); - - hose->last_busno = pciauto_bus_scan(hose, hose->first_busno); - - { - /* Add ISA bus wait states */ - unsigned char isa_control; - - early_read_config_byte(hose, 0, 0x90, 0x43, &isa_control); - isa_control |= 0x33; - early_write_config_byte(hose, 0, 0x90, 0x43, isa_control); - } - - ppc_md.pci_swizzle = common_swizzle; - ppc_md.pci_map_irq = menf1_map_irq; -} diff -Nru a/arch/ppc/platforms/menf1_setup.c b/arch/ppc/platforms/menf1_setup.c --- a/arch/ppc/platforms/menf1_setup.c Sat Jul 5 12:40:32 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,283 +0,0 @@ -/* - * arch/ppc/platforms/menf1_setup.c - * - * Board setup routines for MEN F1 - * - * Author: Matt Porter - * - * 2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "menf1.h" - -extern void menf1_find_bridges(void); -extern unsigned long loops_per_jiffy; - -/* Dummy variable to satisfy mpc10x_common.o */ -void *OpenPIC_Addr; - -static int -menf1_show_cpuinfo(struct seq_file *m) -{ - seq_printf(m, "machine\t\t: MEN F1\n"); - - return 0; -} - -static void __init -menf1_setup_arch(void) -{ - /* init to some ~sane value until calibrate_delay() runs */ - loops_per_jiffy = 50000000/HZ; - - /* Lookup PCI host bridges */ - menf1_find_bridges(); - -#ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start) - ROOT_DEV = Root_RAM0; - else -#endif -#ifdef CONFIG_ROOT_NFS - ROOT_DEV = Root_NFS; -#else - ROOT_DEV = Root_HDA2; -#endif - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - printk("MEN F1 port (C) 2001 MontaVista Software, Inc. (source@mvista.com)\n"); -} - -static void -menf1_restart(char *cmd) -{ - - int picr1; - struct pci_dev *pdev; - - local_irq_disable(); - - /* - * Firmware doesn't like re-entry using Map B (CHRP), so make sure the - * PCI bridge is using MAP A (PReP). - */ - - pdev = pci_find_slot(0, PCI_DEVFN(0,0)); - - while(pdev == NULL); /* paranoia */ - - pci_read_config_dword(pdev, MPC10X_CFG_PICR1_REG, &picr1); - - picr1 = (picr1 & ~MPC10X_CFG_PICR1_ADDR_MAP_MASK) | - MPC10X_CFG_PICR1_ADDR_MAP_A; - - pci_write_config_dword(pdev, MPC10X_CFG_PICR1_REG, picr1); - - asm volatile("sync"); - - /* SRR0 has system reset vector, SRR1 has default MSR value */ - /* rfi restores MSR from SRR1 and sets the PC to the SRR0 value */ - __asm__ __volatile__ - ("\n\ - lis 3,0xfff0 - ori 3,3,0x0100 - mtspr 26,3 - li 3,0 - mtspr 27,3 - rfi - "); - while(1); -} - -static void -menf1_halt(void) -{ - local_irq_disable(); - while (1); -} - -static void -menf1_power_off(void) -{ - menf1_halt(); -} - -static void __init -menf1_init_IRQ(void) -{ - int i; - - for ( i = 0 ; i < NUM_8259_INTERRUPTS ; i++ ) - irq_desc[i].handler = &i8259_pic; - i8259_init(NULL); -} - -/* - * Set BAT 3 to map 0xF0000000. - */ -static __inline__ void -menf1_set_bat(void) -{ - static int mapping_set = 0; - - if (!mapping_set) - { - - /* wait for all outstanding memory accesses to complete */ - mb(); - - /* setup DBATs */ - mtspr(DBAT3U, 0xf0001ffe); - mtspr(DBAT3L, 0xf000002a); - - /* wait for updates */ - mb(); - - mapping_set = 1; - } - return; -} - -static unsigned long __init -menf1_find_end_of_memory(void) -{ - /* Cover the I/O with a BAT */ - menf1_set_bat(); - - /* Read the memory size from the MPC107 SMC */ - return mpc10x_get_mem_size(MPC10X_MEM_MAP_B); -} - -static void __init -menf1_map_io(void) -{ - io_block_mapping(0xfe000000, 0xfe000000, 0x02000000, _PAGE_IO); -} - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) -/* IDE functions */ - -static void __init -menf1_ide_init_hwif_ports (hw_regs_t *hw, unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i = 8; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - else - hw->io_ports[IDE_CONTROL_OFFSET] = - hw->io_ports[IDE_DATA_OFFSET] + 0x206; - - if (irq != NULL) - *irq = 0; -} - -static int -menf1_ide_default_irq(unsigned long base) -{ - if (base == MENF1_IDE0_BASE_ADDR) - return 14; - else if (base == MENF1_IDE1_BASE_ADDR) - return 15; - else - return 0; -} - -static unsigned long -menf1_ide_default_io_base(int index) -{ - if (index == 0) - return MENF1_IDE0_BASE_ADDR; - else if (index == 1) - return MENF1_IDE1_BASE_ADDR; - else - return 0; -} -#endif - -TODC_ALLOC(); - -void __init -platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) -{ - parse_bootinfo(find_bootinfo()); - - isa_io_base = MPC10X_MAPB_ISA_IO_BASE; - isa_mem_base = MPC10X_MAPB_ISA_MEM_BASE; - pci_dram_offset = MPC10X_MAPB_DRAM_OFFSET; - - ppc_md.setup_arch = menf1_setup_arch; - ppc_md.show_cpuinfo = menf1_show_cpuinfo; - ppc_md.init_IRQ = menf1_init_IRQ; - ppc_md.get_irq = i8259_irq; - - ppc_md.find_end_of_memory = menf1_find_end_of_memory; - ppc_md.setup_io_mappings = menf1_map_io; - - ppc_md.restart = menf1_restart; - ppc_md.power_off = menf1_power_off; - ppc_md.halt = menf1_halt; - - TODC_INIT(TODC_TYPE_MK48T59, - MENF1_NVRAM_AS0, - MENF1_NVRAM_AS1, - MENF1_NVRAM_DATA, - 7); - - ppc_md.time_init = todc_time_init; - ppc_md.get_rtc_time = todc_get_rtc_time; - ppc_md.set_rtc_time = todc_set_rtc_time; - ppc_md.calibrate_decr = todc_calibrate_decr; - - ppc_md.nvram_read_val = todc_m48txx_read_val; - ppc_md.nvram_write_val = todc_m48txx_write_val; - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) - ppc_ide_md.default_io_base = menf1_ide_default_io_base; - ppc_ide_md.default_irq = menf1_ide_default_irq; - ppc_ide_md.ide_init_hwif = menf1_ide_init_hwif_ports; -#endif -} diff -Nru a/arch/ppc/platforms/sandpoint.c b/arch/ppc/platforms/sandpoint.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/arch/ppc/platforms/sandpoint.c Sat Jul 5 12:40:34 2003 @@ -0,0 +1,719 @@ +/* + * arch/ppc/platforms/sandpoint_setup.c + * + * Board setup routines for the Motorola SPS Sandpoint Test Platform. + * + * Author: Mark A. Greer + * mgreer@mvista.com + * + * 2000-2003 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +/* + * This file adds support for the Motorola SPS Sandpoint Test Platform. + * These boards have a PPMC slot for the processor so any combination + * of cpu and host bridge can be attached. This port is for an 8240 PPMC + * module from Motorola SPS and other closely related cpu/host bridge + * combinations (e.g., 750/755/7400 with MPC107 host bridge). + * The sandpoint itself has a Windbond 83c553 (PCI-ISA bridge, 2 DMA ctlrs, 2 + * cascaded 8259 interrupt ctlrs, 8254 Timer/Counter, and an IDE ctlr), a + * National 87308 (RTC, 2 UARTs, Keyboard & mouse ctlrs, and a floppy ctlr), + * and 4 PCI slots (only 2 of which are usable; the other 2 are keyed for 3.3V + * but are really 5V). + * + * The firmware on the sandpoint is called DINK (not my acronym :). This port + * depends on DINK to do some basic initialization (e.g., initialize the memory + * ctlr) and to ensure that the processor is using MAP B (CHRP map). + * + * The switch settings for the Sandpoint board MUST be as follows: + * S3: down + * S4: up + * S5: up + * S6: down + * + * 'down' is in the direction from the PCI slots towards the PPMC slot; + * 'up' is in the direction from the PPMC slot towards the PCI slots. + * Be careful, the way the sandpoint board is installed in XT chasses will + * make the directions reversed. + * + * Since Motorola listened to our suggestions for improvement, we now have + * the Sandpoint X3 board. All of the PCI slots are available, it uses + * the serial interrupt interface (just a hardware thing we need to + * configure properly). + * + * Use the default X3 switch settings. The interrupts are then: + * EPIC Source + * 0 SIOINT (8259, active low) + * 1 PCI #1 + * 2 PCI #2 + * 3 PCI #3 + * 4 PCI #4 + * 7 Winbond INTC (IDE interrupt) + * 8 Winbond INTD (IDE interrupt) + * + * + * Motorola has finally released a version of DINK32 that correctly + * (seemingly) initalizes the memory controller correctly, regardless + * of the amount of memory in the system. Once a method of determining + * what version of DINK initializes the system for us, if applicable, is + * found, we can hopefully stop hardcoding 32MB of RAM. + * + * It is important to note that this code only supports the Sandpoint X3 + * (all flavors) platform, and it does not support the X2 anymore. Code + * that at one time worked on the X2 can be found at: + * ftp://source.mvista.com/pub/linuxppc/obsolete/sandpoint/ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for linux/serial_core.h */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sandpoint.h" + +extern void gen550_progress(char *, unsigned short); +extern void gen550_init(int, struct uart_port *); + +unsigned char __res[sizeof(bd_t)]; + +static void sandpoint_halt(void); + +/* + * Define all of the IRQ senses and polarities. Taken from the + * Sandpoint X3 User's manual. + */ +static u_char sandpoint_openpic_initsenses[] __initdata = { + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 0: SIOINT */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 2: PCI Slot 1 */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 3: PCI Slot 2 */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 4: PCI Slot 3 */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 5: PCI Slot 4 */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE), /* 8: IDE (INT C) */ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE) /* 9: IDE (INT D) */ +}; + +/* + * Motorola SPS Sandpoint interrupt routing. + */ +static inline int +sandpoint_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) +{ + static char pci_irq_table[][4] = + /* + * PCI IDSEL/INTPIN->INTLINE + * A B C D + */ + { + { 16, 0, 0, 0 }, /* IDSEL 11 - i8259 on Winbond */ + { 0, 0, 0, 0 }, /* IDSEL 12 - unused */ + { 18, 21, 20, 19 }, /* IDSEL 13 - PCI slot 1 */ + { 19, 18, 21, 20 }, /* IDSEL 14 - PCI slot 2 */ + { 20, 19, 18, 21 }, /* IDSEL 15 - PCI slot 3 */ + { 21, 20, 19, 18 }, /* IDSEL 16 - PCI slot 4 */ + }; + + const long min_idsel = 11, max_idsel = 16, irqs_per_slot = 4; + return PCI_IRQ_TABLE_LOOKUP; +} + +static void __init +sandpoint_setup_winbond_83553(struct pci_controller *hose) +{ + int devfn; + + /* + * Route IDE interrupts directly to the 8259's IRQ 14 & 15. + * We can't route the IDE interrupt to PCI INTC# or INTD# because those + * woule interfere with the PMC's INTC# and INTD# lines. + */ + /* + * Winbond Fcn 0 + */ + devfn = PCI_DEVFN(11,0); + + early_write_config_byte(hose, + 0, + devfn, + 0x43, /* IDE Interrupt Routing Control */ + 0xef); + early_write_config_word(hose, + 0, + devfn, + 0x44, /* PCI Interrupt Routing Control */ + 0x0000); + + /* Want ISA memory cycles to be forwarded to PCI bus */ + early_write_config_byte(hose, + 0, + devfn, + 0x48, /* ISA-to-PCI Addr Decoder Control */ + 0xf0); + + /* Enable RTC and Keyboard address locations. */ + early_write_config_byte(hose, + 0, + devfn, + 0x4d, /* Chip Select Control Register */ + 0x00); + + /* Enable Port 92. */ + early_write_config_byte(hose, + 0, + devfn, + 0x4e, /* AT System Control Register */ + 0x06); + /* + * Winbond Fcn 1 + */ + devfn = PCI_DEVFN(11,1); + + /* Put IDE controller into native mode. */ + early_write_config_byte(hose, + 0, + devfn, + 0x09, /* Programming interface Register */ + 0x8f); + + /* Init IRQ routing, enable both ports, disable fast 16 */ + early_write_config_dword(hose, + 0, + devfn, + 0x40, /* IDE Control/Status Register */ + 0x00ff0011); + return; +} + +static void __init +sandpoint_find_bridges(void) +{ + struct pci_controller *hose; + + hose = pcibios_alloc_controller(); + + if (!hose) + return; + + hose->first_busno = 0; + hose->last_busno = 0xff; + + if (mpc10x_bridge_init(hose, + MPC10X_MEM_MAP_B, + MPC10X_MEM_MAP_B, + MPC10X_MAPB_EUMB_BASE) == 0) { + + /* Do early winbond init, then scan PCI bus */ + sandpoint_setup_winbond_83553(hose); + hose->last_busno = pciauto_bus_scan(hose, hose->first_busno); + + ppc_md.pcibios_fixup = NULL; + ppc_md.pcibios_fixup_bus = NULL; + ppc_md.pci_swizzle = common_swizzle; + ppc_md.pci_map_irq = sandpoint_map_irq; + } + else { + if (ppc_md.progress) + ppc_md.progress("Bridge init failed", 0x100); + printk("Host bridge init failed\n"); + } + + return; +} + +#if defined(CONFIG_SERIAL_8250) && \ + (defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)) +static void __init +sandpoint_early_serial_map(void) +{ + struct uart_port serial_req; + + /* Setup serial port access */ + memset(&serial_req, 0, sizeof(serial_req)); + serial_req.uartclk = UART_CLK; + serial_req.irq = 4; + serial_req.flags = STD_COM_FLAGS; + serial_req.iotype = SERIAL_IO_MEM; + serial_req.membase = (u_char *)SANDPOINT_SERIAL_0; + + gen550_init(0, &serial_req); + + if (early_serial_setup(&serial_req) != 0) + printk(KERN_ERR "Early serial init of port 0 failed\n"); + + /* Assume early_serial_setup() doesn't modify serial_req */ + serial_req.line = 1; + serial_req.irq = 3; /* XXXX */ + serial_req.membase = (u_char *)SANDPOINT_SERIAL_1; + + gen550_init(1, &serial_req); + + if (early_serial_setup(&serial_req) != 0) + printk(KERN_ERR "Early serial init of port 1 failed\n"); +} +#endif + +static void __init +sandpoint_setup_arch(void) +{ + loops_per_jiffy = 100000000 / HZ; + +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) + ROOT_DEV = Root_RAM0; + else +#endif +#ifdef CONFIG_ROOT_NFS + ROOT_DEV = Root_NFS; +#else + ROOT_DEV = Root_HDA1; +#endif + + /* Lookup PCI host bridges */ + sandpoint_find_bridges(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + printk(KERN_INFO "Motorola SPS Sandpoint Test Platform\n"); + printk(KERN_INFO "Port by MontaVista Software, Inc. (source@mvista.com)\n"); + + /* DINK32 12.3 and below do not correctly enable any caches. + * We will do this now with good known values. Future versions + * of DINK32 are supposed to get this correct. + */ + if (cur_cpu_spec[0]->cpu_features & CPU_FTR_SPEC7450) + /* 745x is different. We only want to pass along enable. */ + _set_L2CR(L2CR_L2E); + else if (cur_cpu_spec[0]->cpu_features & CPU_FTR_L2CR) + /* All modules have 1MB of L2. We also assume that an + * L2 divisor of 3 will work. + */ + _set_L2CR(L2CR_L2E | L2CR_L2SIZ_1MB | L2CR_L2CLK_DIV3 + | L2CR_L2RAM_PIPE | L2CR_L2OH_1_0 | L2CR_L2DF); +#if 0 + /* Untested right now. */ + if (cur_cpu_spec[0]->cpu_features & CPU_FTR_L3CR) { + /* Magic value. */ + _set_L3CR(0x8f032000); + } +#endif +} + +#define SANDPOINT_87308_CFG_ADDR 0x15c +#define SANDPOINT_87308_CFG_DATA 0x15d + +#define SANDPOINT_87308_CFG_INB(addr, byte) { \ + outb((addr), SANDPOINT_87308_CFG_ADDR); \ + (byte) = inb(SANDPOINT_87308_CFG_DATA); \ +} + +#define SANDPOINT_87308_CFG_OUTB(addr, byte) { \ + outb((addr), SANDPOINT_87308_CFG_ADDR); \ + outb((byte), SANDPOINT_87308_CFG_DATA); \ +} + +#define SANDPOINT_87308_SELECT_DEV(dev_num) { \ + SANDPOINT_87308_CFG_OUTB(0x07, (dev_num)); \ +} + +#define SANDPOINT_87308_DEV_ENABLE(dev_num) { \ + SANDPOINT_87308_SELECT_DEV(dev_num); \ + SANDPOINT_87308_CFG_OUTB(0x30, 0x01); \ +} + +/* + * Initialize the ISA devices on the Nat'l PC87308VUL SuperIO chip. + */ +static int __init +sandpoint_setup_natl_87308(void) +{ + u_char reg; + + /* + * Enable all the devices on the Super I/O chip. + */ + SANDPOINT_87308_SELECT_DEV(0x00); /* Select kbd logical device */ + SANDPOINT_87308_CFG_OUTB(0xf0, 0x00); /* Set KBC clock to 8 Mhz */ + SANDPOINT_87308_DEV_ENABLE(0x00); /* Enable keyboard */ + SANDPOINT_87308_DEV_ENABLE(0x01); /* Enable mouse */ + SANDPOINT_87308_DEV_ENABLE(0x02); /* Enable rtc */ + SANDPOINT_87308_DEV_ENABLE(0x03); /* Enable fdc (floppy) */ + SANDPOINT_87308_DEV_ENABLE(0x04); /* Enable parallel */ + SANDPOINT_87308_DEV_ENABLE(0x05); /* Enable UART 2 */ + SANDPOINT_87308_CFG_OUTB(0xf0, 0x82); /* Enable bank select regs */ + SANDPOINT_87308_DEV_ENABLE(0x06); /* Enable UART 1 */ + SANDPOINT_87308_CFG_OUTB(0xf0, 0x82); /* Enable bank select regs */ + + /* Set up floppy in PS/2 mode */ + outb(0x09, SIO_CONFIG_RA); + reg = inb(SIO_CONFIG_RD); + reg = (reg & 0x3F) | 0x40; + outb(reg, SIO_CONFIG_RD); + outb(reg, SIO_CONFIG_RD); /* Have to write twice to change! */ + + return 0; +} + +arch_initcall(sandpoint_setup_natl_87308); + +/* + * Fix IDE interrupts. + */ +static int __init +sandpoint_fix_winbond_83553(void) +{ + /* Make all 8259 interrupt level sensitive */ + outb(0xf8, 0x4d0); + outb(0xde, 0x4d1); + + return 0; +} + +arch_initcall(sandpoint_fix_winbond_83553); + +static int __init +sandpoint_request_io(void) +{ + request_region(0x00,0x20,"dma1"); + request_region(0x20,0x20,"pic1"); + request_region(0x40,0x20,"timer"); + request_region(0x80,0x10,"dma page reg"); + request_region(0xa0,0x20,"pic2"); + request_region(0xc0,0x20,"dma2"); + + return 0; +} + +arch_initcall(sandpoint_request_io); + +/* + * Interrupt setup and service. Interrrupts on the Sandpoint come + * from the four PCI slots plus the 8259 in the Winbond Super I/O (SIO). + * The 8259 is cascaded from EPIC IRQ0, IRQ1-4 map to PCI slots 1-4, + * IDE is on EPIC 7 and 8. + */ +static void __init +sandpoint_init_IRQ(void) +{ + int i; + + OpenPIC_InitSenses = sandpoint_openpic_initsenses; + OpenPIC_NumInitSenses = sizeof(sandpoint_openpic_initsenses); + + /* + * We need to tell openpic_set_sources where things actually are. + * mpc10x_common will setup OpenPIC_Addr at ioremap(EUMB phys base + + * EPIC offset (0x40000)); The EPIC IRQ Register Address Map - + * Interrupt Source Configuration Registers gives these numbers + * as offsets starting at 0x50200, we need to adjust occordinly. + */ + /* Map serial interrupts 0-15 */ + openpic_set_sources(0, 16, OpenPIC_Addr + 0x10200); + + openpic_init(NUM_8259_INTERRUPTS); + + /* + * openpic_init() has set up irq_desc[16-31] to be openpic + * interrupts. We need to set irq_desc[0-15] to be i8259 + * interrupts. + */ + for(i=0; i < NUM_8259_INTERRUPTS; i++) + irq_desc[i].handler = &i8259_pic; + + /* + * The EPIC allows for a read in the range of 0xFEF00000 -> + * 0xFEFFFFFF to generate a PCI interrupt-acknowledge transaction. + */ + i8259_init(0xfef00000); +} + +static u32 +sandpoint_irq_canonicalize(u32 irq) +{ + if (irq == 2) + return 9; + else + return irq; +} + +static unsigned long __init +sandpoint_find_end_of_memory(void) +{ + bd_t *bp = (bd_t *)__res; + + if (bp->bi_memsize) + return bp->bi_memsize; + + /* DINK32 13.0 correctly initalizes things, so iff you use + * this you _should_ be able to change this instead of a + * hardcoded value. */ +#if 0 + return mpc10x_get_mem_size(MPC10X_MEM_MAP_B); +#else + return 32*1024*1024; +#endif +} + +static void __init +sandpoint_map_io(void) +{ + io_block_mapping(0xfe000000, 0xfe000000, 0x02000000, _PAGE_IO); +} + +static void +sandpoint_restart(char *cmd) +{ + local_irq_disable(); + + /* Set exception prefix high - to the firmware */ + _nmask_and_or_msr(0, MSR_IP); + + /* Reset system via Port 92 */ + outb(0x00, 0x92); + outb(0x01, 0x92); + for(;;); /* Spin until reset happens */ +} + +static void +sandpoint_power_off(void) +{ + local_irq_disable(); + for(;;); /* No way to shut power off with software */ + /* NOTREACHED */ +} + +static void +sandpoint_halt(void) +{ + sandpoint_power_off(); + /* NOTREACHED */ +} + +static int +sandpoint_show_cpuinfo(struct seq_file *m) +{ + seq_printf(m, "vendor\t\t: Motorola SPS\n"); + seq_printf(m, "machine\t\t: Sandpoint\n"); + + return 0; +} + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) +/* + * IDE support. + */ +static int sandpoint_ide_ports_known = 0; +static unsigned long sandpoint_ide_regbase[MAX_HWIFS]; +static unsigned long sandpoint_ide_ctl_regbase[MAX_HWIFS]; +static unsigned long sandpoint_idedma_regbase; + +static void +sandpoint_ide_probe(void) +{ + struct pci_dev *pdev = pci_find_device(PCI_VENDOR_ID_WINBOND, + PCI_DEVICE_ID_WINBOND_82C105, NULL); + + if (pdev) { + sandpoint_ide_regbase[0]=pdev->resource[0].start; + sandpoint_ide_regbase[1]=pdev->resource[2].start; + sandpoint_ide_ctl_regbase[0]=pdev->resource[1].start; + sandpoint_ide_ctl_regbase[1]=pdev->resource[3].start; + sandpoint_idedma_regbase=pdev->resource[4].start; + } + + sandpoint_ide_ports_known = 1; +} + +static int +sandpoint_ide_default_irq(unsigned long base) +{ + if (sandpoint_ide_ports_known == 0) + sandpoint_ide_probe(); + + if (base == sandpoint_ide_regbase[0]) + return SANDPOINT_IDE_INT0; + else if (base == sandpoint_ide_regbase[1]) + return SANDPOINT_IDE_INT1; + else + return 0; +} + +static unsigned long +sandpoint_ide_default_io_base(int index) +{ + if (sandpoint_ide_ports_known == 0) + sandpoint_ide_probe(); + + return sandpoint_ide_regbase[index]; +} + +static void __init +sandpoint_ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, + unsigned long ctrl_port, int *irq) +{ + unsigned long reg = data_port; + uint alt_status_base; + int i; + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg++; + } + + if (data_port == sandpoint_ide_regbase[0]) { + alt_status_base = sandpoint_ide_ctl_regbase[0] + 2; + hw->irq = 14; + } + else if (data_port == sandpoint_ide_regbase[1]) { + alt_status_base = sandpoint_ide_ctl_regbase[1] + 2; + hw->irq = 15; + } + else { + alt_status_base = 0; + hw->irq = 0; + } + + if (ctrl_port) { + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + } else { + hw->io_ports[IDE_CONTROL_OFFSET] = alt_status_base; + } + + if (irq != NULL) { + *irq = hw->irq; + } +} +#endif + +/* + * Set BAT 3 to map 0xf8000000 to end of physical memory space 1-to-1. + */ +static __inline__ void +sandpoint_set_bat(void) +{ + unsigned long bat3u, bat3l; + + __asm__ __volatile__( + " lis %0,0xf800\n \ + ori %1,%0,0x002a\n \ + ori %0,%0,0x0ffe\n \ + mtspr 0x21e,%0\n \ + mtspr 0x21f,%1\n \ + isync\n \ + sync " + : "=r" (bat3u), "=r" (bat3l)); +} + +TODC_ALLOC(); + +void __init +platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + parse_bootinfo(find_bootinfo()); + + /* ASSUMPTION: If both r3 (bd_t pointer) and r6 (cmdline pointer) + * are non-zero, then we should use the board info from the bd_t + * structure and the cmdline pointed to by r6 instead of the + * information from birecs, if any. Otherwise, use the information + * from birecs as discovered by the preceeding call to + * parse_bootinfo(). This rule should work with both PPCBoot, which + * uses a bd_t board info structure, and the kernel boot wrapper, + * which uses birecs. + */ + if (r3 && r6) { + /* copy board info structure */ + memcpy( (void *)__res,(void *)(r3+KERNELBASE), sizeof(bd_t) ); + /* copy command line */ + *(char *)(r7+KERNELBASE) = 0; + strcpy(cmd_line, (char *)(r6+KERNELBASE)); + } + +#ifdef CONFIG_BLK_DEV_INITRD + /* take care of initrd if we have one */ + if (r4) { + initrd_start = r4 + KERNELBASE; + initrd_end = r5 + KERNELBASE; + } +#endif /* CONFIG_BLK_DEV_INITRD */ + + /* Map in board regs, etc. */ + sandpoint_set_bat(); + + isa_io_base = MPC10X_MAPB_ISA_IO_BASE; + isa_mem_base = MPC10X_MAPB_ISA_MEM_BASE; + pci_dram_offset = MPC10X_MAPB_DRAM_OFFSET; + ISA_DMA_THRESHOLD = 0x00ffffff; + DMA_MODE_READ = 0x44; + DMA_MODE_WRITE = 0x48; + + ppc_md.setup_arch = sandpoint_setup_arch; + ppc_md.show_cpuinfo = sandpoint_show_cpuinfo; + ppc_md.irq_canonicalize = sandpoint_irq_canonicalize; + ppc_md.init_IRQ = sandpoint_init_IRQ; + ppc_md.get_irq = openpic_get_irq; + + ppc_md.restart = sandpoint_restart; + ppc_md.power_off = sandpoint_power_off; + ppc_md.halt = sandpoint_halt; + + ppc_md.find_end_of_memory = sandpoint_find_end_of_memory; + ppc_md.setup_io_mappings = sandpoint_map_io; + + TODC_INIT(TODC_TYPE_PC97307, 0x70, 0x00, 0x71, 8); + ppc_md.time_init = todc_time_init; + ppc_md.set_rtc_time = todc_set_rtc_time; + ppc_md.get_rtc_time = todc_get_rtc_time; + ppc_md.calibrate_decr = todc_calibrate_decr; + + ppc_md.nvram_read_val = todc_mc146818_read_val; + ppc_md.nvram_write_val = todc_mc146818_write_val; + +#if defined(CONFIG_SERIAL_8250) && \ + (defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)) + sandpoint_early_serial_map(); +#ifdef CONFIG_SERIAL_TEXT_DEBUG + ppc_md.progress = gen550_progress; +#endif +#endif + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) + ppc_ide_md.default_irq = sandpoint_ide_default_irq; + ppc_ide_md.default_io_base = sandpoint_ide_default_io_base; + ppc_ide_md.ide_init_hwif = sandpoint_ide_init_hwif_ports; +#endif +} diff -Nru a/arch/ppc/platforms/sandpoint.h b/arch/ppc/platforms/sandpoint.h --- a/arch/ppc/platforms/sandpoint.h Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/platforms/sandpoint.h Sat Jul 5 12:40:31 2003 @@ -6,7 +6,7 @@ * Author: Mark A. Greer * mgreer@mvista.com * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under + * 2000-2003 (c) MontaVista, Software, Inc. This file is licensed under * the terms of the GNU General Public License version 2. This program * is licensed "as is" without any warranty of any kind, whether express * or implied. @@ -19,8 +19,8 @@ #ifndef __PPC_PLATFORMS_SANDPOINT_H #define __PPC_PLATFORMS_SANDPOINT_H -#ifdef CONFIG_SANDPOINT_X3 -#define SANDPOINT_SIO_SLOT 0 /* Cascaded from EPIC IRQ 0 */ +#include + #if 0 /* The Sandpoint X3 allows the IDE interrupt to be directly connected * from the Windbond (PCI INTC or INTD) to the serial EPIC. Someday @@ -28,27 +28,13 @@ * initialization than change it to route the different interrupts :-). * -- Dan */ -#define SANDPOINT_IDE_INT0 23 /* EPIC 7 */ -#define SANDPOINT_IDE_INT1 24 /* EPIC 8 */ -#else -#define SANDPOINT_IDE_INT0 14 /* 8259 Test */ -#define SANDPOINT_IDE_INT1 15 /* 8259 Test */ -#endif +#define SANDPOINT_IDE_INT0 23 /* EPIC 7 */ +#define SANDPOINT_IDE_INT1 24 /* EPIC 8 */ #else - /* - * Define the PCI slot that the 8259 is sharing interrupts with. - * Valid values are 1 (PCI slot 2) and 2 (PCI slot 3). - */ -#define SANDPOINT_SIO_SLOT 1 - -/* ...and for the IDE from the 8259.... -*/ -#define SANDPOINT_IDE_INT0 14 -#define SANDPOINT_IDE_INT1 15 +#define SANDPOINT_IDE_INT0 14 /* 8259 Test */ +#define SANDPOINT_IDE_INT1 15 /* 8259 Test */ #endif -#define SANDPOINT_SIO_IRQ (SANDPOINT_SIO_SLOT + NUM_8259_INTERRUPTS) - /* * The sandpoint boards have processor modules that either have an 8240 or * an MPC107 host bridge on them. These bridges have an IDSEL line that allows @@ -62,7 +48,33 @@ */ #define SANDPOINT_HOST_BRIDGE_IDSEL 12 +/* + * Serial defines. + */ +#define SANDPOINT_SERIAL_0 0xfe0003f8 +#define SANDPOINT_SERIAL_1 0xfe0002f8 + +#define RS_TABLE_SIZE 2 + +/* Rate for the 1.8432 Mhz clock for the onboard serial chip */ +#define BASE_BAUD ( 1843200 / 16 ) +#define UART_CLK 1843200 + +#ifdef CONFIG_SERIAL_DETECT_IRQ +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST|ASYNC_AUTO_IRQ) +#else +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST) +#endif + +#define STD_SERIAL_PORT_DFNS \ + { 0, BASE_BAUD, SANDPOINT_SERIAL_0, 4, STD_COM_FLAGS, /* ttyS0 */ \ + iomem_base: (u8 *)SANDPOINT_SERIAL_0, \ + io_type: SERIAL_IO_MEM }, \ + { 0, BASE_BAUD, SANDPOINT_SERIAL_1, 3, STD_COM_FLAGS, /* ttyS1 */ \ + iomem_base: (u8 *)SANDPOINT_SERIAL_1, \ + io_type: SERIAL_IO_MEM }, -void sandpoint_find_bridges(void); +#define SERIAL_PORT_DFNS \ + STD_SERIAL_PORT_DFNS #endif /* __PPC_PLATFORMS_SANDPOINT_H */ diff -Nru a/arch/ppc/platforms/sandpoint_pci.c b/arch/ppc/platforms/sandpoint_pci.c --- a/arch/ppc/platforms/sandpoint_pci.c Sat Jul 5 12:40:31 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,181 +0,0 @@ -/* - * arch/ppc/platforms/sandpoint_pci.c - * - * PCI setup routines for the Motorola SPS Sandpoint Test Platform - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "sandpoint.h" - -/* - * Motorola SPS Sandpoint interrupt routing. - */ -static inline int -sandpoint_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) -{ - static char pci_irq_table[][4] = - /* - * PCI IDSEL/INTPIN->INTLINE - * A B C D - */ - { - { SANDPOINT_SIO_IRQ, - 0, 0, 0 }, /* IDSEL 11 - i8259 on Winbond */ - { 0, 0, 0, 0 }, /* IDSEL 12 - unused */ -#ifdef CONFIG_SANDPOINT_X3 -#if 0 /* This is what it _should_ look like -- Dan */ - { 17, 20, 19, 18 }, /* IDSEL 13 - PCI slot 1 */ - { 18, 17, 20, 19 }, /* IDSEL 14 - PCI slot 2 */ - { 19, 18, 17, 20 }, /* IDSEL 15 - PCI slot 3 */ - { 20, 19, 18, 17 }, /* IDSEL 16 - PCI slot 4 */ -#else - { 18, 21, 20, 19 }, /* IDSEL 13 - PCI slot 1 */ - { 19, 18, 21, 20 }, /* IDSEL 14 - PCI slot 2 */ - { 20, 19, 18, 21 }, /* IDSEL 15 - PCI slot 3 */ - { 21, 20, 19, 18 }, /* IDSEL 16 - PCI slot 4 */ -#endif -#else - { 16, 19, 18, 17 }, /* IDSEL 13 - PCI slot 1 */ - { 17, 16, 19, 18 }, /* IDSEL 14 - PCI slot 2 */ - { 18, 17, 16, 19 }, /* IDSEL 15 - PCI slot 3 */ - { 19, 18, 17, 16 }, /* IDSEL 16 - PCI slot 4 */ -#endif - }; - - const long min_idsel = 11, max_idsel = 16, irqs_per_slot = 4; - return PCI_IRQ_TABLE_LOOKUP; -} - -static void __init -sandpoint_setup_winbond_83553(struct pci_controller *hose) -{ - int devfn; - - /* - * Route IDE interrupts directly to the 8259's IRQ 14 & 15. - * We can't route the IDE interrupt to PCI INTC# or INTD# because those - * woule interfere with the PMC's INTC# and INTD# lines. - */ - /* - * Winbond Fcn 0 - */ - devfn = PCI_DEVFN(11,0); - - early_write_config_byte(hose, - 0, - devfn, - 0x43, /* IDE Interrupt Routing Control */ - 0xef); - early_write_config_word(hose, - 0, - devfn, - 0x44, /* PCI Interrupt Routing Control */ - 0x0000); - - /* Want ISA memory cycles to be forwarded to PCI bus */ - early_write_config_byte(hose, - 0, - devfn, - 0x48, /* ISA-to-PCI Addr Decoder Control */ - 0xf0); - - /* Enable RTC and Keyboard address locations. */ - early_write_config_byte(hose, - 0, - devfn, - 0x4d, /* Chip Select Control Register */ - 0x00); - - /* Enable Port 92. */ - early_write_config_byte(hose, - 0, - devfn, - 0x4e, /* AT System Control Register */ - 0x06); - /* - * Winbond Fcn 1 - */ - devfn = PCI_DEVFN(11,1); - - /* Put IDE controller into native mode. */ - early_write_config_byte(hose, - 0, - devfn, - 0x09, /* Programming interface Register */ - 0x8f); - - /* Init IRQ routing, enable both ports, disable fast 16 */ - early_write_config_dword(hose, - 0, - devfn, - 0x40, /* IDE Control/Status Register */ - 0x00ff0011); - return; -} - -static int -sandpoint_exclude_device(u_char bus, u_char devfn) -{ - if ((bus == 0) && (PCI_SLOT(devfn) == SANDPOINT_HOST_BRIDGE_IDSEL)) { - return PCIBIOS_DEVICE_NOT_FOUND; - } - else { - return PCIBIOS_SUCCESSFUL; - } -} - -void __init -sandpoint_find_bridges(void) -{ - struct pci_controller *hose; - - hose = pcibios_alloc_controller(); - - if (!hose) - return; - - hose->first_busno = 0; - hose->last_busno = 0xff; - - if (mpc10x_bridge_init(hose, - MPC10X_MEM_MAP_B, - MPC10X_MEM_MAP_B, - MPC10X_MAPB_EUMB_BASE) == 0) { - - /* Do early winbond init, then scan PCI bus */ - sandpoint_setup_winbond_83553(hose); - ppc_md.pci_exclude_device = sandpoint_exclude_device; - hose->last_busno = pciauto_bus_scan(hose, hose->first_busno); - - ppc_md.pcibios_fixup = NULL; - ppc_md.pcibios_fixup_bus = NULL; - ppc_md.pci_swizzle = common_swizzle; - ppc_md.pci_map_irq = sandpoint_map_irq; - } - else { - if (ppc_md.progress) - ppc_md.progress("Bridge init failed", 0x100); - printk("Host bridge init failed\n"); - } - - return; -} diff -Nru a/arch/ppc/platforms/sandpoint_serial.h b/arch/ppc/platforms/sandpoint_serial.h --- a/arch/ppc/platforms/sandpoint_serial.h Sat Jul 5 12:40:31 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,49 +0,0 @@ -/* - * include/asm-ppc/sandpoint_serial.h - * - * Definitions for Motorola SPS Sandpoint Test Platform - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#ifndef __ASMPPC_SANDPOINT_SERIAL_H -#define __ASMPPC_SANDPOINT_SERIAL_H - -#include - -#define SANDPOINT_SERIAL_0 0xfe0003f8 -#define SANDPOINT_SERIAL_1 0xfe0002f8 - -#ifdef CONFIG_SERIAL_MANY_PORTS -#define RS_TABLE_SIZE 64 -#else -#define RS_TABLE_SIZE 2 -#endif - -/* Rate for the 1.8432 Mhz clock for the onboard serial chip */ -#define BASE_BAUD ( 1843200 / 16 ) - -#ifdef CONFIG_SERIAL_DETECT_IRQ -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST|ASYNC_AUTO_IRQ) -#else -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST) -#endif - -#define STD_SERIAL_PORT_DFNS \ - { 0, BASE_BAUD, SANDPOINT_SERIAL_0, 4, STD_COM_FLAGS, /* ttyS0 */ \ - iomem_base: (u8 *)SANDPOINT_SERIAL_0, \ - io_type: SERIAL_IO_MEM }, \ - { 0, BASE_BAUD, SANDPOINT_SERIAL_1, 3, STD_COM_FLAGS, /* ttyS1 */ \ - iomem_base: (u8 *)SANDPOINT_SERIAL_1, \ - io_type: SERIAL_IO_MEM }, - -#define SERIAL_PORT_DFNS \ - STD_SERIAL_PORT_DFNS - -#endif /* __ASMPPC_SANDPOINT_SERIAL_H */ diff -Nru a/arch/ppc/platforms/sandpoint_setup.c b/arch/ppc/platforms/sandpoint_setup.c --- a/arch/ppc/platforms/sandpoint_setup.c Sat Jul 5 12:40:34 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,643 +0,0 @@ -/* - * arch/ppc/platforms/sandpoint_setup.c - * - * Board setup routines for the Motorola SPS Sandpoint Test Platform. - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2002 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -/* - * This file adds support for the Motorola SPS Sandpoint Test Platform. - * These boards have a PPMC slot for the processor so any combination - * of cpu and host bridge can be attached. This port is for an 8240 PPMC - * module from Motorola SPS and other closely related cpu/host bridge - * combinations (e.g., 750/755/7400 with MPC107 host bridge). - * The sandpoint itself has a Windbond 83c553 (PCI-ISA bridge, 2 DMA ctlrs, 2 - * cascaded 8259 interrupt ctlrs, 8254 Timer/Counter, and an IDE ctlr), a - * National 87308 (RTC, 2 UARTs, Keyboard & mouse ctlrs, and a floppy ctlr), - * and 4 PCI slots (only 2 of which are usable; the other 2 are keyed for 3.3V - * but are really 5V). - * - * The firmware on the sandpoint is called DINK (not my acronym :). This port - * depends on DINK to do some basic initialization (e.g., initialize the memory - * ctlr) and to ensure that the processor is using MAP B (CHRP map). - * - * The switch settings for the Sandpoint board MUST be as follows: - * S3: down - * S4: up - * S5: up - * S6: down - * - * 'down' is in the direction from the PCI slots towards the PPMC slot; - * 'up' is in the direction from the PPMC slot towards the PCI slots. - * Be careful, the way the sandpoint board is installed in XT chasses will - * make the directions reversed. - * - * Since Motorola listened to our suggestions for improvement, we now have - * the Sandpoint X3 board. All of the PCI slots are available, it uses - * the serial interrupt interface (just a hardware thing we need to - * configure properly). - * - * Use the default X3 switch settings. The interrupts are then: - * EPIC Source - * 0 SIOINT (8259, active low) - * 1 PCI #1 - * 2 PCI #2 - * 3 PCI #3 - * 4 PCI #4 - * 7 Winbond INTC (IDE interrupt) - * 8 Winbond INTD (IDE interrupt) - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sandpoint.h" - -extern u_int openpic_irq(void); -extern void openpic_eoi(void); - -static void sandpoint_halt(void); - - -/* - * *** IMPORTANT *** - * - * The first 16 entries of 'sandpoint_openpic_initsenses[]' are there and - * initialized to 0 on purpose. DO NOT REMOVE THEM as the 'offset' parameter - * of 'openpic_init()' does not work for the sandpoint because the 8259 - * interrupt is NOT routed to the EPIC's IRQ 0 AND the EPIC's IRQ 0's offset is - * the same as a normal openpic's IRQ 16 offset. - */ -static u_char sandpoint_openpic_initsenses[] __initdata = { - 0, /* 0-15 not used by EPCI but by 8259 (std PC-type IRQs) */ - 0, /* 1 */ - 0, /* 2 */ - 0, /* 3 */ - 0, /* 4 */ - 0, /* 5 */ - 0, /* 6 */ - 0, /* 7 */ - 0, /* 8 */ - 0, /* 9 */ - 0, /* 10 */ - 0, /* 11 */ - 0, /* 12 */ - 0, /* 13 */ - 0, /* 14 */ - 0, /* 15 */ -#ifdef CONFIG_SANDPOINT_X3 - 1, /* 16: EPIC IRQ 0: Active Low -- SIOINT (8259) */ - 0, /* AACK! Shouldn't need this.....see sandpoint_pci.c for more info */ - 1, /* 17: EPIC IRQ 1: Active Low -- PCI Slot 1 */ - 1, /* 18: EPIC IRQ 2: Active Low -- PCI Slot 2 */ - 1, /* 19: EPIC IRQ 3: Active Low -- PCI Slot 3 */ - 1, /* 20: EPIC IRQ 4: Active Low -- PCI Slot 4 */ - 0, /* 21 -- Unused */ - 0, /* 22 -- Unused */ - 1, /* 23 -- IDE (Winbond INT C) */ - 1, /* 24 -- IDE (Winbond INT D) */ - /* 35 - 31 (EPIC 9 - 15) Unused */ -#else - 1, /* 16: EPIC IRQ 0: Active Low -- PCI intrs */ - 1, /* 17: EPIC IRQ 1: Active Low -- PCI (possibly 8259) intrs */ - 1, /* 18: EPIC IRQ 2: Active Low -- PCI (possibly 8259) intrs */ - 1 /* 19: EPIC IRQ 3: Active Low -- PCI intrs */ - /* 20: EPIC IRQ 4: Not used */ -#endif -}; - -static void __init -sandpoint_setup_arch(void) -{ - loops_per_jiffy = 100000000 / HZ; - -#ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start) - ROOT_DEV = Root_RAM0; - else -#endif -#ifdef CONFIG_ROOT_NFS - ROOT_DEV = Root_NFS; -#else - ROOT_DEV = Root_HDA1; -#endif - - /* Lookup PCI host bridges */ - sandpoint_find_bridges(); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - printk("Motorola SPS Sandpoint Test Platform\n"); - printk("Sandpoint port (MontaVista Software, Inc. (source@mvista.com))\n"); - - /* The Sandpoint rom doesn't enable any caches. Do that now. - * The 7450 portion will also set up the L3s once I get enough - * information do do so. If the processor running doesn't have - * and L2, the _set_L2CR is a no-op. - */ - if (cur_cpu_spec[0]->cpu_features & CPU_FTR_SPEC7450) { - /* Just enable L2, the bits are different from others. - */ - _set_L2CR(L2CR_L2E); - } - else { - /* The magic number for Sandpoint/74xx PrPMCs. - */ - _set_L2CR(0xbd014000); - } -} - -#define SANDPOINT_87308_CFG_ADDR 0x15c -#define SANDPOINT_87308_CFG_DATA 0x15d - -#define SANDPOINT_87308_CFG_INB(addr, byte) { \ - outb((addr), SANDPOINT_87308_CFG_ADDR); \ - (byte) = inb(SANDPOINT_87308_CFG_DATA); \ -} - -#define SANDPOINT_87308_CFG_OUTB(addr, byte) { \ - outb((addr), SANDPOINT_87308_CFG_ADDR); \ - outb((byte), SANDPOINT_87308_CFG_DATA); \ -} - -#define SANDPOINT_87308_SELECT_DEV(dev_num) { \ - SANDPOINT_87308_CFG_OUTB(0x07, (dev_num)); \ -} - -#define SANDPOINT_87308_DEV_ENABLE(dev_num) { \ - SANDPOINT_87308_SELECT_DEV(dev_num); \ - SANDPOINT_87308_CFG_OUTB(0x30, 0x01); \ -} - -/* - * Initialize the ISA devices on the Nat'l PC87308VUL SuperIO chip. - */ -static void __init -sandpoint_setup_natl_87308(void) -{ - u_char reg; - - /* - * Enable all the devices on the Super I/O chip. - */ - SANDPOINT_87308_SELECT_DEV(0x00); /* Select kbd logical device */ - SANDPOINT_87308_CFG_OUTB(0xf0, 0x00); /* Set KBC clock to 8 Mhz */ - SANDPOINT_87308_DEV_ENABLE(0x00); /* Enable keyboard */ - SANDPOINT_87308_DEV_ENABLE(0x01); /* Enable mouse */ - SANDPOINT_87308_DEV_ENABLE(0x02); /* Enable rtc */ - SANDPOINT_87308_DEV_ENABLE(0x03); /* Enable fdc (floppy) */ - SANDPOINT_87308_DEV_ENABLE(0x04); /* Enable parallel */ - SANDPOINT_87308_DEV_ENABLE(0x05); /* Enable UART 2 */ - SANDPOINT_87308_CFG_OUTB(0xf0, 0x82); /* Enable bank select regs */ - SANDPOINT_87308_DEV_ENABLE(0x06); /* Enable UART 1 */ - SANDPOINT_87308_CFG_OUTB(0xf0, 0x82); /* Enable bank select regs */ - - /* Set up floppy in PS/2 mode */ - outb(0x09, SIO_CONFIG_RA); - reg = inb(SIO_CONFIG_RD); - reg = (reg & 0x3F) | 0x40; - outb(reg, SIO_CONFIG_RD); - outb(reg, SIO_CONFIG_RD); /* Have to write twice to change! */ - - return; -} - -/* - * Fix IDE interrupts. - */ -static void __init -sandpoint_fix_winbond_83553(void) -{ - /* Make all 8259 interrupt level sensitive */ - outb(0xf8, 0x4d0); - outb(0xde, 0x4d1); - - return; -} - -static void __init -sandpoint_init2(void) -{ - /* Do Sandpoint board specific initialization. */ - sandpoint_fix_winbond_83553(); - sandpoint_setup_natl_87308(); - - request_region(0x00,0x20,"dma1"); - request_region(0x20,0x20,"pic1"); - request_region(0x40,0x20,"timer"); - request_region(0x80,0x10,"dma page reg"); - request_region(0xa0,0x20,"pic2"); - request_region(0xc0,0x20,"dma2"); - - return; -} - -/* - * Interrupt setup and service. Interrrupts on the Sandpoint come - * from the four PCI slots plus the 8259 in the Winbond Super I/O (SIO). - * These interrupts are sent to one of four IRQs on the EPIC. - * The SIO shares its interrupt with either slot 2 or slot 3 (INTA#). - * Slot numbering is confusing. Sometimes in the documentation they - * use 0,1,2,3 and others 1,2,3,4. We will use slots 1,2,3,4 and - * map this to IRQ 16, 17, 18, 19. - * For Sandpoint X3, this has been better designed. The 8259 is - * cascaded from EPIC IRQ0, IRQ1-4 map to PCI slots 1-4, IDE is on - * EPIC 7 and 8. - */ -static void __init -sandpoint_init_IRQ(void) -{ - int i; - - /* - * 3 things cause us to jump through some hoops: - * 1) the EPIC on the 8240 & 107 are not full-blown openpic pic's - * 2) the 8259 is NOT cascaded on the openpic IRQ 0 - * 3) the 8259 shares its interrupt line with some PCI interrupts. - * - * What we'll do is set up the 8259 to be level sensitive, active low - * just like a PCI device. Then, when an interrupt on the IRQ that is - * shared with the 8259 comes in, we'll take a peek at the 8259 to see - * it its generating an interrupt. If it is, we'll handle the 8259 - * interrupt. Otherwise, we'll handle it just like a normal PCI - * interrupt. This does give the 8259 interrupts a higher priority - * than the EPIC ones--hopefully, not a problem. - */ - OpenPIC_InitSenses = sandpoint_openpic_initsenses; - OpenPIC_NumInitSenses = sizeof(sandpoint_openpic_initsenses); - - openpic_init(1, 0, NULL, -1); - - /* - * openpic_init() has set up irq_desc[0-23] to be openpic - * interrupts. We need to set irq_desc[0-15] to be 8259 interrupts. - * We then need to request and enable the 8259 irq. - */ - for(i=0; i < NUM_8259_INTERRUPTS; i++) - irq_desc[i].handler = &i8259_pic; - - if (request_irq(SANDPOINT_SIO_IRQ, no_action, SA_INTERRUPT, - "8259 cascade to EPIC", NULL)) { - - printk("Unable to get OpenPIC IRQ %d for cascade\n", - SANDPOINT_SIO_IRQ); - } - - i8259_init(NULL); -} - -static int -sandpoint_get_irq(struct pt_regs *regs) -{ - int irq, cascade_irq; - - irq = openpic_irq(); - - if (irq == SANDPOINT_SIO_IRQ) { - cascade_irq = i8259_irq(regs); - - if (cascade_irq != -1) { - irq = cascade_irq; - openpic_eoi(); - } - } - else if (irq == OPENPIC_VEC_SPURIOUS) { - irq = -1; - } - - return irq; -} - -static u32 -sandpoint_irq_canonicalize(u32 irq) -{ - if (irq == 2) - { - return 9; - } - else - { - return irq; - } -} - -static ulong __init -sandpoint_find_end_of_memory(void) -{ - ulong size = 0; - -#if 0 /* Leave out until DINK sets mem ctlr correctly */ - size = mpc10x_get_mem_size(MPC10X_MEM_MAP_B); -#else - size = 32*1024*1024; -#endif - - return size; -} - -static void __init -sandpoint_map_io(void) -{ - io_block_mapping(0xfe000000, 0xfe000000, 0x02000000, _PAGE_IO); -} - -/* - * Due to Sandpoint X2 errata, the Port 92 will not work. - */ -static void -sandpoint_restart(char *cmd) -{ - local_irq_disable(); - - /* Set exception prefix high - to the firmware */ - _nmask_and_or_msr(0, MSR_IP); - - /* Reset system via Port 92 */ - outb(0x00, 0x92); - outb(0x01, 0x92); - for(;;); /* Spin until reset happens */ -} - -static void -sandpoint_power_off(void) -{ - local_irq_disable(); - for(;;); /* No way to shut power off with software */ - /* NOTREACHED */ -} - -static void -sandpoint_halt(void) -{ - sandpoint_power_off(); - /* NOTREACHED */ -} - -static int -sandpoint_show_cpuinfo(struct seq_file *m) -{ - uint pvid; - - pvid = mfspr(PVR); - - seq_printf(m, "vendor\t\t: Motorola SPS\n"); - seq_printf(m, "machine\t\t: Sandpoint\n"); - seq_printf(m, "processor\t: PVID: 0x%x, vendor: %s\n", - pvid, (pvid & (1<<15) ? "IBM" : "Motorola")); - - return 0; -} - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) -/* - * IDE support. - */ -static int sandpoint_ide_ports_known = 0; -static unsigned long sandpoint_ide_regbase[MAX_HWIFS]; -static unsigned long sandpoint_ide_ctl_regbase[MAX_HWIFS]; -static unsigned long sandpoint_idedma_regbase; - -static void -sandpoint_ide_probe(void) -{ - struct pci_dev *pdev = pci_find_device(PCI_VENDOR_ID_WINBOND, - PCI_DEVICE_ID_WINBOND_82C105, - NULL); - - if(pdev) { - sandpoint_ide_regbase[0]=pdev->resource[0].start; - sandpoint_ide_regbase[1]=pdev->resource[2].start; - sandpoint_ide_ctl_regbase[0]=pdev->resource[1].start; - sandpoint_ide_ctl_regbase[1]=pdev->resource[3].start; - sandpoint_idedma_regbase=pdev->resource[4].start; - } - - sandpoint_ide_ports_known = 1; - return; -} - -static int -sandpoint_ide_default_irq(unsigned long base) -{ - if (sandpoint_ide_ports_known == 0) - sandpoint_ide_probe(); - - if (base == sandpoint_ide_regbase[0]) - return SANDPOINT_IDE_INT0; - else if (base == sandpoint_ide_regbase[1]) - return SANDPOINT_IDE_INT1; - else - return 0; -} - -static unsigned long -sandpoint_ide_default_io_base(int index) -{ - if (sandpoint_ide_ports_known == 0) - sandpoint_ide_probe(); - - return sandpoint_ide_regbase[index]; -} - -static void __init -sandpoint_ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - uint alt_status_base; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg++; - } - - if (data_port == sandpoint_ide_regbase[0]) { - alt_status_base = sandpoint_ide_ctl_regbase[0] + 2; - hw->irq = 14; - } - else if (data_port == sandpoint_ide_regbase[1]) { - alt_status_base = sandpoint_ide_ctl_regbase[1] + 2; - hw->irq = 15; - } - else { - alt_status_base = 0; - hw->irq = 0; - } - - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = alt_status_base; - } - - if (irq != NULL) { - *irq = hw->irq; - } - - return; -} -#endif - -/* - * Set BAT 3 to map 0xf8000000 to end of physical memory space 1-to-1. - */ -static __inline__ void -sandpoint_set_bat(void) -{ - unsigned long bat3u, bat3l; - static int mapping_set = 0; - - if (!mapping_set) { - - __asm__ __volatile__( - " lis %0,0xf800\n \ - ori %1,%0,0x002a\n \ - ori %0,%0,0x0ffe\n \ - mtspr 0x21e,%0\n \ - mtspr 0x21f,%1\n \ - isync\n \ - sync " - : "=r" (bat3u), "=r" (bat3l)); - - mapping_set = 1; - } - - return; -} - -#ifdef CONFIG_SERIAL_TEXT_DEBUG -#include -#include -#include - -static struct serial_state rs_table[RS_TABLE_SIZE] = { - SERIAL_PORT_DFNS /* Defined in */ -}; - -static void -sandpoint_progress(char *s, unsigned short hex) -{ - volatile char c; - volatile unsigned long com_port; - u16 shift; - - com_port = rs_table[0].port; - shift = rs_table[0].iomem_reg_shift; - - while ((c = *s++) != 0) { - while ((*((volatile unsigned char *)com_port + - (UART_LSR << shift)) & UART_LSR_THRE) == 0) - ; - *(volatile unsigned char *)com_port = c; - - if (c == '\n') { - while ((*((volatile unsigned char *)com_port + - (UART_LSR << shift)) & UART_LSR_THRE) == 0) - ; - *(volatile unsigned char *)com_port = '\r'; - } - } -} -#endif /* CONFIG_SERIAL_TEXT_DEBUG */ - -__init void sandpoint_setup_pci_ptrs(void); - -TODC_ALLOC(); - -void __init -platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) -{ - parse_bootinfo(find_bootinfo()); - - /* Map in board regs, etc. */ - sandpoint_set_bat(); - - isa_io_base = MPC10X_MAPB_ISA_IO_BASE; - isa_mem_base = MPC10X_MAPB_ISA_MEM_BASE; - pci_dram_offset = MPC10X_MAPB_DRAM_OFFSET; - ISA_DMA_THRESHOLD = 0x00ffffff; - DMA_MODE_READ = 0x44; - DMA_MODE_WRITE = 0x48; - - ppc_md.setup_arch = sandpoint_setup_arch; - ppc_md.show_cpuinfo = sandpoint_show_cpuinfo; - ppc_md.irq_canonicalize = sandpoint_irq_canonicalize; - ppc_md.init_IRQ = sandpoint_init_IRQ; - ppc_md.get_irq = sandpoint_get_irq; - ppc_md.init = sandpoint_init2; - - ppc_md.restart = sandpoint_restart; - ppc_md.power_off = sandpoint_power_off; - ppc_md.halt = sandpoint_halt; - - ppc_md.find_end_of_memory = sandpoint_find_end_of_memory; - ppc_md.setup_io_mappings = sandpoint_map_io; - - TODC_INIT(TODC_TYPE_PC97307, 0x70, 0x00, 0x71, 8); - ppc_md.time_init = todc_time_init; - ppc_md.set_rtc_time = todc_set_rtc_time; - ppc_md.get_rtc_time = todc_get_rtc_time; - ppc_md.calibrate_decr = todc_calibrate_decr; - - ppc_md.nvram_read_val = todc_mc146818_read_val; - ppc_md.nvram_write_val = todc_mc146818_write_val; - - ppc_md.heartbeat = NULL; - ppc_md.heartbeat_reset = 0; - ppc_md.heartbeat_count = 0; - -#ifdef CONFIG_SERIAL_TEXT_DEBUG - ppc_md.progress = sandpoint_progress; -#else /* !CONFIG_SERIAL_TEXT_DEBUG */ - ppc_md.progress = NULL; -#endif /* CONFIG_SERIAL_TEXT_DEBUG */ - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) - ppc_ide_md.default_irq = sandpoint_ide_default_irq; - ppc_ide_md.default_io_base = sandpoint_ide_default_io_base; - ppc_ide_md.ide_init_hwif = sandpoint_ide_init_hwif_ports; -#endif - - return; -} diff -Nru a/arch/ppc/platforms/zx4500.h b/arch/ppc/platforms/zx4500.h --- a/arch/ppc/platforms/zx4500.h Sat Jul 5 12:40:32 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,68 +0,0 @@ -/* * arch/ppc/platforms/zx4500.h - * - * Board setup routines for Znyx ZX4500 cPCI board. - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ -#ifndef __PPC_PLATFORMS_ZX4500_H_ -#define __PPC_PLATFORMS_ZX4500_H_ - -/* - * Define the addresses of CPLD registers in CLPD area. - */ -#define ZX4500_CPLD_BOARD_ID 0xff800001 -#define ZX4500_CPLD_REV 0xff800002 -#define ZX4500_CPLD_RESET 0xff800011 -#define ZX4500_CPLD_PHY1 0xff800014 -#define ZX4500_CPLD_PHY2 0xff800015 -#define ZX4500_CPLD_PHY3 0xff800016 -#define ZX4500_CPLD_SYSCTL 0xff800017 -#define ZX4500_CPLD_EXT_FLASH 0xff800018 -#define ZX4500_CPLD_DUAL1 0xff800019 -#define ZX4500_CPLD_DUAL2 0xff80001A -#define ZX4500_CPLD_STATUS 0xff800030 -#define ZX4500_CPLD_STREAM 0xff800032 -#define ZX4500_CPLD_PHY1_LED 0xff800034 -#define ZX4500_CPLD_PHY2_LED 0xff800035 -#define ZX4500_CPLD_PHY3_LED 0xff800036 -#define ZX4500_CPLD_PHY1_LNK 0xff80003C -#define ZX4500_CPLD_PHY2_LNK 0xff80003D -#define ZX4500_CPLD_PHY3_LNK 0xff80003E - -#define ZX4500_CPLD_RESET_SOFT 0x01 /* Soft Reset */ -#define ZX4500_CPLD_RESET_XBUS 0x40 /* Reset entire board */ - -#define ZX4500_CPLD_SYSCTL_PMC 0x01 /* Enable INTA/B/C/D from PMC */ -#define ZX4500_CPLD_SYSCTL_BCM 0x04 /* Enable INTA from BCM */ -#define ZX4500_CPLD_SYSCTL_SINTA 0x08 /* Enable SINTA from 21554 */ -#define ZX4500_CPLD_SYSCTL_WD 0x20 /* Enable Watchdog Timer */ -#define ZX4500_CPLD_SYSCTL_PMC_TRI 0x80 /* Tri-state PMC EREADY */ - -#define ZX4500_CPLD_DUAL2_LED_PULL 0x01 /* Pull LED */ -#define ZX4500_CPLD_DUAL2_LED_EXT_FAULT 0x02 /* External Fault LED */ -#define ZX4500_CPLD_DUAL2_LED_INT_FAULT 0x04 /* Internal Fault LED */ -#define ZX4500_CPLD_DUAL2_LED_OK 0x08 /* OK LED */ -#define ZX4500_CPLD_DUAL2_LED_CLK 0x10 /* CLK LED */ - -/* - * Defines related to boot string stored in flash. - */ -#define ZX4500_BOOT_STRING_ADDR 0xfff7f000 -#define ZX4500_BOOT_STRING_LEN 80 - -/* - * Define the IDSEL that the PCI bus side of the 8240 is connected to. - * This IDSEL must not be selected from the 8240 processor side. - */ -#define ZX4500_HOST_BRIDGE_IDSEL 20 - - -void zx4500_find_bridges(void); - -#endif /* __PPC_PLATFORMS_ZX4500_H_ */ diff -Nru a/arch/ppc/platforms/zx4500_pci.c b/arch/ppc/platforms/zx4500_pci.c --- a/arch/ppc/platforms/zx4500_pci.c Sat Jul 5 12:40:32 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,138 +0,0 @@ -/* - * arch/ppc/platforms/zx4500_pci.c - * - * PCI setup routines for Znyx ZX4500 cPCI boards. - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "zx4500.h" - -/* - * Znyx ZX4500 interrupt routes. - */ -static inline int -zx4500_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin) -{ - static char pci_irq_table[][4] = - /* - * PCI IDSEL/INTPIN->INTLINE - * A B C D - */ - { - { 19, 0, 0, 0 }, /* IDSEL 21 - 21554 PCI-cPCI bridge */ - { 18, 0, 0, 0 }, /* IDSEL 22 - BCM5600 INTA */ - { 16, 20, 16, 20 }, /* IDSEL 23 - PPMC Slot */ - }; - - const long min_idsel = 21, max_idsel = 23, irqs_per_slot = 4; - return PCI_IRQ_TABLE_LOOKUP; -} - -void __init -zx4500_board_init(struct pci_controller *hose) -{ - uint val; - u_char sysctl; - - /* - * CPLD Registers are mapped in by BAT 3 in zx4500_setup_arch(). - * - * Turn off all interrupts routed through the CPLD. - * Also, turn off watchdog timer and drive PMC EREADY low. - */ - sysctl = in_8((volatile u_char *)ZX4500_CPLD_SYSCTL); - sysctl &= ~(ZX4500_CPLD_SYSCTL_PMC | - ZX4500_CPLD_SYSCTL_BCM | - ZX4500_CPLD_SYSCTL_SINTA | - ZX4500_CPLD_SYSCTL_WD | - ZX4500_CPLD_SYSCTL_PMC_TRI); - out_8((volatile u_char *)ZX4500_CPLD_SYSCTL, sysctl); - - /* - * Kludge the size that BAR2 of the 21554 asks for - * (i.e., set Upstream I/O or Memory 0 Setup Register). - * Old versions of SROM wants 1 GB which is too large, make it ask - * for 256 MB. - */ - early_read_config_dword(hose, 0, PCI_DEVFN(21,0), 0xc4, &val); - - if (val != 0) { - early_write_config_dword(hose, - 0, - PCI_DEVFN(21,0), - 0xc4, - val | 0xf0000000); - } - - return; -} - -static int -zx4500_exclude_device(u_char bus, u_char devfn) -{ - if ((bus == 0) && (PCI_SLOT(devfn) == ZX4500_HOST_BRIDGE_IDSEL)) { - return PCIBIOS_DEVICE_NOT_FOUND; - } - else { - return PCIBIOS_SUCCESSFUL; - } -} - -void __init -zx4500_find_bridges(void) -{ - struct pci_controller *hose; - - hose = pcibios_alloc_controller(); - - if (!hose) - return; - - hose->first_busno = 0; - hose->last_busno = 0xff; - - if (mpc10x_bridge_init(hose, - MPC10X_MEM_MAP_B, - MPC10X_MEM_MAP_B, - MPC10X_MAPB_EUMB_BASE) == 0) { - - hose->mem_resources[0].end = 0xffffffff; - - /* Initialize the board */ - zx4500_board_init(hose); - - /* scan PCI bus */ - ppc_md.pci_exclude_device = zx4500_exclude_device; - hose->last_busno = pciauto_bus_scan(hose, hose->first_busno); - - ppc_md.pcibios_fixup = NULL; - ppc_md.pcibios_fixup_bus = NULL; - ppc_md.pci_swizzle = common_swizzle; - ppc_md.pci_map_irq = zx4500_map_irq; - } - else { - if (ppc_md.progress) - ppc_md.progress("Bridge init failed", 0x100); - printk("Host bridge init failed\n"); - } - - return; -} diff -Nru a/arch/ppc/platforms/zx4500_serial.h b/arch/ppc/platforms/zx4500_serial.h --- a/arch/ppc/platforms/zx4500_serial.h Sat Jul 5 12:40:32 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,46 +0,0 @@ -/* - * arch/ppc/platforms/zx4500_serial.h - * - * Definitions for Znyx ZX4500 board support - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -#ifndef __ASMPPC_ZX4500_SERIAL_H -#define __ASMPPC_ZX4500_SERIAL_H - -#include - -/* Define the UART base address (only 1 UART) */ -#define ZX4500_SERIAL_1 0xff880000 - -#ifdef CONFIG_SERIAL_MANY_PORTS -#define RS_TABLE_SIZE 64 -#else -#define RS_TABLE_SIZE 1 -#endif - -/* Rate for the 1.8432 Mhz clock for the onboard serial chip */ -#define BASE_BAUD ( 1843200 / 16 ) - -#ifdef CONFIG_SERIAL_DETECT_IRQ -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST|ASYNC_AUTO_IRQ) -#else -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF|ASYNC_SKIP_TEST) -#endif - -#define STD_SERIAL_PORT_DFNS \ - { 0, BASE_BAUD, ZX4500_SERIAL_1, 17, STD_COM_FLAGS, /* ttyS0 */ \ - iomem_base: (u8 *)ZX4500_SERIAL_1, \ - io_type: SERIAL_IO_MEM }, - -#define SERIAL_PORT_DFNS \ - STD_SERIAL_PORT_DFNS - -#endif /* __ASMPPC_ZX4500_SERIAL_H */ diff -Nru a/arch/ppc/platforms/zx4500_setup.c b/arch/ppc/platforms/zx4500_setup.c --- a/arch/ppc/platforms/zx4500_setup.c Sat Jul 5 12:40:31 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,359 +0,0 @@ -/* - * arch/ppc/platforms/zx4500_setup.c - * - * Board setup routines for Znyx ZX4500 family of cPCI boards. - * - * Author: Mark A. Greer - * mgreer@mvista.com - * - * 2000-2001 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - */ - -/* - * This file adds support for the Znyx ZX4500 series of cPCI boards. - * These boards have an 8240, UART on the processor bus, a PPMC slot (for now - * the card in this slot can _not_ be a monarch), Broadcom BCM5600, and an - * Intel 21554 bridge. - * - * Currently, this port assumes that the 8240 is the master and performs PCI - * arbitration, etc. It is also assumed that the 8240 is wired to come up - * using memory MAP B (CHRP map). - * - * Note: This board port will not work properly as it is. You must apply the - * patch that is at ftp://ftp.mvista.com/pub/Area51/zx4500/zx_patch_2_5 - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "zx4500.h" - -static u_char zx4500_openpic_initsenses[] __initdata = { - 0, /* 0-15 are not used on an 8240 EPIC */ - 0, /* 1 */ - 0, /* 2 */ - 0, /* 3 */ - 0, /* 4 */ - 0, /* 5 */ - 0, /* 6 */ - 0, /* 7 */ - 0, /* 8 */ - 0, /* 9 */ - 0, /* 10 */ - 0, /* 11 */ - 0, /* 12 */ - 0, /* 13 */ - 0, /* 14 */ - 0, /* 15 */ - 1, /* 16: EPIC IRQ 0: Active Low -- PMC #INTA & #INTC */ - 1, /* 17: EPIC IRQ 1: Active Low -- UART */ - 1, /* 18: EPIC IRQ 2: Active Low -- BCM5600 #INTA */ - 1, /* 19: EPIC IRQ 3: Active Low -- 21554 #SINTA */ - 1, /* 20: EPIC IRQ 4: Active Low -- PMC #INTB & #INTD */ -}; - - -static void __init -zx4500_setup_arch(void) -{ - char boot_string[ZX4500_BOOT_STRING_LEN + 1]; - char *boot_arg; - extern char cmd_line[]; - - - loops_per_jiffy = 50000000 / HZ; - -#ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start) - ROOT_DEV = Root_RAM0; - else -#endif -#if defined(CONFIG_ROOT_NFS) - ROOT_DEV = Root_NFS; -#else - ROOT_DEV = Root_SDA1; -#endif - - /* Get boot string from flash */ - strlcpy(boot_string, - (char *)ZX4500_BOOT_STRING_ADDR, - sizeof(boot_string)); - boot_string[ZX4500_BOOT_STRING_LEN] = '\0'; - - /* Can be delimited by 0xff */ - boot_arg = strchr(boot_string, 0xff); - - if (boot_arg != NULL) { - *boot_arg = '\0'; - } - - /* First 3 chars must be 'dev'. If not, ignore. */ - if (!strncmp(boot_string, "dev", 3)) { - /* skip 'dev?' and any blanks after it */ - boot_arg = strchr(boot_string, ' '); - - if (boot_arg != NULL) { - while (*boot_arg == ' ') boot_arg++; - strcat(cmd_line, " "); - strcat(cmd_line, boot_arg); - } - } - - /* nothing but serial consoles... */ - printk("Znyx ZX4500 Series High Performance Switch\n"); - printk("ZX4500 port (C) 2000, 2001 MontaVista Software, Inc. (source@mvista.com)\n"); - - /* Lookup PCI host bridge */ - zx4500_find_bridges(); - - printk("ZX4500 Board ID: 0x%x, Revision #: 0x%x\n", - in_8((volatile u_char *)ZX4500_CPLD_BOARD_ID), - in_8((volatile u_char *)ZX4500_CPLD_REV)); - - return; -} - -static ulong __init -zx4500_find_end_of_memory(void) -{ - return mpc10x_get_mem_size(MPC10X_MEM_MAP_B); -} - -static void __init -zx4500_map_io(void) -{ - io_block_mapping(0xfe000000, 0xfe000000, 0x02000000, _PAGE_IO); -} - -/* - * Enable interrupts routed thru CPLD to reach the 8240's EPIC. - * Need to enable all 4 PMC intrs, BCM INTA, and 21554 SINTA to 8240. - * UART intrs routed directly to 8240 (not thru CPLD). - */ -static void __init -zx4500_enable_cpld_intrs(void) -{ - u_char sysctl; - - sysctl = in_8((volatile u_char *)ZX4500_CPLD_SYSCTL); - sysctl |= (ZX4500_CPLD_SYSCTL_PMC | - ZX4500_CPLD_SYSCTL_BCM | - ZX4500_CPLD_SYSCTL_SINTA); - out_8((volatile u_char *)ZX4500_CPLD_SYSCTL, sysctl); - - return; -} - -static void __init -zx4500_init_IRQ(void) -{ - OpenPIC_InitSenses = zx4500_openpic_initsenses; - OpenPIC_NumInitSenses = sizeof(zx4500_openpic_initsenses); - - openpic_init(1, 0, NULL, -1); - - zx4500_enable_cpld_intrs(); /* Allow CPLD to route intrs to 8240 */ - - return; -} - -static void -zx4500_restart(char *cmd) -{ - local_irq_disable(); - - out_8((volatile u_char *)ZX4500_CPLD_RESET, ZX4500_CPLD_RESET_XBUS); - for (;;); - - panic("Restart failed.\n"); - /* NOTREACHED */ -} - -static void -zx4500_power_off(void) -{ - local_irq_disable(); - for(;;); /* No way to shut power off with software */ - /* NOTREACHED */ -} - -static void -zx4500_halt(void) -{ - zx4500_power_off(); - /* NOTREACHED */ -} - -static int -zx4500_get_bus_speed(void) -{ - int bus_speed; - - bus_speed = 100000000; - - return bus_speed; -} - -static int -zx4500_show_cpuinfo(struct seq_file *m) -{ - uint pvid; - - seq_printf(m, "vendor\t\t: Znyx\n"); - seq_printf(m, "machine\t\t: ZX4500\n"); - seq_printf(m, "processor\t: PVID: 0x%x, vendor: %s\n", - pvid, (pvid & (1<<15) ? "IBM" : "Motorola")); - seq_printf(m, "bus speed\t: %dMhz\n", - zx4500_get_bus_speed()/1000000); - - return 0; -} - -static void __init -zx4500_calibrate_decr(void) -{ - ulong freq; - - freq = zx4500_get_bus_speed() / 4; - - printk("time_init: decrementer frequency = %lu.%.6lu MHz\n", - freq/1000000, freq%1000000); - - tb_ticks_per_jiffy = freq / HZ; - tb_to_us = mulhwu_scale_factor(freq, 1000000); - - return; -} - -/* - * Set BAT 3 to map 0xf0000000 to end of physical memory space 1-1. - */ -static __inline__ void -zx4500_set_bat(void) -{ - unsigned long bat3u, bat3l; - static int mapping_set = 0; - - if (!mapping_set) { - - __asm__ __volatile__( - " lis %0,0xf800\n \ - ori %1,%0,0x002a\n \ - ori %0,%0,0x0ffe\n \ - mtspr 0x21e,%0\n \ - mtspr 0x21f,%1\n \ - isync\n \ - sync " - : "=r" (bat3u), "=r" (bat3l)); - - mapping_set = 1; - } - - return; -} - -#ifdef CONFIG_SERIAL_TEXT_DEBUG -#include -#include -#include - -static struct serial_state rs_table[RS_TABLE_SIZE] = { - SERIAL_PORT_DFNS /* Defined in */ -}; - -void -zx4500_progress(char *s, unsigned short hex) -{ - volatile char c; - volatile unsigned long com_port; - u16 shift; - - com_port = rs_table[0].port; - shift = rs_table[0].iomem_reg_shift; - - while ((c = *s++) != 0) { - while ((*((volatile unsigned char *)com_port + - (UART_LSR << shift)) & UART_LSR_THRE) == 0) - ; - *(volatile unsigned char *)com_port = c; - - if (c == '\n') { - while ((*((volatile unsigned char *)com_port + - (UART_LSR << shift)) & UART_LSR_THRE) == 0) - ; - *(volatile unsigned char *)com_port = '\r'; - } - } -} -#endif /* CONFIG_SERIAL_TEXT_DEBUG */ - -void __init -platform_init(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7) -{ - parse_bootinfo(find_bootinfo()); - - /* Map in board registers, etc. */ - zx4500_set_bat(); - - isa_io_base = MPC10X_MAPB_ISA_IO_BASE; - isa_mem_base = MPC10X_MAPB_ISA_MEM_BASE; - pci_dram_offset = MPC10X_MAPB_DRAM_OFFSET; - - ppc_md.setup_arch = zx4500_setup_arch; - ppc_md.show_cpuinfo = zx4500_show_cpuinfo; - ppc_md.irq_canonicalize = NULL; - ppc_md.init_IRQ = zx4500_init_IRQ; - ppc_md.get_irq = openpic_get_irq; - ppc_md.init = NULL; - - ppc_md.restart = zx4500_restart; - ppc_md.power_off = zx4500_power_off; - ppc_md.halt = zx4500_halt; - - ppc_md.find_end_of_memory = zx4500_find_end_of_memory; - ppc_md.setup_io_mappings = zx4500_map_io; - - ppc_md.calibrate_decr = zx4500_calibrate_decr; - - ppc_md.heartbeat = NULL; - ppc_md.heartbeat_reset = 0; - ppc_md.heartbeat_count = 0; - -#ifdef CONFIG_SERIAL_TEXT_DEBUG - ppc_md.progress = zx4500_progress; -#else /* !CONFIG_SERIAL_TEXT_DEBUG */ - ppc_md.progress = NULL; -#endif /* CONFIG_SERIAL_TEXT_DEBUG */ - - return; -} diff -Nru a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile --- a/arch/ppc/syslib/Makefile Sat Jul 5 12:40:31 2003 +++ b/arch/ppc/syslib/Makefile Sat Jul 5 12:40:31 2003 @@ -61,8 +61,9 @@ pci_auto.o indirect_pci.o todc_time.o obj-$(CONFIG_SPRUCE) += cpc700_pic.o indirect_pci.o pci_auto.o \ todc_time.o -obj-$(CONFIG_ZX4500) += indirect_pci.o pci_auto.o mpc10x_common.o \ - i8259.o open_pic.o obj-$(CONFIG_8260) += m8260_setup.o ppc8260_pic.o +ifeq ($(CONFIG_SERIAL_8250)$(CONFIG_PPC_GEN550),yy) +obj-$(CONFIG_KGDB) += gen550_kgdb.o gen550_dbg.o +obj-$(CONFIG_SERIAL_TEXT_DEBUG) += gen550_dbg.o +endif obj-$(CONFIG_BOOTX_TEXT) += btext.o - diff -Nru a/arch/ppc/syslib/gen550_dbg.c b/arch/ppc/syslib/gen550_dbg.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/arch/ppc/syslib/gen550_dbg.c Sat Jul 5 12:40:34 2003 @@ -0,0 +1,174 @@ +/* + * arch/ppc/syslib/gen550_dbg.c + * + * A library of polled 16550 serial routines. These are intended to + * be used to support progress messages, xmon, kgdb, etc. on a + * variety of platforms. + * + * Adapted from lots of code ripped from the arch/ppc/boot/ polled + * 16550 support. + * + * Author: Matt Porter + * + * 2002-2003 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +#include +#include /* For linux/serial_core.h */ +#include +#include +#include +#include +#include +#include + +#define SERIAL_BAUD 9600 + +static struct serial_state rs_table[RS_TABLE_SIZE] = { + SERIAL_PORT_DFNS /* defined in */ +}; + +static void (*serial_outb)(unsigned long, unsigned char); +static unsigned long (*serial_inb)(unsigned long); + +static int shift; + +unsigned long direct_inb(unsigned long addr) +{ + return readb(addr); +} + +void direct_outb(unsigned long addr, unsigned char val) +{ + writeb(val, addr); +} + +unsigned long io_inb(unsigned long port) +{ + return inb(port); +} + +void io_outb(unsigned long port, unsigned char val) +{ + outb(val, port); +} + +unsigned long serial_init(int chan, void *ignored) +{ + unsigned long com_port; + unsigned char lcr, dlm; + + /* We need to find out which type io we're expecting. If it's + * 'SERIAL_IO_PORT', we get an offset from the isa_io_base. + * If it's 'SERIAL_IO_MEM', we can the exact location. -- Tom */ + switch (rs_table[chan].io_type) { + case SERIAL_IO_PORT: + com_port = rs_table[chan].port; + serial_outb = io_outb; + serial_inb = io_inb; + break; + case SERIAL_IO_MEM: + com_port = (unsigned long)rs_table[chan].iomem_base; + serial_outb = direct_outb; + serial_inb = direct_inb; + break; + default: + /* We can't deal with it. */ + return -1; + } + + /* How far apart the registers are. */ + shift = rs_table[chan].iomem_reg_shift; + + /* save the LCR */ + lcr = serial_inb(com_port + (UART_LCR << shift)); + + /* Access baud rate */ + serial_outb(com_port + (UART_LCR << shift), UART_LCR_DLAB); + dlm = serial_inb(com_port + (UART_DLM << shift)); + + /* + * Test if serial port is unconfigured + * We assume that no-one uses less than 110 baud or + * less than 7 bits per character these days. + * -- paulus. + */ + if ((dlm <= 4) && (lcr & 2)) { + /* port is configured, put the old LCR back */ + serial_outb(com_port + (UART_LCR << shift), lcr); + } + else { + /* Input clock. */ + serial_outb(com_port + (UART_DLL << shift), + (rs_table[chan].baud_base / SERIAL_BAUD) & 0xFF); + serial_outb(com_port + (UART_DLM << shift), + (rs_table[chan].baud_base / SERIAL_BAUD) >> 8); + /* 8 data, 1 stop, no parity */ + serial_outb(com_port + (UART_LCR << shift), 0x03); + /* RTS/DTR */ + serial_outb(com_port + (UART_MCR << shift), 0x03); + + /* Clear & enable FIFOs */ + serial_outb(com_port + (UART_FCR << shift), 0x07); + } + + return (com_port); +} + +void +serial_putc(unsigned long com_port, unsigned char c) +{ + while ((serial_inb(com_port + (UART_LSR << shift)) & UART_LSR_THRE) == 0) + ; + serial_outb(com_port, c); +} + +unsigned char +serial_getc(unsigned long com_port) +{ + while ((serial_inb(com_port + (UART_LSR << shift)) & UART_LSR_DR) == 0) + ; + return serial_inb(com_port); +} + +int +serial_tstc(unsigned long com_port) +{ + return ((serial_inb(com_port + (UART_LSR << shift)) & UART_LSR_DR) != 0); +} + +void +serial_close(unsigned long com_port) +{ +} + +void +gen550_init(int i, struct uart_port *serial_req) +{ + rs_table[i].io_type = serial_req->iotype; + rs_table[i].port = serial_req->line; + rs_table[i].iomem_base = serial_req->membase; + rs_table[i].iomem_reg_shift = serial_req->regshift; +} + +#ifdef CONFIG_SERIAL_TEXT_DEBUG +void +gen550_progress(char *s, unsigned short hex) +{ + volatile unsigned int progress_debugport; + volatile char c; + + progress_debugport = serial_init(0, NULL); + + serial_putc(progress_debugport, '\r'); + + while ((c = *s++) != 0) + serial_putc(progress_debugport, c); + + serial_putc(progress_debugport, '\n'); + serial_putc(progress_debugport, '\r'); +} +#endif /* CONFIG_SERIAL_TEXT_DEBUG */ diff -Nru a/arch/ppc/syslib/gen550_kgdb.c b/arch/ppc/syslib/gen550_kgdb.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/arch/ppc/syslib/gen550_kgdb.c Sat Jul 5 12:40:34 2003 @@ -0,0 +1,84 @@ +/* + * arch/ppc/syslib/gen550_kgdb.c + * + * Generic 16550 kgdb support intended to be useful on a variety + * of platforms. To enable this support, it is necessary to set + * the CONFIG_GEN550 option. Any virtual mapping of the serial + * port(s) to be used can be accomplished by setting + * ppc_md.early_serial_map to a platform-specific mapping function. + * + * Adapted from ppc4xx_kgdb.c. + * + * Author: Matt Porter + * + * 2002-2003 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +#include +#include +#include + +#include + +extern unsigned long serial_init(int, void *); +extern unsigned long serial_getc(unsigned long); +extern unsigned long serial_putc(unsigned long, unsigned char); + +#if defined(CONFIG_KGDB_TTYS0) +#define KGDB_PORT 0 +#elif defined(CONFIG_KGDB_TTYS1) +#define KGDB_PORT 1 +#elif defined(CONFIG_KGDB_TTYS2) +#define KGDB_PORT 2 +#elif defined(CONFIG_KGDB_TTYS3) +#define KGDB_PORT 3 +#else +#error "invalid kgdb_tty port" +#endif + +static volatile unsigned int kgdb_debugport; + +void putDebugChar(unsigned char c) +{ + if (kgdb_debugport == 0) + kgdb_debugport = serial_init(KGDB_PORT, NULL); + + serial_putc(kgdb_debugport, c); +} + +int getDebugChar(void) +{ + if (kgdb_debugport == 0) + kgdb_debugport = serial_init(KGDB_PORT, NULL); + + return(serial_getc(kgdb_debugport)); +} + +void kgdb_interruptible(int enable) +{ + return; +} + +void putDebugString(char* str) +{ + while (*str != '\0') { + putDebugChar(*str); + str++; + } + putDebugChar('\r'); + return; +} + +/* + * Note: gen550_init() must be called already on the port we are going + * to use. + */ +void +kgdb_map_scc(void) +{ + printk(KERN_DEBUG "kgdb init\n"); + kgdb_debugport = serial_init(KGDB_PORT, NULL); +} diff -Nru a/arch/ppc/syslib/open_pic.c b/arch/ppc/syslib/open_pic.c --- a/arch/ppc/syslib/open_pic.c Sat Jul 5 12:40:32 2003 +++ b/arch/ppc/syslib/open_pic.c Sat Jul 5 12:40:32 2003 @@ -33,6 +33,7 @@ void* OpenPIC_Addr; static volatile struct OpenPIC *OpenPIC = NULL; + /* * We define OpenPIC_InitSenses table thusly: * bit 0x1: sense, 0 for edge and 1 for level. @@ -261,32 +262,32 @@ } #endif /* CONFIG_SMP */ -#if defined(CONFIG_EPIC_SERIAL_MODE) || defined(CONFIG_PMAC_PBOOK) -static void openpic_reset(void) +#ifdef CONFIG_EPIC_SERIAL_MODE +static void __init openpic_eicr_set_clk(u_int clkval) { - openpic_setfield(&OpenPIC->Global.Global_Configuration0, - OPENPIC_CONFIG_RESET); - while (openpic_readfield(&OpenPIC->Global.Global_Configuration0, - OPENPIC_CONFIG_RESET)) - mb(); + openpic_writefield(&OpenPIC->Global.Global_Configuration1, + OPENPIC_EICR_S_CLK_MASK, (clkval << 28)); } -#endif -#ifdef CONFIG_EPIC_SERIAL_MODE -static void openpic_enable_sie(void) +static void __init openpic_enable_sie(void) { openpic_setfield(&OpenPIC->Global.Global_Configuration1, - OPENPIC_EICR_SIE); + OPENPIC_EICR_SIE); } +#endif -static void openpic_eicr_set_clk(u_int clkval) +#if defined(CONFIG_EPIC_SERIAL_MODE) || defined(CONFIG_PMAC_PBOOK) +static void openpic_reset(void) { - openpic_writefield(&OpenPIC->Global.Global_Configuration1, - OPENPIC_EICR_S_CLK_MASK, (clkval << 28)); + openpic_setfield(&OpenPIC->Global.Global_Configuration0, + OPENPIC_CONFIG_RESET); + while (openpic_readfield(&OpenPIC->Global.Global_Configuration0, + OPENPIC_CONFIG_RESET)) + mb(); } #endif -void openpic_set_sources(int first_irq, int num_irqs, void *first_ISR) +void __init openpic_set_sources(int first_irq, int num_irqs, void *first_ISR) { volatile OpenPIC_Source *src = first_ISR; int i, last_irq; @@ -300,7 +301,14 @@ ISR[i] = src; } -void __init openpic_init(int linux_irq_offset) +/* + * The `offset' parameter defines where the interrupts handled by the + * OpenPIC start in the space of interrupt numbers that the kernel knows + * about. In other words, the OpenPIC's IRQ0 is numbered `offset' in the + * kernel's interrupt numbering scheme. + * We assume there is only one OpenPIC. + */ +void __init openpic_init(int offset) { u_int t, i; u_int timerfreq; @@ -349,13 +357,13 @@ printk("OpenPIC timer frequency is %d.%06d MHz\n", timerfreq / 1000000, timerfreq % 1000000); - open_pic_irq_offset = linux_irq_offset; + open_pic_irq_offset = offset; /* Initialize timer interrupts */ if ( ppc_md.progress ) ppc_md.progress("openpic: timer",0x3ba); for (i = 0; i < OPENPIC_NUM_TIMERS; i++) { /* Disabled, Priority 0 */ - openpic_inittimer(i, 0, OPENPIC_VEC_TIMER+i+linux_irq_offset); + openpic_inittimer(i, 0, OPENPIC_VEC_TIMER+i+offset); /* No processor */ openpic_maptimer(i, 0); } @@ -365,12 +373,10 @@ if ( ppc_md.progress ) ppc_md.progress("openpic: ipi",0x3bb); for (i = 0; i < OPENPIC_NUM_IPI; i++) { /* Disabled, Priority 10..13 */ - openpic_initipi(i, 10+i, OPENPIC_VEC_IPI+i+linux_irq_offset); + openpic_initipi(i, 10+i, OPENPIC_VEC_IPI+i+offset); /* IPIs are per-CPU */ - irq_desc[OPENPIC_VEC_IPI+i+linux_irq_offset].status |= - IRQ_PER_CPU; - irq_desc[OPENPIC_VEC_IPI+i+linux_irq_offset].handler = - &open_pic_ipi; + irq_desc[OPENPIC_VEC_IPI+i+offset].status |= IRQ_PER_CPU; + irq_desc[OPENPIC_VEC_IPI+i+offset].handler = &open_pic_ipi; } #endif @@ -387,40 +393,36 @@ continue; /* the bootloader may have left it enabled (bad !) */ - openpic_disable_irq(i+linux_irq_offset); + openpic_disable_irq(i+offset); - /* - * We find the value from either the InitSenses table - * or assume a negative polarity level interrupt. - */ - sense = (i < OpenPIC_NumInitSenses)? OpenPIC_InitSenses[i]: 1; + sense = (i < OpenPIC_NumInitSenses)? OpenPIC_InitSenses[i]: \ + (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE); - if ((sense & IRQ_SENSE_MASK) == 1) - irq_desc[i+linux_irq_offset].status = IRQ_LEVEL; + if (sense & IRQ_SENSE_MASK) + irq_desc[i+offset].status = IRQ_LEVEL; /* Enabled, Priority 8 */ - openpic_initirq(i, 8, i + linux_irq_offset, - (sense & IRQ_POLARITY_MASK), + openpic_initirq(i, 8, i+offset, (sense & IRQ_POLARITY_MASK), (sense & IRQ_SENSE_MASK)); /* Processor 0 */ openpic_mapirq(i, 1<<0, 0); } /* Init descriptors */ - for (i = linux_irq_offset; i < NumSources + linux_irq_offset; i++) + for (i = offset; i < NumSources + offset; i++) irq_desc[i].handler = &open_pic; /* Initialize the spurious interrupt */ if (ppc_md.progress) ppc_md.progress("openpic: spurious",0x3bd); - openpic_set_spurious(OPENPIC_VEC_SPURIOUS+linux_irq_offset); + openpic_set_spurious(OPENPIC_VEC_SPURIOUS+offset); /* Initialize the cascade */ - if (linux_irq_offset) { - if (request_irq(linux_irq_offset, no_action, SA_INTERRUPT, + if (offset) { + if (request_irq(offset, no_action, SA_INTERRUPT, "82c59 cascade", NULL)) printk("Unable to get OpenPIC IRQ 0 for cascade\n"); } - openpic_disable_8259_pass_through(); + openpic_disable_8259_pass_through(); #ifdef CONFIG_EPIC_SERIAL_MODE openpic_eicr_set_clk(7); /* Slowest value until we know better */ openpic_enable_sie(); @@ -479,7 +481,7 @@ } #endif /* notused */ -static void openpic_set_priority(u_int pri) +static void __init openpic_set_priority(u_int pri) { DECL_THIS_CPU; @@ -656,29 +658,18 @@ } /* - * Initalize the interrupt source which will generate an NMI (and disable it). + * Initalize the interrupt source which will generate an NMI. + * This raises the interrupt's priority from 8 to 9. * * irq: The logical IRQ which generates an NMI. */ void __init openpic_init_nmi_irq(u_int irq) { - int sense; - - /* If this wasn't given, assume a level, negative polarity interrupt. */ - sense = (irq < OpenPIC_NumInitSenses) ? OpenPIC_InitSenses[irq] : - (IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE); - - openpic_safe_writefield(&ISR[irq]->Vector_Priority, - OPENPIC_PRIORITY_MASK | OPENPIC_VECTOR_MASK | - OPENPIC_SENSE_MASK | OPENPIC_POLARITY_MASK, - (9 << OPENPIC_PRIORITY_SHIFT) | - (irq + open_pic_irq_offset) | - ((sense & IRQ_POLARITY_MASK) ? - OPENPIC_POLARITY_POSITIVE : - OPENPIC_POLARITY_NEGATIVE) | - ((sense & IRQ_SENSE_MASK) ? OPENPIC_SENSE_LEVEL - : OPENPIC_SENSE_EDGE)); + check_arg_irq(irq); + openpic_safe_writefield(&ISR[irq - open_pic_irq_offset]->Vector_Priority, + OPENPIC_PRIORITY_MASK, + 9 << OPENPIC_PRIORITY_SHIFT); } /* @@ -752,7 +743,8 @@ * pol: polarity (1 for positive, 0 for negative) * sense: 1 for level, 0 for edge */ -static void openpic_initirq(u_int irq, u_int pri, u_int vec, int pol, int sense) +static void __init +openpic_initirq(u_int irq, u_int pri, u_int vec, int pol, int sense) { openpic_safe_writefield(&ISR[irq]->Vector_Priority, OPENPIC_PRIORITY_MASK | OPENPIC_VECTOR_MASK | diff -Nru a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c --- a/arch/ppc64/mm/init.c Sat Jul 5 12:40:32 2003 +++ b/arch/ppc64/mm/init.c Sat Jul 5 12:40:32 2003 @@ -109,7 +109,7 @@ show_free_areas(); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; i++) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { page = pgdat->node_mem_map + i; total++; if (PageReserved(page)) @@ -564,7 +564,7 @@ int nid; for (nid = 0; nid < numnodes; nid++) { - if (node_data[nid].node_size != 0) { + if (node_data[nid].node_spanned_pages != 0) { printk("freeing bootmem node %x\n", nid); totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); diff -Nru a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c --- a/arch/ppc64/mm/numa.c Sat Jul 5 12:40:32 2003 +++ b/arch/ppc64/mm/numa.c Sat Jul 5 12:40:32 2003 @@ -160,21 +160,21 @@ * this simple case and complain if there is a gap in * memory */ - if (node_data[numa_domain].node_size) { + if (node_data[numa_domain].node_spanned_pages) { unsigned long shouldstart = node_data[numa_domain].node_start_pfn + - node_data[numa_domain].node_size; + node_data[numa_domain].node_spanned_pages; if (shouldstart != (start / PAGE_SIZE)) { printk(KERN_ERR "Hole in node, disabling " "region start %lx length %lx\n", start, size); continue; } - node_data[numa_domain].node_size += size / PAGE_SIZE; + node_data[numa_domain].node_spanned_pages += size / PAGE_SIZE; } else { node_data[numa_domain].node_start_pfn = start / PAGE_SIZE; - node_data[numa_domain].node_size = size / PAGE_SIZE; + node_data[numa_domain].node_spanned_pages = size / PAGE_SIZE; } for (i = start ; i < (start+size); i += MEMORY_INCREMENT) @@ -202,7 +202,7 @@ map_cpu_to_node(i, 0); node_data[0].node_start_pfn = 0; - node_data[0].node_size = lmb_end_of_DRAM() / PAGE_SIZE; + node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE; for (i = 0 ; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; @@ -224,12 +224,12 @@ unsigned long bootmem_paddr; unsigned long bootmap_pages; - if (node_data[nid].node_size == 0) + if (node_data[nid].node_spanned_pages == 0) continue; start_paddr = node_data[nid].node_start_pfn * PAGE_SIZE; end_paddr = start_paddr + - (node_data[nid].node_size * PAGE_SIZE); + (node_data[nid].node_spanned_pages * PAGE_SIZE); dbg("node %d\n", nid); dbg("start_paddr = %lx\n", start_paddr); @@ -311,7 +311,7 @@ unsigned long start_pfn; unsigned long end_pfn; - if (node_data[nid].node_size == 0) + if (node_data[nid].node_spanned_pages == 0) continue; start_pfn = plat_node_bdata[nid].node_boot_start >> PAGE_SHIFT; diff -Nru a/arch/s390/kernel/compat_exec.c b/arch/s390/kernel/compat_exec.c --- a/arch/s390/kernel/compat_exec.c Sat Jul 5 12:40:32 2003 +++ b/arch/s390/kernel/compat_exec.c Sat Jul 5 12:40:32 2003 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,7 @@ if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff -Nru a/arch/sh/kernel/cpu/sh4/pci-sh7751.c b/arch/sh/kernel/cpu/sh4/pci-sh7751.c --- a/arch/sh/kernel/cpu/sh4/pci-sh7751.c Sat Jul 5 12:40:32 2003 +++ b/arch/sh/kernel/cpu/sh4/pci-sh7751.c Sat Jul 5 12:40:32 2003 @@ -200,7 +200,7 @@ return; PCIDBG(2,"PCI: Peer bridge fixup\n"); for (n=0; n <= pcibios_last_bus; n++) { - if (pci_bus_exists(&pci_root_buses, n)) + if (pci_find_bus(0, n)) continue; bus.number = n; bus.ops = pci_root_ops; diff -Nru a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c --- a/arch/x86_64/ia32/ia32_binfmt.c Sat Jul 5 12:40:31 2003 +++ b/arch/x86_64/ia32/ia32_binfmt.c Sat Jul 5 12:40:31 2003 @@ -14,6 +14,8 @@ #include #include #include +#include + #include #include #include @@ -339,7 +341,7 @@ if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff -Nru a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c --- a/arch/x86_64/mm/init.c Sat Jul 5 12:40:31 2003 +++ b/arch/x86_64/mm/init.c Sat Jul 5 12:40:31 2003 @@ -64,7 +64,7 @@ printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; ++i) { + for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat->node_mem_map + i; total++; if (PageReserved(page)) diff -Nru a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c --- a/arch/x86_64/mm/numa.c Sat Jul 5 12:40:32 2003 +++ b/arch/x86_64/mm/numa.c Sat Jul 5 12:40:32 2003 @@ -86,7 +86,7 @@ memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; NODE_DATA(nodeid)->node_start_pfn = start_pfn; - NODE_DATA(nodeid)->node_size = end_pfn - start_pfn; + NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; /* Find a place for the bootmem map */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); diff -Nru a/drivers/base/class.c b/drivers/base/class.c --- a/drivers/base/class.c Sat Jul 5 12:40:33 2003 +++ b/drivers/base/class.c Sat Jul 5 12:40:33 2003 @@ -3,6 +3,8 @@ * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs + * Copyright (c) 2003 Greg Kroah-Hartman + * Copyright (c) 2003 IBM Corp. * * This file is released under the GPLv2 * @@ -337,6 +339,24 @@ class_dev->class_id); class_device_del(class_dev); class_device_put(class_dev); +} + +int class_device_rename(struct class_device *class_dev, char *new_name) +{ + class_dev = class_device_get(class_dev); + if (!class_dev) + return -EINVAL; + + pr_debug("CLASS: renaming '%s' to '%s'\n", class_dev->class_id, + new_name); + + strlcpy(class_dev->class_id, new_name, KOBJ_NAME_LEN); + + kobject_rename(&class_dev->kobj, new_name); + + class_device_put(class_dev); + + return 0; } struct class_device * class_device_get(struct class_device *class_dev) diff -Nru a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c --- a/drivers/base/firmware_class.c Sat Jul 5 12:40:31 2003 +++ b/drivers/base/firmware_class.c Sat Jul 5 12:40:31 2003 @@ -149,7 +149,7 @@ if (offset + count > fw->size) count = fw->size - offset; - memcpy(buffer, fw->data + offset, count); + memcpy(buffer + offset, fw->data + offset, count); return count; } static int @@ -198,7 +198,7 @@ if (retval) return retval; - memcpy(fw->data + offset, buffer, count); + memcpy(fw->data + offset, buffer + offset, count); fw->size = max_t(size_t, offset + count, fw->size); diff -Nru a/drivers/block/Makefile b/drivers/block/Makefile --- a/drivers/block/Makefile Sat Jul 5 12:40:32 2003 +++ b/drivers/block/Makefile Sat Jul 5 12:40:32 2003 @@ -8,7 +8,13 @@ # In the future, some of these should be built conditionally. # -obj-y := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o deadline-iosched.o +# +# NOTE that ll_rw_blk.c must come early in linkage order - it starts the +# kblockd threads +# + +obj-y := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o \ + deadline-iosched.o as-iosched.o obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o diff -Nru a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/drivers/block/as-iosched.c Sat Jul 5 12:40:34 2003 @@ -0,0 +1,1837 @@ +/* + * linux/drivers/block/as-iosched.c + * + * Anticipatory & deadline i/o scheduler. + * + * Copyright (C) 2002 Jens Axboe + * Nick Piggin + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define REQ_SYNC 1 +#define REQ_ASYNC 0 + +/* + * See Documentation/as-iosched.txt + */ + +/* + * max time before a read is submitted. + */ +#define default_read_expire (HZ / 20) + +/* + * ditto for writes, these limits are not hard, even + * if the disk is capable of satisfying them. + */ +#define default_write_expire (HZ / 5) + +/* + * read_batch_expire describes how long we will allow a stream of reads to + * persist before looking to see whether it is time to switch over to writes. + */ +#define default_read_batch_expire (HZ / 5) + +/* + * write_batch_expire describes how long we want a stream of writes to run for. + * This is not a hard limit, but a target we set for the auto-tuning thingy. + * See, the problem is: we can send a lot of writes to disk cache / TCQ in + * a short amount of time... + */ +#define default_write_batch_expire (HZ / 20) + +/* + * max time we may wait to anticipate a read (default around 6ms) + */ +#define default_antic_expire ((HZ / 150) ? HZ / 150 : 1) + +/* + * Keep track of up to 20ms thinktimes. We can go as big as we like here, + * however huge values tend to interfere and not decay fast enough. A program + * might be in a non-io phase of operation. Waiting on user input for example, + * or doing a lengthy computation. A small penalty can be justified there, and + * will still catch out those processes that constantly have large thinktimes. + */ +#define MAX_THINKTIME (HZ/50UL) + +/* Bits in as_io_context.state */ +enum as_io_states { + AS_TASK_RUNNING=0, /* Process has not exitted */ + AS_TASK_IORUNNING, /* Process has completed some IO */ +}; + +enum anticipation_status { + ANTIC_OFF=0, /* Not anticipating (normal operation) */ + ANTIC_WAIT_REQ, /* The last read has not yet completed */ + ANTIC_WAIT_NEXT, /* Currently anticipating a request vs + last read (which has completed) */ + ANTIC_FINISHED, /* Anticipating but have found a candidate + * or timed out */ +}; + +struct as_data { + /* + * run time data + */ + + struct request_queue *q; /* the "owner" queue */ + + /* + * requests (as_rq s) are present on both sort_list and fifo_list + */ + struct rb_root sort_list[2]; + struct list_head fifo_list[2]; + + struct as_rq *next_arq[2]; /* next in sort order */ + sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ + struct list_head *dispatch; /* driver dispatch queue */ + struct list_head *hash; /* request hash */ + unsigned long hash_valid_count; /* barrier hash count */ + unsigned long current_batch_expires; + unsigned long last_check_fifo[2]; + int changed_batch; + int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */ + int write_batch_count; /* max # of reqs in a write batch */ + int current_write_count; /* how many requests left this batch */ + int write_batch_idled; /* has the write batch gone idle? */ + mempool_t *arq_pool; + + enum anticipation_status antic_status; + unsigned long antic_start; /* jiffies: when it started */ + struct timer_list antic_timer; /* anticipatory scheduling timer */ + struct work_struct antic_work; /* Deferred unplugging */ + struct io_context *io_context; /* Identify the expected process */ + int ioc_finished; /* IO associated with io_context is finished */ + int nr_dispatched; + + /* + * settings that change how the i/o scheduler behaves + */ + unsigned long fifo_expire[2]; + unsigned long batch_expire[2]; + unsigned long antic_expire; +}; + +#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo) + +/* + * per-request data. + */ +enum arq_state { + AS_RQ_NEW=0, /* New - not referenced and not on any lists */ + AS_RQ_QUEUED, /* In the request queue. It belongs to the + scheduler */ + AS_RQ_DISPATCHED, /* On the dispatch list. It belongs to the + driver now */ +}; + +struct as_rq { + /* + * rbtree index, key is the starting offset + */ + struct rb_node rb_node; + sector_t rb_key; + + struct request *request; + + struct io_context *io_context; /* The submitting task */ + + /* + * request hash, key is the ending offset (for back merge lookup) + */ + struct list_head hash; + unsigned long hash_valid_count; + + /* + * expire fifo + */ + struct list_head fifo; + unsigned long expires; + + int is_sync; + enum arq_state state; /* debug only */ +}; + +#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private) + +static kmem_cache_t *arq_pool; + +/* + * IO Context helper functions + */ +/* Debug */ +static atomic_t nr_as_io_requests = ATOMIC_INIT(0); + +/* Called to deallocate the as_io_context */ +static void free_as_io_context(struct as_io_context *aic) +{ + atomic_dec(&nr_as_io_requests); + kfree(aic); +} + +/* Called when the task exits */ +static void exit_as_io_context(struct as_io_context *aic) +{ + clear_bit(AS_TASK_RUNNING, &aic->state); +} + +static struct as_io_context *alloc_as_io_context(void) +{ + struct as_io_context *ret; + + ret = kmalloc(sizeof(*ret), GFP_ATOMIC); + if (ret) { + atomic_inc(&nr_as_io_requests); + ret->dtor = free_as_io_context; + ret->exit = exit_as_io_context; + ret->state = 1 << AS_TASK_RUNNING; + atomic_set(&ret->nr_queued, 0); + atomic_set(&ret->nr_dispatched, 0); + spin_lock_init(&ret->lock); + ret->ttime_total = 0; + ret->ttime_samples = 0; + ret->ttime_mean = 0; + ret->seek_total = 0; + ret->seek_samples = 0; + ret->seek_mean = 0; + } + + return ret; +} + +/* + * If the current task has no AS IO context then create one and initialise it. + * Then take a ref on the task's io context and return it. + */ +static struct io_context *as_get_io_context(void) +{ + struct io_context *ioc = get_io_context(GFP_ATOMIC); + if (ioc && !ioc->aic) { + ioc->aic = alloc_as_io_context(); + if (!ioc->aic) { + put_io_context(ioc); + ioc = NULL; + } + } + return ioc; +} + +/* + * the back merge hash support functions + */ +static const int as_hash_shift = 6; +#define AS_HASH_BLOCK(sec) ((sec) >> 3) +#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift)) +#define AS_HASH_ENTRIES (1 << as_hash_shift) +#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) +#define list_entry_hash(ptr) list_entry((ptr), struct as_rq, hash) +#define ON_HASH(arq) (arq)->hash_valid_count + +#define AS_INVALIDATE_HASH(ad) \ + do { \ + if (!++(ad)->hash_valid_count) \ + (ad)->hash_valid_count = 1; \ + } while (0) + +static inline void __as_del_arq_hash(struct as_rq *arq) +{ + arq->hash_valid_count = 0; + list_del_init(&arq->hash); +} + +static inline void as_del_arq_hash(struct as_rq *arq) +{ + if (ON_HASH(arq)) + __as_del_arq_hash(arq); +} + +static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq) +{ + as_del_arq_hash(arq); + + if (q->last_merge == &arq->request->queuelist) + q->last_merge = NULL; +} + +static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq) +{ + struct request *rq = arq->request; + + BUG_ON(ON_HASH(arq)); + + arq->hash_valid_count = ad->hash_valid_count; + list_add(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]); +} + +/* + * move hot entry to front of chain + */ +static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq) +{ + struct request *rq = arq->request; + struct list_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))]; + + if (!ON_HASH(arq)) { + WARN_ON(1); + return; + } + + if (arq->hash.prev != head) { + list_del(&arq->hash); + list_add(&arq->hash, head); + } +} + +static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset) +{ + struct list_head *hash_list = &ad->hash[AS_HASH_FN(offset)]; + struct list_head *entry, *next = hash_list->next; + + while ((entry = next) != hash_list) { + struct as_rq *arq = list_entry_hash(entry); + struct request *__rq = arq->request; + + next = entry->next; + + BUG_ON(!ON_HASH(arq)); + + if (!rq_mergeable(__rq) + || arq->hash_valid_count != ad->hash_valid_count) { + __as_del_arq_hash(arq); + continue; + } + + if (rq_hash_key(__rq) == offset) + return __rq; + } + + return NULL; +} + +/* + * rb tree support functions + */ +#define RB_NONE (2) +#define RB_EMPTY(root) ((root)->rb_node == NULL) +#define ON_RB(node) ((node)->rb_color != RB_NONE) +#define RB_CLEAR(node) ((node)->rb_color = RB_NONE) +#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node) +#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync]) +#define rq_rb_key(rq) (rq)->sector + +/* + * as_find_first_arq finds the first (lowest sector numbered) request + * for the specified data_dir. Used to sweep back to the start of the disk + * (1-way elevator) after we process the last (highest sector) request. + */ +static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir) +{ + struct rb_node *n = ad->sort_list[data_dir].rb_node; + + if (n == NULL) + return NULL; + + for (;;) { + if (n->rb_left == NULL) + return rb_entry_arq(n); + + n = n->rb_left; + } +} + +static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq) +{ + struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; + struct rb_node *parent = NULL; + struct as_rq *__arq; + + while (*p) { + parent = *p; + __arq = rb_entry_arq(parent); + + if (arq->rb_key < __arq->rb_key) + p = &(*p)->rb_left; + else if (arq->rb_key > __arq->rb_key) + p = &(*p)->rb_right; + else + return __arq; + } + + rb_link_node(&arq->rb_node, parent, p); + return 0; +} + +static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); +/* + * Add the request to the rb tree if it is unique. If there is an alias (an + * existing request against the same sector), which can happen when using + * direct IO, then move the alias to the dispatch list and then add the + * request. + */ +static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) +{ + struct as_rq *alias; + struct request *rq = arq->request; + + arq->rb_key = rq_rb_key(rq); + + /* This can be caused by direct IO */ + while ((alias = __as_add_arq_rb(ad, arq))) + as_move_to_dispatch(ad, alias); + + rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); +} + +static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) +{ + if (!ON_RB(&arq->rb_node)) { + WARN_ON(1); + return; + } + + rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); + RB_CLEAR(&arq->rb_node); +} + +static struct request * +as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir) +{ + struct rb_node *n = ad->sort_list[data_dir].rb_node; + struct as_rq *arq; + + while (n) { + arq = rb_entry_arq(n); + + if (sector < arq->rb_key) + n = n->rb_left; + else if (sector > arq->rb_key) + n = n->rb_right; + else + return arq->request; + } + + return NULL; +} + +/* + * IO Scheduler proper + */ + +#define MAXBACK (1024 * 1024) /* + * Maximum distance the disk will go backward + * for a request. + */ + +/* + * as_choose_req selects the preferred one of two requests of the same data_dir + * ignoring time - eg. timeouts, which is the job of as_dispatch_request + */ +static struct as_rq * +as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) +{ + int data_dir; + sector_t last, s1, s2, d1, d2; + int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */ + const sector_t maxback = MAXBACK; + + if (arq1 == NULL || arq1 == arq2) + return arq2; + if (arq2 == NULL) + return arq1; + + data_dir = arq1->is_sync; + + last = ad->last_sector[data_dir]; + s1 = arq1->request->sector; + s2 = arq2->request->sector; + + BUG_ON(data_dir != arq2->is_sync); + + /* + * Strict one way elevator _except_ in the case where we allow + * short backward seeks which are biased as twice the cost of a + * similar forward seek. + */ + if (s1 >= last) + d1 = s1 - last; + else if (s1+maxback >= last) + d1 = (last - s1)*2; + else { + r1_wrap = 1; + d1 = 0; /* shut up, gcc */ + } + + if (s2 >= last) + d2 = s2 - last; + else if (s2+maxback >= last) + d2 = (last - s2)*2; + else { + r2_wrap = 1; + d2 = 0; + } + + /* Found required data */ + if (!r1_wrap && r2_wrap) + return arq1; + else if (!r2_wrap && r1_wrap) + return arq2; + else if (r1_wrap && r2_wrap) { + /* both behind the head */ + if (s1 <= s2) + return arq1; + else + return arq2; + } + + /* Both requests in front of the head */ + if (d1 < d2) + return arq1; + else if (d2 < d1) + return arq2; + else { + if (s1 >= s2) + return arq1; + else + return arq2; + } +} + +/* + * as_find_next_arq finds the next request after @prev in elevator order. + * this with as_choose_req form the basis for how the scheduler chooses + * what request to process next. Anticipation works on top of this. + */ +static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last) +{ + const int data_dir = last->is_sync; + struct as_rq *ret; + struct rb_node *rbnext = rb_next(&last->rb_node); + struct rb_node *rbprev = rb_prev(&last->rb_node); + struct as_rq *arq_next, *arq_prev; + + BUG_ON(!ON_RB(&last->rb_node)); + + if (rbprev) + arq_prev = rb_entry_arq(rbprev); + else + arq_prev = NULL; + + if (rbnext) + arq_next = rb_entry_arq(rbnext); + else { + arq_next = as_find_first_arq(ad, data_dir); + if (arq_next == last) + arq_next = NULL; + } + + ret = as_choose_req(ad, arq_next, arq_prev); + + return ret; +} + +/* + * anticipatory scheduling functions follow + */ + +/* + * as_antic_expired tells us when we have anticipated too long. + * The funny "absolute difference" math on the elapsed time is to handle + * jiffy wraps, and disks which have been idle for 0x80000000 jiffies. + */ +static int as_antic_expired(struct as_data *ad) +{ + long delta_jif; + + delta_jif = jiffies - ad->antic_start; + if (unlikely(delta_jif < 0)) + delta_jif = -delta_jif; + if (delta_jif < ad->antic_expire) + return 0; + + return 1; +} + +/* + * as_antic_waitnext starts anticipating that a nice request will soon be + * submitted. See also as_antic_waitreq + */ +static void as_antic_waitnext(struct as_data *ad) +{ + unsigned long timeout; + + BUG_ON(ad->antic_status != ANTIC_OFF + && ad->antic_status != ANTIC_WAIT_REQ); + + timeout = ad->antic_start + ad->antic_expire; + + mod_timer(&ad->antic_timer, timeout); + + ad->antic_status = ANTIC_WAIT_NEXT; +} + +/* + * as_antic_waitreq starts anticipating. We don't start timing the anticipation + * until the request that we're anticipating on has finished. This means we + * are timing from when the candidate process wakes up hopefully. + */ +static void as_antic_waitreq(struct as_data *ad) +{ + BUG_ON(ad->antic_status == ANTIC_FINISHED); + if (ad->antic_status == ANTIC_OFF) { + if (!ad->io_context || ad->ioc_finished) + as_antic_waitnext(ad); + else + ad->antic_status = ANTIC_WAIT_REQ; + } +} + +/* + * This is called directly by the functions in this file to stop anticipation. + * We kill the timer and schedule a call to the request_fn asap. + */ +static void as_antic_stop(struct as_data *ad) +{ + int status = ad->antic_status; + + if (status == ANTIC_WAIT_REQ || status == ANTIC_WAIT_NEXT) { + if (status == ANTIC_WAIT_NEXT) + del_timer(&ad->antic_timer); + ad->antic_status = ANTIC_FINISHED; + /* see as_work_handler */ + kblockd_schedule_work(&ad->antic_work); + } +} + +/* + * as_antic_timeout is the timer function set by as_antic_waitnext. + */ +static void as_antic_timeout(unsigned long data) +{ + struct request_queue *q = (struct request_queue *)data; + struct as_data *ad = q->elevator.elevator_data; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + if (ad->antic_status == ANTIC_WAIT_REQ + || ad->antic_status == ANTIC_WAIT_NEXT) { + ad->antic_status = ANTIC_FINISHED; + kblockd_schedule_work(&ad->antic_work); + } + spin_unlock_irqrestore(q->queue_lock, flags); +} + +/* + * as_close_req decides if one request is considered "close" to the + * previous one issued. + */ +static int as_close_req(struct as_data *ad, struct as_rq *arq) +{ + unsigned long delay; /* milliseconds */ + sector_t last = ad->last_sector[ad->batch_data_dir]; + sector_t next = arq->request->sector; + sector_t delta; /* acceptable close offset (in sectors) */ + + if (ad->antic_status == ANTIC_OFF || !ad->ioc_finished) + delay = 0; + else + delay = ((jiffies - ad->antic_start) * 1000) / HZ; + + if (delay <= 1) + delta = 64; + else if (delay <= 20 && delay <= ad->antic_expire) + delta = 64 << (delay-1); + else + return 1; + + return (last - (delta>>1) <= next) && (next <= last + delta); +} + +/* + * as_can_break_anticipation returns true if we have been anticipating this + * request. + * + * It also returns true if the process against which we are anticipating + * submits a write - that's presumably an fsync, O_SYNC write, etc. We want to + * dispatch it ASAP, because we know that application will not be submitting + * any new reads. + * + * If the task which has submitted the request has exitted, break anticipation. + * + * If this task has queued some other IO, do not enter enticipation. + */ +static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) +{ + struct io_context *ioc; + struct as_io_context *aic; + + if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, arq)) { + /* close request */ + return 1; + } + + if (ad->ioc_finished && as_antic_expired(ad)) { + /* + * In this situation status should really be FINISHED, + * however the timer hasn't had the chance to run yet. + */ + return 1; + } + + ioc = ad->io_context; + BUG_ON(!ioc); + + if (arq && ioc == arq->io_context) { + /* request from same process */ + return 1; + } + + aic = ioc->aic; + if (!aic) + return 0; + + if (!test_bit(AS_TASK_RUNNING, &aic->state)) { + /* process anticipated on has exitted */ + return 1; + } + + if (atomic_read(&aic->nr_queued) > 0) { + /* process has more requests queued */ + return 1; + } + + if (atomic_read(&aic->nr_dispatched) > 0) { + /* process has more requests dispatched */ + return 1; + } + + if (aic->ttime_mean > ad->antic_expire) { + /* the process thinks too much between requests */ + return 1; + } + + if (arq && aic->seek_samples) { + sector_t s; + if (ad->last_sector[REQ_SYNC] < arq->request->sector) + s = arq->request->sector - ad->last_sector[REQ_SYNC]; + else + s = ad->last_sector[REQ_SYNC] - arq->request->sector; + + if (aic->seek_mean > (s>>1)) { + /* this request is better than what we're expecting */ + return 1; + } + } + + return 0; +} + +/* + * as_can_anticipate indicates weather we should either run arq + * or keep anticipating a better request. + */ +static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) +{ + if (!ad->io_context) + /* + * Last request submitted was a write + */ + return 0; + + if (ad->antic_status == ANTIC_FINISHED) + /* + * Don't restart if we have just finished. Run the next request + */ + return 0; + + if (as_can_break_anticipation(ad, arq)) + /* + * This request is a good candidate. Don't keep anticipating, + * run it. + */ + return 0; + + /* + * OK from here, we haven't finished, and don't have a decent request! + * Status is either ANTIC_OFF so start waiting, + * ANTIC_WAIT_REQ so continue waiting for request to finish + * or ANTIC_WAIT_NEXT so continue waiting for an acceptable request. + * + */ + + return 1; +} + +/* + * as_update_iohist keeps a decaying histogram of IO thinktimes, and + * updates @aic->ttime_mean based on that. It is called when a new + * request is queued. + */ +static void as_update_iohist(struct as_io_context *aic, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + int data_dir = arq->is_sync; + unsigned long thinktime; + sector_t seek_dist; + + if (aic == NULL) + return; + + if (data_dir == REQ_SYNC) { + spin_lock(&aic->lock); + + if (test_bit(AS_TASK_IORUNNING, &aic->state) + && !atomic_read(&aic->nr_queued) + && !atomic_read(&aic->nr_dispatched)) { + /* Calculate read -> read thinktime */ + thinktime = jiffies - aic->last_end_request; + thinktime = min(thinktime, MAX_THINKTIME-1); + /* fixed point: 1.0 == 1<<8 */ + aic->ttime_samples += 256; + aic->ttime_total += 256*thinktime; + if (aic->ttime_samples) + /* fixed point factor is cancelled here */ + aic->ttime_mean = (aic->ttime_total + 128) + / aic->ttime_samples; + aic->ttime_samples = (aic->ttime_samples>>1) + + (aic->ttime_samples>>2); + aic->ttime_total = (aic->ttime_total>>1) + + (aic->ttime_total>>2); + } + + /* Calculate read -> read seek distance */ + if (!aic->seek_samples) + seek_dist = 0; + else if (aic->last_request_pos < rq->sector) + seek_dist = rq->sector - aic->last_request_pos; + else + seek_dist = aic->last_request_pos - rq->sector; + + aic->last_request_pos = rq->sector + rq->nr_sectors; + + /* + * Don't allow the seek distance to get too large from the + * odd fragment, pagein, etc + */ + if (aic->seek_samples < 400) /* second&third seek */ + seek_dist = min(seek_dist, (aic->seek_mean * 4) + + 2*1024*1024); + else + seek_dist = min(seek_dist, (aic->seek_mean * 4) + + 2*1024*64); + + aic->seek_samples += 256; + aic->seek_total += 256*seek_dist; + if (aic->seek_samples) { + aic->seek_mean = aic->seek_total + 128; + do_div(aic->seek_mean, aic->seek_samples); + } + aic->seek_samples = (aic->seek_samples>>1) + + (aic->seek_samples>>2); + aic->seek_total = (aic->seek_total>>1) + + (aic->seek_total>>2); + + spin_unlock(&aic->lock); + } +} + +/* + * as_update_arq must be called whenever a request (arq) is added to + * the sort_list. This function keeps caches up to date, and checks if the + * request might be one we are "anticipating" + */ +static void as_update_arq(struct as_data *ad, struct as_rq *arq) +{ + const int data_dir = arq->is_sync; + + /* keep the next_arq cache up to date */ + ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]); + + /* + * have we been anticipating this request? + * or does it come from the same process as the one we are anticipating + * for? + */ + if (ad->antic_status == ANTIC_WAIT_REQ + || ad->antic_status == ANTIC_WAIT_NEXT) { + if (as_can_break_anticipation(ad, arq)) + as_antic_stop(ad); + } +} + +/* + * Gathers timings and resizes the write batch automatically + */ +void update_write_batch(struct as_data *ad) +{ + unsigned long batch = ad->batch_expire[REQ_ASYNC]; + long write_time; + + write_time = (jiffies - ad->current_batch_expires) + batch; + if (write_time < 0) + write_time = 0; + + if (write_time > batch && !ad->write_batch_idled) { + if (write_time > batch * 3) + ad->write_batch_count /= 2; + else + ad->write_batch_count--; + } else if (write_time < batch && ad->current_write_count == 0) { + if (batch > write_time * 3) + ad->write_batch_count *= 2; + else + ad->write_batch_count++; + } + + if (ad->write_batch_count < 1) + ad->write_batch_count = 1; +} + +/* + * as_completed_request is to be called when a request has completed and + * returned something to the requesting process, be it an error or data. + */ +static void as_completed_request(request_queue_t *q, struct request *rq) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = RQ_DATA(rq); + struct as_io_context *aic; + + if (unlikely(!blk_fs_request(rq))) + return; + + WARN_ON(blk_fs_request(rq) && arq->state == AS_RQ_NEW); + + if (arq->state != AS_RQ_DISPATCHED) + return; + + if (ad->changed_batch && ad->nr_dispatched == 1) { + kblockd_schedule_work(&ad->antic_work); + ad->changed_batch = 2; + } + ad->nr_dispatched--; + + /* + * Start counting the batch from when a request of that direction is + * actually serviced. This should help devices with big TCQ windows + * and writeback caches + */ + if (ad->batch_data_dir == REQ_SYNC && ad->changed_batch + && ad->batch_data_dir == arq->is_sync) { + update_write_batch(ad); + ad->current_batch_expires = jiffies + + ad->batch_expire[REQ_SYNC]; + ad->changed_batch = 0; + } + + if (!arq->io_context) + return; + + if (ad->io_context == arq->io_context) { + ad->antic_start = jiffies; + ad->ioc_finished = 1; + if (ad->antic_status == ANTIC_WAIT_REQ) { + /* + * We were waiting on this request, now anticipate + * the next one + */ + as_antic_waitnext(ad); + } + } + + aic = arq->io_context->aic; + if (!aic) + return; + + spin_lock(&aic->lock); + if (arq->is_sync == REQ_SYNC) { + set_bit(AS_TASK_IORUNNING, &aic->state); + aic->last_end_request = jiffies; + } + spin_unlock(&aic->lock); + + put_io_context(arq->io_context); +} + +/* + * as_remove_queued_request removes a request from the pre dispatch queue + * without updating refcounts. It is expected the caller will drop the + * reference unless it replaces the request at somepart of the elevator + * (ie. the dispatch queue) + */ +static void as_remove_queued_request(request_queue_t *q, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + const int data_dir = arq->is_sync; + struct as_data *ad = q->elevator.elevator_data; + + WARN_ON(arq->state != AS_RQ_QUEUED); + + if (arq->io_context && arq->io_context->aic) { + BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued)); + atomic_dec(&arq->io_context->aic->nr_queued); + } + + /* + * Update the "next_arq" cache if we are about to remove its + * entry + */ + if (ad->next_arq[data_dir] == arq) + ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + + list_del_init(&arq->fifo); + as_remove_merge_hints(q, arq); + as_del_arq_rb(ad, arq); +} + +/* + * as_remove_dispatched_request is called to remove a request which has gone + * to the dispatch list. + */ +static void as_remove_dispatched_request(request_queue_t *q, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + struct as_io_context *aic; + + if (!arq) { + WARN_ON(1); + return; + } + + WARN_ON(arq->state != AS_RQ_DISPATCHED); + WARN_ON(ON_RB(&arq->rb_node)); + if (arq->io_context && arq->io_context->aic) { + aic = arq->io_context->aic; + if (aic) { + WARN_ON(!atomic_read(&aic->nr_dispatched)); + atomic_dec(&aic->nr_dispatched); + } + } +} +/* + * as_remove_request is called when a driver has finished with a request. + * This should be only called for dispatched requests, but for some reason + * a POWER4 box running hwscan it does not. + */ +static void as_remove_request(request_queue_t *q, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + + if (unlikely(!blk_fs_request(rq))) + return; + + if (!arq) { + WARN_ON(1); + return; + } + + if (ON_RB(&arq->rb_node)) + as_remove_queued_request(q, rq); + else + as_remove_dispatched_request(q, rq); +} + +/* + * as_fifo_expired returns 0 if there are no expired reads on the fifo, + * 1 otherwise. It is ratelimited so that we only perform the check once per + * `fifo_expire' interval. Otherwise a large number of expired requests + * would create a hopeless seekstorm. + * + * See as_antic_expired comment. + */ +static int as_fifo_expired(struct as_data *ad, int adir) +{ + struct as_rq *arq; + long delta_jif; + + delta_jif = jiffies - ad->last_check_fifo[adir]; + if (unlikely(delta_jif < 0)) + delta_jif = -delta_jif; + if (delta_jif < ad->fifo_expire[adir]) + return 0; + + ad->last_check_fifo[adir] = jiffies; + + if (list_empty(&ad->fifo_list[adir])) + return 0; + + arq = list_entry_fifo(ad->fifo_list[adir].next); + + return time_after(jiffies, arq->expires); +} + +/* + * as_batch_expired returns true if the current batch has expired. A batch + * is a set of reads or a set of writes. + */ +static inline int as_batch_expired(struct as_data *ad) +{ + if (ad->changed_batch) + return 0; + + if (ad->batch_data_dir == REQ_SYNC) + /* TODO! add a check so a complete fifo gets written? */ + return time_after(jiffies, ad->current_batch_expires); + + return time_after(jiffies, ad->current_batch_expires) + || ad->current_write_count == 0; +} + +/* + * move an entry to dispatch queue + */ +static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) +{ + const int data_dir = arq->is_sync; + + BUG_ON(!ON_RB(&arq->rb_node)); + + as_antic_stop(ad); + ad->antic_status = ANTIC_OFF; + + /* + * This has to be set in order to be correctly updated by + * as_find_next_arq + */ + ad->last_sector[data_dir] = arq->request->sector + + arq->request->nr_sectors; + + ad->nr_dispatched++; + + if (data_dir == REQ_SYNC) { + /* In case we have to anticipate after this */ + copy_io_context(&ad->io_context, &arq->io_context); + } else { + if (ad->io_context) { + put_io_context(ad->io_context); + ad->io_context = NULL; + } + + if (ad->current_write_count != 0) + ad->current_write_count--; + } + ad->ioc_finished = 0; + + ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + + /* + * take it off the sort and fifo list, add to dispatch queue + */ + as_remove_queued_request(ad->q, arq->request); + list_add_tail(&arq->request->queuelist, ad->dispatch); + if (arq->io_context && arq->io_context->aic) + atomic_inc(&arq->io_context->aic->nr_dispatched); + + WARN_ON(arq->state != AS_RQ_QUEUED); + arq->state = AS_RQ_DISPATCHED; +} + +/* + * as_dispatch_request selects the best request according to + * read/write expire, batch expire, etc, and moves it to the dispatch + * queue. Returns 1 if a request was found, 0 otherwise. + */ +static int as_dispatch_request(struct as_data *ad) +{ + struct as_rq *arq; + const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); + const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); + + /* Signal that the write batch was uncontended, so we can't time it */ + if (ad->batch_data_dir == REQ_ASYNC && !reads) { + if (ad->current_write_count == 0 || !writes) + ad->write_batch_idled = 1; + } + + if (!(reads || writes) + || ad->antic_status == ANTIC_WAIT_REQ + || ad->antic_status == ANTIC_WAIT_NEXT + || ad->changed_batch == 1) + return 0; + + if (!(reads && writes && as_batch_expired(ad)) ) { + /* + * batch is still running or no reads or no writes + */ + arq = ad->next_arq[ad->batch_data_dir]; + + if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { + if (as_fifo_expired(ad, REQ_SYNC)) + goto fifo_expired; + + if (as_can_anticipate(ad, arq)) { + as_antic_waitreq(ad); + return 0; + } + } + + if (arq) { + /* we have a "next request" */ + if (reads && !writes) + ad->current_batch_expires = + jiffies + ad->batch_expire[REQ_SYNC]; + goto dispatch_request; + } + } + + /* + * at this point we are not running a batch. select the appropriate + * data direction (read / write) + */ + + if (reads) { + BUG_ON(RB_EMPTY(&ad->sort_list[REQ_SYNC])); + + if (writes && ad->batch_data_dir == REQ_SYNC) + /* + * Last batch was a read, switch to writes + */ + goto dispatch_writes; + + if (ad->batch_data_dir == REQ_ASYNC) + ad->changed_batch = 1; + ad->batch_data_dir = REQ_SYNC; + arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); + ad->last_check_fifo[ad->batch_data_dir] = jiffies; + goto dispatch_request; + } + + /* + * the last batch was a read + */ + + if (writes) { +dispatch_writes: + BUG_ON(RB_EMPTY(&ad->sort_list[REQ_ASYNC])); + + if (ad->batch_data_dir == REQ_SYNC) + ad->changed_batch = 1; + ad->batch_data_dir = REQ_ASYNC; + ad->current_write_count = ad->write_batch_count; + ad->write_batch_idled = 0; + arq = ad->next_arq[ad->batch_data_dir]; + goto dispatch_request; + } + + BUG(); + return 0; + +dispatch_request: + /* + * If a request has expired, service it. + */ + + if (as_fifo_expired(ad, ad->batch_data_dir)) { +fifo_expired: + arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); + BUG_ON(arq == NULL); + } + + if (ad->changed_batch) { + if (ad->changed_batch == 1 && ad->nr_dispatched) + return 0; + if (ad->batch_data_dir == REQ_ASYNC) { + ad->current_batch_expires = jiffies + + ad->batch_expire[REQ_ASYNC]; + ad->changed_batch = 0; + } else + ad->changed_batch = 2; + arq->request->flags |= REQ_HARDBARRIER; + } + + /* + * arq is the selected appropriate request. + */ + as_move_to_dispatch(ad, arq); + + return 1; +} + +static struct request *as_next_request(request_queue_t *q) +{ + struct as_data *ad = q->elevator.elevator_data; + struct request *rq = NULL; + + /* + * if there are still requests on the dispatch queue, grab the first + */ + if (!list_empty(ad->dispatch) || as_dispatch_request(ad)) + rq = list_entry_rq(ad->dispatch->next); + + return rq; +} + +/* + * add arq to rbtree and fifo + */ +static void as_add_request(struct as_data *ad, struct as_rq *arq) +{ + int data_dir; + + if (rq_data_dir(arq->request) == READ + || current->flags&PF_SYNCWRITE) + arq->is_sync = 1; + else + arq->is_sync = 0; + data_dir = arq->is_sync; + + arq->io_context = as_get_io_context(); + + if (arq->io_context) { + atomic_inc(&arq->io_context->aic->nr_queued); + as_update_iohist(arq->io_context->aic, arq->request); + } + + as_add_arq_rb(ad, arq); + + /* + * set expire time (only used for reads) and add to fifo list + */ + arq->expires = jiffies + ad->fifo_expire[data_dir]; + list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); + arq->state = AS_RQ_QUEUED; + as_update_arq(ad, arq); /* keep state machine up to date */ +} + +static void +as_insert_request(request_queue_t *q, struct request *rq, + struct list_head *insert_here) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = RQ_DATA(rq); + + if (unlikely(rq->flags & REQ_HARDBARRIER)) { + AS_INVALIDATE_HASH(ad); + q->last_merge = NULL; + + while (ad->next_arq[REQ_SYNC]) + as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]); + + while (ad->next_arq[REQ_ASYNC]) + as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]); + } + + if (unlikely(!blk_fs_request(rq))) { + if (!insert_here) + insert_here = ad->dispatch->prev; + + list_add(&rq->queuelist, insert_here); + + /* Stop anticipating - let this request get through */ + if (!list_empty(ad->dispatch) + && (ad->antic_status == ANTIC_WAIT_REQ + || ad->antic_status == ANTIC_WAIT_NEXT)) + as_antic_stop(ad); + + return; + } + + if (rq_mergeable(rq)) { + as_add_arq_hash(ad, arq); + + if (!q->last_merge) + q->last_merge = &rq->queuelist; + } + + as_add_request(ad, arq); +} + +/* + * as_queue_empty tells us if there are requests left in the device. It may + * not be the case that a driver can get the next request even if the queue + * is not empty - it is used in the block layer to check for plugging and + * merging opportunities + */ +static int as_queue_empty(request_queue_t *q) +{ + struct as_data *ad = q->elevator.elevator_data; + + if (!list_empty(&ad->fifo_list[REQ_ASYNC]) + || !list_empty(&ad->fifo_list[REQ_SYNC]) + || !list_empty(ad->dispatch)) + return 0; + + return 1; +} + +static struct request * +as_former_request(request_queue_t *q, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + struct rb_node *rbprev = rb_prev(&arq->rb_node); + struct request *ret = NULL; + + if (rbprev) + ret = rb_entry_arq(rbprev)->request; + + return ret; +} + +static struct request * +as_latter_request(request_queue_t *q, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + struct rb_node *rbnext = rb_next(&arq->rb_node); + struct request *ret = NULL; + + if (rbnext) + ret = rb_entry_arq(rbnext)->request; + + return ret; +} + +static int +as_merge(request_queue_t *q, struct list_head **insert, struct bio *bio) +{ + struct as_data *ad = q->elevator.elevator_data; + sector_t rb_key = bio->bi_sector + bio_sectors(bio); + struct request *__rq; + int ret; + + /* + * try last_merge to avoid going to hash + */ + ret = elv_try_last_merge(q, bio); + if (ret != ELEVATOR_NO_MERGE) { + __rq = list_entry_rq(q->last_merge); + goto out_insert; + } + + /* + * see if the merge hash can satisfy a back merge + */ + __rq = as_find_arq_hash(ad, bio->bi_sector); + if (__rq) { + BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); + + if (elv_rq_merge_ok(__rq, bio)) { + ret = ELEVATOR_BACK_MERGE; + goto out; + } + } + + /* + * check for front merge + */ + __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio)); + if (__rq) { + BUG_ON(rb_key != rq_rb_key(__rq)); + + if (elv_rq_merge_ok(__rq, bio)) { + ret = ELEVATOR_FRONT_MERGE; + goto out; + } + } + + return ELEVATOR_NO_MERGE; +out: + q->last_merge = &__rq->queuelist; +out_insert: + if (ret) + as_hot_arq_hash(ad, RQ_DATA(__rq)); + *insert = &__rq->queuelist; + return ret; +} + +static void as_merged_request(request_queue_t *q, struct request *req) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = RQ_DATA(req); + + /* + * hash always needs to be repositioned, key is end sector + */ + as_del_arq_hash(arq); + as_add_arq_hash(ad, arq); + + /* + * if the merge was a front merge, we need to reposition request + */ + if (rq_rb_key(req) != arq->rb_key) { + as_del_arq_rb(ad, arq); + as_add_arq_rb(ad, arq); + /* + * Note! At this stage of this and the next function, our next + * request may not be optimal - eg the request may have "grown" + * behind the disk head. We currently don't bother adjusting. + */ + } + + q->last_merge = &req->queuelist; +} + +static void +as_merged_requests(request_queue_t *q, struct request *req, + struct request *next) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = RQ_DATA(req); + struct as_rq *anext = RQ_DATA(next); + + BUG_ON(!arq); + BUG_ON(!anext); + + /* + * reposition arq (this is the merged request) in hash, and in rbtree + * in case of a front merge + */ + as_del_arq_hash(arq); + as_add_arq_hash(ad, arq); + + if (rq_rb_key(req) != arq->rb_key) { + as_del_arq_rb(ad, arq); + as_add_arq_rb(ad, arq); + } + + /* + * if anext expires before arq, assign its expire time to arq + * and move into anext position (anext will be deleted) in fifo + */ + if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) { + if (time_before(anext->expires, arq->expires)) { + list_move(&arq->fifo, &anext->fifo); + arq->expires = anext->expires; + /* + * Don't copy here but swap, because when anext is + * removed below, it must contain the unused context + */ + swap_io_context(&arq->io_context, &anext->io_context); + } + } + + /* + * kill knowledge of next, this one is a goner + */ + as_remove_queued_request(q, next); + put_io_context(anext->io_context); +} + +/* + * This is executed in a "deferred" process context, by kblockd. It calls the + * driver's request_fn so the driver can submit that request. + * + * IMPORTANT! This guy will reenter the elevator, so set up all queue global + * state before calling, and don't rely on any state over calls. + * + * FIXME! dispatch queue is not a queue at all! + */ +static void as_work_handler(void *data) +{ + struct request_queue *q = data; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + if (as_next_request(q)) + q->request_fn(q); + spin_unlock_irqrestore(q->queue_lock, flags); +} + +static void as_put_request(request_queue_t *q, struct request *rq) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = RQ_DATA(rq); + + if (!arq) { + WARN_ON(1); + return; + } + + mempool_free(arq, ad->arq_pool); + rq->elevator_private = NULL; +} + +static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask) +{ + struct as_data *ad = q->elevator.elevator_data; + struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); + + if (arq) { + RB_CLEAR(&arq->rb_node); + arq->request = rq; + arq->state = AS_RQ_NEW; + arq->io_context = NULL; + INIT_LIST_HEAD(&arq->hash); + arq->hash_valid_count = 0; + INIT_LIST_HEAD(&arq->fifo); + rq->elevator_private = arq; + return 0; + } + + return 1; +} + +static int as_may_queue(request_queue_t *q, int rw) +{ + int ret = 0; + struct as_data *ad = q->elevator.elevator_data; + struct io_context *ioc; + if (ad->antic_status == ANTIC_WAIT_REQ || + ad->antic_status == ANTIC_WAIT_NEXT) { + ioc = as_get_io_context(); + if (ad->io_context == ioc) + ret = 1; + put_io_context(ioc); + } + + return ret; +} + +static void as_exit(request_queue_t *q, elevator_t *e) +{ + struct as_data *ad = e->elevator_data; + + del_timer_sync(&ad->antic_timer); + kblockd_flush(); + + BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); + BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); + + mempool_destroy(ad->arq_pool); + put_io_context(ad->io_context); + kfree(ad->hash); + kfree(ad); +} + +/* + * initialize elevator private data (as_data), and alloc a arq for + * each request on the free lists + */ +static int as_init(request_queue_t *q, elevator_t *e) +{ + struct as_data *ad; + int i; + + if (!arq_pool) + return -ENOMEM; + + ad = kmalloc(sizeof(*ad), GFP_KERNEL); + if (!ad) + return -ENOMEM; + memset(ad, 0, sizeof(*ad)); + + ad->q = q; /* Identify what queue the data belongs to */ + + ad->hash = kmalloc(sizeof(struct list_head)*AS_HASH_ENTRIES,GFP_KERNEL); + if (!ad->hash) { + kfree(ad); + return -ENOMEM; + } + + ad->arq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, arq_pool); + if (!ad->arq_pool) { + kfree(ad->hash); + kfree(ad); + return -ENOMEM; + } + + /* anticipatory scheduling helpers */ + ad->antic_timer.function = as_antic_timeout; + ad->antic_timer.data = (unsigned long)q; + init_timer(&ad->antic_timer); + INIT_WORK(&ad->antic_work, as_work_handler, q); + + for (i = 0; i < AS_HASH_ENTRIES; i++) + INIT_LIST_HEAD(&ad->hash[i]); + + INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); + INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); + ad->sort_list[REQ_SYNC] = RB_ROOT; + ad->sort_list[REQ_ASYNC] = RB_ROOT; + ad->dispatch = &q->queue_head; + ad->fifo_expire[REQ_SYNC] = default_read_expire; + ad->fifo_expire[REQ_ASYNC] = default_write_expire; + ad->hash_valid_count = 1; + ad->antic_expire = default_antic_expire; + ad->batch_expire[REQ_SYNC] = default_read_batch_expire; + ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; + e->elevator_data = ad; + + ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; + ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; + if (ad->write_batch_count < 2) + ad->write_batch_count = 2; + return 0; +} + +/* + * sysfs parts below + */ +struct as_fs_entry { + struct attribute attr; + ssize_t (*show)(struct as_data *, char *); + ssize_t (*store)(struct as_data *, const char *, size_t); +}; + +static ssize_t +as_var_show(unsigned int var, char *page) +{ + var = (var * 1000) / HZ; + return sprintf(page, "%d\n", var); +} + +static ssize_t +as_var_store(unsigned long *var, const char *page, size_t count) +{ + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (tmp != 0) { + tmp = (tmp * HZ) / 1000; + if (tmp == 0) + tmp = 1; + } + *var = tmp; + return count; +} + +#define SHOW_FUNCTION(__FUNC, __VAR) \ +static ssize_t __FUNC(struct as_data *ad, char *page) \ +{ \ + return as_var_show(__VAR, (page)); \ +} +SHOW_FUNCTION(as_readexpire_show, ad->fifo_expire[REQ_SYNC]); +SHOW_FUNCTION(as_writeexpire_show, ad->fifo_expire[REQ_ASYNC]); +SHOW_FUNCTION(as_anticexpire_show, ad->antic_expire); +SHOW_FUNCTION(as_read_batchexpire_show, ad->batch_expire[REQ_SYNC]); +SHOW_FUNCTION(as_write_batchexpire_show, ad->batch_expire[REQ_ASYNC]); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ +static ssize_t __FUNC(struct as_data *ad, const char *page, size_t count) \ +{ \ + int ret = as_var_store(__PTR, (page), count); \ + if (*(__PTR) < (MIN)) \ + *(__PTR) = (MIN); \ + else if (*(__PTR) > (MAX)) \ + *(__PTR) = (MAX); \ + return ret; \ +} +STORE_FUNCTION(as_readexpire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX); +STORE_FUNCTION(as_writeexpire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX); +STORE_FUNCTION(as_anticexpire_store, &ad->antic_expire, 0, INT_MAX); +STORE_FUNCTION(as_read_batchexpire_store, + &ad->batch_expire[REQ_SYNC], 0, INT_MAX); +STORE_FUNCTION(as_write_batchexpire_store, + &ad->batch_expire[REQ_ASYNC], 0, INT_MAX); +#undef STORE_FUNCTION + +static struct as_fs_entry as_readexpire_entry = { + .attr = {.name = "read_expire", .mode = S_IRUGO | S_IWUSR }, + .show = as_readexpire_show, + .store = as_readexpire_store, +}; +static struct as_fs_entry as_writeexpire_entry = { + .attr = {.name = "write_expire", .mode = S_IRUGO | S_IWUSR }, + .show = as_writeexpire_show, + .store = as_writeexpire_store, +}; +static struct as_fs_entry as_anticexpire_entry = { + .attr = {.name = "antic_expire", .mode = S_IRUGO | S_IWUSR }, + .show = as_anticexpire_show, + .store = as_anticexpire_store, +}; +static struct as_fs_entry as_read_batchexpire_entry = { + .attr = {.name = "read_batch_expire", .mode = S_IRUGO | S_IWUSR }, + .show = as_read_batchexpire_show, + .store = as_read_batchexpire_store, +}; +static struct as_fs_entry as_write_batchexpire_entry = { + .attr = {.name = "write_batch_expire", .mode = S_IRUGO | S_IWUSR }, + .show = as_write_batchexpire_show, + .store = as_write_batchexpire_store, +}; + +static struct attribute *default_attrs[] = { + &as_readexpire_entry.attr, + &as_writeexpire_entry.attr, + &as_anticexpire_entry.attr, + &as_read_batchexpire_entry.attr, + &as_write_batchexpire_entry.attr, + NULL, +}; + +#define to_as(atr) container_of((atr), struct as_fs_entry, attr) + +static ssize_t +as_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + elevator_t *e = container_of(kobj, elevator_t, kobj); + struct as_fs_entry *entry = to_as(attr); + + if (!entry->show) + return 0; + + return entry->show(e->elevator_data, page); +} + +static ssize_t +as_attr_store(struct kobject *kobj, struct attribute *attr, + const char *page, size_t length) +{ + elevator_t *e = container_of(kobj, elevator_t, kobj); + struct as_fs_entry *entry = to_as(attr); + + if (!entry->store) + return -EINVAL; + + return entry->store(e->elevator_data, page, length); +} + +static struct sysfs_ops as_sysfs_ops = { + .show = as_attr_show, + .store = as_attr_store, +}; + +struct kobj_type as_ktype = { + .sysfs_ops = &as_sysfs_ops, + .default_attrs = default_attrs, +}; + +static int __init as_slab_setup(void) +{ + arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq), + 0, 0, NULL, NULL); + + if (!arq_pool) + panic("as: can't init slab pool\n"); + + return 0; +} + +subsys_initcall(as_slab_setup); + +elevator_t iosched_as = { + .elevator_merge_fn = as_merge, + .elevator_merged_fn = as_merged_request, + .elevator_merge_req_fn = as_merged_requests, + .elevator_next_req_fn = as_next_request, + .elevator_add_req_fn = as_insert_request, + .elevator_remove_req_fn = as_remove_request, + .elevator_queue_empty_fn = as_queue_empty, + .elevator_completed_req_fn = as_completed_request, + .elevator_former_req_fn = as_former_request, + .elevator_latter_req_fn = as_latter_request, + .elevator_set_req_fn = as_set_request, + .elevator_put_req_fn = as_put_request, + .elevator_may_queue_fn = as_may_queue, + .elevator_init_fn = as_init, + .elevator_exit_fn = as_exit, + + .elevator_ktype = &as_ktype, +}; + +EXPORT_SYMBOL(iosched_as); diff -Nru a/drivers/block/cciss.c b/drivers/block/cciss.c --- a/drivers/block/cciss.c Sat Jul 5 12:40:32 2003 +++ b/drivers/block/cciss.c Sat Jul 5 12:40:32 2003 @@ -1887,7 +1887,7 @@ BUG(); if (( c = cmd_alloc(h, 1)) == NULL) - goto startio; + goto full; blkdev_dequeue_request(creq); @@ -1960,8 +1960,9 @@ h->maxQsinceinit = h->Qdepth; goto queue; -startio: +full: blk_stop_queue(q); +startio: start_io(h); } diff -Nru a/drivers/block/elevator.c b/drivers/block/elevator.c --- a/drivers/block/elevator.c Sat Jul 5 12:40:32 2003 +++ b/drivers/block/elevator.c Sat Jul 5 12:40:32 2003 @@ -361,17 +361,31 @@ e->elevator_put_req_fn(q, rq); } -int elv_register_queue(struct gendisk *disk) +int elv_may_queue(request_queue_t *q, int rw) { - request_queue_t *q = disk->queue; - elevator_t *e; + elevator_t *e = &q->elevator; + + if (e->elevator_may_queue_fn) + return e->elevator_may_queue_fn(q, rw); + + return 0; +} - if (!q) - return -ENXIO; +void elv_completed_request(request_queue_t *q, struct request *rq) +{ + elevator_t *e = &q->elevator; + + if (e->elevator_completed_req_fn) + e->elevator_completed_req_fn(q, rq); +} + +int elv_register_queue(struct request_queue *q) +{ + elevator_t *e; e = &q->elevator; - e->kobj.parent = kobject_get(&disk->kobj); + e->kobj.parent = kobject_get(&q->kobj); if (!e->kobj.parent) return -EBUSY; @@ -381,14 +395,12 @@ return kobject_register(&e->kobj); } -void elv_unregister_queue(struct gendisk *disk) +void elv_unregister_queue(struct request_queue *q) { - request_queue_t *q = disk->queue; - if (q) { elevator_t * e = &q->elevator; kobject_unregister(&e->kobj); - kobject_put(&disk->kobj); + kobject_put(&q->kobj); } } @@ -408,5 +420,6 @@ EXPORT_SYMBOL(elv_next_request); EXPORT_SYMBOL(elv_remove_request); EXPORT_SYMBOL(elv_queue_empty); +EXPORT_SYMBOL(elv_completed_request); EXPORT_SYMBOL(elevator_exit); EXPORT_SYMBOL(elevator_init); diff -Nru a/drivers/block/floppy.c b/drivers/block/floppy.c --- a/drivers/block/floppy.c Sat Jul 5 12:40:31 2003 +++ b/drivers/block/floppy.c Sat Jul 5 12:40:31 2003 @@ -3767,7 +3767,7 @@ * Needed so that programs such as fdrawcmd still can work on write * protected disks */ if ((filp->f_mode & 2) || - (inode->i_sb && (permission(inode,2) == 0))) + (inode->i_sb && (permission(inode,2, NULL) == 0))) filp->private_data = (void*) 8; if (UFDCS->rawcmd == 1) diff -Nru a/drivers/block/genhd.c b/drivers/block/genhd.c --- a/drivers/block/genhd.c Sat Jul 5 12:40:31 2003 +++ b/drivers/block/genhd.c Sat Jul 5 12:40:31 2003 @@ -191,7 +191,7 @@ blk_register_region(MKDEV(disk->major, disk->first_minor), disk->minors, NULL, exact_match, exact_lock, disk); register_disk(disk); - elv_register_queue(disk); + blk_register_queue(disk); } EXPORT_SYMBOL(add_disk); @@ -199,7 +199,7 @@ void unlink_gendisk(struct gendisk *disk) { - elv_unregister_queue(disk); + blk_unregister_queue(disk); blk_unregister_region(MKDEV(disk->major, disk->first_minor), disk->minors); } diff -Nru a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c --- a/drivers/block/ll_rw_blk.c Sat Jul 5 12:40:31 2003 +++ b/drivers/block/ll_rw_blk.c Sat Jul 5 12:40:31 2003 @@ -42,40 +42,50 @@ static LIST_HEAD(blk_plug_list); static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; +static wait_queue_head_t congestion_wqh[2]; + /* - * Number of requests per queue. This many for reads and for writes (twice - * this number, total). + * Controlling structure to kblockd */ -static int queue_nr_requests; +static struct workqueue_struct *kblockd_workqueue; unsigned long blk_max_low_pfn, blk_max_pfn; -static wait_queue_head_t congestion_wqh[2]; + +/* Amount of time in which a process may batch requests */ +#define BLK_BATCH_TIME (HZ/50UL) + +/* Number of requests a "batching" process may submit */ +#define BLK_BATCH_REQ 32 /* - * Return the threshold (number of free requests) at which the queue is + * Return the threshold (number of used requests) at which the queue is * considered to be congested. It include a little hysteresis to keep the * context switch rate down. */ -static inline int queue_congestion_on_threshold(void) +static inline int queue_congestion_on_threshold(struct request_queue *q) { int ret; - ret = queue_nr_requests / 8 - 1; - if (ret < 0) - ret = 1; + ret = q->nr_requests - (q->nr_requests / 8) + 1; + + if (ret > q->nr_requests) + ret = q->nr_requests; + return ret; } /* * The threshold at which a queue is considered to be uncongested */ -static inline int queue_congestion_off_threshold(void) +static inline int queue_congestion_off_threshold(struct request_queue *q) { int ret; - ret = queue_nr_requests / 8 + 1; - if (ret > queue_nr_requests) - ret = queue_nr_requests; + ret = q->nr_requests - (q->nr_requests / 8) - 1; + + if (ret < 1) + ret = 1; + return ret; } @@ -188,6 +198,7 @@ /* * set defaults */ + q->nr_requests = BLKDEV_MAX_RQ; q->max_phys_segments = MAX_PHYS_SEGMENTS; q->max_hw_segments = MAX_HW_SEGMENTS; q->make_request_fn = mfn; @@ -441,13 +452,15 @@ q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); } -static int init_tag_map(struct blk_queue_tag *tags, int depth) +static int +init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) { int bits, i; - if (depth > (queue_nr_requests*2)) { - depth = (queue_nr_requests*2); - printk(KERN_ERR "%s: adjusted depth to %d\n", __FUNCTION__, depth); + if (depth > q->nr_requests * 2) { + depth = q->nr_requests * 2; + printk(KERN_ERR "%s: adjusted depth to %d\n", + __FUNCTION__, depth); } tags->tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC); @@ -476,7 +489,6 @@ return -ENOMEM; } - /** * blk_queue_init_tags - initialize the queue tag info * @q: the request queue for the device @@ -490,7 +502,7 @@ if (!tags) goto fail; - if (init_tag_map(tags, depth)) + if (init_tag_map(q, tags, depth)) goto fail; INIT_LIST_HEAD(&tags->busy_list); @@ -540,7 +552,7 @@ tag_map = bqt->tag_map; max_depth = bqt->real_max_depth; - if (init_tag_map(bqt, new_depth)) + if (init_tag_map(q, bqt, new_depth)) return -ENOMEM; memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); @@ -1022,7 +1034,7 @@ /* * was plugged, fire request_fn if queue has stuff to do */ - if (!elv_queue_empty(q)) + if (elv_next_request(q)) q->request_fn(q); } @@ -1057,7 +1069,7 @@ { request_queue_t *q = (request_queue_t *)data; - schedule_work(&q->unplug_work); + kblockd_schedule_work(&q->unplug_work); } /** @@ -1072,8 +1084,8 @@ **/ void blk_start_queue(request_queue_t *q) { - if (test_and_clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) - schedule_work(&q->unplug_work); + clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); + schedule_work(&q->unplug_work); } /** @@ -1165,7 +1177,7 @@ elevator_exit(q); del_timer_sync(&q->unplug_timer); - flush_scheduled_work(); + kblockd_flush(); mempool_destroy(rl->rq_pool); @@ -1180,6 +1192,8 @@ struct request_list *rl = &q->rq; rl->count[READ] = rl->count[WRITE] = 0; + init_waitqueue_head(&rl->wait[READ]); + init_waitqueue_head(&rl->wait[WRITE]); rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep); @@ -1191,6 +1205,18 @@ static int __make_request(request_queue_t *, struct bio *); +static elevator_t *chosen_elevator = &iosched_as; + +static int __init elevator_setup(char *str) +{ + if (!strcmp(str, "deadline")) + chosen_elevator = &iosched_deadline; + if (!strcmp(str, "as")) + chosen_elevator = &iosched_as; + return 1; +} +__setup("elevator=", elevator_setup); + /** * blk_init_queue - prepare a request queue for use with a block device * @q: The &request_queue_t to be initialised @@ -1222,11 +1248,20 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock) { int ret; + static int printed; if (blk_init_free_list(q)) return -ENOMEM; - if ((ret = elevator_init(q, &iosched_deadline))) { + if (!printed) { + printed = 1; + if (chosen_elevator == &iosched_deadline) + printk("deadline elevator\n"); + else if (chosen_elevator == &iosched_as) + printk("anticipatory scheduling elevator\n"); + } + + if ((ret = elevator_init(q, chosen_elevator))) { blk_cleanup_queue(q); return ret; } @@ -1271,6 +1306,60 @@ return NULL; } +/* + * ioc_batching returns true if the ioc is a valid batching request and + * should be given priority access to a request. + */ +static inline int ioc_batching(struct io_context *ioc) +{ + if (!ioc) + return 0; + + /* + * Make sure the process is able to allocate at least 1 request + * even if the batch times out, otherwise we could theoretically + * lose wakeups. + */ + return ioc->nr_batch_requests == BLK_BATCH_REQ || + (ioc->nr_batch_requests > 0 + && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); +} + +/* + * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This + * will cause the process to be a "batcher" on all queues in the system. This + * is the behaviour we want though - once it gets a wakeup it should be given + * a nice run. + */ +void ioc_set_batching(struct io_context *ioc) +{ + if (!ioc || ioc_batching(ioc)) + return; + + ioc->nr_batch_requests = BLK_BATCH_REQ; + ioc->last_waited = jiffies; +} + +/* + * A request has just been released. Account for it, update the full and + * congestion status, wake up any waiters. Called under q->queue_lock. + */ +static void freed_request(request_queue_t *q, int rw) +{ + struct request_list *rl = &q->rq; + + rl->count[rw]--; + if (rl->count[rw] < queue_congestion_off_threshold(q)) + clear_queue_congested(q, rw); + if (rl->count[rw]+1 <= q->nr_requests) { + smp_mb(); + if (waitqueue_active(&rl->wait[rw])) + wake_up(&rl->wait[rw]); + if (!waitqueue_active(&rl->wait[rw])) + blk_clear_queue_full(q, rw); + } +} + #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* * Get a free request, queue_lock must not be held @@ -1279,26 +1368,54 @@ { struct request *rq = NULL; struct request_list *rl = &q->rq; + struct io_context *ioc = get_io_context(gfp_mask); spin_lock_irq(q->queue_lock); - if (rl->count[rw] == BLKDEV_MAX_RQ) { + if (rl->count[rw]+1 >= q->nr_requests) { + /* + * The queue will fill after this allocation, so set it as + * full, and mark this process as "batching". This process + * will be allowed to complete a batch of requests, others + * will be blocked. + */ + if (!blk_queue_full(q, rw)) { + ioc_set_batching(ioc); + blk_set_queue_full(q, rw); + } + } + + if (blk_queue_full(q, rw) + && !ioc_batching(ioc) && !elv_may_queue(q, rw)) { + /* + * The queue is full and the allocating process is not a + * "batcher", and not exempted by the IO scheduler + */ spin_unlock_irq(q->queue_lock); goto out; } + rl->count[rw]++; - if ((BLKDEV_MAX_RQ - rl->count[rw]) < queue_congestion_on_threshold()) + if (rl->count[rw] >= queue_congestion_on_threshold(q)) set_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); rq = blk_alloc_request(q, gfp_mask); if (!rq) { + /* + * Allocation failed presumably due to memory. Undo anything + * we might have messed up. + * + * Allocating task should really be put onto the front of the + * wait queue, but this is pretty rare. + */ spin_lock_irq(q->queue_lock); - rl->count[rw]--; - if ((BLKDEV_MAX_RQ - rl->count[rw]) >= queue_congestion_off_threshold()) - clear_queue_congested(q, rw); + freed_request(q, rw); spin_unlock_irq(q->queue_lock); goto out; } + + if (ioc_batching(ioc)) + ioc->nr_batch_requests--; INIT_LIST_HEAD(&rq->queuelist); @@ -1321,22 +1438,44 @@ rq->sense = NULL; out: + put_io_context(ioc); return rq; } /* - * No available requests for this queue, unplug the device. + * No available requests for this queue, unplug the device and wait for some + * requests to become available. */ static struct request *get_request_wait(request_queue_t *q, int rw) { + DEFINE_WAIT(wait); struct request *rq; generic_unplug_device(q); do { + struct request_list *rl = &q->rq; + + prepare_to_wait_exclusive(&rl->wait[rw], &wait, + TASK_UNINTERRUPTIBLE); + rq = get_request(q, rw, GFP_NOIO); - if (!rq) - blk_congestion_wait(rw, HZ / 50); + if (!rq) { + struct io_context *ioc; + + io_schedule(); + + /* + * After sleeping, we become a "batching" process and + * will be able to allocate at least one request, and + * up to a big batch of them for a small period time. + * See ioc_batching, ioc_set_batching + */ + ioc = get_io_context(GFP_NOIO); + ioc_set_batching(ioc); + put_io_context(ioc); + } + finish_wait(&rl->wait[rw], &wait); } while (!rq); return rq; @@ -1348,10 +1487,10 @@ BUG_ON(rw != READ && rw != WRITE); - rq = get_request(q, rw, gfp_mask); - - if (!rq && (gfp_mask & __GFP_WAIT)) + if (gfp_mask & __GFP_WAIT) rq = get_request_wait(q, rw); + else + rq = get_request(q, rw, gfp_mask); return rq; } @@ -1482,6 +1621,8 @@ if (unlikely(--req->ref_count)) return; + elv_completed_request(req->q, req); + req->rq_status = RQ_INACTIVE; req->q = NULL; req->rl = NULL; @@ -1496,10 +1637,7 @@ BUG_ON(!list_empty(&req->queuelist)); blk_free_request(q, req); - - rl->count[rw]--; - if ((BLKDEV_MAX_RQ - rl->count[rw]) >= queue_congestion_off_threshold()) - clear_queue_congested(q, rw); + freed_request(q, rw); } } @@ -1786,13 +1924,12 @@ __blk_put_request(q, freereq); if (blk_queue_plugged(q)) { - int nr_queued = q->rq.count[0] + q->rq.count[1]; + int nr_queued = q->rq.count[READ] + q->rq.count[WRITE]; if (nr_queued == q->unplug_thresh) __generic_unplug_device(q); } spin_unlock_irq(q->queue_lock); - return 0; end_io: @@ -1800,7 +1937,6 @@ return 0; } - /* * If bio->bi_dev is a partition, remap the location */ @@ -1850,8 +1986,7 @@ * bio happens to be merged with someone else, and may change bi_dev and * bi_sector for remaps as it sees fit. So the values of these fields * should NOT be depended on after the call to generic_make_request. - * - * */ + */ void generic_make_request(struct bio *bio) { request_queue_t *q; @@ -2282,30 +2417,286 @@ rq->current_nr_sectors = rq->hard_cur_sectors; } +int kblockd_schedule_work(struct work_struct *work) +{ + return queue_work(kblockd_workqueue, work); +} + +void kblockd_flush(void) +{ + flush_workqueue(kblockd_workqueue); +} + int __init blk_dev_init(void) { int i; + kblockd_workqueue = create_workqueue("kblockd"); + if (!kblockd_workqueue) + panic("Failed to create kblockd\n"); + request_cachep = kmem_cache_create("blkdev_requests", sizeof(struct request), 0, 0, NULL, NULL); if (!request_cachep) panic("Can't create request pool slab cache\n"); - queue_nr_requests = BLKDEV_MAX_RQ; - - printk("block request queues:\n"); - printk(" %d/%d requests per read queue\n", BLKDEV_MIN_RQ, queue_nr_requests); - printk(" %d/%d requests per write queue\n", BLKDEV_MIN_RQ, queue_nr_requests); - printk(" enter congestion at %d\n", queue_congestion_on_threshold()); - printk(" exit congestion at %d\n", queue_congestion_off_threshold()); - blk_max_low_pfn = max_low_pfn; blk_max_pfn = max_pfn; for (i = 0; i < ARRAY_SIZE(congestion_wqh); i++) init_waitqueue_head(&congestion_wqh[i]); return 0; +} + +static atomic_t nr_io_contexts = ATOMIC_INIT(0); + +/* + * IO Context helper functions + */ +void put_io_context(struct io_context *ioc) +{ + if (ioc == NULL) + return; + + BUG_ON(atomic_read(&ioc->refcount) == 0); + + if (atomic_dec_and_test(&ioc->refcount)) { + if (ioc->aic && ioc->aic->dtor) + ioc->aic->dtor(ioc->aic); + kfree(ioc); + atomic_dec(&nr_io_contexts); + } +} + +/* Called by the exitting task */ +void exit_io_context(void) +{ + unsigned long flags; + struct io_context *ioc; + + local_irq_save(flags); + ioc = current->io_context; + if (ioc) { + if (ioc->aic && ioc->aic->exit) + ioc->aic->exit(ioc->aic); + put_io_context(ioc); + current->io_context = NULL; + } else + WARN_ON(1); + local_irq_restore(flags); +} + +/* + * If the current task has no IO context then create one and initialise it. + * If it does have a context, take a ref on it. + * + * This is always called in the context of the task which submitted the I/O. + * But weird things happen, so we disable local interrupts to ensure exclusive + * access to *current. + */ +struct io_context *get_io_context(int gfp_flags) +{ + struct task_struct *tsk = current; + unsigned long flags; + struct io_context *ret; + + local_irq_save(flags); + ret = tsk->io_context; + if (ret == NULL) { + ret = kmalloc(sizeof(*ret), GFP_ATOMIC); + if (ret) { + atomic_inc(&nr_io_contexts); + atomic_set(&ret->refcount, 1); + ret->pid = tsk->pid; + ret->last_waited = jiffies; /* doesn't matter... */ + ret->nr_batch_requests = 0; /* because this is 0 */ + ret->aic = NULL; + tsk->io_context = ret; + } + } + if (ret) + atomic_inc(&ret->refcount); + local_irq_restore(flags); + return ret; +} + +void copy_io_context(struct io_context **pdst, struct io_context **psrc) +{ + struct io_context *src = *psrc; + struct io_context *dst = *pdst; + + if (src) { + BUG_ON(atomic_read(&src->refcount) == 0); + atomic_inc(&src->refcount); + put_io_context(dst); + *pdst = src; + } +} + +void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) +{ + struct io_context *temp; + temp = *ioc1; + *ioc1 = *ioc2; + *ioc2 = temp; +} + + +/* + * sysfs parts below + */ +struct queue_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct request_queue *, char *); + ssize_t (*store)(struct request_queue *, const char *, size_t); +}; + +static ssize_t +queue_var_show(unsigned int var, char *page) +{ + return sprintf(page, "%d\n", var); +} + +static ssize_t +queue_var_store(unsigned long *var, const char *page, size_t count) +{ + char *p = (char *) page; + + *var = simple_strtoul(p, &p, 10); + return count; +} + +static ssize_t queue_requests_show(struct request_queue *q, char *page) +{ + return queue_var_show(q->nr_requests, (page)); +} + +static ssize_t +queue_requests_store(struct request_queue *q, const char *page, size_t count) +{ + struct request_list *rl = &q->rq; + + int ret = queue_var_store(&q->nr_requests, page, count); + if (q->nr_requests < BLKDEV_MIN_RQ) + q->nr_requests = BLKDEV_MIN_RQ; + + if (rl->count[READ] >= queue_congestion_on_threshold(q)) + set_queue_congested(q, READ); + else if (rl->count[READ] < queue_congestion_off_threshold(q)) + clear_queue_congested(q, READ); + + if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) + set_queue_congested(q, WRITE); + else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) + clear_queue_congested(q, WRITE); + + if (rl->count[READ] >= q->nr_requests) { + blk_set_queue_full(q, READ); + } else if (rl->count[READ]+1 <= q->nr_requests) { + blk_clear_queue_full(q, READ); + wake_up(&rl->wait[READ]); + } + + if (rl->count[WRITE] >= q->nr_requests) { + blk_set_queue_full(q, WRITE); + } else if (rl->count[WRITE]+1 <= q->nr_requests) { + blk_clear_queue_full(q, WRITE); + wake_up(&rl->wait[WRITE]); + } + return ret; +} + +static struct queue_sysfs_entry queue_requests_entry = { + .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, + .show = queue_requests_show, + .store = queue_requests_store, }; + +static struct attribute *default_attrs[] = { + &queue_requests_entry.attr, + NULL, +}; + +#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) + +static ssize_t +queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct queue_sysfs_entry *entry = to_queue(attr); + struct request_queue *q; + + q = container_of(kobj, struct request_queue, kobj); + if (!entry->show) + return 0; + + return entry->show(q, page); +} + +static ssize_t +queue_attr_store(struct kobject *kobj, struct attribute *attr, + const char *page, size_t length) +{ + struct queue_sysfs_entry *entry = to_queue(attr); + struct request_queue *q; + + q = container_of(kobj, struct request_queue, kobj); + if (!entry->store) + return -EINVAL; + + return entry->store(q, page, length); +} + +static struct sysfs_ops queue_sysfs_ops = { + .show = queue_attr_show, + .store = queue_attr_store, +}; + +struct kobj_type queue_ktype = { + .sysfs_ops = &queue_sysfs_ops, + .default_attrs = default_attrs, +}; + +int blk_register_queue(struct gendisk *disk) +{ + int ret; + + request_queue_t *q = disk->queue; + + if (!q) + return -ENXIO; + + q->kobj.parent = kobject_get(&disk->kobj); + if (!q->kobj.parent) + return -EBUSY; + + snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue"); + q->kobj.ktype = &queue_ktype; + + ret = kobject_register(&q->kobj); + if (ret < 0) + return ret; + + ret = elv_register_queue(q); + if (ret) { + kobject_unregister(&q->kobj); + return ret; + } + + return 0; +} + +void blk_unregister_queue(struct gendisk *disk) +{ + request_queue_t *q = disk->queue; + + if (q) { + elv_unregister_queue(q); + + kobject_unregister(&q->kobj); + kobject_put(&disk->kobj); + } +} + EXPORT_SYMBOL(process_that_request_first); EXPORT_SYMBOL(end_that_request_first); diff -Nru a/drivers/eisa/eisa-bus.c b/drivers/eisa/eisa-bus.c --- a/drivers/eisa/eisa-bus.c Sat Jul 5 12:40:31 2003 +++ b/drivers/eisa/eisa-bus.c Sat Jul 5 12:40:31 2003 @@ -1,7 +1,7 @@ /* * EISA bus support functions for sysfs. * - * (C) 2002 Marc Zyngier + * (C) 2002, 2003 Marc Zyngier * * This code is released under the GPL version 2. */ @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -24,7 +25,7 @@ char name[DEVICE_NAME_SIZE]; }; -struct eisa_device_info __initdata eisa_table[] = { +static struct eisa_device_info __initdata eisa_table[] = { #ifdef CONFIG_EISA_NAMES #include "devlist.h" #endif @@ -32,6 +33,30 @@ #define EISA_INFOS (sizeof (eisa_table) / (sizeof (struct eisa_device_info))) +#define EISA_MAX_FORCED_DEV 16 +#define EISA_FORCED_OFFSET 2 + +static int enable_dev[EISA_MAX_FORCED_DEV + EISA_FORCED_OFFSET] = { 1, EISA_MAX_FORCED_DEV, }; +static int disable_dev[EISA_MAX_FORCED_DEV + EISA_FORCED_OFFSET] = { 1, EISA_MAX_FORCED_DEV, }; + +static int is_forced_dev (int *forced_tab, + struct eisa_root_device *root, + struct eisa_device *edev) +{ + int i, x; + + for (i = 0; i < EISA_MAX_FORCED_DEV; i++) { + if (!forced_tab[EISA_FORCED_OFFSET + i]) + return 0; + + x = (root->bus_nr << 8) | edev->slot; + if (forced_tab[EISA_FORCED_OFFSET + i] == x) + return 1; + } + + return 0; +} + static void __init eisa_name_device (struct eisa_device *edev) { int i; @@ -92,7 +117,8 @@ return 0; while (strlen (eids->sig)) { - if (!strcmp (eids->sig, edev->id.sig)) { + if (!strcmp (eids->sig, edev->id.sig) && + edev->state & EISA_CONFIG_ENABLED) { edev->id.driver_data = eids->driver_data; return 1; } @@ -132,41 +158,160 @@ static DEVICE_ATTR(signature, S_IRUGO, eisa_show_sig, NULL); -static int __init eisa_register_device (struct eisa_root_device *root, - struct eisa_device *edev, - char *sig, int slot) +static ssize_t eisa_show_state (struct device *dev, char *buf) +{ + struct eisa_device *edev = to_eisa_device (dev); + return sprintf (buf,"%d\n", edev->state & EISA_CONFIG_ENABLED); +} + +static DEVICE_ATTR(enabled, S_IRUGO, eisa_show_state, NULL); + +static int __init eisa_init_device (struct eisa_root_device *root, + struct eisa_device *edev, + int slot) { + char *sig; + unsigned long sig_addr; + int i; + + sig_addr = SLOT_ADDRESS (root, slot) + EISA_VENDOR_ID_OFFSET; + + if (!(sig = decode_eisa_sig (sig_addr))) + return -1; /* No EISA device here */ + memcpy (edev->id.sig, sig, EISA_SIG_LEN); edev->slot = slot; + edev->state = inb (SLOT_ADDRESS (root, slot) + EISA_CONFIG_OFFSET) & EISA_CONFIG_ENABLED; edev->base_addr = SLOT_ADDRESS (root, slot); - edev->dma_mask = 0xffffffff; /* Default DMA mask */ + edev->dma_mask = root->dma_mask; /* Default DMA mask */ eisa_name_device (edev); edev->dev.parent = root->dev; edev->dev.bus = &eisa_bus_type; edev->dev.dma_mask = &edev->dma_mask; sprintf (edev->dev.bus_id, "%02X:%02X", root->bus_nr, slot); - edev->res.name = edev->dev.name; + for (i = 0; i < EISA_MAX_RESOURCES; i++) + edev->res[i].name = edev->dev.name; + + if (is_forced_dev (enable_dev, root, edev)) + edev->state = EISA_CONFIG_ENABLED | EISA_CONFIG_FORCED; + + if (is_forced_dev (disable_dev, root, edev)) + edev->state = EISA_CONFIG_FORCED; + + return 0; +} +static int __init eisa_register_device (struct eisa_device *edev) +{ if (device_register (&edev->dev)) return -1; device_create_file (&edev->dev, &dev_attr_signature); + device_create_file (&edev->dev, &dev_attr_enabled); + + return 0; +} + +static int __init eisa_request_resources (struct eisa_root_device *root, + struct eisa_device *edev, + int slot) +{ + int i; + + for (i = 0; i < EISA_MAX_RESOURCES; i++) { + /* Don't register resource for slot 0, since this is + * very likely to fail... :-( Instead, grab the EISA + * id, now we can display something in /proc/ioports. + */ + + /* Only one region for mainboard */ + if (!slot && i > 0) { + edev->res[i].start = edev->res[i].end = 0; + continue; + } + + if (slot) { + edev->res[i].name = NULL; + edev->res[i].start = SLOT_ADDRESS (root, slot) + (i * 0x400); + edev->res[i].end = edev->res[i].start + 0xff; + edev->res[i].flags = IORESOURCE_IO; + } else { + edev->res[i].name = NULL; + edev->res[i].start = SLOT_ADDRESS (root, slot) + EISA_VENDOR_ID_OFFSET; + edev->res[i].end = edev->res[i].start + 3; + edev->res[i].flags = IORESOURCE_BUSY; + } + + if (request_resource (root->res, &edev->res[i])) + goto failed; + } return 0; + + failed: + while (--i >= 0) + release_resource (&edev->res[i]); + + return -1; +} + +static void __init eisa_release_resources (struct eisa_device *edev) +{ + int i; + + for (i = 0; i < EISA_MAX_RESOURCES; i++) + if (edev->res[i].start || edev->res[i].end) + release_resource (&edev->res[i]); } static int __init eisa_probe (struct eisa_root_device *root) { int i, c; - char *str; - unsigned long sig_addr; struct eisa_device *edev; printk (KERN_INFO "EISA: Probing bus %d at %s\n", root->bus_nr, root->dev->name); + + /* First try to get hold of slot 0. If there is no device + * here, simply fail, unless root->force_probe is set. */ + + if (!(edev = kmalloc (sizeof (*edev), GFP_KERNEL))) { + printk (KERN_ERR "EISA: Couldn't allocate mainboard slot\n"); + return -ENOMEM; + } + + memset (edev, 0, sizeof (*edev)); + + if (eisa_request_resources (root, edev, 0)) { + printk (KERN_WARNING \ + "EISA: Cannot allocate resource for mainboard\n"); + kfree (edev); + if (!root->force_probe) + return -EBUSY; + goto force_probe; + } + + if (eisa_init_device (root, edev, 0)) { + eisa_release_resources (edev); + kfree (edev); + if (!root->force_probe) + return -ENODEV; + goto force_probe; + } + + printk (KERN_INFO "EISA: Mainboard %s detected.\n", edev->id.sig); + + if (eisa_register_device (edev)) { + printk (KERN_ERR "EISA: Failed to register %s\n", + edev->id.sig); + eisa_release_resources (edev); + kfree (edev); + } - for (c = 0, i = 0; i <= root->slots; i++) { + force_probe: + + for (c = 0, i = 1; i <= root->slots; i++) { if (!(edev = kmalloc (sizeof (*edev), GFP_KERNEL))) { printk (KERN_ERR "EISA: Out of memory for slot %d\n", i); @@ -175,24 +320,7 @@ memset (edev, 0, sizeof (*edev)); - /* Don't register resource for slot 0, since this is - * very likely to fail... :-( Instead, grab the EISA - * id, now we can display something in /proc/ioports. - */ - - if (i) { - edev->res.name = NULL; - edev->res.start = SLOT_ADDRESS (root, i); - edev->res.end = edev->res.start + 0xfff; - edev->res.flags = IORESOURCE_IO; - } else { - edev->res.name = NULL; - edev->res.start = SLOT_ADDRESS (root, i) + EISA_VENDOR_ID_OFFSET; - edev->res.end = edev->res.start + 3; - edev->res.flags = IORESOURCE_BUSY; - } - - if (request_resource (root->res, &edev->res)) { + if (eisa_request_resources (root, edev, i)) { printk (KERN_WARNING \ "Cannot allocate resource for EISA slot %d\n", i); @@ -200,30 +328,41 @@ continue; } - sig_addr = SLOT_ADDRESS (root, i) + EISA_VENDOR_ID_OFFSET; - - if (!(str = decode_eisa_sig (sig_addr))) { - release_resource (&edev->res); + if (eisa_init_device (root, edev, i)) { + eisa_release_resources (edev); kfree (edev); continue; } - if (!i) - printk (KERN_INFO "EISA: Motherboard %s detected\n", - str); - else { - printk (KERN_INFO "EISA: slot %d : %s detected.\n", - i, str); - - c++; + printk (KERN_INFO "EISA: slot %d : %s detected", + i, edev->id.sig); + + switch (edev->state) { + case EISA_CONFIG_ENABLED | EISA_CONFIG_FORCED: + printk (" (forced enabled)"); + break; + + case EISA_CONFIG_FORCED: + printk (" (forced disabled)"); + break; + + case 0: + printk (" (disabled)"); + break; } + + printk (".\n"); + + c++; - if (eisa_register_device (root, edev, str, i)) { - printk (KERN_ERR "EISA: Failed to register %s\n", str); - release_resource (&edev->res); + if (eisa_register_device (edev)) { + printk (KERN_ERR "EISA: Failed to register %s\n", + edev->id.sig); + eisa_release_resources (edev); kfree (edev); } } + printk (KERN_INFO "EISA: Detected %d card%s.\n", c, c == 1 ? "" : "s"); return 0; @@ -273,6 +412,13 @@ printk (KERN_INFO "EISA bus registered\n"); return 0; } + +/* Couldn't use intarray with checking on... :-( */ +#undef param_check_intarray +#define param_check_intarray(name, p) + +module_param(enable_dev, intarray, 0444); +module_param(disable_dev, intarray, 0444); postcore_initcall (eisa_init); diff -Nru a/drivers/eisa/eisa.ids b/drivers/eisa/eisa.ids --- a/drivers/eisa/eisa.ids Sat Jul 5 12:40:31 2003 +++ b/drivers/eisa/eisa.ids Sat Jul 5 12:40:31 2003 @@ -504,6 +504,7 @@ DTK0003 "DTK PLM-3331P EISACACHE486 33/25/50 MHZ" ECS0580 "DI-580A EISA SCSI Host Adapter" ECS0590 "DI-590 EISA SCSI Cache Host Adapter" +EGL0101 "Eagle Technology EP3210 EtherXpert EISA Adapter" ELS8041 "ELSA WINNER 1000 Enhanced VGA" ETI1001 "NE3300 Ethernet Rev. C & D" EVX0002 "PN-3000 System Board" @@ -515,6 +516,9 @@ FSI2001 "ESA-200 ATM" FSI2002 "ESA-200A ATM" FSI2003 "ESA-200E ATM" +GCI0101 "Gateway G/Ethernet 32EB -- 32-Bit EISA Bus Master Ethernet Adpater" +GCI0102 "Gateway G/Ethernet 32EB -- 32-Bit EISA Bus Master Ethernet Adapter" +GCI0103 "Gateway G/Ethernet 32EB -- 32-Bit EISA Bus Master Ethernet Adapter" GDT2001 "GDT2000/GDT2020 Fast-SCSI Cache Controller - Rev. 1.0" GDT3001 "GDT3000/GDT3020 Dual Channel SCSI Controller - Rev. 1.0" GDT3002 "GDT30x0A Cache Controller" @@ -1138,12 +1142,14 @@ NON0601 "c't Universal 8-Bit Adapter" NSS0011 "Newport Systems Solutions WNIC Adapter" NVL0701 "Novell NE3200 Bus Master Ethernet" +NVL0702 "Novell NE3200T Bus Master Ethernet" NVL0901 "Novell NE2100 Ethernet/Cheapernet Adapter" NVL1001 "Novell NMSL (Netware Mirrored Server Link)" NVL1201 "Novell NE32HUB 32-bit Base EISA Adapter" NVL1301 "Novell NE32HUB 32-bit TPE EISA Adapter" NVL1401 "Novell NE32HUB PME ISA Adapter" NVL1501 "Novell NE2000PLUS Ethernet Adapter" +NVL1801 "Eagle Technology NE3210 EISA Ethernet LAN Adapter" OLC0701 "Olicom ISA 16/4 Token-Ring Network Adapter" OLC0702 "Olicom OC-3117, ISA 16/4 Adapter (NIC)" OLC0801 "OC-3118 Olicom ISA 16/4 Token-Ring Network Adapter" diff -Nru a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c --- a/drivers/eisa/pci_eisa.c Sat Jul 5 12:40:31 2003 +++ b/drivers/eisa/pci_eisa.c Sat Jul 5 12:40:31 2003 @@ -20,7 +20,7 @@ static struct eisa_root_device pci_eisa_root; static int __devinit pci_eisa_init (struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { int rc; @@ -35,6 +35,7 @@ pci_eisa_root.res = pdev->bus->resource[0]; pci_eisa_root.bus_base_addr = pdev->bus->resource[0]->start; pci_eisa_root.slots = EISA_MAX_SLOTS; + pci_eisa_root.dma_mask = pdev->dma_mask; if (eisa_root_register (&pci_eisa_root)) { printk (KERN_ERR "pci_eisa : Could not register EISA root\n"); diff -Nru a/drivers/eisa/virtual_root.c b/drivers/eisa/virtual_root.c --- a/drivers/eisa/virtual_root.c Sat Jul 5 12:40:32 2003 +++ b/drivers/eisa/virtual_root.c Sat Jul 5 12:40:32 2003 @@ -7,12 +7,22 @@ * This code is released under the GPL version 2. */ +#include #include #include #include #include +#include #include +#if defined(CONFIG_ALPHA_JENSEN) || defined(CONFIG_EISA_VLB_PRIMING) +#define EISA_FORCE_PROBE_DEFAULT 1 +#else +#define EISA_FORCE_PROBE_DEFAULT 0 +#endif + +static int force_probe = EISA_FORCE_PROBE_DEFAULT; + /* The default EISA device parent (virtual root device). * Now use a platform device, since that's the obvious choice. */ @@ -29,6 +39,7 @@ .bus_base_addr = 0, .res = &ioport_resource, .slots = EISA_MAX_SLOTS, + .dma_mask = 0xffffffff, }; static int virtual_eisa_root_init (void) @@ -39,6 +50,8 @@ return r; } + eisa_bus_root.force_probe = force_probe; + eisa_root_dev.dev.driver_data = &eisa_bus_root; if (eisa_root_register (&eisa_bus_root)) { @@ -50,5 +63,7 @@ return 0; } + +module_param (force_probe, int, 0444); device_initcall (virtual_eisa_root_init); diff -Nru a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c --- a/drivers/ieee1394/sbp2.c Sat Jul 5 12:40:31 2003 +++ b/drivers/ieee1394/sbp2.c Sat Jul 5 12:40:31 2003 @@ -56,6 +56,8 @@ #include #include #include +#include + #include #include #include diff -Nru a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c --- a/drivers/message/i2o/i2o_scsi.c Sat Jul 5 12:40:32 2003 +++ b/drivers/message/i2o/i2o_scsi.c Sat Jul 5 12:40:32 2003 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include diff -Nru a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c --- a/drivers/mtd/mtd_blkdevs.c Sat Jul 5 12:40:31 2003 +++ b/drivers/mtd/mtd_blkdevs.c Sat Jul 5 12:40:31 2003 @@ -211,9 +211,10 @@ case HDIO_GETGEO: if (tr->getgeo) { struct hd_geometry g; + int ret; memset(&g, 0, sizeof(g)); - int ret = tr->getgeo(dev, &g); + ret = tr->getgeo(dev, &g); if (ret) return ret; diff -Nru a/drivers/net/Kconfig b/drivers/net/Kconfig --- a/drivers/net/Kconfig Sat Jul 5 12:40:32 2003 +++ b/drivers/net/Kconfig Sat Jul 5 12:40:32 2003 @@ -1397,7 +1397,7 @@ config TC35815 tristate "TOSHIBA TC35815 Ethernet support" - depends on NET_PCI && PCI + depends on NET_PCI && PCI && TOSHIBA_JMR3927 config DGRS tristate "Digi Intl. RightSwitch SE-X support" diff -Nru a/drivers/net/e100/e100_main.c b/drivers/net/e100/e100_main.c --- a/drivers/net/e100/e100_main.c Sat Jul 5 12:40:31 2003 +++ b/drivers/net/e100/e100_main.c Sat Jul 5 12:40:31 2003 @@ -1085,9 +1085,9 @@ goto exit1; } - e100_prepare_xmit_buff(bdp, skb); - bdp->drv_stats.net_stats.tx_bytes += skb->len; + + e100_prepare_xmit_buff(bdp, skb); dev->trans_start = jiffies; diff -Nru a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c --- a/drivers/net/irda/irtty-sir.c Sat Jul 5 12:40:31 2003 +++ b/drivers/net/irda/irtty-sir.c Sat Jul 5 12:40:31 2003 @@ -212,13 +212,6 @@ /* called from sir_dev when there is more data to send * context is either netdev->hard_xmit or some transmit-completion bh * i.e. we are under spinlock here and must not sleep. - * - * Note: as of 2.5.44 the usb-serial driver calls down() on a semaphore - * hence we are hitting the might_sleep bugcatcher. IMHO the whole tty-api - * would be pretty pointless if write_room/write would be allowed to sleep. - * Furthermore other tty ldiscs (like ppp) do also require the driver not - * to sleep there. Hence this is considered a current limitation of - * usb-serial. */ static int irtty_do_write(struct sir_dev *dev, const unsigned char *ptr, size_t len) @@ -269,16 +262,15 @@ struct sirtty_cb *priv = tty->disc_data; int i; - if (unlikely(!priv || priv->magic!=IRTTY_MAGIC)) - return; - /* Please use ASSERT - Fix ASSERT as needed - Jean II */ + ASSERT(priv != NULL, return;); + ASSERT(priv->magic == IRTTY_MAGIC, return;); if (unlikely(count==0)) /* yes, this happens */ return; dev = priv->dev; if (!dev) { - printk(KERN_ERR "%s(), not ready yet!\n", __FUNCTION__); + WARNING("%s(), not ready yet!\n", __FUNCTION__); return; } @@ -306,8 +298,8 @@ { struct sirtty_cb *priv = tty->disc_data; - if (unlikely(!priv || priv->magic!=IRTTY_MAGIC)) - return 0; + ASSERT(priv != NULL, return 0;); + ASSERT(priv->magic == IRTTY_MAGIC, return 0;); return 65536; /* We can handle an infinite amount of data. :-) */ } @@ -323,8 +315,8 @@ { struct sirtty_cb *priv = tty->disc_data; - if (unlikely(!priv || priv->magic!=IRTTY_MAGIC)) - return; + ASSERT(priv != NULL, return;); + ASSERT(priv->magic == IRTTY_MAGIC, return;); tty->flags &= ~(1 << TTY_DO_WRITE_WAKEUP); @@ -559,7 +551,7 @@ up(&irtty_sem); - printk(KERN_INFO "%s - done\n", __FUNCTION__); + IRDA_DEBUG(0, "%s - %s: irda line discipline opened\n", __FUNCTION__, tty->name); return 0; @@ -580,8 +572,8 @@ { struct sirtty_cb *priv = tty->disc_data; - if (!priv || priv->magic != IRTTY_MAGIC) - return; + ASSERT(priv != NULL, return;); + ASSERT(priv->magic == IRTTY_MAGIC, return;); /* Hm, with a dongle attached the dongle driver wants * to close the dongle - which requires the use of @@ -610,6 +602,8 @@ tty->driver->stop(tty); kfree(priv); + + IRDA_DEBUG(0, "%s - %s: irda line discipline closed\n", __FUNCTION__, tty->name); } /* ------------------------------------------------------- */ diff -Nru a/drivers/net/irda/sir_dev.c b/drivers/net/irda/sir_dev.c --- a/drivers/net/irda/sir_dev.c Sat Jul 5 12:40:32 2003 +++ b/drivers/net/irda/sir_dev.c Sat Jul 5 12:40:32 2003 @@ -201,14 +201,12 @@ int sirdev_receive(struct sir_dev *dev, const unsigned char *cp, size_t count) { if (!dev || !dev->netdev) { - IRDA_DEBUG(0, "%s(), not ready yet!\n", __FUNCTION__); - /* Use WARNING instead of IRDA_DEBUG */ + WARNING("%s(), not ready yet!\n", __FUNCTION__); return -1; } if (!dev->irlap) { - IRDA_DEBUG(0, "%s - too early: %p / %d!\n", __FUNCTION__, cp, count); - /* Use WARNING instead of IRDA_DEBUG */ + WARNING("%s - too early: %p / %d!\n", __FUNCTION__, cp, count); return -1; } @@ -218,7 +216,7 @@ */ irda_device_set_media_busy(dev->netdev, TRUE); dev->stats.rx_dropped++; - printk(KERN_INFO "%s; rx-drop: %d\n", __FUNCTION__, count); + IRDA_DEBUG(0, "%s; rx-drop: %d\n", __FUNCTION__, count); return 0; } @@ -431,7 +429,6 @@ return -ENOMEM; skb_reserve(dev->rx_buff.skb, 1); dev->rx_buff.head = dev->rx_buff.skb->data; - /* No need to memset the buffer, unless you are really pedantic */ dev->tx_buff.head = kmalloc(dev->tx_buff.truesize, GFP_KERNEL); if (dev->tx_buff.head == NULL) { @@ -439,8 +436,6 @@ dev->rx_buff.skb = NULL; dev->rx_buff.head = NULL; return -ENOMEM; - /* Hu ??? This should not be here, Martin ? */ - memset(dev->tx_buff.head, 0, dev->tx_buff.truesize); } dev->tx_buff.data = dev->tx_buff.head; @@ -492,7 +487,7 @@ netif_wake_queue(ndev); - printk(KERN_INFO "%s - done, speed = %d\n", __FUNCTION__, dev->speed); + IRDA_DEBUG(2, "%s - done, speed = %d\n", __FUNCTION__, dev->speed); return 0; @@ -512,7 +507,7 @@ struct sir_dev *dev = ndev->priv; const struct sir_driver *drv; - printk(KERN_INFO "%s\n", __FUNCTION__); +// IRDA_DEBUG(0, "%s\n", __FUNCTION__); netif_stop_queue(ndev); @@ -570,7 +565,7 @@ struct net_device *ndev; struct sir_dev *dev; - printk(KERN_INFO "%s - %s\n", __FUNCTION__, name); + IRDA_DEBUG(0, "%s - %s\n", __FUNCTION__, name); /* instead of adding tests to protect against drv->do_write==NULL * at several places we refuse to create a sir_dev instance for @@ -584,8 +579,7 @@ */ dev = kmalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) { - printk(KERN_ERR "IrDA: Can't allocate memory for " - "IrDA control block!\n"); + ERROR("%s - Can't allocate memory for IrDA control block!\n", __FUNCTION__); goto out; } memset(dev, 0, sizeof(*dev)); @@ -638,7 +632,7 @@ { int err = 0; - printk(KERN_INFO "%s\n", __FUNCTION__); + IRDA_DEBUG(0, "%s\n", __FUNCTION__); atomic_set(&dev->enable_rx, 0); diff -Nru a/drivers/net/irda/sir_kthread.c b/drivers/net/irda/sir_kthread.c --- a/drivers/net/irda/sir_kthread.c Sat Jul 5 12:40:32 2003 +++ b/drivers/net/irda/sir_kthread.c Sat Jul 5 12:40:32 2003 @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -107,44 +108,12 @@ spin_unlock_irqrestore(&irda_rq_queue.lock, flags); } -static int irda_rt_prio = 0; /* MODULE_PARM? */ - static int irda_thread(void *startup) { DECLARE_WAITQUEUE(wait, current); daemonize("kIrDAd"); - set_fs(KERNEL_DS); - - if (irda_rt_prio > 0) { -#if 0 /* works but requires EXPORT_SYMBOL(setscheduler) */ - struct sched_param param; - - param.sched_priority = irda_rt_prio; - setscheduler(0, SCHED_FIFO, ¶m); -#endif - -#if 0 /* doesn't work - has some tendency to trigger instant reboot! - * looks like we would have to deactivate current on the - * runqueue - which is only possible inside of kernel/sched.h - */ - - /* runqueues are per-cpu and we are current on this cpu. Hence - * The tasklist_lock with irq-off protects our runqueue too - * and we don't have to lock it (which would be impossible, - * because it is private in kernel/sched.c) - */ - - read_lock_irq(&tasklist_lock); - current->rt_priority = (irda_rt_priopolicy = SCHED_FIFO; - current->prio = MAX_USER_RT_PRIO-1 - irda_rt_prio; - read_unlock_irq(&tasklist_lock); -#endif - } - irda_rq_queue.thread = current; complete((struct completion *)startup); @@ -166,6 +135,10 @@ set_task_state(current, TASK_RUNNING); remove_wait_queue(&irda_rq_queue.kick, &wait); + /* make swsusp happy with our thread */ + if (current->flags & PF_FREEZE) + refrigerator(PF_IOTHREAD); + run_irda_queue(); } @@ -442,7 +415,6 @@ case SIRDEV_STATE_COMPLETE: /* config change finished, so we are not busy any longer */ sirdev_enable_rx(dev); - printk(KERN_INFO "%s - up\n", __FUNCTION__); up(&fsm->sem); return; } @@ -462,9 +434,7 @@ struct sir_fsm *fsm = &dev->fsm; int xmit_was_down; -// IRDA_DEBUG(2, "%s - state=0x%04x / param=%u\n", __FUNCTION__, initial_state, param); - - printk(KERN_INFO "%s - state=0x%04x / param=%u\n", __FUNCTION__, initial_state, param); + IRDA_DEBUG(2, "%s - state=0x%04x / param=%u\n", __FUNCTION__, initial_state, param); if (in_interrupt()) { if (down_trylock(&fsm->sem)) { @@ -474,12 +444,10 @@ } else down(&fsm->sem); - printk(KERN_INFO "%s - down\n", __FUNCTION__); if (fsm->state == SIRDEV_STATE_DEAD) { /* race with sirdev_close should never happen */ ERROR("%s(), instance staled!\n", __FUNCTION__); - printk(KERN_INFO "%s - up\n", __FUNCTION__); up(&fsm->sem); return -ESTALE; /* or better EPIPE? */ } @@ -501,7 +469,6 @@ atomic_set(&dev->enable_rx, 1); if (!xmit_was_down) netif_wake_queue(dev->netdev); - printk(KERN_INFO "%s - up\n", __FUNCTION__); up(&fsm->sem); return -EAGAIN; } diff -Nru a/drivers/net/wan/comx.c b/drivers/net/wan/comx.c --- a/drivers/net/wan/comx.c Sat Jul 5 12:40:31 2003 +++ b/drivers/net/wan/comx.c Sat Jul 5 12:40:31 2003 @@ -86,7 +86,7 @@ static int comx_mkdir(struct inode *, struct dentry *, int); static int comx_rmdir(struct inode *, struct dentry *); -static struct dentry *comx_lookup(struct inode *, struct dentry *); +static struct dentry *comx_lookup(struct inode *, struct dentry *, struct nameidata *); static struct inode_operations comx_root_inode_ops = { .lookup = comx_lookup, @@ -922,7 +922,7 @@ return 0; } -static struct dentry *comx_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *comx_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct proc_dir_entry *de; struct inode *inode = NULL; diff -Nru a/drivers/net/wan/hdlc_generic.c b/drivers/net/wan/hdlc_generic.c --- a/drivers/net/wan/hdlc_generic.c Sat Jul 5 12:40:31 2003 +++ b/drivers/net/wan/hdlc_generic.c Sat Jul 5 12:40:31 2003 @@ -177,11 +177,8 @@ struct packet_type hdlc_packet_type= { - __constant_htons(ETH_P_HDLC), - NULL, - hdlc_rcv, - NULL, - NULL + .type = __constant_htons(ETH_P_HDLC), + .func = hdlc_rcv, }; diff -Nru a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c --- a/drivers/parisc/eisa.c Sat Jul 5 12:40:31 2003 +++ b/drivers/parisc/eisa.c Sat Jul 5 12:40:31 2003 @@ -378,19 +378,21 @@ } } eisa_eeprom_init(eisa_dev.eeprom_addr); - eisa_enumerator(eisa_dev.eeprom_addr, &eisa_dev.hba.io_space, &eisa_dev.hba.lmmio_space); + result = eisa_enumerator(eisa_dev.eeprom_addr, &eisa_dev.hba.io_space, &eisa_dev.hba.lmmio_space); init_eisa_pic(); - /* FIXME : Get the number of slots from the enumerator, not a - * hadcoded value. Also don't enumerate the bus twice. */ - eisa_dev.root.dev = &dev->dev; - dev->dev.driver_data = &eisa_dev.root; - eisa_dev.root.bus_base_addr = 0; - eisa_dev.root.res = &eisa_dev.hba.io_space; - eisa_dev.root.slots = EISA_MAX_SLOTS; - if (eisa_root_register (&eisa_dev.root)) { - printk(KERN_ERR "EISA: Failed to register EISA root\n"); - return -1; + if (result >= 0) { + /* FIXME : Don't enumerate the bus twice. */ + eisa_dev.root.dev = &dev->dev; + dev->dev.driver_data = &eisa_dev.root; + eisa_dev.root.bus_base_addr = 0; + eisa_dev.root.res = &eisa_dev.hba.io_space; + eisa_dev.root.slots = result; + eisa_dev.root.dma_mask = 0xffffffff; /* wild guess */ + if (eisa_root_register (&eisa_dev.root)) { + printk(KERN_ERR "EISA: Failed to register EISA root\n"); + return -1; + } } return 0; diff -Nru a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c --- a/drivers/parisc/eisa_enumerator.c Sat Jul 5 12:40:31 2003 +++ b/drivers/parisc/eisa_enumerator.c Sat Jul 5 12:40:31 2003 @@ -438,6 +438,10 @@ id = le32_to_cpu(inl(SLOT2PORT(slot)+EPI)); if (0xffffffff == id) { + /* Maybe we didn't expect a card to be here... */ + if (es->eisa_slot_id == 0xffffffff) + return -1; + /* this board is not here or it does not * support readid */ @@ -499,8 +503,7 @@ (&eeprom_buf[HPEE_SLOT_INFO(i)]); if (-1==init_slot(i+1, es)) { - return -1; - + continue; } if (es->config_data_offset < HPEE_MAX_LENGTH) { @@ -513,6 +516,6 @@ return -1; } } - return 0; + return eh->num_slots; } diff -Nru a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c --- a/drivers/pci/hotplug/acpiphp_glue.c Sat Jul 5 12:40:31 2003 +++ b/drivers/pci/hotplug/acpiphp_glue.c Sat Jul 5 12:40:31 2003 @@ -385,7 +385,7 @@ bridge->seg = seg; bridge->bus = bus; - bridge->pci_bus = pci_find_bus(bus); + bridge->pci_bus = pci_find_bus(seg, bus); bridge->res_lock = SPIN_LOCK_UNLOCKED; diff -Nru a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c --- a/drivers/pci/hotplug/cpci_hotplug_pci.c Sat Jul 5 12:40:32 2003 +++ b/drivers/pci/hotplug/cpci_hotplug_pci.c Sat Jul 5 12:40:32 2003 @@ -395,7 +395,7 @@ /* Scan behind bridge */ n = pci_scan_bridge(bus, dev, max, 2); - child = pci_find_bus(max + 1); + child = pci_find_bus(0, max + 1); if (!child) return -ENODEV; pci_proc_attach_bus(child); diff -Nru a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c --- a/drivers/pci/hotplug/ibmphp_core.c Sat Jul 5 12:40:34 2003 +++ b/drivers/pci/hotplug/ibmphp_core.c Sat Jul 5 12:40:34 2003 @@ -774,7 +774,7 @@ struct pci_dev *dev; u16 l; - if (pci_find_bus(busno) || !(ibmphp_find_same_bus_num (busno))) + if (pci_find_bus(0, busno) || !(ibmphp_find_same_bus_num (busno))) return 1; bus = kmalloc (sizeof (*bus), GFP_KERNEL); @@ -819,7 +819,7 @@ func->dev = pci_find_slot (func->busno, PCI_DEVFN(func->device, func->function)); if (func->dev == NULL) { - struct pci_bus *bus = pci_find_bus(func->busno); + struct pci_bus *bus = pci_find_bus(0, func->busno); if (!bus) return 0; @@ -1335,7 +1335,7 @@ goto exit; } - bus = pci_find_bus(0); + bus = pci_find_bus(0, 0); if (!bus) { err ("Can't find the root pci bus, can not continue\n"); rc = -ENODEV; diff -Nru a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c --- a/drivers/pci/pci-sysfs.c Sat Jul 5 12:40:32 2003 +++ b/drivers/pci/pci-sysfs.c Sat Jul 5 12:40:32 2003 @@ -3,6 +3,8 @@ * * (C) Copyright 2002 Greg Kroah-Hartman * (C) Copyright 2002 IBM Corp. + * (C) Copyright 2003 Matthew Wilcox + * (C) Copyright 2003 Hewlett-Packard * * File attributes for PCI devices * @@ -60,6 +62,108 @@ static DEVICE_ATTR(resource,S_IRUGO,pci_show_resources,NULL); +static ssize_t +pci_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +{ + struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); + unsigned int size = 64; + + /* Several chips lock up trying to read undefined config space */ + if (capable(CAP_SYS_ADMIN)) { + size = 256; + } else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) { + size = 128; + } + + if (off > size) + return 0; + if (off + count > size) { + size -= off; + count = size; + } else { + size = count; + } + + while (off & 3) { + unsigned char val; + pci_read_config_byte(dev, off, &val); + buf[off] = val; + off++; + if (--size == 0) + break; + } + + while (size > 3) { + unsigned int val; + pci_read_config_dword(dev, off, &val); + buf[off] = val & 0xff; + buf[off + 1] = (val >> 8) & 0xff; + buf[off + 2] = (val >> 16) & 0xff; + buf[off + 3] = (val >> 24) & 0xff; + off += 4; + size -= 4; + } + + while (size > 0) { + unsigned char val; + pci_read_config_byte(dev, off, &val); + buf[off] = val; + off++; + --size; + } + + return count; +} + +static ssize_t +pci_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +{ + struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); + unsigned int size = count; + + if (off > 256) + return 0; + if (off + count > 256) { + size = 256 - off; + count = size; + } + + while (off & 3) { + pci_write_config_byte(dev, off, buf[off]); + off++; + if (--size == 0) + break; + } + + while (size > 3) { + unsigned int val = buf[off]; + val |= (unsigned int) buf[off + 1] << 8; + val |= (unsigned int) buf[off + 2] << 16; + val |= (unsigned int) buf[off + 3] << 24; + pci_write_config_dword(dev, off, val); + off += 4; + size -= 4; + } + + while (size > 0) { + pci_write_config_byte(dev, off, buf[off]); + off++; + --size; + } + + return count; +} + +static struct bin_attribute pci_config_attr = { + .attr = { + .name = "config", + .mode = S_IRUGO | S_IWUSR, + }, + .size = 256, + .read = pci_read_config, + .write = pci_write_config, +}; + void pci_create_sysfs_dev_files (struct pci_dev *pdev) { struct device *dev = &pdev->dev; @@ -72,4 +176,5 @@ device_create_file (dev, &dev_attr_class); device_create_file (dev, &dev_attr_irq); device_create_file (dev, &dev_attr_resource); + sysfs_create_bin_file(&dev->kobj, &pci_config_attr); } diff -Nru a/drivers/pci/pci.h b/drivers/pci/pci.h --- a/drivers/pci/pci.h Sat Jul 5 12:40:31 2003 +++ b/drivers/pci/pci.h Sat Jul 5 12:40:31 2003 @@ -29,7 +29,6 @@ extern unsigned char pci_max_busnr(void); extern unsigned char pci_bus_max_busnr(struct pci_bus *bus); extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap); -extern struct pci_bus *pci_find_bus(unsigned char busnr); struct pci_dev_wrapped { struct pci_dev *dev; diff -Nru a/drivers/pci/probe.c b/drivers/pci/probe.c --- a/drivers/pci/probe.c Sat Jul 5 12:40:32 2003 +++ b/drivers/pci/probe.c Sat Jul 5 12:40:32 2003 @@ -633,27 +633,10 @@ return max; } -int __devinit pci_bus_exists(const struct list_head *list, int nr) -{ - const struct pci_bus *b; - - list_for_each_entry(b, list, node) { - if (b->number == nr || pci_bus_exists(&b->children, nr)) - return 1; - } - return 0; -} - struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata) { struct pci_bus *b; - if (pci_bus_exists(&pci_root_buses, bus)) { - /* If we already got to this bus through a different bridge, ignore it */ - DBG("PCI: Bus %02x already known\n", bus); - return NULL; - } - b = pci_alloc_bus(); if (!b) return NULL; @@ -666,6 +649,14 @@ b->sysdata = sysdata; b->ops = ops; + + if (pci_find_bus(pci_domain_nr(b), bus)) { + /* If we already got to this bus through a different bridge, ignore it */ + DBG("PCI: Bus %02x already known\n", bus); + kfree(b->dev); + kfree(b); + return NULL; + } list_add_tail(&b->node, &pci_root_buses); diff -Nru a/drivers/pci/search.c b/drivers/pci/search.c --- a/drivers/pci/search.c Sat Jul 5 12:40:31 2003 +++ b/drivers/pci/search.c Sat Jul 5 12:40:31 2003 @@ -7,12 +7,14 @@ * Copyright 2003 -- Greg Kroah-Hartman */ +#include #include #include +#include spinlock_t pci_bus_lock = SPIN_LOCK_UNLOCKED; -static struct pci_bus * +static struct pci_bus * __devinit pci_do_find_bus(struct pci_bus* bus, unsigned char busnr) { struct pci_bus* child; @@ -30,22 +32,24 @@ } /** - * pci_find_bus - locate PCI bus from a given bus number + * pci_find_bus - locate PCI bus from a given domain and bus number + * @domain: number of PCI domain to search * @busnr: number of desired PCI bus * - * Given a PCI bus number, the desired PCI bus is located in system - * global list of PCI buses. If the bus is found, a pointer to its + * Given a PCI bus number and domain number, the desired PCI bus is located + * in the global list of PCI buses. If the bus is found, a pointer to its * data structure is returned. If no bus is found, %NULL is returned. */ -struct pci_bus * -pci_find_bus(unsigned char busnr) +struct pci_bus * __devinit pci_find_bus(int domain, int busnr) { - struct pci_bus* bus = NULL; - struct pci_bus* tmp_bus; + struct pci_bus *bus = NULL; + struct pci_bus *tmp_bus; while ((bus = pci_find_next_bus(bus)) != NULL) { + if (pci_domain_nr(bus) != domain) + continue; tmp_bus = pci_do_find_bus(bus, busnr); - if(tmp_bus) + if (tmp_bus) return tmp_bus; } return NULL; @@ -66,7 +70,7 @@ struct list_head *n; struct pci_bus *b = NULL; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->node.next : pci_root_buses.next; if (n != &pci_root_buses) @@ -125,7 +129,7 @@ struct list_head *n; struct pci_dev *dev; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->global_list.next : pci_devices.next; @@ -190,7 +194,7 @@ struct list_head *n; struct pci_dev *dev; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->global_list.next : pci_devices.next; @@ -256,7 +260,7 @@ struct list_head *n; struct pci_dev *dev; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->global_list.prev : pci_devices.prev; diff -Nru a/drivers/pnp/interface.c b/drivers/pnp/interface.c --- a/drivers/pnp/interface.c Sat Jul 5 12:40:31 2003 +++ b/drivers/pnp/interface.c Sat Jul 5 12:40:31 2003 @@ -259,7 +259,10 @@ for (i = 0; i < PNP_MAX_PORT; i++) { if (pnp_port_valid(dev, i)) { pnp_printf(buffer,"io"); - pnp_printf(buffer," 0x%lx-0x%lx \n", + if (pnp_port_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," 0x%lx-0x%lx\n", pnp_port_start(dev, i), pnp_port_end(dev, i)); } @@ -267,7 +270,10 @@ for (i = 0; i < PNP_MAX_MEM; i++) { if (pnp_mem_valid(dev, i)) { pnp_printf(buffer,"mem"); - pnp_printf(buffer," 0x%lx-0x%lx \n", + if (pnp_mem_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," 0x%lx-0x%lx\n", pnp_mem_start(dev, i), pnp_mem_end(dev, i)); } @@ -275,13 +281,21 @@ for (i = 0; i < PNP_MAX_IRQ; i++) { if (pnp_irq_valid(dev, i)) { pnp_printf(buffer,"irq"); - pnp_printf(buffer," %ld \n", pnp_irq(dev, i)); + if (pnp_irq_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," %ld\n", + pnp_irq(dev, i)); } } for (i = 0; i < PNP_MAX_DMA; i++) { if (pnp_dma_valid(dev, i)) { pnp_printf(buffer,"dma"); - pnp_printf(buffer," %ld \n", pnp_dma(dev, i)); + if (pnp_dma_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," %ld\n", + pnp_dma(dev, i)); } } ret = (buffer->curr - buf); diff -Nru a/drivers/pnp/manager.c b/drivers/pnp/manager.c --- a/drivers/pnp/manager.c Sat Jul 5 12:40:32 2003 +++ b/drivers/pnp/manager.c Sat Jul 5 12:40:32 2003 @@ -45,9 +45,15 @@ flags = &dev->res.port_resource[idx].flags; /* set the initial values */ + *flags = *flags | rule->flags | IORESOURCE_IO; + + if (!rule->size) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + *start = rule->min; *end = *start + rule->size - 1; - *flags = *flags | rule->flags | IORESOURCE_IO; /* run through until pnp_check_port is happy */ while (!pnp_check_port(dev, idx)) { @@ -81,8 +87,6 @@ flags = &dev->res.mem_resource[idx].flags; /* set the initial values */ - *start = rule->min; - *end = *start + rule->size -1; *flags = *flags | rule->flags | IORESOURCE_MEM; /* convert pnp flags to standard Linux flags */ @@ -95,6 +99,14 @@ if (rule->flags & IORESOURCE_MEM_SHADOWABLE) *flags |= IORESOURCE_SHADOWABLE; + if (!rule->size) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + + *start = rule->min; + *end = *start + rule->size -1; + /* run through until pnp_check_mem is happy */ while (!pnp_check_mem(dev, idx)) { *start += rule->align; @@ -135,6 +147,11 @@ /* set the initial values */ *flags = *flags | rule->flags | IORESOURCE_IRQ; + if (!rule->map) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + for (i = 0; i < 16; i++) { if(rule->map & (1<flags | IORESOURCE_DMA; + if (!rule->map) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + for (i = 0; i < 8; i++) { if(rule->map & (1<res = *res; if (!(mode & PNP_CONFIG_FORCE)) { for (i = 0; i < PNP_MAX_PORT; i++) { - if(pnp_check_port(dev,i)) + if(!pnp_check_port(dev,i)) goto fail; } for (i = 0; i < PNP_MAX_MEM; i++) { - if(pnp_check_mem(dev,i)) + if(!pnp_check_mem(dev,i)) goto fail; } for (i = 0; i < PNP_MAX_IRQ; i++) { - if(pnp_check_irq(dev,i)) + if(!pnp_check_irq(dev,i)) goto fail; } for (i = 0; i < PNP_MAX_DMA; i++) { - if(pnp_check_dma(dev,i)) + if(!pnp_check_dma(dev,i)) goto fail; } } up(&pnp_res_mutex); - pnp_auto_config_dev(dev); kfree(bak); return 0; diff -Nru a/drivers/pnp/resource.c b/drivers/pnp/resource.c --- a/drivers/pnp/resource.c Sat Jul 5 12:40:32 2003 +++ b/drivers/pnp/resource.c Sat Jul 5 12:40:32 2003 @@ -286,6 +286,8 @@ continue; for (tmp = 0; tmp < PNP_MAX_PORT; tmp++) { if (tdev->res.port_resource[tmp].flags & IORESOURCE_IO) { + if (pnp_port_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; tport = &tdev->res.port_resource[tmp].start; tend = &tdev->res.port_resource[tmp].end; if (ranged_conflict(port,end,tport,tend)) @@ -340,6 +342,8 @@ continue; for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) { if (tdev->res.mem_resource[tmp].flags & IORESOURCE_MEM) { + if (pnp_mem_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; taddr = &tdev->res.mem_resource[tmp].start; tend = &tdev->res.mem_resource[tmp].end; if (ranged_conflict(addr,end,taddr,tend)) @@ -409,6 +413,8 @@ continue; for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) { if (tdev->res.irq_resource[tmp].flags & IORESOURCE_IRQ) { + if (pnp_irq_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; if ((tdev->res.irq_resource[tmp].start == *irq)) return 0; } @@ -462,6 +468,8 @@ continue; for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) { if (tdev->res.dma_resource[tmp].flags & IORESOURCE_DMA) { + if (pnp_dma_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; if ((tdev->res.dma_resource[tmp].start == *dma)) return 0; } diff -Nru a/drivers/pnp/support.c b/drivers/pnp/support.c --- a/drivers/pnp/support.c Sat Jul 5 12:40:31 2003 +++ b/drivers/pnp/support.c Sat Jul 5 12:40:31 2003 @@ -68,9 +68,13 @@ int i = 0; while ((res->irq_resource[i].flags & IORESOURCE_IRQ) && i < PNP_MAX_IRQ) i++; if (i < PNP_MAX_IRQ) { + res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag + if (irq == -1) { + res->irq_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->irq_resource[i].start = res->irq_resource[i].end = (unsigned long) irq; - res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag } } @@ -79,9 +83,13 @@ int i = 0; while ((res->dma_resource[i].flags & IORESOURCE_DMA) && i < PNP_MAX_DMA) i++; if (i < PNP_MAX_DMA) { + res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag + if (dma == -1) { + res->dma_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->dma_resource[i].start = res->dma_resource[i].end = (unsigned long) dma; - res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag } } @@ -90,9 +98,13 @@ int i = 0; while ((res->port_resource[i].flags & IORESOURCE_IO) && i < PNP_MAX_PORT) i++; if (i < PNP_MAX_PORT) { + res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag + if (len <= 0 || (io + len -1) >= 0x10003) { + res->port_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->port_resource[i].start = (unsigned long) io; res->port_resource[i].end = (unsigned long)(io + len - 1); - res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag } } @@ -101,9 +113,13 @@ int i = 0; while ((res->mem_resource[i].flags & IORESOURCE_MEM) && i < PNP_MAX_MEM) i++; if (i < PNP_MAX_MEM) { + res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag + if (len <= 0) { + res->mem_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->mem_resource[i].start = (unsigned long) mem; res->mem_resource[i].end = (unsigned long)(mem + len - 1); - res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag } } diff -Nru a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c --- a/drivers/scsi/scsi.c Sat Jul 5 12:40:32 2003 +++ b/drivers/scsi/scsi.c Sat Jul 5 12:40:32 2003 @@ -582,7 +582,7 @@ local_irq_save(flags); cpu = smp_processor_id(); list_add_tail(&cmd->eh_entry, &done_q[cpu]); - cpu_raise_softirq(cpu, SCSI_SOFTIRQ); + raise_softirq_irqoff(SCSI_SOFTIRQ); local_irq_restore(flags); } diff -Nru a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c --- a/drivers/scsi/scsi_scan.c Sat Jul 5 12:40:32 2003 +++ b/drivers/scsi/scsi_scan.c Sat Jul 5 12:40:32 2003 @@ -646,7 +646,7 @@ sdev->max_device_blocked = SCSI_DEFAULT_DEVICE_BLOCKED; sdev->use_10_for_rw = 1; - sdev->use_10_for_ms = 1; + sdev->use_10_for_ms = 0; if(sdev->host->hostt->slave_configure) sdev->host->hostt->slave_configure(sdev); diff -Nru a/fs/adfs/adfs.h b/fs/adfs/adfs.h --- a/fs/adfs/adfs.h Sat Jul 5 12:40:32 2003 +++ b/fs/adfs/adfs.h Sat Jul 5 12:40:32 2003 @@ -88,7 +88,7 @@ #define adfs_error(sb, fmt...) __adfs_error(sb, __FUNCTION__, fmt) /* namei.c */ -extern struct dentry *adfs_lookup(struct inode *dir, struct dentry *dentry); +extern struct dentry *adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); /* super.c */ diff -Nru a/fs/adfs/dir.c b/fs/adfs/dir.c --- a/fs/adfs/dir.c Sat Jul 5 12:40:32 2003 +++ b/fs/adfs/dir.c Sat Jul 5 12:40:32 2003 @@ -269,7 +269,7 @@ .d_compare = adfs_compare, }; -struct dentry *adfs_lookup(struct inode *dir, struct dentry *dentry) +struct dentry *adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode = NULL; struct object_info obj; diff -Nru a/fs/affs/namei.c b/fs/affs/namei.c --- a/fs/affs/namei.c Sat Jul 5 12:40:31 2003 +++ b/fs/affs/namei.c Sat Jul 5 12:40:31 2003 @@ -210,7 +210,7 @@ } struct dentry * -affs_lookup(struct inode *dir, struct dentry *dentry) +affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct super_block *sb = dir->i_sb; struct buffer_head *bh; @@ -256,7 +256,7 @@ } int -affs_create(struct inode *dir, struct dentry *dentry, int mode) +affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { struct super_block *sb = dir->i_sb; struct inode *inode; diff -Nru a/fs/afs/dir.c b/fs/afs/dir.c --- a/fs/afs/dir.c Sat Jul 5 12:40:31 2003 +++ b/fs/afs/dir.c Sat Jul 5 12:40:31 2003 @@ -23,10 +23,10 @@ #include "super.h" #include "internal.h" -static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry); +static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_dir_readdir(struct file *file, void *dirent, filldir_t filldir); -static int afs_d_revalidate(struct dentry *dentry, int flags); +static int afs_d_revalidate(struct dentry *dentry, struct nameidata *); static int afs_d_delete(struct dentry *dentry); static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen, loff_t fpos, ino_t ino, unsigned dtype); @@ -414,7 +414,7 @@ /* * look up an entry in a directory */ -static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct afs_dir_lookup_cookie cookie; struct afs_super_info *as; @@ -487,7 +487,7 @@ * - NOTE! the hit can be a negative hit too, so we can't assume we have an inode * (derived from nfs_lookup_revalidate) */ -static int afs_d_revalidate(struct dentry *dentry, int flags) +static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) { struct afs_dir_lookup_cookie cookie; struct dentry *parent; @@ -495,7 +495,7 @@ unsigned fpos; int ret; - _enter("%s,%x",dentry->d_name.name,flags); + _enter("%s,%p",dentry->d_name.name,nd); parent = dget_parent(dentry); dir = parent->d_inode; diff -Nru a/fs/afs/mntpt.c b/fs/afs/mntpt.c --- a/fs/afs/mntpt.c Sat Jul 5 12:40:31 2003 +++ b/fs/afs/mntpt.c Sat Jul 5 12:40:31 2003 @@ -21,7 +21,7 @@ #include "internal.h" -static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry); +static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); static int afs_mntpt_open(struct inode *inode, struct file *file); struct file_operations afs_mntpt_file_operations = { @@ -93,7 +93,7 @@ /* * no valid lookup procedure on this sort of dir */ -static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { return ERR_PTR(-EREMOTE); } /* end afs_mntpt_lookup() */ diff -Nru a/fs/attr.c b/fs/attr.c --- a/fs/attr.c Sat Jul 5 12:40:31 2003 +++ b/fs/attr.c Sat Jul 5 12:40:31 2003 @@ -22,8 +22,6 @@ int retval = -EPERM; unsigned int ia_valid = attr->ia_valid; - lock_kernel(); - /* If force is set do it anyway. */ if (ia_valid & ATTR_FORCE) goto fine; @@ -58,7 +56,6 @@ fine: retval = 0; error: - unlock_kernel(); return retval; } diff -Nru a/fs/autofs/root.c b/fs/autofs/root.c --- a/fs/autofs/root.c Sat Jul 5 12:40:32 2003 +++ b/fs/autofs/root.c Sat Jul 5 12:40:32 2003 @@ -18,7 +18,7 @@ #include "autofs_i.h" static int autofs_root_readdir(struct file *,void *,filldir_t); -static struct dentry *autofs_root_lookup(struct inode *,struct dentry *); +static struct dentry *autofs_root_lookup(struct inode *,struct dentry *, struct nameidata *); static int autofs_root_symlink(struct inode *,struct dentry *,const char *); static int autofs_root_unlink(struct inode *,struct dentry *); static int autofs_root_rmdir(struct inode *,struct dentry *); @@ -144,7 +144,7 @@ * yet completely filled in, and revalidate has to delay such * lookups.. */ -static int autofs_revalidate(struct dentry * dentry, int flags) +static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd) { struct inode * dir; struct autofs_sb_info *sbi; @@ -195,7 +195,7 @@ .d_revalidate = autofs_revalidate, }; -static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct autofs_sb_info *sbi; int oz_mode; @@ -230,7 +230,7 @@ d_add(dentry, NULL); up(&dir->i_sem); - autofs_revalidate(dentry, 0); + autofs_revalidate(dentry, nd); down(&dir->i_sem); /* diff -Nru a/fs/autofs4/root.c b/fs/autofs4/root.c --- a/fs/autofs4/root.c Sat Jul 5 12:40:31 2003 +++ b/fs/autofs4/root.c Sat Jul 5 12:40:31 2003 @@ -18,13 +18,13 @@ #include #include "autofs_i.h" -static struct dentry *autofs4_dir_lookup(struct inode *,struct dentry *); +static struct dentry *autofs4_dir_lookup(struct inode *,struct dentry *, struct nameidata *); static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); static int autofs4_dir_unlink(struct inode *,struct dentry *); static int autofs4_dir_rmdir(struct inode *,struct dentry *); static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); -static struct dentry *autofs4_root_lookup(struct inode *,struct dentry *); +static struct dentry *autofs4_root_lookup(struct inode *,struct dentry *, struct nameidata *); struct file_operations autofs4_root_operations = { .open = dcache_dir_open, @@ -143,7 +143,7 @@ * yet completely filled in, and revalidate has to delay such * lookups.. */ -static int autofs4_root_revalidate(struct dentry * dentry, int flags) +static int autofs4_root_revalidate(struct dentry * dentry, struct nameidata *nd) { struct inode * dir = dentry->d_parent->d_inode; struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); @@ -183,7 +183,7 @@ return 1; } -static int autofs4_revalidate(struct dentry *dentry, int flags) +static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); @@ -225,7 +225,7 @@ /* Lookups in non-root dirs never find anything - if it's there, it's already in the dcache */ /* SMP-safe */ -static struct dentry *autofs4_dir_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *autofs4_dir_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { #if 0 DPRINTK(("autofs_dir_lookup: ignoring lookup of %.*s/%.*s\n", @@ -239,7 +239,7 @@ } /* Lookups in the root directory */ -static struct dentry *autofs4_root_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *autofs4_root_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct autofs_sb_info *sbi; int oz_mode; @@ -276,7 +276,7 @@ if (dentry->d_op && dentry->d_op->d_revalidate) { up(&dir->i_sem); - (dentry->d_op->d_revalidate)(dentry, 0); + (dentry->d_op->d_revalidate)(dentry, nd); down(&dir->i_sem); } diff -Nru a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c --- a/fs/befs/linuxvfs.c Sat Jul 5 12:40:31 2003 +++ b/fs/befs/linuxvfs.c Sat Jul 5 12:40:31 2003 @@ -33,7 +33,7 @@ static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); static int befs_readpage(struct file *file, struct page *page); static sector_t befs_bmap(struct address_space *mapping, sector_t block); -static struct dentry *befs_lookup(struct inode *, struct dentry *); +static struct dentry *befs_lookup(struct inode *, struct dentry *, struct nameidata *); static void befs_read_inode(struct inode *ino); static struct inode *befs_alloc_inode(struct super_block *sb); static void befs_destroy_inode(struct inode *inode); @@ -163,7 +163,7 @@ } static struct dentry * -befs_lookup(struct inode *dir, struct dentry *dentry) +befs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode = NULL; struct super_block *sb = dir->i_sb; diff -Nru a/fs/bfs/dir.c b/fs/bfs/dir.c --- a/fs/bfs/dir.c Sat Jul 5 12:40:32 2003 +++ b/fs/bfs/dir.c Sat Jul 5 12:40:32 2003 @@ -78,7 +78,8 @@ extern void dump_imap(const char *, struct super_block *); -static int bfs_create(struct inode * dir, struct dentry * dentry, int mode) +static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) { int err; struct inode * inode; @@ -127,7 +128,7 @@ return 0; } -static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry) +static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { struct inode * inode = NULL; struct buffer_head * bh; diff -Nru a/fs/block_dev.c b/fs/block_dev.c --- a/fs/block_dev.c Sat Jul 5 12:40:34 2003 +++ b/fs/block_dev.c Sat Jul 5 12:40:34 2003 @@ -155,11 +155,13 @@ */ static loff_t block_llseek(struct file *file, loff_t offset, int origin) { - /* ewww */ - loff_t size = file->f_dentry->d_inode->i_bdev->bd_inode->i_size; + struct inode *bd_inode; + loff_t size; loff_t retval; - lock_kernel(); + bd_inode = file->f_dentry->d_inode->i_bdev->bd_inode; + down(&bd_inode->i_sem); + size = bd_inode->i_size; switch (origin) { case 2: @@ -175,7 +177,7 @@ } retval = offset; } - unlock_kernel(); + up(&bd_inode->i_sem); return retval; } diff -Nru a/fs/buffer.c b/fs/buffer.c --- a/fs/buffer.c Sat Jul 5 12:40:32 2003 +++ b/fs/buffer.c Sat Jul 5 12:40:32 2003 @@ -319,6 +319,7 @@ /* We need to protect against concurrent writers.. */ down(&inode->i_sem); + current->flags |= PF_SYNCWRITE; ret = filemap_fdatawrite(inode->i_mapping); err = file->f_op->fsync(file, dentry, 0); if (!ret) @@ -326,6 +327,7 @@ err = filemap_fdatawait(inode->i_mapping); if (!ret) ret = err; + current->flags &= ~PF_SYNCWRITE; up(&inode->i_sem); out_putf: @@ -354,6 +356,7 @@ goto out_putf; down(&inode->i_sem); + current->flags |= PF_SYNCWRITE; ret = filemap_fdatawrite(inode->i_mapping); err = file->f_op->fsync(file, dentry, 1); if (!ret) @@ -361,6 +364,7 @@ err = filemap_fdatawait(inode->i_mapping); if (!ret) ret = err; + current->flags &= ~PF_SYNCWRITE; up(&inode->i_sem); out_putf: @@ -1446,6 +1450,28 @@ return bh; } EXPORT_SYMBOL(__getblk); + +/* + * Do async read-ahead on a buffer.. + */ +void +__breadahead(struct block_device *bdev, sector_t block, int size) +{ + struct buffer_head *bh = __getblk(bdev, block, size); + if (!test_set_buffer_locked(bh)) { + if (!buffer_uptodate(bh)) { + /* + * This eats the bh count from __getblk() and + * unlocks when the read is done. + */ + bh->b_end_io = end_buffer_io_sync; + submit_bh(READ, bh); + return; + } + unlock_buffer(bh); + } + brelse(bh); +} /** * __bread() - reads a specified block and returns the bh diff -Nru a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c --- a/fs/cifs/cifsfs.c Sat Jul 5 12:40:32 2003 +++ b/fs/cifs/cifsfs.c Sat Jul 5 12:40:32 2003 @@ -178,7 +178,7 @@ return 0; /* always return success? what if volume is no longer available? */ } -static int cifs_permission(struct inode * inode, int mask) +static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd) { /* the server does permission checks, we do not need to do it here */ return 0; diff -Nru a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h --- a/fs/cifs/cifsfs.h Sat Jul 5 12:40:31 2003 +++ b/fs/cifs/cifsfs.h Sat Jul 5 12:40:31 2003 @@ -46,8 +46,8 @@ /* Functions related to inodes */ extern struct inode_operations cifs_dir_inode_ops; -extern int cifs_create(struct inode *, struct dentry *, int); -extern struct dentry *cifs_lookup(struct inode *, struct dentry *); +extern int cifs_create(struct inode *, struct dentry *, int, struct nameidata *); +extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int cifs_unlink(struct inode *, struct dentry *); extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); extern int cifs_mkdir(struct inode *, struct dentry *, int); diff -Nru a/fs/cifs/dir.c b/fs/cifs/dir.c --- a/fs/cifs/dir.c Sat Jul 5 12:40:32 2003 +++ b/fs/cifs/dir.c Sat Jul 5 12:40:32 2003 @@ -119,7 +119,8 @@ /* Inode operations in similar order to how they appear in the Linux file fs.h */ int -cifs_create(struct inode *inode, struct dentry *direntry, int mode) +cifs_create(struct inode *inode, struct dentry *direntry, int mode, + struct nameidata *nd) { int rc = -ENOENT; int xid; @@ -178,7 +179,7 @@ } struct dentry * -cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry) +cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, struct nameidata *nd) { int rc, xid; struct cifs_sb_info *cifs_sb; @@ -262,7 +263,7 @@ } static int -cifs_d_revalidate(struct dentry *direntry, int flags) +cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) { int isValid = 1; diff -Nru a/fs/coda/dir.c b/fs/coda/dir.c --- a/fs/coda/dir.c Sat Jul 5 12:40:31 2003 +++ b/fs/coda/dir.c Sat Jul 5 12:40:31 2003 @@ -28,9 +28,9 @@ #include /* dir inode-ops */ -static int coda_create(struct inode *dir, struct dentry *new, int mode); +static int coda_create(struct inode *dir, struct dentry *new, int mode, struct nameidata *nd); static int coda_mknod(struct inode *dir, struct dentry *new, int mode, dev_t rdev); -static struct dentry *coda_lookup(struct inode *dir, struct dentry *target); +static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd); static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, struct dentry *entry); static int coda_unlink(struct inode *dir_inode, struct dentry *entry); @@ -45,7 +45,7 @@ static int coda_readdir(struct file *file, void *dirent, filldir_t filldir); /* dentry ops */ -static int coda_dentry_revalidate(struct dentry *de, int); +static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd); static int coda_dentry_delete(struct dentry *); /* support routines */ @@ -90,7 +90,7 @@ /* inode operations for directories */ /* access routines: lookup, readlink, permission */ -static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry) +static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd) { struct inode *res_inode = NULL; struct ViceFid resfid = {0,0,0}; @@ -147,7 +147,7 @@ } -int coda_permission(struct inode *inode, int mask) +int coda_permission(struct inode *inode, int mask, struct nameidata *nd) { int error = 0; @@ -190,7 +190,7 @@ } /* creation routines: create, mknod, mkdir, link, symlink */ -static int coda_create(struct inode *dir, struct dentry *de, int mode) +static int coda_create(struct inode *dir, struct dentry *de, int mode, struct nameidata *nd) { int error=0; const char *name=de->d_name.name; @@ -627,7 +627,7 @@ } /* called when a cache lookup succeeds */ -static int coda_dentry_revalidate(struct dentry *de, int flags) +static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) { struct inode *inode = de->d_inode; struct coda_inode_info *cii; diff -Nru a/fs/coda/file.c b/fs/coda/file.c --- a/fs/coda/file.c Sat Jul 5 12:40:31 2003 +++ b/fs/coda/file.c Sat Jul 5 12:40:31 2003 @@ -153,19 +153,22 @@ struct inode *coda_inode; int err = 0, fcnt; + lock_kernel(); + coda_vfs_stat.flush++; /* last close semantics */ fcnt = file_count(coda_file); - if (fcnt > 1) return 0; + if (fcnt > 1) + goto out; /* No need to make an upcall when we have not made any modifications * to the file */ if ((coda_file->f_flags & O_ACCMODE) == O_RDONLY) - return 0; + goto out; if (use_coda_close) - return 0; + goto out; cfi = CODA_FTOC(coda_file); BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); @@ -180,6 +183,8 @@ err = 0; } +out: + unlock_kernel(); return err; } diff -Nru a/fs/coda/pioctl.c b/fs/coda/pioctl.c --- a/fs/coda/pioctl.c Sat Jul 5 12:40:32 2003 +++ b/fs/coda/pioctl.c Sat Jul 5 12:40:32 2003 @@ -24,7 +24,8 @@ #include /* pioctl ops */ -static int coda_ioctl_permission(struct inode *inode, int mask); +static int coda_ioctl_permission(struct inode *inode, int mask, + struct nameidata *nd); static int coda_pioctl(struct inode * inode, struct file * filp, unsigned int cmd, unsigned long user_data); @@ -41,7 +42,8 @@ }; /* the coda pioctl inode ops */ -static int coda_ioctl_permission(struct inode *inode, int mask) +static int coda_ioctl_permission(struct inode *inode, int mask, + struct nameidata *nd) { return 0; } diff -Nru a/fs/cramfs/inode.c b/fs/cramfs/inode.c --- a/fs/cramfs/inode.c Sat Jul 5 12:40:31 2003 +++ b/fs/cramfs/inode.c Sat Jul 5 12:40:31 2003 @@ -342,7 +342,7 @@ /* * Lookup and fill in the inode data.. */ -static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { unsigned int offset = 0; int sorted; diff -Nru a/fs/devfs/base.c b/fs/devfs/base.c --- a/fs/devfs/base.c Sat Jul 5 12:40:31 2003 +++ b/fs/devfs/base.c Sat Jul 5 12:40:31 2003 @@ -2175,7 +2175,7 @@ .d_iput = devfs_d_iput, }; -static int devfs_d_revalidate_wait (struct dentry *dentry, int flags); +static int devfs_d_revalidate_wait (struct dentry *dentry, struct nameidata *); static struct dentry_operations devfs_wait_dops = { @@ -2212,7 +2212,7 @@ /* XXX: this doesn't handle the case where we got a negative dentry but a devfs entry has been registered in the meanwhile */ -static int devfs_d_revalidate_wait (struct dentry *dentry, int flags) +static int devfs_d_revalidate_wait (struct dentry *dentry, struct nameidata *nd) { struct inode *dir = dentry->d_parent->d_inode; struct fs_info *fs_info = dir->i_sb->s_fs_info; @@ -2265,7 +2265,7 @@ /* Inode operations for device entries follow */ -static struct dentry *devfs_lookup (struct inode *dir, struct dentry *dentry) +static struct dentry *devfs_lookup (struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct devfs_entry tmp; /* Must stay in scope until devfsd idle again */ struct devfs_lookup_struct lookup_info; diff -Nru a/fs/efs/namei.c b/fs/efs/namei.c --- a/fs/efs/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/efs/namei.c Sat Jul 5 12:40:32 2003 @@ -57,7 +57,7 @@ return(0); } -struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry) { +struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { efs_ino_t inodenum; struct inode * inode = NULL; diff -Nru a/fs/eventpoll.c b/fs/eventpoll.c --- a/fs/eventpoll.c Sat Jul 5 12:40:31 2003 +++ b/fs/eventpoll.c Sat Jul 5 12:40:31 2003 @@ -443,29 +443,17 @@ /* - * This is called from inside fs/file_table.c:__fput() to unlink files - * from the eventpoll interface. We need to have this facility to cleanup - * correctly files that are closed without being removed from the eventpoll - * interface. + * This is called from eventpoll_release() to unlink files from the eventpoll + * interface. We need to have this facility to cleanup correctly files that are + * closed without being removed from the eventpoll interface. */ -void eventpoll_release(struct file *file) +void eventpoll_release_file(struct file *file) { struct list_head *lsthead = &file->f_ep_links; struct eventpoll *ep; struct epitem *epi; /* - * Fast check to avoid the get/release of the semaphore. Since - * we're doing this outside the semaphore lock, it might return - * false negatives, but we don't care. It'll help in 99.99% of cases - * to avoid the semaphore lock. False positives simply cannot happen - * because the file in on the way to be removed and nobody ( but - * eventpoll ) has still a reference to this file. - */ - if (list_empty(lsthead)) - return; - - /* * We don't want to get "file->f_ep_lock" because it is not * necessary. It is not necessary because we're in the "struct file" * cleanup path, and this means that noone is using this file anymore. @@ -541,7 +529,7 @@ /* * The following function implement the controller interface for the eventpoll * file that enable the insertion/removal/change of file descriptors inside - * the interest set. It rapresents the kernel part of the user spcae epoll_ctl(2). + * the interest set. It rapresents the kernel part of the user space epoll_ctl(2). */ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) { @@ -551,8 +539,8 @@ struct epitem *epi; struct epoll_event epds; - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %u)\n", - current, epfd, op, fd, event->events)); + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n", + current, epfd, op, fd, event)); error = -EFAULT; if (copy_from_user(&epds, event, sizeof(struct epoll_event))) @@ -633,8 +621,8 @@ eexit_2: fput(file); eexit_1: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %u) = %d\n", - current, epfd, op, fd, event->events, error)); + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n", + current, epfd, op, fd, event, error)); return error; } diff -Nru a/fs/exec.c b/fs/exec.c --- a/fs/exec.c Sat Jul 5 12:40:31 2003 +++ b/fs/exec.c Sat Jul 5 12:40:31 2003 @@ -117,7 +117,8 @@ struct nameidata nd; int error; - error = user_path_walk(library, &nd); + nd.intent.open.flags = O_RDONLY; + error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); if (error) goto out; @@ -125,7 +126,7 @@ if (!S_ISREG(nd.dentry->d_inode->i_mode)) goto exit; - error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC); + error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC, &nd); if (error) goto exit; @@ -392,7 +393,7 @@ if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } @@ -441,9 +442,9 @@ { int i; - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { + for (i = 0; i < MAX_ARG_PAGES; i++) { if (bprm->page[i]) - __free_page(bprm->page[i]); + __free_page(bprm->page[i]); bprm->page[i] = NULL; } } @@ -461,7 +462,7 @@ file = ERR_PTR(-EACCES); if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && S_ISREG(inode->i_mode)) { - int err = permission(inode, MAY_EXEC); + int err = permission(inode, MAY_EXEC, &nd); if (!err && !(inode->i_mode & 0111)) err = -EACCES; file = ERR_PTR(err); @@ -758,12 +759,6 @@ char * name; int i, ch, retval; - /* - * Release all of the old mmap stuff - */ - retval = exec_mmap(bprm->mm); - if (retval) - goto out; /* * Make sure we have a private signal table and that * we are unassociated from the previous thread group. @@ -772,6 +767,15 @@ if (retval) goto out; + /* + * Release all of the old mmap stuff + */ + retval = exec_mmap(bprm->mm); + if (retval) + goto out; + + bprm->mm = NULL; /* We're using it now */ + /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; @@ -791,7 +795,7 @@ flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || - permission(bprm->file->f_dentry->d_inode,MAY_READ)) + permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL)) current->mm->dumpable = 0; /* An exec changes our domain. We are no longer part of the thread @@ -999,7 +1003,7 @@ } read_lock(&binfmt_lock); put_binfmt(fmt); - if (retval != -ENOEXEC) + if (retval != -ENOEXEC || bprm->mm == NULL) break; if (!bprm->file) { read_unlock(&binfmt_lock); @@ -1007,7 +1011,7 @@ } } read_unlock(&binfmt_lock); - if (retval != -ENOEXEC) { + if (retval != -ENOEXEC || bprm->mm == NULL) { break; #ifdef CONFIG_KMOD }else{ @@ -1035,7 +1039,6 @@ struct linux_binprm bprm; struct file *file; int retval; - int i; sched_balance_exec(); @@ -1103,17 +1106,14 @@ out: /* Something went wrong, return the inode and free the argument pages*/ - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - struct page * page = bprm.page[i]; - if (page) - __free_page(page); - } + free_arg_pages(&bprm); if (bprm.security) security_bprm_free(&bprm); out_mm: - mmdrop(bprm.mm); + if (bprm.mm) + mmdrop(bprm.mm); out_file: if (bprm.file) { diff -Nru a/fs/ext2/acl.c b/fs/ext2/acl.c --- a/fs/ext2/acl.c Sat Jul 5 12:40:32 2003 +++ b/fs/ext2/acl.c Sat Jul 5 12:40:32 2003 @@ -309,7 +309,7 @@ * BKL held [before 2.5.x] */ int -ext2_permission(struct inode *inode, int mask) +ext2_permission(struct inode *inode, int mask, struct nameidata *nd) { return __ext2_permission(inode, mask, 1); } diff -Nru a/fs/ext2/acl.h b/fs/ext2/acl.h --- a/fs/ext2/acl.h Sat Jul 5 12:40:32 2003 +++ b/fs/ext2/acl.h Sat Jul 5 12:40:32 2003 @@ -59,7 +59,7 @@ #define EXT2_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext2_permission (struct inode *, int); +extern int ext2_permission (struct inode *, int, struct nameidata *); extern int ext2_permission_locked (struct inode *, int); extern int ext2_acl_chmod (struct inode *); extern int ext2_init_acl (struct inode *, struct inode *); diff -Nru a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c --- a/fs/ext2/ialloc.c Sat Jul 5 12:40:32 2003 +++ b/fs/ext2/ialloc.c Sat Jul 5 12:40:32 2003 @@ -489,17 +489,18 @@ return group; } -struct inode * ext2_new_inode(struct inode * dir, int mode) +struct inode *ext2_new_inode(struct inode *dir, int mode) { struct super_block *sb; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; int group, i; - ino_t ino; + ino_t ino = 0; struct inode * inode; - struct ext2_group_desc * desc; - struct ext2_super_block * es; + struct ext2_group_desc *gdp; + struct ext2_super_block *es; struct ext2_inode_info *ei; + struct ext2_sb_info *sbi; int err; sb = dir->i_sb; @@ -508,36 +509,62 @@ return ERR_PTR(-ENOMEM); ei = EXT2_I(inode); - es = EXT2_SB(sb)->s_es; + sbi = EXT2_SB(sb); + es = sbi->s_es; repeat: if (S_ISDIR(mode)) { - if (test_opt (sb, OLDALLOC)) + if (test_opt(sb, OLDALLOC)) group = find_group_dir(sb, dir); else group = find_group_orlov(sb, dir); } else group = find_group_other(sb, dir); - err = -ENOSPC; - if (group == -1) + if (group == -1) { + err = -ENOSPC; goto fail; + } - err = -EIO; - bitmap_bh = read_inode_bitmap(sb, group); - if (!bitmap_bh) - goto fail2; - - i = ext2_find_first_zero_bit((unsigned long *)bitmap_bh->b_data, - EXT2_INODES_PER_GROUP(sb)); - if (i >= EXT2_INODES_PER_GROUP(sb)) - goto bad_count; - if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), - i, (void *) bitmap_bh->b_data)) { + for (i = 0; i < sbi->s_groups_count; i++) { + gdp = ext2_get_group_desc(sb, group, &bh2); brelse(bitmap_bh); - ext2_release_inode(sb, group, S_ISDIR(mode)); - goto repeat; + bitmap_bh = read_inode_bitmap(sb, group); + if (!bitmap_bh) { + err = -EIO; + goto fail2; + } + + i = ext2_find_first_zero_bit((unsigned long *)bitmap_bh->b_data, + EXT2_INODES_PER_GROUP(sb)); + if (i >= EXT2_INODES_PER_GROUP(sb)) { + /* + * Rare race: find_group_xx() decided that there were + * free inodes in this group, but by the time we tried + * to allocate one, they're all gone. This can also + * occur because the counters which find_group_orlov() + * uses are approximate. So just go and search the + * next block group. + */ + if (++group == sbi->s_groups_count) + group = 0; + continue; + } + if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), + i, bitmap_bh->b_data)) { + brelse(bitmap_bh); + bitmap_bh = NULL; + ext2_release_inode(sb, group, S_ISDIR(mode)); + goto repeat; + } + goto got; } + /* + * Scanned all blockgroups. + */ + err = -ENOSPC; + goto fail2; +got: mark_buffer_dirty(bitmap_bh); if (sb->s_flags & MS_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); @@ -605,8 +632,9 @@ inode->i_generation = EXT2_SB(sb)->s_next_generation++; insert_inode_hash(inode); - if(DQUOT_ALLOC_INODE(inode)) { + if (DQUOT_ALLOC_INODE(inode)) { DQUOT_DROP(inode); + err = -ENOSPC; goto fail3; } err = ext2_init_acl(inode, dir); @@ -631,21 +659,6 @@ make_bad_inode(inode); iput(inode); return ERR_PTR(err); - -bad_count: - brelse(bitmap_bh); - ext2_error (sb, "ext2_new_inode", - "Free inodes count corrupted in group %d", - group); - /* Is it really ENOSPC? */ - err = -ENOSPC; - if (sb->s_flags & MS_RDONLY) - goto fail; - - desc = ext2_get_group_desc (sb, group, &bh2); - desc->bg_free_inodes_count = 0; - mark_buffer_dirty(bh2); - goto repeat; } unsigned long ext2_count_free_inodes (struct super_block * sb) diff -Nru a/fs/ext2/namei.c b/fs/ext2/namei.c --- a/fs/ext2/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/ext2/namei.c Sat Jul 5 12:40:32 2003 @@ -66,7 +66,7 @@ * Methods themselves. */ -static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode; ino_t ino; @@ -120,7 +120,7 @@ * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext2_create (struct inode * dir, struct dentry * dentry, int mode) +static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) { struct inode * inode = ext2_new_inode (dir, mode); int err = PTR_ERR(inode); diff -Nru a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c --- a/fs/ext2/xattr_user.c Sat Jul 5 12:40:32 2003 +++ b/fs/ext2/xattr_user.c Sat Jul 5 12:40:32 2003 @@ -47,7 +47,7 @@ #ifdef CONFIG_EXT2_FS_POSIX_ACL error = ext2_permission_locked(inode, MAY_READ); #else - error = permission(inode, MAY_READ); + error = permission(inode, MAY_READ, NULL); #endif if (error) return error; @@ -71,7 +71,7 @@ #ifdef CONFIG_EXT2_FS_POSIX_ACL error = ext2_permission_locked(inode, MAY_WRITE); #else - error = permission(inode, MAY_WRITE); + error = permission(inode, MAY_WRITE, NULL); #endif if (error) return error; diff -Nru a/fs/ext3/acl.c b/fs/ext3/acl.c --- a/fs/ext3/acl.c Sat Jul 5 12:40:31 2003 +++ b/fs/ext3/acl.c Sat Jul 5 12:40:31 2003 @@ -312,7 +312,7 @@ * inode->i_sem: up */ int -ext3_permission(struct inode *inode, int mask) +ext3_permission(struct inode *inode, int mask, struct nameidata *nd) { return __ext3_permission(inode, mask, 1); } diff -Nru a/fs/ext3/acl.h b/fs/ext3/acl.h --- a/fs/ext3/acl.h Sat Jul 5 12:40:32 2003 +++ b/fs/ext3/acl.h Sat Jul 5 12:40:32 2003 @@ -59,7 +59,7 @@ #define EXT3_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext3_permission (struct inode *, int); +extern int ext3_permission (struct inode *, int, struct nameidata *); extern int ext3_permission_locked (struct inode *, int); extern int ext3_acl_chmod (struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); diff -Nru a/fs/ext3/inode.c b/fs/ext3/inode.c --- a/fs/ext3/inode.c Sat Jul 5 12:40:32 2003 +++ b/fs/ext3/inode.c Sat Jul 5 12:40:32 2003 @@ -2290,68 +2290,72 @@ ext3_journal_stop(handle); } -/* - * ext3_get_inode_loc returns with an extra refcount against the - * inode's underlying buffer_head on success. - */ - -int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) +static unsigned long ext3_get_inode_block(struct super_block *sb, + unsigned long ino, struct ext3_iloc *iloc) { - struct buffer_head *bh = 0; - unsigned long block; - unsigned long block_group; - unsigned long group_desc; - unsigned long desc; - unsigned long offset; + unsigned long desc, group_desc, block_group; + unsigned long offset, block; + struct buffer_head *bh; struct ext3_group_desc * gdp; - if ((inode->i_ino != EXT3_ROOT_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( - EXT3_SB(inode->i_sb)->s_es->s_inodes_count)) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "bad inode number: %lu", inode->i_ino); - goto bad_inode; - } - block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); - if (block_group >= EXT3_SB(inode->i_sb)->s_groups_count) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", + if ((ino != EXT3_ROOT_INO && + ino != EXT3_JOURNAL_INO && + ino < EXT3_FIRST_INO(sb)) || + ino > le32_to_cpu( + EXT3_SB(sb)->s_es->s_inodes_count)) { + ext3_error (sb, "ext3_get_inode_block", + "bad inode number: %lu", ino); + return 0; + } + block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); + if (block_group >= EXT3_SB(sb)->s_groups_count) { + ext3_error (sb, "ext3_get_inode_block", "group >= groups count"); - goto bad_inode; + return 0; } - group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); - desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); - bh = EXT3_SB(inode->i_sb)->s_group_desc[group_desc]; + group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb); + desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1); + bh = EXT3_SB(sb)->s_group_desc[group_desc]; if (!bh) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", + ext3_error (sb, "ext3_get_inode_block", "Descriptor not loaded"); - goto bad_inode; + return 0; } gdp = (struct ext3_group_desc *) bh->b_data; /* * Figure out the offset within the block group inode table */ - offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * - EXT3_INODE_SIZE(inode->i_sb); + offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) * + EXT3_INODE_SIZE(sb); block = le32_to_cpu(gdp[desc].bg_inode_table) + - (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); - if (!(bh = sb_bread(inode->i_sb, block))) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "unable to read inode block - " - "inode=%lu, block=%lu", inode->i_ino, block); - goto bad_inode; - } - offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); + (offset >> EXT3_BLOCK_SIZE_BITS(sb)); - iloc->bh = bh; - iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); iloc->block_group = block_group; + iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1); + return block; +} - return 0; +/* + * ext3_get_inode_loc returns with an extra refcount against the + * inode's underlying buffer_head on success. + */ + +int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) +{ + unsigned long block; - bad_inode: + block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc); + if (block) { + struct buffer_head *bh = sb_bread(inode->i_sb, block); + if (bh) { + iloc->bh = bh; + return 0; + } + ext3_error (inode->i_sb, "ext3_get_inode_loc", + "unable to read inode block - " + "inode=%lu, block=%lu", inode->i_ino, block); + } return -EIO; } @@ -2388,7 +2392,7 @@ if (ext3_get_inode_loc(inode, &iloc)) goto bad_inode; bh = iloc.bh; - raw_inode = iloc.raw_inode; + raw_inode = ext3_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); @@ -2454,11 +2458,9 @@ * even on big-endian machines: we do NOT byteswap the block numbers! */ for (block = 0; block < EXT3_N_BLOCKS; block++) - ei->i_data[block] = iloc.raw_inode->i_block[block]; + ei->i_data[block] = raw_inode->i_block[block]; INIT_LIST_HEAD(&ei->i_orphan); - brelse (iloc.bh); - if (S_ISREG(inode->i_mode)) { inode->i_op = &ext3_file_inode_operations; inode->i_fop = &ext3_file_operations; @@ -2476,8 +2478,9 @@ } else { inode->i_op = &ext3_special_inode_operations; init_special_inode(inode, inode->i_mode, - le32_to_cpu(iloc.raw_inode->i_block[0])); + le32_to_cpu(raw_inode->i_block[0])); } + brelse (iloc.bh); ext3_set_inode_flags(inode); return; @@ -2497,7 +2500,7 @@ struct inode *inode, struct ext3_iloc *iloc) { - struct ext3_inode *raw_inode = iloc->raw_inode; + struct ext3_inode *raw_inode = ext3_raw_inode(iloc); struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c --- a/fs/ext3/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/ext3/namei.c Sat Jul 5 12:40:32 2003 @@ -970,7 +970,7 @@ } #endif -static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode; struct ext3_dir_entry_2 * de; @@ -1623,7 +1623,8 @@ * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext3_create (struct inode * dir, struct dentry * dentry, int mode) +static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) { handle_t *handle; struct inode * inode; diff -Nru a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c --- a/fs/ext3/xattr_user.c Sat Jul 5 12:40:31 2003 +++ b/fs/ext3/xattr_user.c Sat Jul 5 12:40:31 2003 @@ -49,7 +49,7 @@ #ifdef CONFIG_EXT3_FS_POSIX_ACL error = ext3_permission_locked(inode, MAY_READ); #else - error = permission(inode, MAY_READ); + error = permission(inode, MAY_READ, NULL); #endif if (error) return error; @@ -73,7 +73,7 @@ #ifdef CONFIG_EXT3_FS_POSIX_ACL error = ext3_permission_locked(inode, MAY_WRITE); #else - error = permission(inode, MAY_WRITE); + error = permission(inode, MAY_WRITE, NULL); #endif if (error) return error; diff -Nru a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c --- a/fs/freevxfs/vxfs_lookup.c Sat Jul 5 12:40:34 2003 +++ b/fs/freevxfs/vxfs_lookup.c Sat Jul 5 12:40:34 2003 @@ -51,7 +51,7 @@ #define VXFS_BLOCK_PER_PAGE(sbp) ((PAGE_CACHE_SIZE / (sbp)->s_blocksize)) -static struct dentry * vxfs_lookup(struct inode *, struct dentry *); +static struct dentry * vxfs_lookup(struct inode *, struct dentry *, struct nameidata *); static int vxfs_readdir(struct file *, void *, filldir_t); struct inode_operations vxfs_dir_inode_ops = { @@ -193,6 +193,7 @@ * vxfs_lookup - lookup pathname component * @dip: dir in which we lookup * @dp: dentry we lookup + * @nd: lookup nameidata * * Description: * vxfs_lookup tries to lookup the pathname component described @@ -203,7 +204,7 @@ * in the return pointer. */ static struct dentry * -vxfs_lookup(struct inode *dip, struct dentry *dp) +vxfs_lookup(struct inode *dip, struct dentry *dp, struct nameidata *nd) { struct inode *ip = NULL; ino_t ino; diff -Nru a/fs/fs-writeback.c b/fs/fs-writeback.c --- a/fs/fs-writeback.c Sat Jul 5 12:40:34 2003 +++ b/fs/fs-writeback.c Sat Jul 5 12:40:34 2003 @@ -516,6 +516,7 @@ int need_write_inode_now = 0; int err2; + current->flags |= PF_SYNCWRITE; if (what & OSYNC_DATA) err = filemap_fdatawrite(inode->i_mapping); if (what & (OSYNC_METADATA|OSYNC_DATA)) { @@ -528,6 +529,7 @@ if (!err) err = err2; } + current->flags &= ~PF_SYNCWRITE; spin_lock(&inode_lock); if ((inode->i_state & I_DIRTY) && diff -Nru a/fs/hfs/dir.c b/fs/hfs/dir.c --- a/fs/hfs/dir.c Sat Jul 5 12:40:32 2003 +++ b/fs/hfs/dir.c Sat Jul 5 12:40:32 2003 @@ -163,7 +163,7 @@ * a directory and return a corresponding inode, given the inode for * the directory and the name (and its length) of the new file. */ -int hfs_create(struct inode * dir, struct dentry *dentry, int mode) +int hfs_create(struct inode * dir, struct dentry *dentry, int mode, struct nameidata *nd) { struct hfs_cat_entry *entry = HFS_I(dir)->entry; struct hfs_cat_entry *new; diff -Nru a/fs/hfs/dir_cap.c b/fs/hfs/dir_cap.c --- a/fs/hfs/dir_cap.c Sat Jul 5 12:40:32 2003 +++ b/fs/hfs/dir_cap.c Sat Jul 5 12:40:32 2003 @@ -28,7 +28,7 @@ /*================ Forward declarations ================*/ -static struct dentry *cap_lookup(struct inode *, struct dentry *); +static struct dentry *cap_lookup(struct inode *, struct dentry *, struct nameidata *); static int cap_readdir(struct file *, void *, filldir_t); /*================ Global variables ================*/ @@ -95,7 +95,7 @@ * inode corresponding to an entry in a directory, given the inode for * the directory and the name (and its length) of the entry. */ -static struct dentry *cap_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *cap_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { ino_t dtype; struct hfs_name cname; diff -Nru a/fs/hfs/dir_dbl.c b/fs/hfs/dir_dbl.c --- a/fs/hfs/dir_dbl.c Sat Jul 5 12:40:32 2003 +++ b/fs/hfs/dir_dbl.c Sat Jul 5 12:40:32 2003 @@ -24,9 +24,9 @@ /*================ Forward declarations ================*/ -static struct dentry *dbl_lookup(struct inode *, struct dentry *); +static struct dentry *dbl_lookup(struct inode *, struct dentry *, struct nameidata *); static int dbl_readdir(struct file *, void *, filldir_t); -static int dbl_create(struct inode *, struct dentry *, int); +static int dbl_create(struct inode *, struct dentry *, int, struct nameidata *); static int dbl_mkdir(struct inode *, struct dentry *, int); static int dbl_unlink(struct inode *, struct dentry *); static int dbl_rmdir(struct inode *, struct dentry *); @@ -108,7 +108,7 @@ * the inode for the directory and the name (and its length) of the * entry. */ -static struct dentry *dbl_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *dbl_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct hfs_name cname; struct hfs_cat_entry *entry; @@ -272,7 +272,7 @@ * the directory and the name (and its length) of the new file. */ static int dbl_create(struct inode * dir, struct dentry *dentry, - int mode) + int mode, struct nameidata *nd) { int error; @@ -280,7 +280,7 @@ if (is_hdr(dir, dentry->d_name.name, dentry->d_name.len)) { error = -EEXIST; } else { - error = hfs_create(dir, dentry, mode); + error = hfs_create(dir, dentry, mode, nd); } unlock_kernel(); return error; diff -Nru a/fs/hfs/dir_nat.c b/fs/hfs/dir_nat.c --- a/fs/hfs/dir_nat.c Sat Jul 5 12:40:32 2003 +++ b/fs/hfs/dir_nat.c Sat Jul 5 12:40:32 2003 @@ -30,7 +30,7 @@ /*================ Forward declarations ================*/ -static struct dentry *nat_lookup(struct inode *, struct dentry *); +static struct dentry *nat_lookup(struct inode *, struct dentry *, struct nameidata *); static int nat_readdir(struct file *, void *, filldir_t); static int nat_rmdir(struct inode *, struct dentry *); static int nat_hdr_unlink(struct inode *, struct dentry *); @@ -97,7 +97,7 @@ * the inode corresponding to an entry in a directory, given the inode * for the directory and the name (and its length) of the entry. */ -static struct dentry *nat_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *nat_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { ino_t dtype; struct hfs_name cname; diff -Nru a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c --- a/fs/hfs/sysdep.c Sat Jul 5 12:40:32 2003 +++ b/fs/hfs/sysdep.c Sat Jul 5 12:40:32 2003 @@ -19,7 +19,7 @@ #include #include -static int hfs_revalidate_dentry(struct dentry *, int); +static int hfs_revalidate_dentry(struct dentry *, struct nameidata *); static int hfs_hash_dentry(struct dentry *, struct qstr *); static int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); static void hfs_dentry_iput(struct dentry *, struct inode *); @@ -90,7 +90,7 @@ iput(inode); } -static int hfs_revalidate_dentry(struct dentry *dentry, int flags) +static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; int diff; diff -Nru a/fs/hpfs/dir.c b/fs/hpfs/dir.c --- a/fs/hpfs/dir.c Sat Jul 5 12:40:31 2003 +++ b/fs/hpfs/dir.c Sat Jul 5 12:40:31 2003 @@ -198,7 +198,7 @@ * to tell read_inode to read fnode or not. */ -struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry) +struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { const char *name = dentry->d_name.name; unsigned len = dentry->d_name.len; diff -Nru a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h --- a/fs/hpfs/hpfs_fn.h Sat Jul 5 12:40:32 2003 +++ b/fs/hpfs/hpfs_fn.h Sat Jul 5 12:40:32 2003 @@ -216,7 +216,7 @@ int hpfs_dir_release(struct inode *, struct file *); loff_t hpfs_dir_lseek(struct file *, loff_t, int); int hpfs_readdir(struct file *, void *, filldir_t); -struct dentry *hpfs_lookup(struct inode *, struct dentry *); +struct dentry *hpfs_lookup(struct inode *, struct dentry *, struct nameidata *); /* dnode.c */ @@ -285,7 +285,7 @@ /* namei.c */ int hpfs_mkdir(struct inode *, struct dentry *, int); -int hpfs_create(struct inode *, struct dentry *, int); +int hpfs_create(struct inode *, struct dentry *, int, struct nameidata *); int hpfs_mknod(struct inode *, struct dentry *, int, dev_t); int hpfs_symlink(struct inode *, struct dentry *, const char *); int hpfs_unlink(struct inode *, struct dentry *); diff -Nru a/fs/hpfs/namei.c b/fs/hpfs/namei.c --- a/fs/hpfs/namei.c Sat Jul 5 12:40:31 2003 +++ b/fs/hpfs/namei.c Sat Jul 5 12:40:31 2003 @@ -106,7 +106,7 @@ return -ENOSPC; } -int hpfs_create(struct inode *dir, struct dentry *dentry, int mode) +int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { const char *name = dentry->d_name.name; unsigned len = dentry->d_name.len; @@ -374,7 +374,7 @@ d_drop(dentry); spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count) > 1 || - permission(inode, MAY_WRITE) || + permission(inode, MAY_WRITE, NULL) || get_write_access(inode)) { spin_unlock(&dentry->d_lock); d_rehash(dentry); diff -Nru a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c --- a/fs/hugetlbfs/inode.c Sat Jul 5 12:40:32 2003 +++ b/fs/hugetlbfs/inode.c Sat Jul 5 12:40:32 2003 @@ -462,7 +462,7 @@ return retval; } -static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode) +static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); } diff -Nru a/fs/intermezzo/dcache.c b/fs/intermezzo/dcache.c --- a/fs/intermezzo/dcache.c Sat Jul 5 12:40:32 2003 +++ b/fs/intermezzo/dcache.c Sat Jul 5 12:40:32 2003 @@ -50,7 +50,7 @@ kmem_cache_t * presto_dentry_slab; /* called when a cache lookup succeeds */ -static int presto_d_revalidate(struct dentry *de, int flag) +static int presto_d_revalidate(struct dentry *de, struct nameidata *nd) { struct inode *inode = de->d_inode; struct presto_file_set * root_fset; diff -Nru a/fs/intermezzo/dir.c b/fs/intermezzo/dir.c --- a/fs/intermezzo/dir.c Sat Jul 5 12:40:31 2003 +++ b/fs/intermezzo/dir.c Sat Jul 5 12:40:31 2003 @@ -81,7 +81,7 @@ /* * these are initialized in super.c */ -extern int presto_permission(struct inode *inode, int mask); +extern int presto_permission(struct inode *inode, int mask, struct nameidata *nd); static int izo_authorized_uid = 0; int izo_dentry_is_ilookup(struct dentry *dentry, ino_t *id, @@ -239,7 +239,7 @@ return de; } -struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry) +struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { int rc = 0; struct dentry *de; @@ -286,7 +286,7 @@ (dir, dentry, ino, generation); is_ilookup = 1; } else - de = iops->lookup(dir, dentry); + de = iops->lookup(dir, dentry, nd); #if 0 } #endif @@ -412,7 +412,8 @@ return 0; } -static int presto_create(struct inode * dir, struct dentry * dentry, int mode) +static int presto_create(struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) { int error; struct presto_cache *cache; @@ -829,7 +830,7 @@ * appropriate permission function. Thus we do not worry here about ACLs * or EAs. -SHP */ -int presto_permission(struct inode *inode, int mask) +int presto_permission(struct inode *inode, int mask, struct nameidata *nd) { unsigned short mode = inode->i_mode; struct presto_cache *cache; @@ -851,11 +852,11 @@ if ( S_ISREG(mode) && fiops && fiops->permission ) { EXIT; - return fiops->permission(inode, mask); + return fiops->permission(inode, mask, nd); } if ( S_ISDIR(mode) && diops && diops->permission ) { EXIT; - return diops->permission(inode, mask); + return diops->permission(inode, mask, nd); } } @@ -866,7 +867,7 @@ * the VFS permission function. */ inode->i_op->permission = NULL; - rc = permission(inode, mask); + rc = permission(inode, mask, nd); inode->i_op->permission = &presto_permission; EXIT; diff -Nru a/fs/intermezzo/file.c b/fs/intermezzo/file.c --- a/fs/intermezzo/file.c Sat Jul 5 12:40:32 2003 +++ b/fs/intermezzo/file.c Sat Jul 5 12:40:32 2003 @@ -53,7 +53,7 @@ /* * these are initialized in super.c */ -extern int presto_permission(struct inode *inode, int mask); +extern int presto_permission(struct inode *inode, int mask, struct nameidata *nd); static int presto_open_upcall(int minor, struct dentry *de) diff -Nru a/fs/intermezzo/intermezzo_fs.h b/fs/intermezzo/intermezzo_fs.h --- a/fs/intermezzo/intermezzo_fs.h Sat Jul 5 12:40:31 2003 +++ b/fs/intermezzo/intermezzo_fs.h Sat Jul 5 12:40:31 2003 @@ -370,7 +370,7 @@ # define PRESTO_ILOOKUP_MAGIC "...ino:" # define PRESTO_ILOOKUP_SEP ':' int izo_dentry_is_ilookup(struct dentry *, ino_t *id, unsigned int *generation); -struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry); +struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd); struct presto_dentry_data { int dd_count; /* how mnay dentries are using this dentry */ diff -Nru a/fs/intermezzo/vfs.c b/fs/intermezzo/vfs.c --- a/fs/intermezzo/vfs.c Sat Jul 5 12:40:31 2003 +++ b/fs/intermezzo/vfs.c Sat Jul 5 12:40:31 2003 @@ -134,7 +134,7 @@ int error; if (!victim->d_inode || victim->d_parent->d_inode != dir) return -ENOENT; - error = permission(dir,MAY_WRITE | MAY_EXEC); + error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); if (error) return error; if (IS_APPEND(dir)) @@ -158,7 +158,7 @@ return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; - return permission(dir,MAY_WRITE | MAY_EXEC); + return permission(dir,MAY_WRITE | MAY_EXEC, NULL); } #ifdef PRESTO_DEBUG @@ -598,7 +598,7 @@ } DQUOT_INIT(dir->d_inode); lock_kernel(); - error = iops->create(dir->d_inode, dentry, mode); + error = iops->create(dir->d_inode, dentry, mode, NULL); if (error) { EXIT; goto exit_lock; @@ -1840,7 +1840,7 @@ * we'll need to flip '..'. */ if (new_dir != old_dir) { - error = permission(old_dentry->d_inode, MAY_WRITE); + error = permission(old_dentry->d_inode, MAY_WRITE, NULL); } if (error) return error; diff -Nru a/fs/isofs/namei.c b/fs/isofs/namei.c --- a/fs/isofs/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/isofs/namei.c Sat Jul 5 12:40:32 2003 @@ -158,7 +158,7 @@ return 0; } -struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry) +struct dentry *isofs_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { unsigned long ino; struct inode *inode; diff -Nru a/fs/jbd/commit.c b/fs/jbd/commit.c --- a/fs/jbd/commit.c Sat Jul 5 12:40:32 2003 +++ b/fs/jbd/commit.c Sat Jul 5 12:40:32 2003 @@ -169,10 +169,23 @@ * that multiple journal_get_write_access() calls to the same * buffer are perfectly permissable. */ - while (commit_transaction->t_reserved_list) { - jh = commit_transaction->t_reserved_list; - JBUFFER_TRACE(jh, "reserved, unused: refile"); - journal_refile_buffer(journal, jh); + { + int nr = 0; + while (commit_transaction->t_reserved_list) { + jh = commit_transaction->t_reserved_list; + JBUFFER_TRACE(jh, "reserved, unused: refile"); + journal_refile_buffer(journal, jh); + nr++; + } + if (nr) { + static int noisy; + + if (noisy < 10) { + noisy++; + printk("%s: freed %d reserved buffers\n", + __FUNCTION__, nr); + } + } } /* diff -Nru a/fs/jbd/transaction.c b/fs/jbd/transaction.c --- a/fs/jbd/transaction.c Sat Jul 5 12:40:31 2003 +++ b/fs/jbd/transaction.c Sat Jul 5 12:40:31 2003 @@ -1168,37 +1168,24 @@ * journal_release_buffer: undo a get_write_access without any buffer * updates, if the update decided in the end that it didn't need access. * - * journal_get_write_access() can block, so it is quite possible for a - * journaling component to decide after the write access is returned - * that global state has changed and the update is no longer required. - * * The caller passes in the number of credits which should be put back for * this buffer (zero or one). + * + * We leave the buffer attached to t_reserved_list because even though this + * handle doesn't want it, some other concurrent handle may want to journal + * this buffer. If that handle is curently in between get_write_access() and + * journal_dirty_metadata() then it expects the buffer to be reserved. If + * we were to rip it off t_reserved_list here, the other handle will explode + * when journal_dirty_metadata is presented with a non-reserved buffer. + * + * If nobody really wants to journal this buffer then it will be thrown + * away at the start of commit. */ void journal_release_buffer(handle_t *handle, struct buffer_head *bh, int credits) { - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - struct journal_head *jh = bh2jh(bh); - - JBUFFER_TRACE(jh, "entry"); - - /* If the buffer is reserved but not modified by this - * transaction, then it is safe to release it. In all other - * cases, just leave the buffer as it is. */ - - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - if (jh->b_jlist == BJ_Reserved && jh->b_transaction == transaction && - !buffer_jbddirty(jh2bh(jh))) { - JBUFFER_TRACE(jh, "unused: refiling it"); - __journal_refile_buffer(jh); - } - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); + BUFFER_TRACE(bh, "entry"); handle->h_buffer_credits += credits; - JBUFFER_TRACE(jh, "exit"); } /** diff -Nru a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c --- a/fs/jffs/inode-v23.c Sat Jul 5 12:40:32 2003 +++ b/fs/jffs/inode-v23.c Sat Jul 5 12:40:32 2003 @@ -642,7 +642,7 @@ /* Find a file in a directory. If the file exists, return its corresponding dentry. */ static struct dentry * -jffs_lookup(struct inode *dir, struct dentry *dentry) +jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct jffs_file *d; struct jffs_file *f; @@ -1273,7 +1273,8 @@ * with d_instantiate(). */ static int -jffs_create(struct inode *dir, struct dentry *dentry, int mode) +jffs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { struct jffs_raw_inode raw_inode; struct jffs_control *c; diff -Nru a/fs/jffs2/dir.c b/fs/jffs2/dir.c --- a/fs/jffs2/dir.c Sat Jul 5 12:40:32 2003 +++ b/fs/jffs2/dir.c Sat Jul 5 12:40:32 2003 @@ -32,8 +32,8 @@ static int jffs2_readdir (struct file *, void *, filldir_t); -static int jffs2_create (struct inode *,struct dentry *,int); -static struct dentry *jffs2_lookup (struct inode *,struct dentry *); +static int jffs2_create (struct inode *,struct dentry *,int, struct nameidata *); +static struct dentry *jffs2_lookup (struct inode *,struct dentry *, struct nameidata *); static int jffs2_link (struct dentry *,struct inode *,struct dentry *); static int jffs2_unlink (struct inode *,struct dentry *); static int jffs2_symlink (struct inode *,struct dentry *,const char *); @@ -73,7 +73,7 @@ and we use the same hash function as the dentries. Makes this nice and simple */ -static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target) +static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, struct nameidata *nd) { struct jffs2_inode_info *dir_f; struct jffs2_sb_info *c; @@ -175,7 +175,8 @@ /***********************************************************************/ -static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode) +static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, + struct nameidata *nd) { struct jffs2_raw_inode *ri; struct jffs2_inode_info *f, *dir_f; diff -Nru a/fs/jfs/acl.c b/fs/jfs/acl.c --- a/fs/jfs/acl.c Sat Jul 5 12:40:31 2003 +++ b/fs/jfs/acl.c Sat Jul 5 12:40:31 2003 @@ -208,7 +208,7 @@ return -EACCES; } -int jfs_permission(struct inode * inode, int mask) +int jfs_permission(struct inode * inode, int mask, struct nameidata *nd) { return __jfs_permission(inode, mask, 0); } diff -Nru a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h --- a/fs/jfs/jfs_acl.h Sat Jul 5 12:40:32 2003 +++ b/fs/jfs/jfs_acl.h Sat Jul 5 12:40:32 2003 @@ -25,7 +25,7 @@ struct posix_acl *jfs_get_acl(struct inode *, int); int jfs_set_acl(struct inode *, int, struct posix_acl *); int jfs_permission_have_sem(struct inode *, int); -int jfs_permission(struct inode *, int); +int jfs_permission(struct inode *, int, struct nameidata *); int jfs_init_acl(struct inode *, struct inode *); int jfs_setattr(struct dentry *, struct iattr *); diff -Nru a/fs/jfs/namei.c b/fs/jfs/namei.c --- a/fs/jfs/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/jfs/namei.c Sat Jul 5 12:40:32 2003 @@ -54,11 +54,13 @@ * PARAMETER: dip - parent directory vnode * dentry - dentry of new file * mode - create mode (rwxrwxrwx). + * nd- nd struct * * RETURN: Errors from subroutines * */ -int jfs_create(struct inode *dip, struct dentry *dentry, int mode) +int jfs_create(struct inode *dip, struct dentry *dentry, int mode, + struct nameidata *nd) { int rc = 0; tid_t tid; /* transaction id */ @@ -1373,7 +1375,7 @@ return -rc; } -static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry) +static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd) { struct btstack btstack; ino_t inum; diff -Nru a/fs/jfs/xattr.c b/fs/jfs/xattr.c --- a/fs/jfs/xattr.c Sat Jul 5 12:40:31 2003 +++ b/fs/jfs/xattr.c Sat Jul 5 12:40:31 2003 @@ -731,7 +731,7 @@ #ifdef CONFIG_JFS_POSIX_ACL return jfs_permission_have_sem(inode, MAY_WRITE); #else - return permission(inode, MAY_WRITE); + return permission(inode, MAY_WRITE, NULL); #endif } @@ -893,7 +893,7 @@ else return jfs_permission_have_sem(inode, MAY_READ); #else - return permission(inode, MAY_READ); + return permission(inode, MAY_READ, NULL); #endif } diff -Nru a/fs/libfs.c b/fs/libfs.c --- a/fs/libfs.c Sat Jul 5 12:40:31 2003 +++ b/fs/libfs.c Sat Jul 5 12:40:31 2003 @@ -29,7 +29,7 @@ * exist, we know it is negative. */ -struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry) +struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { d_add(dentry, NULL); return NULL; diff -Nru a/fs/minix/namei.c b/fs/minix/namei.c --- a/fs/minix/namei.c Sat Jul 5 12:40:31 2003 +++ b/fs/minix/namei.c Sat Jul 5 12:40:31 2003 @@ -54,7 +54,7 @@ .d_hash = minix_hash, }; -static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode = NULL; ino_t ino; @@ -89,7 +89,8 @@ return error; } -static int minix_create(struct inode * dir, struct dentry *dentry, int mode) +static int minix_create(struct inode * dir, struct dentry *dentry, int mode, + struct nameidata *nd) { return minix_mknod(dir, dentry, mode, 0); } diff -Nru a/fs/msdos/namei.c b/fs/msdos/namei.c --- a/fs/msdos/namei.c Sat Jul 5 12:40:34 2003 +++ b/fs/msdos/namei.c Sat Jul 5 12:40:34 2003 @@ -193,7 +193,7 @@ */ /***** Get inode using directory and name */ -struct dentry *msdos_lookup(struct inode *dir,struct dentry *dentry) +struct dentry *msdos_lookup(struct inode *dir,struct dentry *dentry, struct nameidata *nd) { struct super_block *sb = dir->i_sb; struct inode *inode = NULL; @@ -261,7 +261,8 @@ */ /***** Create a file */ -int msdos_create(struct inode *dir,struct dentry *dentry,int mode) +int msdos_create(struct inode *dir,struct dentry *dentry,int mode, + struct nameidata *nd) { struct super_block *sb = dir->i_sb; struct buffer_head *bh; diff -Nru a/fs/namei.c b/fs/namei.c --- a/fs/namei.c Sat Jul 5 12:40:31 2003 +++ b/fs/namei.c Sat Jul 5 12:40:31 2003 @@ -203,7 +203,7 @@ return -EACCES; } -int permission(struct inode * inode,int mask) +int permission(struct inode * inode,int mask, struct nameidata *nd) { int retval; int submask; @@ -212,7 +212,7 @@ submask = mask & ~MAY_APPEND; if (inode->i_op && inode->i_op->permission) - retval = inode->i_op->permission(inode, submask); + retval = inode->i_op->permission(inode, submask, nd); else retval = vfs_permission(inode, submask); if (retval) @@ -273,7 +273,7 @@ * Internal lookup() using the new generic dcache. * SMP-safe */ -static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) +static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) { struct dentry * dentry = __d_lookup(parent, name); @@ -284,7 +284,7 @@ dentry = d_lookup(parent, name); if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { + if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) { dput(dentry); dentry = NULL; } @@ -336,7 +336,7 @@ * make sure that nobody added the entry to the dcache in the meantime.. * SMP-safe */ -static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) +static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) { struct dentry * result; struct inode *dir = parent->d_inode; @@ -361,7 +361,7 @@ struct dentry * dentry = d_alloc(parent, name); result = ERR_PTR(-ENOMEM); if (dentry) { - result = dir->i_op->lookup(dir, dentry); + result = dir->i_op->lookup(dir, dentry, nd); if (result) dput(dentry); else @@ -377,7 +377,7 @@ */ up(&dir->i_sem); if (result->d_op && result->d_op->d_revalidate) { - if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) { + if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { dput(result); result = ERR_PTR(-ENOENT); } @@ -524,7 +524,7 @@ * It _is_ time-critical. */ static int do_lookup(struct nameidata *nd, struct qstr *name, - struct path *path, int flags) + struct path *path) { struct vfsmount *mnt = nd->mnt; struct dentry *dentry = __d_lookup(nd->dentry, name); @@ -539,13 +539,13 @@ return 0; need_lookup: - dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE); + dentry = real_lookup(nd->dentry, name, nd); if (IS_ERR(dentry)) goto fail; goto done; need_revalidate: - if (dentry->d_op->d_revalidate(dentry, flags)) + if (dentry->d_op->d_revalidate(dentry, nd)) goto done; if (d_invalidate(dentry)) goto done; @@ -588,7 +588,7 @@ err = exec_permission_lite(inode); if (err == -EAGAIN) { - err = permission(inode, MAY_EXEC); + err = permission(inode, MAY_EXEC, nd); } if (err) break; @@ -638,8 +638,9 @@ if (err < 0) break; } + nd->flags |= LOOKUP_CONTINUE; /* This does the actual lookups.. */ - err = do_lookup(nd, &this, &next, LOOKUP_CONTINUE); + err = do_lookup(nd, &this, &next); if (err) break; /* Check mountpoints.. */ @@ -681,6 +682,7 @@ last_with_slashes: lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; last_component: + nd->flags &= ~LOOKUP_CONTINUE; if (lookup_flags & LOOKUP_PARENT) goto lookup_parent; if (this.name[0] == '.') switch (this.len) { @@ -700,7 +702,7 @@ if (err < 0) break; } - err = do_lookup(nd, &this, &next, 0); + err = do_lookup(nd, &this, &next); if (err) break; follow_mount(&next.mnt, &next.dentry); @@ -769,6 +771,7 @@ */ nd_root.last_type = LAST_ROOT; nd_root.flags = nd->flags; + memcpy(&nd_root.intent, &nd->intent, sizeof(nd_root.intent)); read_lock(¤t->fs->lock); nd_root.mnt = mntget(current->fs->rootmnt); nd_root.dentry = dget(current->fs->root); @@ -866,14 +869,14 @@ * needs parent already locked. Doesn't follow mounts. * SMP-safe. */ -struct dentry * lookup_hash(struct qstr *name, struct dentry * base) +static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd) { struct dentry * dentry; struct inode *inode; int err; inode = base->d_inode; - err = permission(inode, MAY_EXEC); + err = permission(inode, MAY_EXEC, nd); dentry = ERR_PTR(err); if (err) goto out; @@ -889,13 +892,13 @@ goto out; } - dentry = cached_lookup(base, name, 0); + dentry = cached_lookup(base, name, nd); if (!dentry) { struct dentry *new = d_alloc(base, name); dentry = ERR_PTR(-ENOMEM); if (!new) goto out; - dentry = inode->i_op->lookup(inode, new); + dentry = inode->i_op->lookup(inode, new, nd); if (!dentry) dentry = new; else @@ -905,6 +908,11 @@ return dentry; } +struct dentry * lookup_hash(struct qstr *name, struct dentry * base) +{ + return __lookup_hash(name, base, NULL); +} + /* SMP-safe */ struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) { @@ -988,12 +996,12 @@ * 10. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) +static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir) { int error; if (!victim->d_inode || victim->d_parent->d_inode != dir) return -ENOENT; - error = permission(dir,MAY_WRITE | MAY_EXEC); + error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); if (error) return error; if (IS_APPEND(dir)) @@ -1023,12 +1031,14 @@ * 3. We should have write and exec permissions on dir * 4. We can't do it if dir is immutable (done in permission()) */ -static inline int may_create(struct inode *dir, struct dentry *child) { +static inline int may_create(struct inode *dir, struct dentry *child, + struct nameidata *nd) +{ if (child->d_inode) return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; - return permission(dir,MAY_WRITE | MAY_EXEC); + return permission(dir,MAY_WRITE | MAY_EXEC, nd); } /* @@ -1097,9 +1107,10 @@ } } -int vfs_create(struct inode *dir, struct dentry *dentry, int mode) +int vfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { - int error = may_create(dir, dentry); + int error = may_create(dir, dentry, nd); if (error) return error; @@ -1112,7 +1123,7 @@ if (error) return error; DQUOT_INIT(dir); - error = dir->i_op->create(dir, dentry, mode); + error = dir->i_op->create(dir, dentry, mode, nd); if (!error) { inode_dir_notify(dir, DN_CREATE); security_inode_post_create(dir, dentry, mode); @@ -1135,7 +1146,7 @@ if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) return -EISDIR; - error = permission(inode, acc_mode); + error = permission(inode, acc_mode, nd); if (error) return error; @@ -1222,11 +1233,15 @@ if (flag & O_APPEND) acc_mode |= MAY_APPEND; + /* Fill in the open() intent data */ + nd->intent.open.flags = flag; + nd->intent.open.create_mode = mode; + /* * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) { - error = path_lookup(pathname, lookup_flags(flag), nd); + error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); if (error) return error; dentry = nd->dentry; @@ -1236,7 +1251,7 @@ /* * Create - we need to know the parent. */ - error = path_lookup(pathname, LOOKUP_PARENT, nd); + error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); if (error) return error; @@ -1250,8 +1265,9 @@ goto exit; dir = nd->dentry; + nd->flags &= ~LOOKUP_PARENT; down(&dir->d_inode->i_sem); - dentry = lookup_hash(&nd->last, nd->dentry); + dentry = __lookup_hash(&nd->last, nd->dentry, nd); do_last: error = PTR_ERR(dentry); @@ -1264,7 +1280,7 @@ if (!dentry->d_inode) { if (!IS_POSIXACL(dir->d_inode)) mode &= ~current->fs->umask; - error = vfs_create(dir->d_inode, dentry, mode); + error = vfs_create(dir->d_inode, dentry, mode, nd); up(&dir->d_inode->i_sem); dput(nd->dentry); nd->dentry = dentry; @@ -1354,7 +1370,7 @@ } dir = nd->dentry; down(&dir->d_inode->i_sem); - dentry = lookup_hash(&nd->last, nd->dentry); + dentry = __lookup_hash(&nd->last, nd->dentry, nd); putname(nd->last.name); goto do_last; } @@ -1368,6 +1384,7 @@ dentry = ERR_PTR(-EEXIST); if (nd->last_type != LAST_NORM) goto fail; + nd->flags &= ~LOOKUP_PARENT; dentry = lookup_hash(&nd->last, nd->dentry); if (IS_ERR(dentry)) goto fail; @@ -1383,7 +1400,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { - int error = may_create(dir, dentry); + int error = may_create(dir, dentry, NULL); if (error) return error; @@ -1431,7 +1448,7 @@ if (!IS_ERR(dentry)) { switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(nd.dentry->d_inode,dentry,mode); + error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); break; case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev); @@ -1454,7 +1471,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - int error = may_create(dir, dentry); + int error = may_create(dir, dentry, NULL); if (error) return error; @@ -1700,7 +1717,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) { - int error = may_create(dir, dentry); + int error = may_create(dir, dentry, NULL); if (error) return error; @@ -1762,7 +1779,7 @@ if (!inode) return -ENOENT; - error = may_create(dir, new_dentry); + error = may_create(dir, new_dentry, NULL); if (error) return error; @@ -1883,7 +1900,7 @@ * we'll need to flip '..'. */ if (new_dir != old_dir) { - error = permission(old_dentry->d_inode, MAY_WRITE); + error = permission(old_dentry->d_inode, MAY_WRITE, NULL); if (error) return error; } @@ -1961,7 +1978,7 @@ return error; if (!new_dentry->d_inode) - error = may_create(new_dir, new_dentry); + error = may_create(new_dir, new_dentry, NULL); else error = may_delete(new_dir, new_dentry, is_dir); if (error) diff -Nru a/fs/namespace.c b/fs/namespace.c --- a/fs/namespace.c Sat Jul 5 12:40:32 2003 +++ b/fs/namespace.c Sat Jul 5 12:40:32 2003 @@ -403,7 +403,7 @@ if (current->uid != nd->dentry->d_inode->i_uid) return -EPERM; } - if (permission(nd->dentry->d_inode, MAY_WRITE)) + if (permission(nd->dentry->d_inode, MAY_WRITE, nd)) return -EPERM; return 0; #endif diff -Nru a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c --- a/fs/ncpfs/dir.c Sat Jul 5 12:40:32 2003 +++ b/fs/ncpfs/dir.c Sat Jul 5 12:40:32 2003 @@ -34,8 +34,8 @@ static int ncp_readdir(struct file *, void *, filldir_t); -static int ncp_create(struct inode *, struct dentry *, int); -static struct dentry *ncp_lookup(struct inode *, struct dentry *); +static int ncp_create(struct inode *, struct dentry *, int, struct nameidata *); +static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *); static int ncp_unlink(struct inode *, struct dentry *); static int ncp_mkdir(struct inode *, struct dentry *, int); static int ncp_rmdir(struct inode *, struct dentry *); @@ -72,7 +72,7 @@ /* * Dentry operations routines */ -static int ncp_lookup_validate(struct dentry *, int); +static int ncp_lookup_validate(struct dentry *, struct nameidata *); static int ncp_hash_dentry(struct dentry *, struct qstr *); static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *); static int ncp_delete_dentry(struct dentry *); @@ -264,7 +264,7 @@ static int -__ncp_lookup_validate(struct dentry * dentry, int flags) +__ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd) { struct ncp_server *server; struct dentry *parent; @@ -333,11 +333,11 @@ } static int -ncp_lookup_validate(struct dentry * dentry, int flags) +ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd) { int res; lock_kernel(); - res = __ncp_lookup_validate(dentry, flags); + res = __ncp_lookup_validate(dentry, nd); unlock_kernel(); return res; } @@ -797,7 +797,7 @@ return result; } -static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry) +static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct ncp_server *server = NCP_SERVER(dir); struct inode *inode = NULL; @@ -942,7 +942,8 @@ return error; } -static int ncp_create(struct inode *dir, struct dentry *dentry, int mode) +static int ncp_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { return ncp_create_new(dir, dentry, mode, 0, 0); } diff -Nru a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c --- a/fs/ncpfs/ioctl.c Sat Jul 5 12:40:32 2003 +++ b/fs/ncpfs/ioctl.c Sat Jul 5 12:40:32 2003 @@ -40,7 +40,7 @@ switch (cmd) { case NCP_IOC_NCPREQUEST: - if ((permission(inode, MAY_WRITE) != 0) + if ((permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; } @@ -99,7 +99,7 @@ { struct ncp_fs_info info; - if ((permission(inode, MAY_WRITE) != 0) + if ((permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; } @@ -127,7 +127,7 @@ { struct ncp_fs_info_v2 info2; - if ((permission(inode, MAY_WRITE) != 0) + if ((permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; } @@ -155,7 +155,7 @@ { unsigned long tmp = server->m.mounted_uid; - if ( (permission(inode, MAY_READ) != 0) + if ( (permission(inode, MAY_READ, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -169,7 +169,7 @@ { struct ncp_setroot_ioctl sr; - if ( (permission(inode, MAY_READ) != 0) + if ( (permission(inode, MAY_READ, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -249,7 +249,7 @@ #ifdef CONFIG_NCPFS_PACKET_SIGNING case NCP_IOC_SIGN_INIT: - if ((permission(inode, MAY_WRITE) != 0) + if ((permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -272,7 +272,7 @@ return 0; case NCP_IOC_SIGN_WANTED: - if ( (permission(inode, MAY_READ) != 0) + if ( (permission(inode, MAY_READ, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -285,7 +285,7 @@ { int newstate; - if ( (permission(inode, MAY_WRITE) != 0) + if ( (permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -306,7 +306,7 @@ #ifdef CONFIG_NCPFS_IOCTL_LOCKING case NCP_IOC_LOCKUNLOCK: - if ( (permission(inode, MAY_WRITE) != 0) + if ( (permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; @@ -608,7 +608,7 @@ } #endif /* CONFIG_NCPFS_NLS */ case NCP_IOC_SETDENTRYTTL: - if ((permission(inode, MAY_WRITE) != 0) && + if ((permission(inode, MAY_WRITE, NULL) != 0) && (current->uid != server->m.mounted_uid)) return -EACCES; { @@ -637,7 +637,7 @@ /* NCP_IOC_GETMOUNTUID may be same as NCP_IOC_GETMOUNTUID2, so we have this out of switch */ if (cmd == NCP_IOC_GETMOUNTUID) { - if ((permission(inode, MAY_READ) != 0) + if ((permission(inode, MAY_READ, NULL) != 0) && (current->uid != server->m.mounted_uid)) { return -EACCES; } diff -Nru a/fs/nfs/dir.c b/fs/nfs/dir.c --- a/fs/nfs/dir.c Sat Jul 5 12:40:31 2003 +++ b/fs/nfs/dir.c Sat Jul 5 12:40:31 2003 @@ -37,10 +37,10 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); -static struct dentry *nfs_lookup(struct inode *, struct dentry *); +static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); static int nfs_cached_lookup(struct inode *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); -static int nfs_create(struct inode *, struct dentry *, int); +static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); static int nfs_mkdir(struct inode *, struct dentry *, int); static int nfs_rmdir(struct inode *, struct dentry *); static int nfs_unlink(struct inode *, struct dentry *); @@ -78,13 +78,9 @@ static int nfs_opendir(struct inode *inode, struct file *filp) { - struct nfs_server *server = NFS_SERVER(inode); int res = 0; lock_kernel(); - /* Do cto revalidation */ - if (!(server->flags & NFS_MOUNT_NOCTO)) - res = __nfs_revalidate_inode(server, inode); /* Call generic open code in order to cache credentials */ if (!res) res = nfs_open(inode, filp); @@ -485,9 +481,13 @@ } static inline -int nfs_lookup_verify_inode(struct inode *inode) +int nfs_lookup_verify_inode(struct inode *inode, int isopen) { - return nfs_revalidate_inode(NFS_SERVER(inode), inode); + struct nfs_server *server = NFS_SERVER(inode); + + if (isopen && !(server->flags & NFS_MOUNT_NOCTO)) + return __nfs_revalidate_inode(server, inode); + return nfs_revalidate_inode(server, inode); } /* @@ -497,8 +497,17 @@ * If parent mtime has changed, we revalidate, else we wait for a * period corresponding to the parent's attribute cache timeout value. */ -static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry) +static inline +int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) { + int ndflags = 0; + + if (nd) + ndflags = nd->flags; + /* Don't revalidate a negative dentry if we're creating a new file */ + if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE)) + return 0; if (!nfs_check_verifier(dir, dentry)) return 1; return time_after(jiffies, dentry->d_time + NFS_ATTRTIMEO(dir)); @@ -515,7 +524,7 @@ * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ -static int nfs_lookup_revalidate(struct dentry * dentry, int flags) +static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) { struct inode *dir; struct inode *inode; @@ -523,14 +532,18 @@ int error; struct nfs_fh fhandle; struct nfs_fattr fattr; + int isopen = 0; parent = dget_parent(dentry); lock_kernel(); dir = parent->d_inode; inode = dentry->d_inode; + if (nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_OPEN)) + isopen = 1; + if (!inode) { - if (nfs_neg_need_reval(dir, dentry)) + if (nfs_neg_need_reval(dir, dentry, nd)) goto out_bad; goto out_valid; } @@ -543,7 +556,7 @@ /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode)) + if (nfs_lookup_verify_inode(inode, isopen)) goto out_bad; goto out_valid; } @@ -552,7 +565,7 @@ if (!error) { if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) goto out_bad; - if (nfs_lookup_verify_inode(inode)) + if (nfs_lookup_verify_inode(inode, isopen)) goto out_bad; goto out_valid_renew; } @@ -630,7 +643,17 @@ .d_iput = nfs_dentry_iput, }; -static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry) +static inline +int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) +{ + if (NFS_PROTO(dir)->version == 2) + return 0; + if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) + return 0; + return (nd->intent.open.flags & O_EXCL) != 0; +} + +static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct inode *inode = NULL; int error; @@ -647,6 +670,10 @@ error = -ENOMEM; dentry->d_op = &nfs_dentry_operations; + /* If we're doing an exclusive create, optimize away the lookup */ + if (nfs_is_exclusive_create(dir, nd)) + return NULL; + lock_kernel(); error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); if (!error) { @@ -787,12 +814,14 @@ * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -static int nfs_create(struct inode *dir, struct dentry *dentry, int mode) +static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { struct iattr attr; struct nfs_fattr fattr; struct nfs_fh fhandle; int error; + int open_flags = 0; dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -800,6 +829,9 @@ attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; + if (nd && (nd->flags & LOOKUP_CREATE)) + open_flags = nd->intent.open.flags; + /* * The 0 argument passed into the create function should one day * contain the O_EXCL flag if requested. This allows NFSv3 to @@ -809,7 +841,7 @@ lock_kernel(); nfs_zap_caches(dir); error = NFS_PROTO(dir)->create(dir, &dentry->d_name, - &attr, 0, &fhandle, &fattr); + &attr, open_flags, &fhandle, &fattr); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); else @@ -1239,12 +1271,19 @@ } int -nfs_permission(struct inode *inode, int mask) +nfs_permission(struct inode *inode, int mask, struct nameidata *nd) { struct nfs_access_cache *cache = &NFS_I(inode)->cache_access; struct rpc_cred *cred; int mode = inode->i_mode; int res; + + /* Are we checking permissions on anything other than lookup? */ + if (!(mask & MAY_EXEC)) { + /* We only need to check permissions on file open() and access() */ + if (!nd || !(nd->flags & (LOOKUP_OPEN|LOOKUP_ACCESS))) + return 0; + } if (mask & MAY_WRITE) { /* diff -Nru a/fs/nfs/file.c b/fs/nfs/file.c --- a/fs/nfs/file.c Sat Jul 5 12:40:32 2003 +++ b/fs/nfs/file.c Sat Jul 5 12:40:32 2003 @@ -82,9 +82,6 @@ /* Do NFSv4 open() call */ if ((open = server->rpc_ops->file_open) != NULL) res = open(inode, filp); - /* Do cto revalidation */ - else if (!(server->flags & NFS_MOUNT_NOCTO)) - res = __nfs_revalidate_inode(server, inode); /* Call generic open code in order to cache credentials */ if (!res) res = nfs_open(inode, filp); @@ -104,11 +101,13 @@ dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + lock_kernel(); status = nfs_wb_file(inode, file); if (!status) { status = file->f_error; file->f_error = 0; } + unlock_kernel(); return status; } diff -Nru a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c --- a/fs/nfsd/nfsfh.c Sat Jul 5 12:40:31 2003 +++ b/fs/nfsd/nfsfh.c Sat Jul 5 12:40:31 2003 @@ -56,7 +56,7 @@ /* make sure parents give x permission to user */ int err; parent = dget_parent(tdentry); - err = permission(parent->d_inode, S_IXOTH); + err = permission(parent->d_inode, S_IXOTH, NULL); if (err < 0) { dput(parent); break; diff -Nru a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c --- a/fs/nfsd/vfs.c Sat Jul 5 12:40:31 2003 +++ b/fs/nfsd/vfs.c Sat Jul 5 12:40:31 2003 @@ -924,7 +924,7 @@ err = nfserr_perm; switch (type) { case S_IFREG: - err = vfs_create(dirp, dchild, iap->ia_mode); + err = vfs_create(dirp, dchild, iap->ia_mode, NULL); break; case S_IFDIR: err = vfs_mkdir(dirp, dchild, iap->ia_mode); @@ -1067,7 +1067,7 @@ goto out; } - err = vfs_create(dirp, dchild, iap->ia_mode); + err = vfs_create(dirp, dchild, iap->ia_mode, NULL); if (err < 0) goto out_nfserr; @@ -1584,12 +1584,12 @@ inode->i_uid == current->fsuid) return 0; - err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC)); + err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL); /* Allow read access to binaries even when mode 111 */ if (err == -EACCES && S_ISREG(inode->i_mode) && acc == (MAY_READ | MAY_OWNER_OVERRIDE)) - err = permission(inode, MAY_EXEC); + err = permission(inode, MAY_EXEC, NULL); return err? nfserrno(err) : 0; } diff -Nru a/fs/ntfs/namei.c b/fs/ntfs/namei.c --- a/fs/ntfs/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/ntfs/namei.c Sat Jul 5 12:40:32 2003 @@ -29,6 +29,7 @@ * ntfs_lookup - find the inode represented by a dentry in a directory inode * @dir_ino: directory inode in which to look for the inode * @dent: dentry representing the inode to look for + * @nd: lookup nameidata * * In short, ntfs_lookup() looks for the inode represented by the dentry @dent * in the directory inode @dir_ino and if found attaches the inode to the @@ -87,7 +88,7 @@ * name. We then convert the name to the current NLS code page, and proceed * searching for a dentry with this name, etc, as in case 2), above. */ -static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent) +static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, struct nameidata *nd) { ntfs_volume *vol = NTFS_SB(dir_ino->i_sb); struct inode *dent_inode; diff -Nru a/fs/open.c b/fs/open.c --- a/fs/open.c Sat Jul 5 12:40:31 2003 +++ b/fs/open.c Sat Jul 5 12:40:31 2003 @@ -219,7 +219,7 @@ if (!S_ISREG(inode->i_mode)) goto dput_and_out; - error = permission(inode,MAY_WRITE); + error = permission(inode,MAY_WRITE,&nd); if (error) goto dput_and_out; @@ -365,7 +365,7 @@ newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; } else { if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) + (error = permission(inode,MAY_WRITE,&nd)) != 0) goto dput_and_out; } down(&inode->i_sem); @@ -410,7 +410,7 @@ newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; } else { if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) + (error = permission(inode,MAY_WRITE,&nd)) != 0) goto dput_and_out; } down(&inode->i_sem); @@ -467,9 +467,9 @@ else current->cap_effective = current->cap_permitted; - res = user_path_walk(filename, &nd); + res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); if (!res) { - res = permission(nd.dentry->d_inode, mode); + res = permission(nd.dentry->d_inode, mode, &nd); /* SuS v2 requires we report a read only fs too */ if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) && !special_file(nd.dentry->d_inode->i_mode)) @@ -493,7 +493,7 @@ if (error) goto out; - error = permission(nd.dentry->d_inode,MAY_EXEC); + error = permission(nd.dentry->d_inode,MAY_EXEC,&nd); if (error) goto dput_and_out; @@ -526,7 +526,7 @@ if (!S_ISDIR(inode->i_mode)) goto out_putf; - error = permission(inode, MAY_EXEC); + error = permission(inode, MAY_EXEC, NULL); if (!error) set_fs_pwd(current->fs, mnt, dentry); out_putf: @@ -544,7 +544,7 @@ if (error) goto out; - error = permission(nd.dentry->d_inode,MAY_EXEC); + error = permission(nd.dentry->d_inode,MAY_EXEC,&nd); if (error) goto dput_and_out; @@ -952,11 +952,8 @@ return 0; } retval = 0; - if (filp->f_op && filp->f_op->flush) { - lock_kernel(); + if (filp->f_op && filp->f_op->flush) retval = filp->f_op->flush(filp); - unlock_kernel(); - } dnotify_flush(filp, id); locks_remove_posix(filp, id); fput(filp); diff -Nru a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c --- a/fs/openpromfs/inode.c Sat Jul 5 12:40:31 2003 +++ b/fs/openpromfs/inode.c Sat Jul 5 12:40:31 2003 @@ -59,9 +59,9 @@ #define NODE2INO(node) (node + OPENPROM_FIRST_INO) #define NODEP2INO(no) (no + OPENPROM_FIRST_INO + last_node) -static int openpromfs_create (struct inode *, struct dentry *, int); +static int openpromfs_create (struct inode *, struct dentry *, int, struct nameidata *); static int openpromfs_readdir(struct file *, void *, filldir_t); -static struct dentry *openpromfs_lookup(struct inode *, struct dentry *dentry); +static struct dentry *openpromfs_lookup(struct inode *, struct dentry *dentry, struct nameidata *nd); static int openpromfs_unlink (struct inode *, struct dentry *dentry); static ssize_t nodenum_read(struct file *file, char *buf, @@ -639,7 +639,7 @@ return 0; } -static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *openpromfs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { int ino = 0; #define OPFSL_DIR 0 @@ -854,7 +854,8 @@ return 0; } -static int openpromfs_create (struct inode *dir, struct dentry *dentry, int mode) +static int openpromfs_create (struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { char *p; struct inode *inode; diff -Nru a/fs/proc/base.c b/fs/proc/base.c --- a/fs/proc/base.c Sat Jul 5 12:40:32 2003 +++ b/fs/proc/base.c Sat Jul 5 12:40:32 2003 @@ -334,7 +334,7 @@ goto exit; } -static int proc_permission(struct inode *inode, int mask) +static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) { if (vfs_permission(inode, mask) != 0) return -EACCES; @@ -864,7 +864,7 @@ * directory. In this case, however, we can do it - no aliasing problems * due to the way we treat inodes. */ -static int pid_revalidate(struct dentry * dentry, int flags) +static int pid_revalidate(struct dentry * dentry, struct nameidata *nd) { if (pid_alive(proc_task(dentry->d_inode))) return 1; @@ -872,7 +872,7 @@ return 0; } -static int pid_fd_revalidate(struct dentry * dentry, int flags) +static int pid_fd_revalidate(struct dentry * dentry, struct nameidata *nd) { struct task_struct *task = proc_task(dentry->d_inode); int fd = proc_type(dentry->d_inode) - PROC_PID_FD_DIR; @@ -961,7 +961,7 @@ } /* SMP-safe */ -static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry) +static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { struct task_struct *task = proc_task(dir); unsigned fd = name_to_int(dentry); @@ -1219,7 +1219,7 @@ return ERR_PTR(error); } -static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry){ +static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ return proc_pident_lookup(dir, dentry, base_stuff); } @@ -1326,7 +1326,7 @@ } /* SMP-safe */ -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry) +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct task_struct *task; struct inode *inode; diff -Nru a/fs/proc/generic.c b/fs/proc/generic.c --- a/fs/proc/generic.c Sat Jul 5 12:40:34 2003 +++ b/fs/proc/generic.c Sat Jul 5 12:40:34 2003 @@ -336,7 +336,7 @@ * Don't create negative dentries here, return -ENOENT by hand * instead. */ -struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry) +struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode = NULL; struct proc_dir_entry * de; diff -Nru a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c --- a/fs/proc/proc_misc.c Sat Jul 5 12:40:31 2003 +++ b/fs/proc/proc_misc.c Sat Jul 5 12:40:31 2003 @@ -497,11 +497,10 @@ static int locks_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len; - lock_kernel(); - len = get_locks_status(page, start, off, count); - unlock_kernel(); - if (len < count) *eof = 1; + int len = get_locks_status(page, start, off, count); + + if (len < count) + *eof = 1; return len; } diff -Nru a/fs/proc/root.c b/fs/proc/root.c --- a/fs/proc/root.c Sat Jul 5 12:40:32 2003 +++ b/fs/proc/root.c Sat Jul 5 12:40:32 2003 @@ -79,19 +79,21 @@ proc_bus = proc_mkdir("bus", 0); } -static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry) +static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { - if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */ - lock_kernel(); + /* + * nr_threads is actually protected by the tasklist_lock; + * however, it's conventional to do reads, especially for + * reporting, without any locking whatsoever. + */ + if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */ dir->i_nlink = proc_root.nlink + nr_threads; - unlock_kernel(); - } - if (!proc_lookup(dir, dentry)) { + if (!proc_lookup(dir, dentry, nd)) { return NULL; } - return proc_pid_lookup(dir, dentry); + return proc_pid_lookup(dir, dentry, nd); } static int proc_root_readdir(struct file * filp, diff -Nru a/fs/qnx4/namei.c b/fs/qnx4/namei.c --- a/fs/qnx4/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/qnx4/namei.c Sat Jul 5 12:40:32 2003 @@ -107,7 +107,7 @@ return NULL; } -struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry) +struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { int ino; struct qnx4_inode_entry *de; @@ -142,7 +142,8 @@ } #ifdef CONFIG_QNX4FS_RW -int qnx4_create(struct inode *dir, struct dentry *dentry, int mode) +int qnx4_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { QNX4DEBUG(("qnx4: qnx4_create\n")); if (dir == NULL) { diff -Nru a/fs/ramfs/inode.c b/fs/ramfs/inode.c --- a/fs/ramfs/inode.c Sat Jul 5 12:40:31 2003 +++ b/fs/ramfs/inode.c Sat Jul 5 12:40:31 2003 @@ -111,7 +111,7 @@ return retval; } -static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode) +static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); } @@ -146,6 +146,7 @@ .mmap = generic_file_mmap, .fsync = simple_sync_file, .sendfile = generic_file_sendfile, + .llseek = generic_file_llseek, }; static struct inode_operations ramfs_file_inode_operations = { diff -Nru a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c --- a/fs/reiserfs/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/reiserfs/namei.c Sat Jul 5 12:40:32 2003 @@ -316,7 +316,7 @@ } -static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dentry) +static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dentry, struct nameidata *nd) { int retval; struct inode * inode = NULL; @@ -558,7 +558,8 @@ return 0 ; } -static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode) +static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode, + struct nameidata *nd) { int retval; struct inode * inode; diff -Nru a/fs/romfs/inode.c b/fs/romfs/inode.c --- a/fs/romfs/inode.c Sat Jul 5 12:40:32 2003 +++ b/fs/romfs/inode.c Sat Jul 5 12:40:32 2003 @@ -329,7 +329,7 @@ } static struct dentry * -romfs_lookup(struct inode *dir, struct dentry *dentry) +romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { unsigned long offset, maxoff; int fslen, res; diff -Nru a/fs/smbfs/dir.c b/fs/smbfs/dir.c --- a/fs/smbfs/dir.c Sat Jul 5 12:40:32 2003 +++ b/fs/smbfs/dir.c Sat Jul 5 12:40:33 2003 @@ -24,8 +24,8 @@ static int smb_readdir(struct file *, void *, filldir_t); static int smb_dir_open(struct inode *, struct file *); -static struct dentry *smb_lookup(struct inode *, struct dentry *); -static int smb_create(struct inode *, struct dentry *, int); +static struct dentry *smb_lookup(struct inode *, struct dentry *, struct nameidata *); +static int smb_create(struct inode *, struct dentry *, int, struct nameidata *); static int smb_mkdir(struct inode *, struct dentry *, int); static int smb_rmdir(struct inode *, struct dentry *); static int smb_unlink(struct inode *, struct dentry *); @@ -268,7 +268,7 @@ /* * Dentry operations routines */ -static int smb_lookup_validate(struct dentry *, int); +static int smb_lookup_validate(struct dentry *, struct nameidata *); static int smb_hash_dentry(struct dentry *, struct qstr *); static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *); static int smb_delete_dentry(struct dentry *); @@ -292,7 +292,7 @@ * This is the callback when the dcache has a lookup hit. */ static int -smb_lookup_validate(struct dentry * dentry, int flags) +smb_lookup_validate(struct dentry * dentry, struct nameidata *nd) { struct smb_sb_info *server = server_from_dentry(dentry); struct inode * inode = dentry->d_inode; @@ -420,7 +420,7 @@ } static struct dentry * -smb_lookup(struct inode *dir, struct dentry *dentry) +smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct smb_fattr finfo; struct inode *inode; @@ -510,7 +510,8 @@ /* N.B. How should the mode argument be used? */ static int -smb_create(struct inode *dir, struct dentry *dentry, int mode) +smb_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { struct smb_sb_info *server = server_from_dentry(dentry); __u16 fileid; diff -Nru a/fs/smbfs/file.c b/fs/smbfs/file.c --- a/fs/smbfs/file.c Sat Jul 5 12:40:32 2003 +++ b/fs/smbfs/file.c Sat Jul 5 12:40:32 2003 @@ -367,7 +367,7 @@ * privileges, so we need our own check for this. */ static int -smb_file_permission(struct inode *inode, int mask) +smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) { int mode = inode->i_mode; int error = 0; diff -Nru a/fs/sysfs/bin.c b/fs/sysfs/bin.c --- a/fs/sysfs/bin.c Sat Jul 5 12:40:32 2003 +++ b/fs/sysfs/bin.c Sat Jul 5 12:40:32 2003 @@ -2,6 +2,8 @@ * bin.c - binary file operations for sysfs. */ +#undef DEBUG + #include #include #include @@ -42,18 +44,17 @@ ret = fill_read(dentry, buffer, offs, count); if (ret < 0) - goto Done; + return ret; count = ret; - ret = -EFAULT; - if (copy_to_user(userbuf, buffer, count) != 0) - goto Done; + if (copy_to_user(userbuf, buffer + offs, count) != 0) + return -EINVAL; + + pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count); *off = offs + count; - ret = count; - Done: - return ret; + return count; } static int @@ -72,7 +73,6 @@ struct dentry *dentry = file->f_dentry; int size = dentry->d_inode->i_size; loff_t offs = *off; - int ret; if (count > PAGE_SIZE) count = PAGE_SIZE; @@ -83,16 +83,13 @@ count = size - offs; } - ret = -EFAULT; - if (copy_from_user(buffer, userbuf, count)) - goto Done; + if (copy_from_user(buffer + offs, userbuf, count)) + return -EFAULT; count = flush_write(dentry, buffer, offs, count); if (count > 0) *off = offs + count; - ret = count; - Done: - return ret; + return count; } static int open(struct inode * inode, struct file * file) diff -Nru a/fs/sysfs/dir.c b/fs/sysfs/dir.c --- a/fs/sysfs/dir.c Sat Jul 5 12:40:31 2003 +++ b/fs/sysfs/dir.c Sat Jul 5 12:40:31 2003 @@ -121,7 +121,29 @@ dput(parent); } +void sysfs_rename_dir(struct kobject * kobj, char *new_name) +{ + struct dentry * new_dentry, * parent; + + if (!strcmp(kobj->name, new_name)) + return; + + if (!kobj->parent) + return; + + parent = kobj->parent->dentry; + + down(&parent->d_inode->i_sem); + + new_dentry = sysfs_get_dentry(parent, new_name); + d_move(kobj->dentry, new_dentry); + + strlcpy(kobj->name, new_name, KOBJ_NAME_LEN); + + up(&parent->d_inode->i_sem); +} EXPORT_SYMBOL(sysfs_create_dir); EXPORT_SYMBOL(sysfs_remove_dir); +EXPORT_SYMBOL(sysfs_rename_dir); diff -Nru a/fs/sysfs/file.c b/fs/sysfs/file.c --- a/fs/sysfs/file.c Sat Jul 5 12:40:31 2003 +++ b/fs/sysfs/file.c Sat Jul 5 12:40:31 2003 @@ -247,6 +247,12 @@ if (!kobj || !attr) goto Einval; + /* Grab the module reference for this attribute if we have one */ + if (!try_module_get(attr->owner)) { + error = -ENODEV; + goto Done; + } + /* if the kobject has no ktype, then we assume that it is a subsystem * itself, and use ops for it. */ @@ -300,6 +306,7 @@ goto Done; Eaccess: error = -EACCES; + module_put(attr->owner); Done: if (error && kobj) kobject_put(kobj); @@ -314,10 +321,12 @@ static int sysfs_release(struct inode * inode, struct file * filp) { struct kobject * kobj = filp->f_dentry->d_parent->d_fsdata; + struct attribute * attr = filp->f_dentry->d_fsdata; struct sysfs_buffer * buffer = filp->private_data; if (kobj) kobject_put(kobj); + module_put(attr->owner); if (buffer) { if (buffer->page) diff -Nru a/fs/sysv/namei.c b/fs/sysv/namei.c --- a/fs/sysv/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/sysv/namei.c Sat Jul 5 12:40:32 2003 @@ -64,7 +64,7 @@ .d_hash = sysv_hash, }; -static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry) +static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) { struct inode * inode = NULL; ino_t ino; @@ -96,7 +96,7 @@ return err; } -static int sysv_create(struct inode * dir, struct dentry * dentry, int mode) +static int sysv_create(struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) { return sysv_mknod(dir, dentry, mode, 0); } diff -Nru a/fs/udf/file.c b/fs/udf/file.c --- a/fs/udf/file.c Sat Jul 5 12:40:32 2003 +++ b/fs/udf/file.c Sat Jul 5 12:40:32 2003 @@ -188,7 +188,7 @@ { int result = -EINVAL; - if ( permission(inode, MAY_READ) != 0 ) + if ( permission(inode, MAY_READ, NULL) != 0 ) { udf_debug("no permission to access inode %lu\n", inode->i_ino); diff -Nru a/fs/udf/namei.c b/fs/udf/namei.c --- a/fs/udf/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/udf/namei.c Sat Jul 5 12:40:32 2003 @@ -289,6 +289,7 @@ * PRE-CONDITIONS * dir Pointer to inode of parent directory. * dentry Pointer to dentry to complete. + * nd Pointer to lookup nameidata * * POST-CONDITIONS * Zero on success. @@ -299,7 +300,7 @@ */ static struct dentry * -udf_lookup(struct inode *dir, struct dentry *dentry) +udf_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode = NULL; struct fileIdentDesc cfi, *fi; @@ -620,7 +621,7 @@ return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); } -static int udf_create(struct inode *dir, struct dentry *dentry, int mode) +static int udf_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { struct udf_fileident_bh fibh; struct inode *inode; diff -Nru a/fs/ufs/namei.c b/fs/ufs/namei.c --- a/fs/ufs/namei.c Sat Jul 5 12:40:34 2003 +++ b/fs/ufs/namei.c Sat Jul 5 12:40:34 2003 @@ -62,7 +62,7 @@ return err; } -static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry) +static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode = NULL; ino_t ino; @@ -92,7 +92,8 @@ * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ufs_create (struct inode * dir, struct dentry * dentry, int mode) +static int ufs_create (struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) { struct inode * inode = ufs_new_inode(dir, mode); int err = PTR_ERR(inode); diff -Nru a/fs/umsdos/dir.c b/fs/umsdos/dir.c --- a/fs/umsdos/dir.c Sat Jul 5 12:40:31 2003 +++ b/fs/umsdos/dir.c Sat Jul 5 12:40:31 2003 @@ -30,7 +30,7 @@ */ /* nothing for now ... */ -static int umsdos_dentry_validate(struct dentry *dentry, int flags) +static int umsdos_dentry_validate(struct dentry *dentry, struct nameidata *nd) { return 1; } @@ -564,7 +564,7 @@ * Called by VFS; should fill dentry->d_inode via d_add. */ -struct dentry *UMSDOS_lookup (struct inode *dir, struct dentry *dentry) +struct dentry *UMSDOS_lookup (struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct dentry *ret; diff -Nru a/fs/umsdos/emd.c b/fs/umsdos/emd.c --- a/fs/umsdos/emd.c Sat Jul 5 12:40:32 2003 +++ b/fs/umsdos/emd.c Sat Jul 5 12:40:32 2003 @@ -105,7 +105,7 @@ Printk(("umsdos_make_emd: creating EMD %s/%s\n", parent->d_name.name, demd->d_name.name)); - err = msdos_create(parent->d_inode, demd, S_IFREG | 0777); + err = msdos_create(parent->d_inode, demd, S_IFREG | 0777, NULL); if (err) { printk (KERN_WARNING "umsdos_make_emd: create %s/%s failed, err=%d\n", diff -Nru a/fs/umsdos/namei.c b/fs/umsdos/namei.c --- a/fs/umsdos/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/umsdos/namei.c Sat Jul 5 12:40:32 2003 @@ -274,7 +274,7 @@ if (fake->d_inode) goto out_remove_dput; - ret = msdos_create (dir, fake, S_IFREG | 0777); + ret = msdos_create (dir, fake, S_IFREG | 0777, NULL); if (ret) goto out_remove_dput; @@ -311,7 +311,7 @@ * * Return the status of the operation. 0 mean success. */ -int UMSDOS_create (struct inode *dir, struct dentry *dentry, int mode) +int UMSDOS_create (struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { return umsdos_create_any (dir, dentry, mode, 0, 0); } diff -Nru a/fs/umsdos/rdir.c b/fs/umsdos/rdir.c --- a/fs/umsdos/rdir.c Sat Jul 5 12:40:31 2003 +++ b/fs/umsdos/rdir.c Sat Jul 5 12:40:31 2003 @@ -101,7 +101,7 @@ goto out; } - ret = msdos_lookup (dir, dentry); + ret = msdos_lookup (dir, dentry, NULL); if (ret) { printk(KERN_WARNING "umsdos_rlookup_x: %s/%s failed, ret=%ld\n", @@ -129,7 +129,7 @@ } -struct dentry *UMSDOS_rlookup ( struct inode *dir, struct dentry *dentry) +struct dentry *UMSDOS_rlookup ( struct inode *dir, struct dentry *dentry, struct nameidata *nd) { return umsdos_rlookup_x (dir, dentry, 0); } diff -Nru a/fs/vfat/namei.c b/fs/vfat/namei.c --- a/fs/vfat/namei.c Sat Jul 5 12:40:32 2003 +++ b/fs/vfat/namei.c Sat Jul 5 12:40:32 2003 @@ -45,7 +45,7 @@ static int vfat_hash(struct dentry *parent, struct qstr *qstr); static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b); static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b); -static int vfat_revalidate(struct dentry *dentry, int); +static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd); static struct dentry_operations vfat_dentry_ops[4] = { { @@ -68,7 +68,7 @@ } }; -static int vfat_revalidate(struct dentry *dentry, int flags) +static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) { PRINTK1(("vfat_revalidate: %s\n", dentry->d_name.name)); spin_lock(&dcache_lock); @@ -860,7 +860,7 @@ return res ? res : -ENOENT; } -struct dentry *vfat_lookup(struct inode *dir,struct dentry *dentry) +struct dentry *vfat_lookup(struct inode *dir,struct dentry *dentry, struct nameidata *nd) { int res; struct vfat_slot_info sinfo; @@ -912,7 +912,8 @@ return dentry; } -int vfat_create(struct inode *dir,struct dentry* dentry,int mode) +int vfat_create(struct inode *dir,struct dentry* dentry,int mode, + struct nameidata *nd) { struct super_block *sb = dir->i_sb; struct inode *inode = NULL; diff -Nru a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c --- a/fs/xfs/linux/xfs_iops.c Sat Jul 5 12:40:31 2003 +++ b/fs/xfs/linux/xfs_iops.c Sat Jul 5 12:40:31 2003 @@ -175,7 +175,8 @@ linvfs_create( struct inode *dir, struct dentry *dentry, - int mode) + int mode, + struct nameidata *nd) { return linvfs_mknod(dir, dentry, mode, 0); } @@ -192,7 +193,8 @@ STATIC struct dentry * linvfs_lookup( struct inode *dir, - struct dentry *dentry) + struct dentry *dentry, + struct nameidata *nd) { struct inode *ip = NULL; vnode_t *vp, *cvp = NULL; @@ -429,7 +431,8 @@ STATIC int linvfs_permission( struct inode *inode, - int mode) + int mode, + struct nameidata *nd) { vnode_t *vp = LINVFS_GET_VP(inode); int error; diff -Nru a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h --- a/include/asm-alpha/mmzone.h Sat Jul 5 12:40:32 2003 +++ b/include/asm-alpha/mmzone.h Sat Jul 5 12:40:32 2003 @@ -31,7 +31,6 @@ #define pa_to_nid(pa) alpha_pa_to_nid(pa) #define NODE_DATA(nid) (&node_data[(nid)]) -#define node_size(nid) (NODE_DATA(nid)->node_size) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) @@ -124,7 +123,7 @@ #define pfn_to_nid(pfn) pa_to_nid(((u64)pfn << PAGE_SHIFT)) #define pfn_valid(pfn) \ (((pfn) - node_start_pfn(pfn_to_nid(pfn))) < \ - node_size(pfn_to_nid(pfn))) \ + node_spanned_pages(pfn_to_nid(pfn))) \ #define virt_addr_valid(kaddr) pfn_valid((__pa(kaddr) >> PAGE_SHIFT)) diff -Nru a/include/asm-i386/cacheflush.h b/include/asm-i386/cacheflush.h --- a/include/asm-i386/cacheflush.h Sat Jul 5 12:40:32 2003 +++ b/include/asm-i386/cacheflush.h Sat Jul 5 12:40:32 2003 @@ -17,4 +17,9 @@ void global_flush_tlb(void); int change_page_attr(struct page *page, int numpages, pgprot_t prot); +#ifdef CONFIG_DEBUG_PAGEALLOC +/* internal debugging function */ +void kernel_map_pages(struct page *page, int numpages, int enable); +#endif + #endif /* _I386_CACHEFLUSH_H */ diff -Nru a/include/asm-i386/hardirq.h b/include/asm-i386/hardirq.h --- a/include/asm-i386/hardirq.h Sat Jul 5 12:40:32 2003 +++ b/include/asm-i386/hardirq.h Sat Jul 5 12:40:32 2003 @@ -7,8 +7,6 @@ typedef struct { unsigned int __softirq_pending; - unsigned int __syscall_count; - struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ unsigned int apic_timer_irqs; /* arch dependent */ diff -Nru a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h --- a/include/asm-i386/mmzone.h Sat Jul 5 12:40:32 2003 +++ b/include/asm-i386/mmzone.h Sat Jul 5 12:40:32 2003 @@ -32,8 +32,7 @@ #define alloc_bootmem_low_pages_node(ignore, x) \ __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) -#define node_size(nid) (node_data[nid]->node_size) -#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) +#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) /* * Following are macros that each numa implmentation must define. @@ -54,7 +53,7 @@ #define node_end_pfn(nid) \ ({ \ pg_data_t *__pgdat = NODE_DATA(nid); \ - __pgdat->node_start_pfn + __pgdat->node_size; \ + __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ }) #define local_mapnr(kvaddr) \ diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h --- a/include/asm-i386/timer.h Sat Jul 5 12:40:31 2003 +++ b/include/asm-i386/timer.h Sat Jul 5 12:40:31 2003 @@ -21,8 +21,21 @@ #define TICK_SIZE (tick_nsec / 1000) extern struct timer_opts* select_timer(void); +extern void clock_fallback(void); /* Modifiers for buggy PIT handling */ extern int pit_latch_buggy; + +extern struct timer_opts *cur_timer; +extern int timer_ack; + +/* list of externed timers */ +extern struct timer_opts timer_none; +extern struct timer_opts timer_pit; +extern struct timer_opts timer_tsc; +#ifdef CONFIG_X86_CYCLONE_TIMER +extern struct timer_opts timer_cyclone; +#endif + #endif diff -Nru a/include/asm-mips64/mmzone.h b/include/asm-mips64/mmzone.h --- a/include/asm-mips64/mmzone.h Sat Jul 5 12:40:31 2003 +++ b/include/asm-mips64/mmzone.h Sat Jul 5 12:40:31 2003 @@ -24,7 +24,7 @@ #define PHYSADDR_TO_NID(pa) NASID_TO_COMPACT_NODEID(NASID_GET(pa)) #define PLAT_NODE_DATA(n) (plat_node_data[n]) -#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_size) +#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_spanned_pages) #define PLAT_NODE_DATA_LOCALNR(p, n) \ (((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn) diff -Nru a/include/asm-ppc/serial.h b/include/asm-ppc/serial.h --- a/include/asm-ppc/serial.h Sat Jul 5 12:40:32 2003 +++ b/include/asm-ppc/serial.h Sat Jul 5 12:40:32 2003 @@ -25,11 +25,9 @@ #elif defined(CONFIG_PRPMC800) #include #elif defined(CONFIG_SANDPOINT) -#include +#include #elif defined(CONFIG_SPRUCE) #include -#elif defined(CONFIG_ZX4500) -#include #elif defined(CONFIG_40x) #include #else diff -Nru a/include/asm-ppc64/mmzone.h b/include/asm-ppc64/mmzone.h --- a/include/asm-ppc64/mmzone.h Sat Jul 5 12:40:32 2003 +++ b/include/asm-ppc64/mmzone.h Sat Jul 5 12:40:32 2003 @@ -54,7 +54,6 @@ */ #define NODE_DATA(nid) (&node_data[nid]) -#define node_size(nid) (NODE_DATA(nid)->node_size) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) /* diff -Nru a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h --- a/include/asm-x86_64/mmzone.h Sat Jul 5 12:40:32 2003 +++ b/include/asm-x86_64/mmzone.h Sat Jul 5 12:40:32 2003 @@ -40,8 +40,7 @@ #define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ - NODE_DATA(nid)->node_size) -#define node_size(nid) (NODE_DATA(nid)->node_size) + NODE_DATA(nid)->node_spanned_pages) #define local_mapnr(kvaddr) \ ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) ) diff -Nru a/include/linux/affs_fs.h b/include/linux/affs_fs.h --- a/include/linux/affs_fs.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/affs_fs.h Sat Jul 5 12:40:31 2003 @@ -41,9 +41,9 @@ /* namei.c */ extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); -extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry); +extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); extern int affs_unlink(struct inode *dir, struct dentry *dentry); -extern int affs_create(struct inode *dir, struct dentry *dentry, int mode); +extern int affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *); extern int affs_mkdir(struct inode *dir, struct dentry *dentry, int mode); extern int affs_rmdir(struct inode *dir, struct dentry *dentry); extern int affs_link(struct dentry *olddentry, struct inode *dir, diff -Nru a/include/linux/blkdev.h b/include/linux/blkdev.h --- a/include/linux/blkdev.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/blkdev.h Sat Jul 5 12:40:31 2003 @@ -22,11 +22,62 @@ struct request_pm_state; #define BLKDEV_MIN_RQ 4 -#define BLKDEV_MAX_RQ 128 +#define BLKDEV_MAX_RQ 128 /* Default maximum */ + +/* + * This is the per-process anticipatory I/O scheduler state. + */ +struct as_io_context { + spinlock_t lock; + + void (*dtor)(struct as_io_context *aic); /* destructor */ + void (*exit)(struct as_io_context *aic); /* called on task exit */ + + unsigned long state; + atomic_t nr_queued; /* queued reads & sync writes */ + atomic_t nr_dispatched; /* number of requests gone to the drivers */ + + /* IO History tracking */ + /* Thinktime */ + unsigned long last_end_request; + unsigned long ttime_total; + unsigned long ttime_samples; + unsigned long ttime_mean; + /* Layout pattern */ + long seek_samples; + sector_t last_request_pos; + sector_t seek_total; + sector_t seek_mean; +}; + +/* + * This is the per-process I/O subsystem state. It is refcounted and + * kmalloc'ed. Currently all fields are modified in process io context + * (apart from the atomic refcount), so require no locking. + */ +struct io_context { + atomic_t refcount; + pid_t pid; + + /* + * For request batching + */ + unsigned long last_waited; /* Time last woken after wait for request */ + int nr_batch_requests; /* Number of requests left in the batch */ + + struct as_io_context *aic; +}; + +void put_io_context(struct io_context *ioc); +void exit_io_context(void); +struct io_context *get_io_context(int gfp_flags); +void copy_io_context(struct io_context **pdst, struct io_context **psrc); +void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); struct request_list { int count[2]; mempool_t *rq_pool; + wait_queue_head_t wait[2]; }; /* @@ -268,8 +319,15 @@ spinlock_t *queue_lock; /* + * queue kobject + */ + struct kobject kobj; + + /* * queue settings */ + unsigned long nr_requests; /* Max # of requests */ + unsigned short max_sectors; unsigned short max_phys_segments; unsigned short max_hw_segments; @@ -299,6 +357,8 @@ #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ +#define QUEUE_FLAG_READFULL 3 /* write queue has been filled */ +#define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */ #define blk_queue_plugged(q) !list_empty(&(q)->plug_list) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) @@ -314,6 +374,30 @@ #define rq_data_dir(rq) ((rq)->flags & 1) +static inline int blk_queue_full(struct request_queue *q, int rw) +{ + if (rw == READ) + return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); +} + +static inline void blk_set_queue_full(struct request_queue *q, int rw) +{ + if (rw == READ) + set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + else + set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); +} + +static inline void blk_clear_queue_full(struct request_queue *q, int rw) +{ + if (rw == READ) + clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + else + clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); +} + + /* * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may * it already be started by driver. @@ -397,6 +481,8 @@ unsigned block_size_bits; }; +extern int blk_register_queue(struct gendisk *disk); +extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); extern void generic_make_request(struct bio *bio); extern void blk_put_request(struct request *); @@ -559,6 +645,10 @@ { page_cache_release(p.v); } + +struct work_struct; +int kblockd_schedule_work(struct work_struct *work); +void kblockd_flush(void); #ifdef CONFIG_LBD # include diff -Nru a/include/linux/buffer_head.h b/include/linux/buffer_head.h --- a/include/linux/buffer_head.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/buffer_head.h Sat Jul 5 12:40:31 2003 @@ -167,6 +167,7 @@ struct buffer_head * __getblk(struct block_device *, sector_t, int); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); +void __breadahead(struct block_device *, sector_t block, int size); struct buffer_head *__bread(struct block_device *, sector_t block, int size); struct buffer_head *alloc_buffer_head(int gfp_flags); void free_buffer_head(struct buffer_head * bh); @@ -239,6 +240,12 @@ sb_bread(struct super_block *sb, sector_t block) { return __bread(sb->s_bdev, block, sb->s_blocksize); +} + +static inline void +sb_breadahead(struct super_block *sb, sector_t block) +{ + __breadahead(sb->s_bdev, block, sb->s_blocksize); } static inline struct buffer_head * diff -Nru a/include/linux/coda_linux.h b/include/linux/coda_linux.h --- a/include/linux/coda_linux.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/coda_linux.h Sat Jul 5 12:40:31 2003 @@ -38,7 +38,7 @@ int coda_open(struct inode *i, struct file *f); int coda_flush(struct file *f); int coda_release(struct inode *i, struct file *f); -int coda_permission(struct inode *inode, int mask); +int coda_permission(struct inode *inode, int mask, struct nameidata *nd); int coda_revalidate_inode(struct dentry *); int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); diff -Nru a/include/linux/dcache.h b/include/linux/dcache.h --- a/include/linux/dcache.h Sat Jul 5 12:40:33 2003 +++ b/include/linux/dcache.h Sat Jul 5 12:40:33 2003 @@ -10,6 +10,7 @@ #include #include +struct nameidata; struct vfsmount; /* @@ -106,7 +107,7 @@ #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) struct dentry_operations { - int (*d_revalidate)(struct dentry *, int); + int (*d_revalidate)(struct dentry *, struct nameidata *); int (*d_hash) (struct dentry *, struct qstr *); int (*d_compare) (struct dentry *, struct qstr *, struct qstr *); int (*d_delete)(struct dentry *); diff -Nru a/include/linux/device.h b/include/linux/device.h --- a/include/linux/device.h Sat Jul 5 12:40:32 2003 +++ b/include/linux/device.h Sat Jul 5 12:40:32 2003 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -95,7 +96,7 @@ #define BUS_ATTR(_name,_mode,_show,_store) \ struct bus_attribute bus_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -136,7 +137,7 @@ #define DRIVER_ATTR(_name,_mode,_show,_store) \ struct driver_attribute driver_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -176,7 +177,7 @@ #define CLASS_ATTR(_name,_mode,_show,_store) \ struct class_attribute class_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -215,6 +216,8 @@ extern int class_device_add(struct class_device *); extern void class_device_del(struct class_device *); +extern int class_device_rename(struct class_device *, char *); + extern struct class_device * class_device_get(struct class_device *); extern void class_device_put(struct class_device *); @@ -226,7 +229,7 @@ #define CLASS_DEVICE_ATTR(_name,_mode,_show,_store) \ struct class_device_attribute class_device_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -324,7 +327,7 @@ #define DEVICE_ATTR(_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; diff -Nru a/include/linux/efs_fs.h b/include/linux/efs_fs.h --- a/include/linux/efs_fs.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/efs_fs.h Sat Jul 5 12:40:31 2003 @@ -46,7 +46,7 @@ extern void efs_read_inode(struct inode *); extern efs_block_t efs_map_block(struct inode *, efs_block_t); -extern struct dentry *efs_lookup(struct inode *, struct dentry *); +extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int efs_bmap(struct inode *, int); #endif /* __EFS_FS_H__ */ diff -Nru a/include/linux/eisa.h b/include/linux/eisa.h --- a/include/linux/eisa.h Sat Jul 5 12:40:32 2003 +++ b/include/linux/eisa.h Sat Jul 5 12:40:32 2003 @@ -4,6 +4,8 @@ #define EISA_SIG_LEN 8 #define EISA_MAX_SLOTS 8 +#define EISA_MAX_RESOURCES 4 + /* A few EISA constants/offsets... */ #define EISA_DMA1_STATUS 8 @@ -17,6 +19,10 @@ #define EISA_INT1_EDGE_LEVEL 0x4D0 #define EISA_INT2_EDGE_LEVEL 0x4D1 #define EISA_VENDOR_ID_OFFSET 0xC80 +#define EISA_CONFIG_OFFSET 0xC84 + +#define EISA_CONFIG_ENABLED 1 +#define EISA_CONFIG_FORCED 2 /* The EISA signature, in ASCII form, null terminated */ struct eisa_device_id { @@ -26,19 +32,28 @@ /* There is not much we can say about an EISA device, apart from * signature, slot number, and base address. dma_mask is set by - * default to 32 bits.*/ + * default to parent device mask..*/ struct eisa_device { struct eisa_device_id id; int slot; + int state; unsigned long base_addr; - struct resource res; + struct resource res[EISA_MAX_RESOURCES]; u64 dma_mask; struct device dev; /* generic device */ }; #define to_eisa_device(n) container_of(n, struct eisa_device, dev) +static inline int eisa_get_region_index (void *addr) +{ + unsigned long x = (unsigned long) addr; + + x &= 0xc00; + return (x >> 12); +} + struct eisa_driver { const struct eisa_device_id *id_table; struct device_driver driver; @@ -69,6 +84,8 @@ struct resource *res; unsigned long bus_base_addr; int slots; /* Max slot number */ + int force_probe; /* Probe even when no slot 0 */ + u64 dma_mask; /* from bridge device */ int bus_nr; /* Set by eisa_root_register */ struct resource eisa_root_res; /* ditto */ }; diff -Nru a/include/linux/elevator.h b/include/linux/elevator.h --- a/include/linux/elevator.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/elevator.h Sat Jul 5 12:40:31 2003 @@ -15,6 +15,9 @@ typedef void (elevator_remove_req_fn) (request_queue_t *, struct request *); typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); typedef struct list_head *(elevator_get_sort_head_fn) (request_queue_t *, struct request *); +typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); +typedef int (elevator_may_queue_fn) (request_queue_t *, int); + typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, int); typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); @@ -32,6 +35,7 @@ elevator_remove_req_fn *elevator_remove_req_fn; elevator_queue_empty_fn *elevator_queue_empty_fn; + elevator_completed_req_fn *elevator_completed_req_fn; elevator_request_list_fn *elevator_former_req_fn; elevator_request_list_fn *elevator_latter_req_fn; @@ -39,6 +43,8 @@ elevator_set_req_fn *elevator_set_req_fn; elevator_put_req_fn *elevator_put_req_fn; + elevator_may_queue_fn *elevator_may_queue_fn; + elevator_init_fn *elevator_init_fn; elevator_exit_fn *elevator_exit_fn; @@ -62,8 +68,10 @@ extern struct request *elv_next_request(struct request_queue *q); extern struct request *elv_former_request(request_queue_t *, struct request *); extern struct request *elv_latter_request(request_queue_t *, struct request *); -extern int elv_register_queue(struct gendisk *); -extern void elv_unregister_queue(struct gendisk *); +extern int elv_register_queue(request_queue_t *q); +extern void elv_unregister_queue(request_queue_t *q); +extern int elv_may_queue(request_queue_t *, int); +extern void elv_completed_request(request_queue_t *, struct request *); extern int elv_set_request(request_queue_t *, struct request *, int); extern void elv_put_request(request_queue_t *, struct request *); @@ -80,6 +88,11 @@ * starvation */ extern elevator_t iosched_deadline; + +/* + * anticipatory I/O scheduler + */ +extern elevator_t iosched_as; extern int elevator_init(request_queue_t *, elevator_t *); extern void elevator_exit(request_queue_t *); diff -Nru a/include/linux/eventpoll.h b/include/linux/eventpoll.h --- a/include/linux/eventpoll.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/eventpoll.h Sat Jul 5 12:40:31 2003 @@ -14,6 +14,8 @@ #ifndef _LINUX_EVENTPOLL_H #define _LINUX_EVENTPOLL_H +#include + /* Valid opcodes to issue to sys_epoll_ctl() */ #define EPOLL_CTL_ADD 1 @@ -55,8 +57,37 @@ /* Used to initialize the epoll bits inside the "struct file" */ void eventpoll_init_file(struct file *file); -/* Used in fs/file_table.c:__fput() to unlink files from the eventpoll interface */ -void eventpoll_release(struct file *file); +/* Used to release the epoll bits inside the "struct file" */ +void eventpoll_release_file(struct file *file); + +/* + * This is called from inside fs/file_table.c:__fput() to unlink files + * from the eventpoll interface. We need to have this facility to cleanup + * correctly files that are closed without being removed from the eventpoll + * interface. + */ +static inline void eventpoll_release(struct file *file) +{ + + /* + * Fast check to avoid the get/release of the semaphore. Since + * we're doing this outside the semaphore lock, it might return + * false negatives, but we don't care. It'll help in 99.99% of cases + * to avoid the semaphore lock. False positives simply cannot happen + * because the file in on the way to be removed and nobody ( but + * eventpoll ) has still a reference to this file. + */ + if (likely(list_empty(&file->f_ep_links))) + return; + + /* + * The file is being closed while it is still linked to an epoll + * descriptor. We need to handle this by correctly unlinking it + * from its containers. + */ + eventpoll_release_file(file); +} + #else diff -Nru a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h --- a/include/linux/ext3_fs.h Sat Jul 5 12:40:32 2003 +++ b/include/linux/ext3_fs.h Sat Jul 5 12:40:32 2003 @@ -636,10 +636,14 @@ struct ext3_iloc { struct buffer_head *bh; - struct ext3_inode *raw_inode; + unsigned long offset; unsigned long block_group; }; +static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc) +{ + return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset); +} /* * This structure is stuffed into the struct file's private_data field diff -Nru a/include/linux/fs.h b/include/linux/fs.h --- a/include/linux/fs.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/fs.h Sat Jul 5 12:40:31 2003 @@ -639,7 +639,7 @@ /* * VFS helper functions.. */ -extern int vfs_create(struct inode *, struct dentry *, int); +extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); extern int vfs_mkdir(struct inode *, struct dentry *, int); extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); @@ -730,8 +730,8 @@ }; struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); - struct dentry * (*lookup) (struct inode *,struct dentry *); + int (*create) (struct inode *,struct dentry *,int, struct nameidata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); @@ -743,7 +743,7 @@ int (*readlink) (struct dentry *, char __user *,int); int (*follow_link) (struct dentry *, struct nameidata *); void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); + int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -1121,7 +1121,7 @@ extern sector_t bmap(struct inode *, sector_t); extern int setattr_mask(unsigned int); extern int notify_change(struct dentry *, struct iattr *); -extern int permission(struct inode *, int); +extern int permission(struct inode *, int, struct nameidata *); extern int vfs_permission(struct inode *, int); extern int get_write_access(struct inode *); extern int deny_write_access(struct file *); @@ -1291,7 +1291,7 @@ extern int simple_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to); -extern struct dentry *simple_lookup(struct inode *, struct dentry *); +extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern struct file_operations simple_dir_operations; extern struct inode_operations simple_dir_inode_operations; diff -Nru a/include/linux/hfs_fs.h b/include/linux/hfs_fs.h --- a/include/linux/hfs_fs.h Sat Jul 5 12:40:32 2003 +++ b/include/linux/hfs_fs.h Sat Jul 5 12:40:32 2003 @@ -234,7 +234,7 @@ const struct hfs_cat_key *); /* dir.c */ -extern int hfs_create(struct inode *, struct dentry *, int); +extern int hfs_create(struct inode *, struct dentry *, int, struct nameidata *); extern int hfs_mkdir(struct inode *, struct dentry *, int); extern int hfs_unlink(struct inode *, struct dentry *); extern int hfs_rmdir(struct inode *, struct dentry *); diff -Nru a/include/linux/interrupt.h b/include/linux/interrupt.h --- a/include/linux/interrupt.h Sat Jul 5 12:40:32 2003 +++ b/include/linux/interrupt.h Sat Jul 5 12:40:32 2003 @@ -94,8 +94,8 @@ asmlinkage void do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); extern void softirq_init(void); -#define __cpu_raise_softirq(cpu, nr) do { softirq_pending(cpu) |= 1UL << (nr); } while (0) -extern void FASTCALL(cpu_raise_softirq(unsigned int cpu, unsigned int nr)); +#define __raise_softirq_irqoff(nr) do { local_softirq_pending() |= 1UL << (nr); } while (0) +extern void FASTCALL(raise_softirq_irqoff(unsigned int nr)); extern void FASTCALL(raise_softirq(unsigned int nr)); #ifndef invoke_softirq diff -Nru a/include/linux/ioport.h b/include/linux/ioport.h --- a/include/linux/ioport.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/ioport.h Sat Jul 5 12:40:31 2003 @@ -43,6 +43,7 @@ #define IORESOURCE_SHADOWABLE 0x00010000 #define IORESOURCE_BUS_HAS_VGA 0x00080000 +#define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 #define IORESOURCE_AUTO 0x40000000 #define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ diff -Nru a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h --- a/include/linux/irq_cpustat.h Sat Jul 5 12:40:32 2003 +++ b/include/linux/irq_cpustat.h Sat Jul 5 12:40:32 2003 @@ -29,10 +29,6 @@ /* arch independent irq_stat fields */ #define softirq_pending(cpu) __IRQ_STAT((cpu), __softirq_pending) #define local_softirq_pending() softirq_pending(smp_processor_id()) -#define syscall_count(cpu) __IRQ_STAT((cpu), __syscall_count) -#define local_syscall_count() syscall_count(smp_processor_id()) -#define ksoftirqd_task(cpu) __IRQ_STAT((cpu), __ksoftirqd_task) -#define local_ksoftirqd_task() ksoftirqd_task(smp_processor_id()) /* arch dependent irq_stat fields */ #define nmi_count(cpu) __IRQ_STAT((cpu), __nmi_count) /* i386 */ diff -Nru a/include/linux/iso_fs.h b/include/linux/iso_fs.h --- a/include/linux/iso_fs.h Sat Jul 5 12:40:32 2003 +++ b/include/linux/iso_fs.h Sat Jul 5 12:40:32 2003 @@ -227,7 +227,7 @@ int get_joliet_filename(struct iso_directory_record *, unsigned char *, struct inode *); int get_acorn_filename(struct iso_directory_record *, char *, struct inode *); -extern struct dentry *isofs_lookup(struct inode *, struct dentry *); +extern struct dentry *isofs_lookup(struct inode *, struct dentry *, struct nameidata *); extern struct buffer_head *isofs_bread(struct inode *, sector_t); extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); diff -Nru a/include/linux/kobject.h b/include/linux/kobject.h --- a/include/linux/kobject.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/kobject.h Sat Jul 5 12:40:31 2003 @@ -39,6 +39,8 @@ extern int kobject_add(struct kobject *); extern void kobject_del(struct kobject *); +extern void kobject_rename(struct kobject *, char *new_name); + extern int kobject_register(struct kobject *); extern void kobject_unregister(struct kobject *); diff -Nru a/include/linux/mm.h b/include/linux/mm.h --- a/include/linux/mm.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/mm.h Sat Jul 5 12:40:31 2003 @@ -339,9 +339,14 @@ page->flags |= zone_num << ZONE_SHIFT; } -static inline void * lowmem_page_address(struct page *page) +#ifndef CONFIG_DISCONTIGMEM +/* The array of struct pages - for discontigmem use pgdat->lmem_map */ +extern struct page *mem_map; +#endif + +static inline void *lowmem_page_address(struct page *page) { - return __va( ( (page - page_zone(page)->zone_mem_map) + page_zone(page)->zone_start_pfn) << PAGE_SHIFT); + return __va(page_to_pfn(page) << PAGE_SHIFT); } #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) @@ -395,11 +400,6 @@ #define VM_FAULT_MINOR 1 #define VM_FAULT_MAJOR 2 -#ifndef CONFIG_DISCONTIGMEM -/* The array of struct pages - for discontigmem use pgdat->lmem_map */ -extern struct page *mem_map; -#endif - extern void show_free_areas(void); struct page *shmem_nopage(struct vm_area_struct * vma, @@ -609,5 +609,13 @@ int write); extern int remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); + +#ifndef CONFIG_DEBUG_PAGEALLOC +static inline void +kernel_map_pages(struct page *page, int numpages, int enable) +{ +} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff -Nru a/include/linux/mman.h b/include/linux/mman.h --- a/include/linux/mman.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/mman.h Sat Jul 5 12:40:31 2003 @@ -9,7 +9,8 @@ #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 -extern int vm_enough_memory(long pages); +extern int sysctl_overcommit_memory; +extern int sysctl_overcommit_ratio; extern atomic_t vm_committed_space; #ifdef CONFIG_SMP diff -Nru a/include/linux/mmzone.h b/include/linux/mmzone.h --- a/include/linux/mmzone.h Sat Jul 5 12:40:32 2003 +++ b/include/linux/mmzone.h Sat Jul 5 12:40:32 2003 @@ -184,11 +184,16 @@ unsigned long *valid_addr_bitmap; struct bootmem_data *bdata; unsigned long node_start_pfn; - unsigned long node_size; + unsigned long node_present_pages; /* total number of physical pages */ + unsigned long node_spanned_pages; /* total size of physical page + range, including holes */ int node_id; struct pglist_data *pgdat_next; wait_queue_head_t kswapd_wait; } pg_data_t; + +#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) +#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) extern int numnodes; extern struct pglist_data *pgdat_list; diff -Nru a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h --- a/include/linux/msdos_fs.h Sat Jul 5 12:40:32 2003 +++ b/include/linux/msdos_fs.h Sat Jul 5 12:40:32 2003 @@ -307,8 +307,8 @@ struct msdos_dir_entry **res_de, loff_t *i_pos); /* msdos/namei.c - these are for Umsdos */ -extern struct dentry *msdos_lookup(struct inode *dir, struct dentry *); -extern int msdos_create(struct inode *dir, struct dentry *dentry, int mode); +extern struct dentry *msdos_lookup(struct inode *dir, struct dentry *, struct nameidata *); +extern int msdos_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *); extern int msdos_rmdir(struct inode *dir, struct dentry *dentry); extern int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode); extern int msdos_unlink(struct inode *dir, struct dentry *dentry); @@ -317,8 +317,8 @@ extern int msdos_fill_super(struct super_block *sb, void *data, int silent); /* vfat/namei.c - these are for dmsdos */ -extern struct dentry *vfat_lookup(struct inode *dir, struct dentry *); -extern int vfat_create(struct inode *dir, struct dentry *dentry, int mode); +extern struct dentry *vfat_lookup(struct inode *dir, struct dentry *, struct nameidata *); +extern int vfat_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *); extern int vfat_rmdir(struct inode *dir, struct dentry *dentry); extern int vfat_unlink(struct inode *dir, struct dentry *dentry); extern int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode); diff -Nru a/include/linux/namei.h b/include/linux/namei.h --- a/include/linux/namei.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/namei.h Sat Jul 5 12:40:31 2003 @@ -5,12 +5,22 @@ struct vfsmount; +struct open_intent { + int flags; + int create_mode; +}; + struct nameidata { struct dentry *dentry; struct vfsmount *mnt; struct qstr last; unsigned int flags; int last_type; + + /* Intent data */ + union { + struct open_intent open; + } intent; }; /* @@ -31,7 +41,12 @@ #define LOOKUP_CONTINUE 4 #define LOOKUP_PARENT 16 #define LOOKUP_NOALT 32 - +/* + * Intent data + */ +#define LOOKUP_OPEN (0x0100) +#define LOOKUP_CREATE (0x0200) +#define LOOKUP_ACCESS (0x0400) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); #define user_path_walk(name,nd) \ diff -Nru a/include/linux/netdevice.h b/include/linux/netdevice.h --- a/include/linux/netdevice.h Sat Jul 5 12:40:32 2003 +++ b/include/linux/netdevice.h Sat Jul 5 12:40:32 2003 @@ -561,7 +561,7 @@ cpu = smp_processor_id(); dev->next_sched = softnet_data[cpu].output_queue; softnet_data[cpu].output_queue = dev; - cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } } @@ -612,7 +612,7 @@ cpu = smp_processor_id(); skb->next = softnet_data[cpu].completion_queue; softnet_data[cpu].completion_queue = skb; - cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } } @@ -779,7 +779,7 @@ dev->quota += dev->weight; else dev->quota = dev->weight; - __cpu_raise_softirq(cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); local_irq_restore(flags); } @@ -805,7 +805,7 @@ local_irq_save(flags); cpu = smp_processor_id(); list_add_tail(&dev->poll_list, &softnet_data[cpu].poll_list); - __cpu_raise_softirq(cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); local_irq_restore(flags); return 1; } diff -Nru a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h --- a/include/linux/nfs_fs.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/nfs_fs.h Sat Jul 5 12:40:31 2003 @@ -240,7 +240,7 @@ struct nfs_fattr *); extern int __nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); -extern int nfs_permission(struct inode *, int); +extern int nfs_permission(struct inode *, int, struct nameidata *); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); diff -Nru a/include/linux/pci.h b/include/linux/pci.h --- a/include/linux/pci.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/pci.h Sat Jul 5 12:40:31 2003 @@ -543,7 +543,7 @@ /* Generic PCI functions used internally */ -int pci_bus_exists(const struct list_head *list, int nr); +extern struct pci_bus *pci_find_bus(int domain, int busnr); struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata) { diff -Nru a/include/linux/proc_fs.h b/include/linux/proc_fs.h --- a/include/linux/proc_fs.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/proc_fs.h Sat Jul 5 12:40:31 2003 @@ -92,7 +92,7 @@ extern void proc_root_init(void); extern void proc_misc_init(void); -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry); +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); struct dentry *proc_pid_unhash(struct task_struct *p); void proc_pid_flush(struct dentry *proc_dentry); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); @@ -115,7 +115,7 @@ * of the /proc/ subdirectories. */ extern int proc_readdir(struct file *, void *, filldir_t); -extern struct dentry *proc_lookup(struct inode *, struct dentry *); +extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); extern struct file_operations proc_kcore_operations; extern struct file_operations proc_kmsg_operations; diff -Nru a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h --- a/include/linux/qnx4_fs.h Sat Jul 5 12:40:34 2003 +++ b/include/linux/qnx4_fs.h Sat Jul 5 12:40:34 2003 @@ -110,21 +110,20 @@ struct inode vfs_inode; }; -extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry); +extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); extern unsigned long qnx4_count_free_blocks(struct super_block *sb); extern unsigned long qnx4_block_map(struct inode *inode, long iblock); extern struct buffer_head *qnx4_getblk(struct inode *, int, int); extern struct buffer_head *qnx4_bread(struct inode *, int, int); -extern int qnx4_create(struct inode *dir, struct dentry *dentry, int mode); extern struct inode_operations qnx4_file_inode_operations; extern struct inode_operations qnx4_dir_inode_operations; extern struct file_operations qnx4_file_operations; extern struct file_operations qnx4_dir_operations; extern int qnx4_is_free(struct super_block *sb, long block); extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); -extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode); +extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); extern void qnx4_truncate(struct inode *inode); extern void qnx4_free_inode(struct inode *inode); extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); diff -Nru a/include/linux/sched.h b/include/linux/sched.h --- a/include/linux/sched.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/sched.h Sat Jul 5 12:40:31 2003 @@ -321,6 +321,8 @@ }; +struct io_context; /* See blkdev.h */ +void exit_io_context(void); struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ @@ -450,6 +452,8 @@ struct dentry *proc_dentry; struct backing_dev_info *backing_dev_info; + struct io_context *io_context; + unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ }; @@ -481,6 +485,7 @@ #define PF_KSWAPD 0x00040000 /* I am kswapd */ #define PF_SWAPOFF 0x00080000 /* I am in swapoff */ #define PF_LESS_THROTTLE 0x01000000 /* Throttle me less: I clena memory */ +#define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ #ifdef CONFIG_SMP extern int set_cpus_allowed(task_t *p, unsigned long new_mask); diff -Nru a/include/linux/security.h b/include/linux/security.h --- a/include/linux/security.h Sat Jul 5 12:40:32 2003 +++ b/include/linux/security.h Sat Jul 5 12:40:32 2003 @@ -49,6 +49,7 @@ extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); extern void cap_task_reparent_to_init (struct task_struct *p); extern int cap_syslog (int type); +extern int cap_vm_enough_memory (long pages); static inline int cap_netlink_send (struct sk_buff *skb) { @@ -958,6 +959,10 @@ * See the syslog(2) manual page for an explanation of the @type values. * @type contains the type of action. * Return 0 if permission is granted. + * @vm_enough_memory: + * Check permissions for allocating a new virtual mapping. + * @pages contains the number of pages. + * Return 0 if permission is granted. * * @register_security: * allow module stacking. @@ -989,6 +994,7 @@ int (*quotactl) (int cmds, int type, int id, struct super_block * sb); int (*quota_on) (struct file * f); int (*syslog) (int type); + int (*vm_enough_memory) (long pages); int (*bprm_alloc_security) (struct linux_binprm * bprm); void (*bprm_free_security) (struct linux_binprm * bprm); @@ -1238,6 +1244,11 @@ return security_ops->syslog(type); } +static inline int security_vm_enough_memory(long pages) +{ + return security_ops->vm_enough_memory(pages); +} + static inline int security_bprm_alloc (struct linux_binprm *bprm) { return security_ops->bprm_alloc_security (bprm); @@ -1896,6 +1907,11 @@ static inline int security_syslog(int type) { return cap_syslog(type); +} + +static inline int security_vm_enough_memory(long pages) +{ + return cap_vm_enough_memory(pages); } static inline int security_bprm_alloc (struct linux_binprm *bprm) diff -Nru a/include/linux/sem.h b/include/linux/sem.h --- a/include/linux/sem.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/sem.h Sat Jul 5 12:40:31 2003 @@ -109,7 +109,6 @@ int id; /* internal sem id */ struct sembuf * sops; /* array of pending operations */ int nsops; /* number of operations */ - int alter; /* operation will alter semaphore */ }; /* Each task has a list of undo requests. They are executed automatically diff -Nru a/include/linux/slab.h b/include/linux/slab.h --- a/include/linux/slab.h Sat Jul 5 12:40:33 2003 +++ b/include/linux/slab.h Sat Jul 5 12:40:33 2003 @@ -114,6 +114,10 @@ extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; +void ptrinfo(unsigned long addr); + +extern atomic_t slab_reclaim_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff -Nru a/include/linux/sysfs.h b/include/linux/sysfs.h --- a/include/linux/sysfs.h Sat Jul 5 12:40:31 2003 +++ b/include/linux/sysfs.h Sat Jul 5 12:40:31 2003 @@ -10,9 +10,11 @@ #define _SYSFS_H_ struct kobject; +struct module; struct attribute { char * name; + struct module * owner; mode_t mode; }; @@ -36,6 +38,9 @@ extern void sysfs_remove_dir(struct kobject *); + +extern void +sysfs_rename_dir(struct kobject *, char *new_name); extern int sysfs_create_file(struct kobject *, struct attribute *); diff -Nru a/include/linux/umsdos_fs.p b/include/linux/umsdos_fs.p --- a/include/linux/umsdos_fs.p Sat Jul 5 12:40:31 2003 +++ b/include/linux/umsdos_fs.p Sat Jul 5 12:40:31 2003 @@ -10,7 +10,7 @@ void umsdos_lookup_patch_new(struct dentry *, struct umsdos_info *); int umsdos_is_pseudodos (struct inode *dir, struct dentry *dentry); struct dentry *umsdos_lookup_x ( struct inode *dir, struct dentry *dentry, int nopseudo); -struct dentry *UMSDOS_lookup(struct inode *, struct dentry *); +struct dentry *UMSDOS_lookup(struct inode *, struct dentry *, struct nameidata *); struct dentry *umsdos_lookup_dentry(struct dentry *, char *, int, int); struct dentry *umsdos_covered(struct dentry *, char *, int); @@ -92,7 +92,7 @@ /* rdir.c 22/03/95 03.31.42 */ struct dentry *umsdos_rlookup_x (struct inode *dir, struct dentry *dentry, int nopseudo); -struct dentry *UMSDOS_rlookup (struct inode *dir, struct dentry *dentry); +struct dentry *UMSDOS_rlookup (struct inode *dir, struct dentry *dentry, struct nameidata *nd); static inline struct umsdos_inode_info *UMSDOS_I(struct inode *inode) { diff -Nru a/init/Kconfig b/init/Kconfig --- a/init/Kconfig Sat Jul 5 12:40:32 2003 +++ b/init/Kconfig Sat Jul 5 12:40:32 2003 @@ -93,7 +93,8 @@ limited in memory. config LOG_BUF_SHIFT - int "Kernel log buffer size" if DEBUG_KERNEL + int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL + range 12 20 default 17 if ARCH_S390 default 16 if X86_NUMAQ || IA64 default 15 if SMP diff -Nru a/ipc/sem.c b/ipc/sem.c --- a/ipc/sem.c Sat Jul 5 12:40:31 2003 +++ b/ipc/sem.c Sat Jul 5 12:40:31 2003 @@ -49,6 +49,10 @@ * increase. If there are decrement operations in the operations * array we do the same as before. * + * With the incarnation of O(1) scheduler, it becomes unnecessary to perform + * check/retry algorithm for waking up blocked processes as the new scheduler + * is better at handling thread switch than the old one. + * * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie * * SMP-threaded, sysctl's added @@ -258,8 +262,7 @@ */ static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, - int nsops, struct sem_undo *un, int pid, - int do_undo) + int nsops, struct sem_undo *un, int pid) { int result, sem_op; struct sembuf *sop; @@ -289,10 +292,6 @@ curr->semval = result; } - if (do_undo) { - result = 0; - goto undo; - } sop--; while (sop >= sops) { sma->sem_base[sop->sem_num].sempid = pid; @@ -334,23 +333,14 @@ for (q = sma->sem_pending; q; q = q->next) { - if (q->status == 1) - continue; /* this one was woken up before */ - error = try_atomic_semop(sma, q->sops, q->nsops, - q->undo, q->pid, q->alter); + q->undo, q->pid); /* Does q->sleeper still need to sleep? */ if (error <= 0) { - /* Found one, wake it up */ - wake_up_process(q->sleeper); - if (error == 0 && q->alter) { - /* if q-> alter let it self try */ - q->status = 1; - return; - } q->status = error; remove_from_queue(sma,q); + wake_up_process(q->sleeper); } } } @@ -1062,7 +1052,7 @@ if (error) goto out_unlock_free; - error = try_atomic_semop (sma, sops, nsops, un, current->pid, 0); + error = try_atomic_semop (sma, sops, nsops, un, current->pid); if (error <= 0) goto update; @@ -1075,55 +1065,46 @@ queue.nsops = nsops; queue.undo = un; queue.pid = current->pid; - queue.alter = decrease; queue.id = semid; if (alter) append_to_queue(sma ,&queue); else prepend_to_queue(sma ,&queue); - for (;;) { - queue.status = -EINTR; - queue.sleeper = current; - current->state = TASK_INTERRUPTIBLE; - sem_unlock(sma); + queue.status = -EINTR; + queue.sleeper = current; + current->state = TASK_INTERRUPTIBLE; + sem_unlock(sma); - if (timeout) - jiffies_left = schedule_timeout(jiffies_left); - else - schedule(); + if (timeout) + jiffies_left = schedule_timeout(jiffies_left); + else + schedule(); - sma = sem_lock(semid); - if(sma==NULL) { - if(queue.prev != NULL) - BUG(); - error = -EIDRM; - goto out_free; - } - /* - * If queue.status == 1 we where woken up and - * have to retry else we simply return. - * If an interrupt occurred we have to clean up the - * queue - * - */ - if (queue.status == 1) - { - error = try_atomic_semop (sma, sops, nsops, un, - current->pid,0); - if (error <= 0) - break; - } else { - error = queue.status; - if (error == -EINTR && timeout && jiffies_left == 0) - error = -EAGAIN; - if (queue.prev) /* got Interrupt */ - break; - /* Everything done by update_queue */ - goto out_unlock_free; - } + sma = sem_lock(semid); + if(sma==NULL) { + if(queue.prev != NULL) + BUG(); + error = -EIDRM; + goto out_free; + } + + /* + * If queue.status != -EINTR we are woken up by another process + */ + error = queue.status; + if (queue.status != -EINTR) { + goto out_unlock_free; } + + /* + * If an interrupt occurred we have to clean up the queue + */ + if (timeout && jiffies_left == 0) + error = -EAGAIN; remove_from_queue(sma,&queue); + goto out_unlock_free; + update: if (alter) update_queue (sma); diff -Nru a/kernel/exit.c b/kernel/exit.c --- a/kernel/exit.c Sat Jul 5 12:40:32 2003 +++ b/kernel/exit.c Sat Jul 5 12:40:32 2003 @@ -230,6 +230,7 @@ /* signals? */ security_task_reparent_to_init(current); memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim))); + atomic_inc(&(INIT_USER->__count)); switch_uid(INIT_USER); write_unlock_irq(&tasklist_lock); @@ -651,6 +652,8 @@ if (tsk->exit_signal != -1) { int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD; do_notify_parent(tsk, signal); + } else if (tsk->ptrace) { + do_notify_parent(tsk, SIGCHLD); } tsk->state = TASK_ZOMBIE; @@ -680,6 +683,8 @@ panic("Attempted to kill the idle task!"); if (unlikely(tsk->pid == 1)) panic("Attempted to kill init!"); + if (tsk->io_context) + exit_io_context(); tsk->flags |= PF_EXITING; del_timer_sync(&tsk->real_timer); @@ -715,7 +720,7 @@ tsk->exit_code = code; exit_notify(tsk); - if (tsk->exit_signal == -1) + if (tsk->exit_signal == -1 && tsk->ptrace == 0) release_task(tsk); schedule(); @@ -859,7 +864,7 @@ BUG_ON(state != TASK_DEAD); return 0; } - if (unlikely(p->exit_signal == -1)) + if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) /* * This can only happen in a race with a ptraced thread * dying on another processor. @@ -889,8 +894,12 @@ /* Double-check with lock held. */ if (p->real_parent != p->parent) { __ptrace_unlink(p); - do_notify_parent(p, p->exit_signal); p->state = TASK_ZOMBIE; + /* If this is a detached thread, this is where it goes away. */ + if (p->exit_signal == -1) + release_task (p); + else + do_notify_parent(p, p->exit_signal); p = NULL; } write_unlock_irq(&tasklist_lock); diff -Nru a/kernel/fork.c b/kernel/fork.c --- a/kernel/fork.c Sat Jul 5 12:40:31 2003 +++ b/kernel/fork.c Sat Jul 5 12:40:31 2003 @@ -286,7 +286,7 @@ continue; if (mpnt->vm_flags & VM_ACCOUNT) { unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - if (!vm_enough_memory(len)) + if (security_vm_enough_memory(len)) goto fail_nomem; charge += len; } @@ -864,6 +864,7 @@ p->lock_depth = -1; /* -1 = no lock */ p->start_time = get_jiffies_64(); p->security = NULL; + p->io_context = NULL; retval = -ENOMEM; if ((retval = security_task_alloc(p))) diff -Nru a/kernel/ksyms.c b/kernel/ksyms.c --- a/kernel/ksyms.c Sat Jul 5 12:40:31 2003 +++ b/kernel/ksyms.c Sat Jul 5 12:40:31 2003 @@ -462,6 +462,7 @@ #endif EXPORT_SYMBOL(schedule_timeout); EXPORT_SYMBOL(yield); +EXPORT_SYMBOL(io_schedule); EXPORT_SYMBOL(__cond_resched); EXPORT_SYMBOL(set_user_nice); EXPORT_SYMBOL(task_nice); @@ -586,7 +587,7 @@ EXPORT_SYMBOL(do_softirq); EXPORT_SYMBOL(raise_softirq); EXPORT_SYMBOL(open_softirq); -EXPORT_SYMBOL(cpu_raise_softirq); +EXPORT_SYMBOL(raise_softirq_irqoff); EXPORT_SYMBOL(__tasklet_schedule); EXPORT_SYMBOL(__tasklet_hi_schedule); diff -Nru a/kernel/signal.c b/kernel/signal.c --- a/kernel/signal.c Sat Jul 5 12:40:31 2003 +++ b/kernel/signal.c Sat Jul 5 12:40:31 2003 @@ -797,10 +797,11 @@ int ret; spin_lock_irqsave(&t->sighand->siglock, flags); - if (t->sighand->action[sig-1].sa.sa_handler == SIG_IGN) + if (sigismember(&t->blocked, sig) || t->sighand->action[sig-1].sa.sa_handler == SIG_IGN) { t->sighand->action[sig-1].sa.sa_handler = SIG_DFL; - sigdelset(&t->blocked, sig); - recalc_sigpending_tsk(t); + sigdelset(&t->blocked, sig); + recalc_sigpending_tsk(t); + } ret = specific_send_sig_info(sig, info, t); spin_unlock_irqrestore(&t->sighand->siglock, flags); @@ -2081,7 +2082,7 @@ info.si_signo = sig; info.si_errno = 0; info.si_code = SI_USER; - info.si_pid = current->pid; + info.si_pid = current->tgid; info.si_uid = current->uid; return kill_something_info(sig, &info, pid); @@ -2104,7 +2105,7 @@ info.si_signo = sig; info.si_errno = 0; info.si_code = SI_TKILL; - info.si_pid = current->pid; + info.si_pid = current->tgid; info.si_uid = current->uid; read_lock(&tasklist_lock); diff -Nru a/kernel/softirq.c b/kernel/softirq.c --- a/kernel/softirq.c Sat Jul 5 12:40:31 2003 +++ b/kernel/softirq.c Sat Jul 5 12:40:31 2003 @@ -14,6 +14,7 @@ #include #include #include +#include #include /* @@ -41,15 +42,18 @@ static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp; +static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); + /* * we cannot loop indefinitely here to avoid userspace starvation, * but we also don't want to introduce a worst case 1/HZ latency * to the pending events, so lets the scheduler to balance * the softirq load for us. */ -static inline void wakeup_softirqd(unsigned cpu) +static inline void wakeup_softirqd(void) { - struct task_struct * tsk = ksoftirqd_task(cpu); + /* Interrupts are disabled: no need to stop preemption */ + struct task_struct *tsk = __get_cpu_var(ksoftirqd); if (tsk && tsk->state != TASK_RUNNING) wake_up_process(tsk); @@ -96,7 +100,7 @@ goto restart; } if (pending) - wakeup_softirqd(smp_processor_id()); + wakeup_softirqd(); __local_bh_enable(); } @@ -117,9 +121,9 @@ /* * This function must run with irqs disabled! */ -inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr) +inline void raise_softirq_irqoff(unsigned int nr) { - __cpu_raise_softirq(cpu, nr); + __raise_softirq_irqoff(nr); /* * If we're in an interrupt or softirq, we're done @@ -131,7 +135,7 @@ * schedule the softirq soon. */ if (!in_interrupt()) - wakeup_softirqd(cpu); + wakeup_softirqd(); } void raise_softirq(unsigned int nr) @@ -139,7 +143,7 @@ unsigned long flags; local_irq_save(flags); - cpu_raise_softirq(smp_processor_id(), nr); + raise_softirq_irqoff(nr); local_irq_restore(flags); } @@ -168,7 +172,7 @@ local_irq_save(flags); t->next = __get_cpu_var(tasklet_vec).list; __get_cpu_var(tasklet_vec).list = t; - cpu_raise_softirq(smp_processor_id(), TASKLET_SOFTIRQ); + raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_restore(flags); } @@ -179,7 +183,7 @@ local_irq_save(flags); t->next = __get_cpu_var(tasklet_hi_vec).list; __get_cpu_var(tasklet_hi_vec).list = t; - cpu_raise_softirq(smp_processor_id(), HI_SOFTIRQ); + raise_softirq_irqoff(HI_SOFTIRQ); local_irq_restore(flags); } @@ -211,7 +215,7 @@ local_irq_disable(); t->next = __get_cpu_var(tasklet_vec).list; __get_cpu_var(tasklet_vec).list = t; - __cpu_raise_softirq(smp_processor_id(), TASKLET_SOFTIRQ); + __raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_enable(); } } @@ -244,7 +248,7 @@ local_irq_disable(); t->next = __get_cpu_var(tasklet_hi_vec).list; __get_cpu_var(tasklet_hi_vec).list = t; - __cpu_raise_softirq(smp_processor_id(), HI_SOFTIRQ); + __raise_softirq_irqoff(HI_SOFTIRQ); local_irq_enable(); } } @@ -325,7 +329,7 @@ __set_current_state(TASK_INTERRUPTIBLE); mb(); - local_ksoftirqd_task() = current; + __get_cpu_var(ksoftirqd) = current; for (;;) { if (!local_softirq_pending()) @@ -354,7 +358,7 @@ return NOTIFY_BAD; } - while (!ksoftirqd_task(hotcpu)) + while (!per_cpu(ksoftirqd, hotcpu)) yield(); } return NOTIFY_OK; diff -Nru a/kernel/sysctl.c b/kernel/sysctl.c --- a/kernel/sysctl.c Sat Jul 5 12:40:31 2003 +++ b/kernel/sysctl.c Sat Jul 5 12:40:31 2003 @@ -130,7 +130,7 @@ static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); -static int proc_sys_permission(struct inode *, int); +static int proc_sys_permission(struct inode *, int, struct nameidata *); struct file_operations proc_sys_file_operations = { .read = proc_readsys, @@ -1177,7 +1177,7 @@ return do_rw_proc(1, file, (char __user *) buf, count, ppos); } -static int proc_sys_permission(struct inode *inode, int op) +static int proc_sys_permission(struct inode *inode, int op, struct nameidata *nd) { return test_perm(inode->i_mode, op); } diff -Nru a/kernel/user.c b/kernel/user.c --- a/kernel/user.c Sat Jul 5 12:40:31 2003 +++ b/kernel/user.c Sat Jul 5 12:40:31 2003 @@ -126,7 +126,6 @@ * we should be checking for it. -DaveM */ old_user = current->user; - atomic_inc(&new_user->__count); atomic_inc(&new_user->processes); atomic_dec(&old_user->processes); current->user = new_user; diff -Nru a/lib/kobject.c b/lib/kobject.c --- a/lib/kobject.c Sat Jul 5 12:40:32 2003 +++ b/lib/kobject.c Sat Jul 5 12:40:32 2003 @@ -314,6 +314,21 @@ } /** + * kobject_rename - change the name of an object + * @kobj: object in question. + * @new_name: object's new name + */ + +void kobject_rename(struct kobject * kobj, char *new_name) +{ + kobj = kobject_get(kobj); + if (!kobj) + return; + sysfs_rename_dir(kobj, new_name); + kobject_put(kobj); +} + +/** * kobject_del - unlink kobject from hierarchy. * @kobj: object. */ diff -Nru a/mm/bootmem.c b/mm/bootmem.c --- a/mm/bootmem.c Sat Jul 5 12:40:31 2003 +++ b/mm/bootmem.c Sat Jul 5 12:40:31 2003 @@ -84,10 +84,6 @@ if (!size) BUG(); - if (sidx < 0) - BUG(); - if (eidx < 0) - BUG(); if (sidx >= eidx) BUG(); if ((addr >> PAGE_SHIFT) >= bdata->node_low_pfn) @@ -202,7 +198,7 @@ ; } - if (preferred) { + if (preferred > offset) { preferred = offset; goto restart_scan; } diff -Nru a/mm/mmap.c b/mm/mmap.c --- a/mm/mmap.c Sat Jul 5 12:40:32 2003 +++ b/mm/mmap.c Sat Jul 5 12:40:32 2003 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -53,65 +54,9 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */ atomic_t vm_committed_space = ATOMIC_INIT(0); -/* - * Check that a process has enough memory to allocate a new virtual - * mapping. 1 means there is enough memory for the allocation to - * succeed and 0 implies there is not. - * - * We currently support three overcommit policies, which are set via the - * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-acounting - * - * Strict overcommit modes added 2002 Feb 26 by Alan Cox. - * Additional code 2002 Jul 20 by Robert Love. - */ -extern atomic_t slab_reclaim_pages; -int vm_enough_memory(long pages) -{ - unsigned long free, allowed; - - vm_acct_memory(pages); - - /* - * Sometimes we want to use more memory than we have - */ - if (sysctl_overcommit_memory == 1) - return 1; - - if (sysctl_overcommit_memory == 0) { - free = get_page_cache_size(); - free += nr_free_pages(); - free += nr_swap_pages; - - /* - * Any slabs which are created with the - * SLAB_RECLAIM_ACCOUNT flag claim to have contents - * which are reclaimable, under pressure. The dentry - * cache and most inode caches should fall into this - */ - free += atomic_read(&slab_reclaim_pages); - - /* - * Leave the last 3% for root - */ - if (!capable(CAP_SYS_ADMIN)) - free -= free / 32; - - if (free > pages) - return 1; - vm_unacct_memory(pages); - return 0; - } - - allowed = totalram_pages * sysctl_overcommit_ratio / 100; - allowed += total_swap_pages; - - if (atomic_read(&vm_committed_space) < allowed) - return 1; - - vm_unacct_memory(pages); - - return 0; -} +EXPORT_SYMBOL(sysctl_overcommit_memory); +EXPORT_SYMBOL(sysctl_overcommit_ratio); +EXPORT_SYMBOL(vm_committed_space); /* * Requires inode->i_mapping->i_shared_sem @@ -646,7 +591,7 @@ * Private writable mapping: check memory availability */ charged = len >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) return -ENOMEM; vm_flags |= VM_ACCOUNT; } @@ -950,7 +895,7 @@ grow = (address - vma->vm_end) >> PAGE_SHIFT; /* Overcommit.. */ - if (!vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -1004,7 +949,7 @@ grow = (vma->vm_start - address) >> PAGE_SHIFT; /* Overcommit.. */ - if (!vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -1376,7 +1321,7 @@ if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; - if (!vm_enough_memory(len >> PAGE_SHIFT)) + if (security_vm_enough_memory(len >> PAGE_SHIFT)) return -ENOMEM; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; diff -Nru a/mm/mprotect.c b/mm/mprotect.c --- a/mm/mprotect.c Sat Jul 5 12:40:31 2003 +++ b/mm/mprotect.c Sat Jul 5 12:40:31 2003 @@ -175,7 +175,7 @@ if (newflags & VM_WRITE) { if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { charged = (end - start) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) return -ENOMEM; newflags |= VM_ACCOUNT; } diff -Nru a/mm/mremap.c b/mm/mremap.c --- a/mm/mremap.c Sat Jul 5 12:40:32 2003 +++ b/mm/mremap.c Sat Jul 5 12:40:32 2003 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -244,9 +245,7 @@ } if (!move_page_tables(vma, new_addr, addr, old_len)) { - unsigned long must_fault_in; - unsigned long fault_in_start; - unsigned long fault_in_end; + unsigned long vm_locked = vma->vm_flags & VM_LOCKED; if (allocated_vma) { *new_vma = *vma; @@ -272,14 +271,8 @@ } else vma = NULL; /* nothing more to do */ - must_fault_in = new_vma->vm_flags & VM_LOCKED; - fault_in_start = new_vma->vm_start; - fault_in_end = new_vma->vm_end; - do_munmap(current->mm, addr, old_len); - /* new_vma could have been invalidated by do_munmap */ - /* Restore VM_ACCOUNT if one or two pieces of vma left */ if (vma) { vma->vm_flags |= VM_ACCOUNT; @@ -288,9 +281,11 @@ } current->mm->total_vm += new_len >> PAGE_SHIFT; - if (must_fault_in) { + if (vm_locked) { current->mm->locked_vm += new_len >> PAGE_SHIFT; - make_pages_present(fault_in_start, fault_in_end); + if (new_len > old_len) + make_pages_present(new_addr + old_len, + new_addr + new_len); } return new_addr; } @@ -391,7 +386,7 @@ if (vma->vm_flags & VM_ACCOUNT) { charged = (new_len - old_len) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) goto out_nc; } diff -Nru a/mm/nommu.c b/mm/nommu.c --- a/mm/nommu.c Sat Jul 5 12:40:32 2003 +++ b/mm/nommu.c Sat Jul 5 12:40:32 2003 @@ -62,11 +62,8 @@ inode->i_size = offset; out_truncate: - if (inode->i_op && inode->i_op->truncate) { - lock_kernel(); + if (inode->i_op && inode->i_op->truncate) inode->i_op->truncate(inode); - unlock_kernel(); - } return 0; out_sig: send_sig(SIGXFSZ, current, 0); diff -Nru a/mm/page_alloc.c b/mm/page_alloc.c --- a/mm/page_alloc.c Sat Jul 5 12:40:31 2003 +++ b/mm/page_alloc.c Sat Jul 5 12:40:31 2003 @@ -32,6 +32,8 @@ #include #include +#include + DECLARE_BITMAP(node_online_map, MAX_NUMNODES); DECLARE_BITMAP(memblk_online_map, MAX_NR_MEMBLKS); struct pglist_data *pgdat_list; @@ -41,6 +43,9 @@ int numnodes = 1; int sysctl_lower_zone_protection = 0; +EXPORT_SYMBOL(totalram_pages); +EXPORT_SYMBOL(nr_swap_pages); + /* * Used by page_zone() to look up the address of the struct zone whose * id is encoded in the upper bits of page->flags @@ -265,6 +270,7 @@ mod_page_state(pgfree, 1 << order); free_pages_check(__FUNCTION__, page); list_add(&page->list, &list); + kernel_map_pages(page, 1<pageset[get_cpu()].pcp[cold]; @@ -556,7 +563,7 @@ (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += z->pages_low * sysctl_lower_zone_protection; } @@ -579,7 +586,7 @@ (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += local_min * sysctl_lower_zone_protection; } @@ -594,7 +601,7 @@ page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } goto nopage; } @@ -622,7 +629,7 @@ (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += z->pages_low * sysctl_lower_zone_protection; } @@ -653,6 +660,9 @@ current->comm, order, gfp_mask); } return NULL; +got_pg: + kernel_map_pages(page, 1 << order, 1); + return page; } /* @@ -726,6 +736,7 @@ return sum; } +EXPORT_SYMBOL(nr_free_pages); unsigned int nr_used_zone_pages(void) { @@ -818,6 +829,7 @@ EXPORT_PER_CPU_SYMBOL(page_states); atomic_t nr_pagecache = ATOMIC_INIT(0); +EXPORT_SYMBOL(nr_pagecache); #ifdef CONFIG_SMP DEFINE_PER_CPU(long, nr_pagecache_local) = 0; #endif @@ -896,7 +908,7 @@ { pg_data_t *pgdat = NODE_DATA(nid); - val->totalram = pgdat->node_size; + val->totalram = pgdat->node_present_pages; val->freeram = nr_free_pages_pgdat(pgdat); val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; @@ -1131,12 +1143,13 @@ for (i = 0; i < MAX_NR_ZONES; i++) totalpages += zones_size[i]; - pgdat->node_size = totalpages; + pgdat->node_spanned_pages = totalpages; realtotalpages = totalpages; if (zholes_size) for (i = 0; i < MAX_NR_ZONES; i++) realtotalpages -= zholes_size[i]; + pgdat->node_present_pages = realtotalpages; printk("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); } @@ -1342,7 +1355,7 @@ pgdat->node_start_pfn = node_start_pfn; calculate_zone_totalpages(pgdat, zones_size, zholes_size); if (!node_mem_map) { - size = (pgdat->node_size + 1) * sizeof(struct page); + size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); node_mem_map = alloc_bootmem_node(pgdat, size); } pgdat->node_mem_map = node_mem_map; diff -Nru a/mm/shmem.c b/mm/shmem.c --- a/mm/shmem.c Sat Jul 5 12:40:31 2003 +++ b/mm/shmem.c Sat Jul 5 12:40:31 2003 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -507,7 +508,7 @@ */ change = VM_ACCT(attr->ia_size) - VM_ACCT(inode->i_size); if (change > 0) { - if (!vm_enough_memory(change)) + if (security_vm_enough_memory(change)) return -ENOMEM; } else if (attr->ia_size < inode->i_size) { vm_unacct_memory(-change); @@ -1139,7 +1140,7 @@ maxpos = inode->i_size; if (maxpos < pos + count) { maxpos = pos + count; - if (!vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) { + if (security_vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) { err = -ENOMEM; goto out; } @@ -1397,7 +1398,8 @@ return 0; } -static int shmem_create(struct inode *dir, struct dentry *dentry, int mode) +static int shmem_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { return shmem_mknod(dir, dentry, mode | S_IFREG, 0); } @@ -1493,7 +1495,7 @@ memcpy(info, symname, len); inode->i_op = &shmem_symlink_inline_operations; } else { - if (!vm_enough_memory(VM_ACCT(1))) { + if (security_vm_enough_memory(VM_ACCT(1))) { iput(inode); return -ENOMEM; } @@ -1887,7 +1889,7 @@ if (size > SHMEM_MAX_BYTES) return ERR_PTR(-EINVAL); - if ((flags & VM_ACCOUNT) && !vm_enough_memory(VM_ACCT(size))) + if ((flags & VM_ACCOUNT) && security_vm_enough_memory(VM_ACCT(size))) return ERR_PTR(-ENOMEM); error = -ENOMEM; diff -Nru a/mm/slab.c b/mm/slab.c --- a/mm/slab.c Sat Jul 5 12:40:32 2003 +++ b/mm/slab.c Sat Jul 5 12:40:32 2003 @@ -89,7 +89,12 @@ #include #include #include +#include +#include + #include +#include +#include /* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL, @@ -351,6 +356,34 @@ #define POISON_AFTER 0x6b /* for use-after-free poisoning */ #define POISON_END 0xa5 /* end-byte of poisoning */ +static inline int obj_dbghead(kmem_cache_t *cachep) +{ + if (cachep->flags & SLAB_RED_ZONE) + return BYTES_PER_WORD; + return 0; +} + +static inline int obj_dbglen(kmem_cache_t *cachep) +{ + int len = 0; + + if (cachep->flags & SLAB_RED_ZONE) { + len += 2*BYTES_PER_WORD; + } + if (cachep->flags & SLAB_STORE_USER) { + len += BYTES_PER_WORD; + } + return len; +} +#else +static inline int obj_dbghead(kmem_cache_t *cachep) +{ + return 0; +} +static inline int obj_dbglen(kmem_cache_t *cachep) +{ + return 0; +} #endif /* @@ -430,6 +463,7 @@ * SLAB_RECLAIM_ACCOUNT turns this on per-slab */ atomic_t slab_reclaim_pages; +EXPORT_SYMBOL(slab_reclaim_pages); /* * chicken and egg problem: delay the per-cpu array allocation @@ -441,7 +475,7 @@ FULL } g_cpucache_up; -static struct timer_list reap_timers[NR_CPUS]; +static DEFINE_PER_CPU(struct timer_list, reap_timers); static void reap_timer_fnc(unsigned long data); @@ -491,7 +525,7 @@ */ static void start_cpu_timer(int cpu) { - struct timer_list *rt = &reap_timers[cpu]; + struct timer_list *rt = &per_cpu(reap_timers, cpu); if (rt->function == NULL) { init_timer(rt); @@ -765,16 +799,45 @@ } #if DEBUG -static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) + +#ifdef CONFIG_DEBUG_PAGEALLOC +static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned long caller) { - int size = cachep->objsize; - if (cachep->flags & SLAB_RED_ZONE) { - addr += BYTES_PER_WORD; - size -= 2*BYTES_PER_WORD; - } - if (cachep->flags & SLAB_STORE_USER) { - size -= BYTES_PER_WORD; + int size = cachep->objsize-obj_dbglen(cachep); + + addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)]; + + if (size < 5*sizeof(unsigned long)) + return; + + *addr++=0x12345678; + *addr++=caller; + *addr++=smp_processor_id(); + size -= 3*sizeof(unsigned long); + { + unsigned long *sptr = &caller; + unsigned long svalue; + + while (((long) sptr & (THREAD_SIZE-1)) != 0) { + svalue = *sptr++; + if (kernel_text_address(svalue)) { + *addr++=svalue; + size -= sizeof(unsigned long); + if (size <= sizeof(unsigned long)) + break; + } + } + } + *addr++=0x87654321; +} +#endif + +static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) +{ + int size = cachep->objsize-obj_dbglen(cachep); + addr = &((char*)addr)[obj_dbghead(cachep)]; + memset(addr, val, size); *(unsigned char *)(addr+size-1) = POISON_END; } @@ -796,15 +859,11 @@ static void check_poison_obj(kmem_cache_t *cachep, void *addr) { - int size = cachep->objsize; void *end; - if (cachep->flags & SLAB_RED_ZONE) { - addr += BYTES_PER_WORD; - size -= 2*BYTES_PER_WORD; - } - if (cachep->flags & SLAB_STORE_USER) { - size -= BYTES_PER_WORD; - } + int size = cachep->objsize-obj_dbglen(cachep); + + addr = &((char*)addr)[obj_dbghead(cachep)]; + end = scan_poisoned_obj(addr, size); if (end) { int s; @@ -858,8 +917,16 @@ void *objp = slabp->s_mem + cachep->objsize * i; int objlen = cachep->objsize; - if (cachep->flags & SLAB_POISON) + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE,1); + else + check_poison_obj(cachep, objp); +#else check_poison_obj(cachep, objp); +#endif + } if (cachep->flags & SLAB_STORE_USER) objlen -= BYTES_PER_WORD; @@ -952,6 +1019,10 @@ } #if FORCED_DEBUG +#ifdef CONFIG_DEBUG_PAGEALLOC + if (size < PAGE_SIZE-3*BYTES_PER_WORD && size > 128) + size = PAGE_SIZE-3*BYTES_PER_WORD; +#endif /* * Enable redzoning and last user accounting, except * - for caches with forced alignment: redzoning would violate the @@ -1404,6 +1475,8 @@ slab_error(cachep, "constructor overwrote the" " start of an object"); } + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); #else if (cachep->ctor) cachep->ctor(objp, cachep, ctor_flags); @@ -1584,25 +1657,28 @@ * caller can perform a verify of its state (debugging). * Called without the cache-lock held. */ - if (cachep->flags & SLAB_RED_ZONE) { - cachep->ctor(objp+BYTES_PER_WORD, + cachep->ctor(objp+obj_dbghead(cachep), cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); - } else { - cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); - } } if (cachep->flags & SLAB_POISON && cachep->dtor) { /* we want to cache poison the object, * call the destruction callback */ - if (cachep->flags & SLAB_RED_ZONE) - cachep->dtor(objp+BYTES_PER_WORD, cachep, 0); - else - cachep->dtor(objp, cachep, 0); + cachep->dtor(objp+obj_dbghead(cachep), cachep, 0); } - if (cachep->flags & SLAB_POISON) + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { + store_stackinfo(cachep, objp, POISON_AFTER); + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); + } else { + poison_obj(cachep, objp, POISON_AFTER); + } +#else poison_obj(cachep, objp, POISON_AFTER); #endif + } +#endif return objp; } @@ -1617,6 +1693,7 @@ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { entries++; BUG_ON(entries > cachep->num); + BUG_ON(i < 0 || i >= cachep->num); } BUG_ON(entries != cachep->num - slabp->inuse); #endif @@ -1746,9 +1823,16 @@ if (!objp) return objp; - if (cachep->flags & SLAB_POISON) { + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 1); + else + check_poison_obj(cachep, objp); +#else check_poison_obj(cachep, objp); - poison_obj(cachep, objp, POISON_BEFORE); +#endif + poison_obj(cachep, objp, POISON_BEFORE); } if (cachep->flags & SLAB_STORE_USER) { objlen -= BYTES_PER_WORD; @@ -2085,16 +2169,7 @@ unsigned int kmem_cache_size(kmem_cache_t *cachep) { - unsigned int objlen = cachep->objsize; - -#if DEBUG - if (cachep->flags & SLAB_RED_ZONE) - objlen -= 2*BYTES_PER_WORD; - if (cachep->flags & SLAB_STORE_USER) - objlen -= BYTES_PER_WORD; -#endif - - return objlen; + return cachep->objsize-obj_dbglen(cachep); } kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags) @@ -2382,7 +2457,7 @@ static void reap_timer_fnc(unsigned long data) { int cpu = smp_processor_id(); - struct timer_list *rt = &reap_timers[cpu]; + struct timer_list *rt = &__get_cpu_var(reap_timers); cache_reap(); mod_timer(rt, jiffies + REAPTIMEOUT_CPUC + cpu); @@ -2626,3 +2701,70 @@ return size; } +void ptrinfo(unsigned long addr) +{ + struct page *page; + + printk("Dumping data about address %p.\n", (void*)addr); + if (!virt_addr_valid((void*)addr)) { + printk("virt addr invalid.\n"); + return; + } + do { + pgd_t *pgd = pgd_offset_k(addr); + pmd_t *pmd; + if (pgd_none(*pgd)) { + printk("No pgd.\n"); + break; + } + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd)) { + printk("No pmd.\n"); + break; + } +#ifdef CONFIG_X86 + if (pmd_large(*pmd)) { + printk("Large page.\n"); + break; + } +#endif + printk("normal page, pte_val 0x%llx\n", + (unsigned long long)pte_val(*pte_offset_kernel(pmd, addr))); + } while(0); + + page = virt_to_page((void*)addr); + printk("struct page at %p, flags %lxh.\n", page, page->flags); + if (PageSlab(page)) { + kmem_cache_t *c; + struct slab *s; + unsigned long flags; + int objnr; + void *objp; + + c = GET_PAGE_CACHE(page); + printk("belongs to cache %s.\n",c->name); + + spin_lock_irqsave(&c->spinlock, flags); + s = GET_PAGE_SLAB(page); + printk("slabp %p with %d inuse objects (from %d).\n", + s, s->inuse, c->num); + check_slabp(c,s); + + objnr = (addr-(unsigned long)s->s_mem)/c->objsize; + objp = s->s_mem+c->objsize*objnr; + printk("points into object no %d, starting at %p, len %d.\n", + objnr, objp, c->objsize); + if (objnr >= c->num) { + printk("Bad obj number.\n"); + } else { + kernel_map_pages(virt_to_page(objp), c->objsize/PAGE_SIZE, 1); + + printk("redzone: %lxh/%lxh/%lxh.\n", + ((unsigned long*)objp)[0], + ((unsigned long*)(objp+c->objsize))[-2], + ((unsigned long*)(objp+c->objsize))[-1]); + } + spin_unlock_irqrestore(&c->spinlock, flags); + + } +} diff -Nru a/mm/swap.c b/mm/swap.c --- a/mm/swap.c Sat Jul 5 12:40:31 2003 +++ b/mm/swap.c Sat Jul 5 12:40:31 2003 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include /* for try_to_release_page() */ #include @@ -370,6 +371,7 @@ } preempt_enable(); } +EXPORT_SYMBOL(vm_acct_memory); #endif diff -Nru a/mm/swapfile.c b/mm/swapfile.c --- a/mm/swapfile.c Sat Jul 5 12:40:31 2003 +++ b/mm/swapfile.c Sat Jul 5 12:40:31 2003 @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include @@ -30,6 +32,8 @@ int total_swap_pages; static int swap_overflow; +EXPORT_SYMBOL(total_swap_pages); + static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -1042,7 +1046,7 @@ swap_list_unlock(); goto out_dput; } - if (vm_enough_memory(p->pages)) + if (!security_vm_enough_memory(p->pages)) vm_unacct_memory(p->pages); else { err = -ENOMEM; diff -Nru a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c --- a/net/bluetooth/hci_sock.c Sat Jul 5 12:40:32 2003 +++ b/net/bluetooth/hci_sock.c Sat Jul 5 12:40:32 2003 @@ -632,6 +632,7 @@ struct net_proto_family hci_sock_family_ops = { .family = PF_BLUETOOTH, + .owner = THIS_MODULE, .create = hci_sock_create, }; diff -Nru a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c --- a/net/bluetooth/rfcomm/sock.c Sat Jul 5 12:40:32 2003 +++ b/net/bluetooth/rfcomm/sock.c Sat Jul 5 12:40:32 2003 @@ -878,6 +878,7 @@ static struct net_proto_family rfcomm_sock_family_ops = { .family = PF_BLUETOOTH, + .owner = THIS_MODULE, .create = rfcomm_sock_create }; diff -Nru a/net/core/dev.c b/net/core/dev.c --- a/net/core/dev.c Sat Jul 5 12:40:32 2003 +++ b/net/core/dev.c Sat Jul 5 12:40:32 2003 @@ -1712,7 +1712,7 @@ softnet_break: netdev_rx_stat[this_cpu].time_squeeze++; - __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); goto out; } diff -Nru a/net/unix/af_unix.c b/net/unix/af_unix.c --- a/net/unix/af_unix.c Sat Jul 5 12:40:32 2003 +++ b/net/unix/af_unix.c Sat Jul 5 12:40:32 2003 @@ -594,7 +594,7 @@ err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); if (err) goto fail; - err = permission(nd.dentry->d_inode,MAY_WRITE); + err = permission(nd.dentry->d_inode,MAY_WRITE, &nd); if (err) goto put_fail; diff -Nru a/security/capability.c b/security/capability.c --- a/security/capability.c Sat Jul 5 12:40:31 2003 +++ b/security/capability.c Sat Jul 5 12:40:31 2003 @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -275,6 +278,65 @@ return 0; } +/* + * Check that a process has enough memory to allocate a new virtual + * mapping. 0 means there is enough memory for the allocation to + * succeed and -ENOMEM implies there is not. + * + * We currently support three overcommit policies, which are set via the + * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-acounting + * + * Strict overcommit modes added 2002 Feb 26 by Alan Cox. + * Additional code 2002 Jul 20 by Robert Love. + */ +int cap_vm_enough_memory(long pages) +{ + unsigned long free, allowed; + + vm_acct_memory(pages); + + /* + * Sometimes we want to use more memory than we have + */ + if (sysctl_overcommit_memory == 1) + return 0; + + if (sysctl_overcommit_memory == 0) { + free = get_page_cache_size(); + free += nr_free_pages(); + free += nr_swap_pages; + + /* + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this + */ + free += atomic_read(&slab_reclaim_pages); + + /* + * Leave the last 3% for root + */ + if (!capable(CAP_SYS_ADMIN)) + free -= free / 32; + + if (free > pages) + return 0; + vm_unacct_memory(pages); + return -ENOMEM; + } + + allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed += total_swap_pages; + + if (atomic_read(&vm_committed_space) < allowed) + return 0; + + vm_unacct_memory(pages); + + return -ENOMEM; +} + EXPORT_SYMBOL(cap_capable); EXPORT_SYMBOL(cap_ptrace); EXPORT_SYMBOL(cap_capget); @@ -286,6 +348,7 @@ EXPORT_SYMBOL(cap_task_post_setuid); EXPORT_SYMBOL(cap_task_reparent_to_init); EXPORT_SYMBOL(cap_syslog); +EXPORT_SYMBOL(cap_vm_enough_memory); #ifdef CONFIG_SECURITY @@ -307,6 +370,8 @@ .task_reparent_to_init = cap_task_reparent_to_init, .syslog = cap_syslog, + + .vm_enough_memory = cap_vm_enough_memory, }; #if defined(CONFIG_SECURITY_CAPABILITIES_MODULE) diff -Nru a/security/dummy.c b/security/dummy.c --- a/security/dummy.c Sat Jul 5 12:40:31 2003 +++ b/security/dummy.c Sat Jul 5 12:40:31 2003 @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -97,6 +100,54 @@ return 0; } +static int dummy_vm_enough_memory(long pages) +{ + unsigned long free, allowed; + + vm_acct_memory(pages); + + /* + * Sometimes we want to use more memory than we have + */ + if (sysctl_overcommit_memory == 1) + return 0; + + if (sysctl_overcommit_memory == 0) { + free = get_page_cache_size(); + free += nr_free_pages(); + free += nr_swap_pages; + + /* + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this + */ + free += atomic_read(&slab_reclaim_pages); + + /* + * Leave the last 3% for root + */ + if (current->euid) + free -= free / 32; + + if (free > pages) + return 0; + vm_unacct_memory(pages); + return -ENOMEM; + } + + allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed += total_swap_pages; + + if (atomic_read(&vm_committed_space) < allowed) + return 0; + + vm_unacct_memory(pages); + + return -ENOMEM; +} + static int dummy_bprm_alloc_security (struct linux_binprm *bprm) { return 0; @@ -793,6 +844,7 @@ set_to_dummy_if_null(ops, quota_on); set_to_dummy_if_null(ops, sysctl); set_to_dummy_if_null(ops, syslog); + set_to_dummy_if_null(ops, vm_enough_memory); set_to_dummy_if_null(ops, bprm_alloc_security); set_to_dummy_if_null(ops, bprm_free_security); set_to_dummy_if_null(ops, bprm_compute_creds);