# This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet v2.5.66 -> 1.999 # drivers/media/video/saa7110.c 1.9 -> 1.10 # arch/ppc64/kernel/pci.h 1.7 -> 1.8 # drivers/i2c/chips/lm75.c 1.7 -> 1.11 # include/asm-x86_64/hdreg.h 1.2 -> 1.3 # drivers/media/video/tda9887.c 1.3 -> 1.4 # drivers/media/video/bt856.c 1.7 -> 1.8 # arch/x86_64/kernel/traps.c 1.17 -> 1.18 # arch/x86_64/mm/fault.c 1.10 -> 1.11 # include/asm-x86_64/numa.h 1.1 -> 1.2 # arch/alpha/kernel/module.c 1.3 -> 1.4 # arch/x86_64/ia32/fpu32.c 1.5 -> 1.6 # drivers/i2c/chips/adm1021.c 1.7 -> 1.11 # net/ipv4/xfrm_input.c 1.8 -> 1.9 net/xfrm/xfrm_input.c (moved) # include/linux/aio.h 1.7 -> 1.8 # drivers/i2c/i2c-elektor.c 1.14 -> 1.15 # drivers/media/video/saa7185.c 1.9 -> 1.10 # arch/x86_64/kernel/setup.c 1.10 -> 1.11 # net/Kconfig 1.7 -> 1.8 # arch/alpha/kernel/pci.c 1.28 -> 1.29 # include/asm-ppc64/pci-bridge.h 1.5 -> 1.6 # drivers/media/video/bttv-if.c 1.10 -> 1.11 # drivers/i2c/i2c-philips-par.c 1.10 -> 1.11 # drivers/media/video/bt819.c 1.6 -> 1.7 # arch/ppc64/kernel/chrp_setup.c 1.20 -> 1.21 # arch/ppc64/Makefile 1.25 -> 1.26 # include/asm-x86_64/kdebug.h 1.2 -> 1.3 # drivers/i2c/i2c-core.c 1.24 -> 1.29 # arch/x86_64/kernel/apic.c 1.14 -> 1.15 # drivers/media/video/saa7134/saa7134-i2c.c 1.6 -> 1.7 # fs/jfs/jfs_imap.c 1.20 -> 1.21 # include/linux/i2c.h 1.16 -> 1.21 # net/ipv4/Makefile 1.15 -> 1.16 # arch/x86_64/kernel/smp.c 1.13 -> 1.14 # arch/x86_64/kernel/i387.c 1.5 -> 1.6 # include/net/sctp/sctp.h 1.25 -> 1.26 # net/ipv4/tcp.c 1.36 -> 1.37 # arch/x86_64/ia32/ptrace32.c 1.5 -> 1.6 # arch/x86_64/kernel/setup64.c 1.11 -> 1.12 # arch/alpha/kernel/smp.c 1.32 -> 1.33 # include/asm-alpha/pgtable.h 1.17 -> 1.18 # drivers/i2c/busses/i2c-piix4.c 1.7 -> 1.8 # arch/x86_64/kernel/ptrace.c 1.11 -> 1.12 # include/asm-ppc64/machdep.h 1.13 -> 1.14 # drivers/media/video/tvaudio.c 1.15 -> 1.16 # arch/x86_64/kernel/entry.S 1.10 -> 1.11 # drivers/ieee1394/pcilynx.c 1.25 -> 1.26 # net/ipv4/xfrm_algo.c 1.6 -> 1.7 net/xfrm/xfrm_algo.c (moved) # drivers/s390/net/ctcmain.c 1.17 -> 1.18 # include/asm-ppc64/pci.h 1.4 -> 1.5 # arch/alpha/kernel/sys_nautilus.c 1.11 -> 1.12 # arch/x86_64/kernel/head64.c 1.6 -> 1.7 # net/netsyms.c 1.56 -> 1.57 # drivers/media/video/saa7111.c 1.7 -> 1.8 # include/asm-x86_64/pci.h 1.5 -> 1.6 # arch/x86_64/boot/compressed/misc.c 1.5 -> 1.6 # net/Makefile 1.21 -> 1.22 # drivers/media/video/tuner-3036.c 1.6 -> 1.7 # arch/x86_64/ia32/sys_ia32.c 1.25 -> 1.26 # arch/x86_64/kernel/pci-gart.c 1.7 -> 1.8 # drivers/media/video/tda9875.c 1.10 -> 1.11 # arch/ppc64/kernel/irq.c 1.22 -> 1.23 # arch/x86_64/ia32/ipc32.c 1.6 -> 1.7 # fs/aio.c 1.29 -> 1.30 # arch/x86_64/kernel/nmi.c 1.9 -> 1.10 # drivers/video/matrox/i2c-matroxfb.c 1.6 -> 1.7 # drivers/i2c/scx200_acb.c 1.3 -> 1.6 # arch/x86_64/kernel/suspend.c 1.2 -> 1.3 # fs/jfs/jfs_xtree.c 1.8 -> 1.9 # drivers/i2c/busses/i2c-i801.c 1.7 -> 1.8 # net/ipv4/xfrm_policy.c 1.21 -> 1.22 net/xfrm/xfrm_policy.c (moved) # include/asm-x86_64/i387.h 1.7 -> 1.8 # drivers/i2c/busses/i2c-ali15x3.c 1.6 -> 1.7 # drivers/i2c/i2c-proc.c 1.18 -> 1.19 # drivers/i2c/i2c-algo-bit.c 1.13 -> 1.14 # arch/x86_64/ia32/ia32_ioctl.c 1.16 -> 1.17 # include/asm-x86_64/suspend.h 1.3 -> 1.4 # fs/jfs/jfs_extent.c 1.7 -> 1.8 # drivers/i2c/busses/i2c-isa.c 1.1 -> 1.2 # arch/x86_64/kernel/process.c 1.15 -> 1.16 # drivers/s390/net/netiucv.c 1.17 -> 1.18 # net/ipv4/xfrm_user.c 1.15 -> 1.17 net/xfrm/xfrm_user.c (moved) # arch/x86_64/pci/irq.c 1.6 -> 1.7 # fs/jfs/jfs_txnmgr.c 1.40 -> 1.42 # arch/ppc64/kernel/process.c 1.29 -> 1.30 # include/asm-alpha/fcntl.h 1.2 -> 1.3 # drivers/media/video/msp3400.c 1.15 -> 1.16 # arch/x86_64/kernel/head.S 1.9 -> 1.10 # include/asm-x86_64/unistd.h 1.13 -> 1.14 # include/asm-x86_64/debugreg.h 1.1 -> 1.2 # arch/x86_64/kernel/signal.c 1.15 -> 1.16 # include/asm-x86_64/thread_info.h 1.8 -> 1.9 # fs/jfs/jfs_dtree.c 1.20 -> 1.21 # drivers/media/video/tda7432.c 1.8 -> 1.9 # arch/ppc64/kernel/prom.c 1.21 -> 1.22 # net/ipv4/xfrm_state.c 1.15 -> 1.17 net/xfrm/xfrm_state.c (moved) # include/asm-x86_64/pgtable.h 1.16 -> 1.17 # arch/ppc64/kernel/iSeries_setup.c 1.9 -> 1.10 # arch/x86_64/kernel/Makefile 1.18 -> 1.19 # arch/x86_64/kernel/aperture.c 1.2 -> 1.3 # drivers/media/video/saa5249.c 1.12 -> 1.13 # net/ipv4/Kconfig 1.4 -> 1.5 # arch/ppc64/kernel/stab.c 1.8 -> 1.10 # drivers/i2c/i2c-dev.c 1.23.1.1 -> 1.26 # drivers/i2c/i2c-algo-pcf.c 1.10 -> 1.11 # arch/x86_64/Kconfig 1.16 -> 1.17 # drivers/i2c/i2c-elv.c 1.10 -> 1.12 # drivers/i2c/i2c-velleman.c 1.8 -> 1.9 # drivers/char/drm/drm_drv.h 1.13 -> 1.14 # net/ipv6/udp.c 1.20 -> 1.21 # drivers/i2c/busses/i2c-amd8111.c 1.6 -> 1.7 # arch/x86_64/mm/numa.c 1.1 -> 1.2 # include/asm-x86_64/system.h 1.12 -> 1.13 # include/net/xfrm.h 1.22 -> 1.23 # arch/x86_64/kernel/reboot.c 1.2 -> 1.3 # drivers/media/video/tuner.c 1.15 -> 1.16 # drivers/i2c/busses/i2c-amd756.c 1.5 -> 1.6 # arch/ppc64/kernel/pci.c 1.27 -> 1.29 # arch/ppc64/boot/Makefile 1.12 -> 1.13 # include/asm-x86_64/proto.h 1.8 -> 1.9 # arch/x86_64/Makefile 1.24 -> 1.25 # include/asm-x86_64/apic.h 1.4 -> 1.5 # arch/x86_64/kernel/bluesmoke.c 1.8 -> 1.9 # arch/alpha/kernel/err_titan.c 1.2 -> 1.3 # drivers/media/video/adv7175.c 1.8 -> 1.9 # arch/x86_64/mm/ioremap.c 1.8 -> 1.9 # include/linux/netdevice.h 1.31 -> 1.32 # include/asm-ppc64/pgtable.h 1.17.1.1 -> 1.21 # arch/x86_64/boot/setup.S 1.4 -> 1.5 # arch/x86_64/ia32/ia32_signal.c 1.11 -> 1.12 # include/asm-x86_64/processor.h 1.13 -> 1.14 # arch/x86_64/mm/k8topology.c 1.1 -> 1.2 # arch/ppc64/kernel/iSeries_pci.c 1.9 -> 1.10 # drivers/acpi/Kconfig 1.7 -> 1.8 # arch/ppc64/kernel/pSeries_lpar.c 1.19 -> 1.20 # drivers/acorn/char/i2c.c 1.6 -> 1.7 # arch/x86_64/kernel/smpboot.c 1.15 -> 1.16 # include/asm-x86_64/desc.h 1.8 -> 1.9 # arch/ppc64/kernel/pSeries_pci.c 1.22 -> 1.23 # (new) -> 1.1 net/xfrm/Kconfig # (new) -> 1.1 net/xfrm/Makefile # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 03/03/24 torvalds@penguin.transmeta.com 1.985.1.92 # Linux 2.5.66 # -------------------------------------------- # 03/03/25 anton@samba.org 1.989 # Merge samba.org:/scratch/anton/linux-2.5 # into samba.org:/scratch/anton/tmp3 # -------------------------------------------- # 03/03/25 anton@samba.org 1.990 # Merge samba.org:/scratch/anton/linux-2.5 # into samba.org:/scratch/anton/tmp3 # -------------------------------------------- # 03/03/24 greg@kroah.com 1.985.2.4 # [PATCH] i2c: set up a "generic" i2c driver to prevent oopses when devices are registering. # # This is needed as we are still not using the driver core model for # matching up devices to drivers, but doing it by hand. Once that is # changed, this will not be needed. # -------------------------------------------- # 03/03/24 greg@kroah.com 1.985.1.93 # Merge kroah.com:/home/greg/linux/BK/bleed-2.5 # into kroah.com:/home/greg/linux/BK/i2c-2.5 # -------------------------------------------- # 03/03/25 anton@samba.org 1.991 # ppc64: Rework pci probe to be like alpha. # -------------------------------------------- # 03/03/24 davem@nuts.ninka.net 1.985.9.1 # Merge nuts.ninka.net:/home/davem/src/BK/network-2.5 # into nuts.ninka.net:/home/davem/src/BK/net-2.5 # -------------------------------------------- # 03/03/25 paulus@samba.org 1.992 # ppc64: Add missing RELOCs # -------------------------------------------- # 03/03/24 hch@lst.de 1.985.9.2 # [NET]: Kill dev_init_buffers, was scheduled to die in 2.5.x # -------------------------------------------- # 03/03/24 toml@us.ibm.com 1.985.9.3 # [IPSEC]: Fix IPV6 UDP policy checking. # -------------------------------------------- # 03/03/25 anton@samba.org 1.993 # ppc64: fix pci probe on large bus systems # -------------------------------------------- # 03/03/25 anton@samba.org 1.985.10.1 # Merge samba.org:/scratch/anton/linux-2.5 # into samba.org:/scratch/anton/linux-2.5_ppc64 # -------------------------------------------- # 03/03/25 anton@samba.org 1.994 # Merge samba.org:/scratch/anton/export # into samba.org:/scratch/anton/linux-2.5_ppc64 # -------------------------------------------- # 03/03/25 anton@samba.org 1.992.1.1 # ppc64: Disable 32bit SLB invalidation optimisation for the moment # -------------------------------------------- # 03/03/25 anton@samba.org 1.993.1.1 # Merge samba.org:/scratch/anton/export into samba.org:/scratch/anton/tmp3 # -------------------------------------------- # 03/03/25 anton@samba.org 1.993.1.2 # ppc64: Fix problem with casting out the segment for our kernel stack # -------------------------------------------- # 03/03/25 anton@samba.org 1.995 # Merge samba.org:/scratch/anton/export # into samba.org:/scratch/anton/linux-2.5_ppc64 # -------------------------------------------- # 03/03/24 rth@are.twiddle.net 1.985.11.1 # Merge are.twiddle.net:/home/rth/BK/linus-2.5 # into are.twiddle.net:/home/rth/BK/axp-2.5 # -------------------------------------------- # 03/03/24 ink@jurassic.park.msu.ru 1.985.11.2 # [PATCH] alpha: file offset in pte # # It's possible to squeeze more bits out of lower half of pte, # but 32 seem to be a plenty... # # Ivan. # -------------------------------------------- # 03/03/24 ink@jurassic.park.msu.ru 1.985.11.3 # [PATCH] alpha: handle unaligned REFQUADs produced by BUG() macro # # -------------------------------------------- # 03/03/24 ink@jurassic.park.msu.ru 1.985.11.4 # [PATCH] alpha: pci update # # - Check for parent PCI bus instead of bridge device to break the # look in common_swizzle(). Functionally it's the same, but it's # cleaner for PC-style host bridges (nautiluses). # - Generic PCI setup changes finally went in (thanks to rmk), so that # FIXME in common_init_pci() can go. # # Ivan. # -------------------------------------------- # 03/03/24 ink@jurassic.park.msu.ru 1.985.11.5 # [PATCH] alpha: nautilus_init_pci() cleanup # # -------------------------------------------- # 03/03/24 ink@jurassic.park.msu.ru 1.985.11.6 # [PATCH] alpha: fix jiffies compile warning in smp.c # # Fix more annoying compile problems due to wrong types # for comparing jiffies. This patch applies to alpha arch. # # From Thomas Weyergraf. # # Ivan. # -------------------------------------------- # 03/03/24 torvalds@home.transmeta.com 1.985.1.94 # Merge bk://are.twiddle.net/axp-2.5/ # into home.transmeta.com:/home/torvalds/v2.5/linux # -------------------------------------------- # 03/03/24 davem@nuts.ninka.net 1.985.9.4 # [TCP]: Forward port of 2.4.x bugfix, noticed as missing by davej@codemonkey.org.uk. # # In tcp_sendmsg, make sure we jump to the out label # when seglen is decremented to zero and no more iovecs remain. # This matches the do_tcp_sendpages logic and makes sure that # PSH is set correctly at the end of a write even if the write length # equals the current mss. # -------------------------------------------- # 03/03/24 sfr@canb.auug.org.au 1.985.9.5 # [SCTP]: Fix IRQ flags warnings. # -------------------------------------------- # 03/03/24 yoshfuji@linux-ipv6.org 1.985.9.6 # [IPSEC]: Move xfrm_*.c into net/xfrm/ # -------------------------------------------- # 03/03/24 davem@kernel.bkbits.net 1.985.1.95 # Merge davem@nuts.ninka.net:/home/davem/src/BK/net-2.5 # into kernel.bkbits.net:/home/davem/net-2.5 # -------------------------------------------- # 03/03/24 davem@nuts.ninka.net 1.985.12.1 # [DRM]: Fix warnings and build errors introduced by previous changes to drm_drv.h # -------------------------------------------- # 03/03/24 davem@kernel.bkbits.net 1.985.13.1 # Merge davem@nuts.ninka.net:/home/davem/src/BK/sparc-2.5 # into kernel.bkbits.net:/home/davem/sparc-2.5 # -------------------------------------------- # 03/03/25 shaggy@shaggy.austin.ibm.com 1.985.14.1 # Merge jfs@jfs.bkbits.net:linux-2.5 # into shaggy.austin.ibm.com:/shaggy/bk/jfs-2.5 # -------------------------------------------- # 03/03/25 torvalds@home.transmeta.com 1.985.13.2 # Merge bk://kernel.bkbits.net/davem/sparc-2.5 # into home.transmeta.com:/home/torvalds/v2.5/linux # -------------------------------------------- # 03/03/25 torvalds@home.transmeta.com 1.985.1.96 # Merge bk://kernel.bkbits.net/davem/net-2.5 # into home.transmeta.com:/home/torvalds/v2.5/linux # -------------------------------------------- # 03/03/25 torvalds@home.transmeta.com 1.996 # Merge http://ppc.bkbits.net/for-linus-ppc64 # into home.transmeta.com:/home/torvalds/v2.5/linux # -------------------------------------------- # 03/03/25 ak@suse.de 1.997 # [PATCH] aio compat patches # # The new aio emulation in x86-64 needs a few aio symbols exported. # Export them. # -------------------------------------------- # 03/03/25 ak@suse.de 1.998 # [PATCH] ACPI NUMA option fix for x86-64 # # The ACPI NUMA support doesn't work on x86-64 currently (no SRAT # parsing yet). Disable for now. # -------------------------------------------- # 03/03/25 ak@suse.de 1.999 # [PATCH] x86-64 updates # # Lots of x86-64 updates. Merge with 2.4 and NUMA works now. Also reenabled # the preemptive kernel. And some other bug fixes. # IOMMU disabled by default now because it has problems. # # - Add more CONFIG options for device driver debugging and iommu # force/debug. (don't enable iommu force currently) # - Some S3/ACPI fixes/cleanups from Pavel. # - Set MSG_COMPAT_* in msg_flags for networking 32bit emulation. # This unfortunately still doesn't fix the fd passing problems. # - Sync PCI IOMMU code with 2.4 (minor fixes, flush less often) # - Really fix UP compilation (Pavel) # - Reenable preempt # - Fix CONFIG_DISCONTIGMEM bootup and enable. Still needs more tuning. # - Fix some bugs in topology discovery and clean code up. # - Don't put unwind tables into object files # - Some kernel debugging hooks # - Move CPU detection into early real mode code to better interact with # vesafb consoles # - Initialize mode in real mode character output # - New 32bit FPU signal save/restore # - Various fixes in FPU handling in ptrace # - Fix security holes in ptrace (32bit and 64bit) # - Fix serial ioctl (including security hole) # - Add bluetooth ioctls to 32bit emu (from sparc64) # - Correctly enable si_val in queued signals in 32bit emulation # - Rework SEM_STAT emulation. LTP still fails unfortunately. # - Fix error case in msg* emulation # - Fix debug register access from ptrace (Michal Ludvig, me) # - Fix handling of NULL arguments in 32bit execve # - Fix some error cases for 32bit readv/writev (LTP still complains) # - Remove rate control from unimplemented syscall warnings # - Fix error message for missing aperture # - Turn some APIC printks into Dprintk to make the bootup more quiet # - Some fixes for no APIC (probably still broken), add disableapic # option (untested) # - Sync K8 MCE handler with 2.4. Should work a lot better now. # - Remove never used KDB hooks # - Fix buffer overflow in command line copying # - Merge from i386: use separate status word for lazy FPU state # - Don't force the IOMMU for dma masks < 4GB. # - Print backtrace in Sysrq-T (from Andrea) # - Merge from i386: fix FPU race in fork. # - Disable NX mode by default for now # - Rewrite dump_pagetable # - Fix off by one bug in ioremap (i386 merge) # - Merge from i386: handle VIA pci bridge bugs # - Disable NUMA ACPI support (no SRAT support yet) # - Fix aio 32bit emulation # - Increase 32bit address space to nearly 4GB # - Add exit_group syscall # - Fix TLS setting in clone (Ulrich Drepper) # -------------------------------------------- # diff -Nru a/arch/alpha/kernel/err_titan.c b/arch/alpha/kernel/err_titan.c --- a/arch/alpha/kernel/err_titan.c Tue Mar 25 18:36:42 2003 +++ b/arch/alpha/kernel/err_titan.c Tue Mar 25 18:36:42 2003 @@ -22,8 +22,10 @@ static int titan_parse_c_misc(u64 c_misc, int print) { +#ifdef CONFIG_VERBOSE_MCHECK char *src; int nxs = 0; +#endif int status = MCHK_DISPOSITION_REPORT; #define TITAN__CCHIP_MISC__NXM (1UL << 28) @@ -263,11 +265,11 @@ static int titan_parse_p_agperror(int which, u64 agperror, int print) { + int status = MCHK_DISPOSITION_REPORT; +#ifdef CONFIG_VERBOSE_MCHECK int cmd, len; unsigned long addr; - int status = MCHK_DISPOSITION_REPORT; -#ifdef CONFIG_VERBOSE_MCHECK char *agperror_cmd[] = { "Read (low-priority)", "Read (high-priority)", "Write (low-priority)", "Write (high-priority)", @@ -575,14 +577,14 @@ static int privateer_process_680_frame(struct el_common *mchk_header, int print) { + int status = MCHK_DISPOSITION_UNKNOWN_ERROR; +#ifdef CONFIG_VERBOSE_MCHECK struct el_PRIVATEER_envdata_mcheck *emchk = (struct el_PRIVATEER_envdata_mcheck *) ((unsigned long)mchk_header + mchk_header->sys_offset); - int status = MCHK_DISPOSITION_UNKNOWN_ERROR; /* TODO - catagorize errors, for now, no error */ -#ifdef CONFIG_VERBOSE_MCHECK if (!print) return status; diff -Nru a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c --- a/arch/alpha/kernel/module.c Tue Mar 25 18:36:42 2003 +++ b/arch/alpha/kernel/module.c Tue Mar 25 18:36:42 2003 @@ -199,7 +199,9 @@ case R_ALPHA_NONE: break; case R_ALPHA_REFQUAD: - *(u64 *)location = value; + /* BUG() can produce misaligned relocations. */ + ((u32 *)location)[0] = value; + ((u32 *)location)[1] = value >> 32; break; case R_ALPHA_GPREL32: value -= gp; diff -Nru a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c --- a/arch/alpha/kernel/pci.c Tue Mar 25 18:36:42 2003 +++ b/arch/alpha/kernel/pci.c Tue Mar 25 18:36:42 2003 @@ -285,7 +285,7 @@ pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn)); /* Move up the chain of bridges. */ dev = dev->bus->self; - } while (dev->bus->self); + } while (dev->bus->parent); *pinp = pin; /* The slot is the slot of the last bridge. */ @@ -410,10 +410,8 @@ if (pci_probe_only) pcibios_claim_console_setup(); - else /* FIXME: `else' will be removed when - pci_assign_unassigned_resources() is able to work - correctly with [partially] allocated PCI tree. */ - pci_assign_unassigned_resources(); + + pci_assign_unassigned_resources(); pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq); } diff -Nru a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c --- a/arch/alpha/kernel/smp.c Tue Mar 25 18:36:42 2003 +++ b/arch/alpha/kernel/smp.c Tue Mar 25 18:36:42 2003 @@ -112,7 +112,7 @@ static void __init wait_boot_cpu_to_stop(int cpuid) { - long stop = jiffies + 10*HZ; + unsigned long stop = jiffies + 10*HZ; while (time_before(jiffies, stop)) { if (!smp_secondary_alive) @@ -349,7 +349,7 @@ { struct percpu_struct *cpu; struct pcb_struct *hwpcb, *ipcb; - long timeout; + unsigned long timeout; cpu = (struct percpu_struct *) ((char*)hwrpb @@ -428,7 +428,7 @@ smp_boot_one_cpu(int cpuid) { struct task_struct *idle; - long timeout; + unsigned long timeout; /* Cook up an idler for this guy. Note that the address we give to kernel_thread is irrelevant -- it's going to start @@ -816,7 +816,7 @@ int wait, unsigned long to_whom) { struct smp_call_struct data; - long timeout; + unsigned long timeout; int num_cpus_to_call; data.func = func; diff -Nru a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c --- a/arch/alpha/kernel/sys_nautilus.c Tue Mar 25 18:36:42 2003 +++ b/arch/alpha/kernel/sys_nautilus.c Tue Mar 25 18:36:42 2003 @@ -183,14 +183,17 @@ extern void free_reserved_mem(void *, void *); +static struct resource irongate_mem = { + .name = "Irongate PCI MEM", + .flags = IORESOURCE_MEM, +}; + void __init nautilus_init_pci(void) { struct pci_controller *hose = hose_head; struct pci_bus *bus; struct pci_dev *irongate; - unsigned long saved_io_start, saved_io_end; - unsigned long saved_mem_start, saved_mem_end; unsigned long bus_align, bus_size, pci_mem; unsigned long memtop = max_low_pfn << PAGE_SHIFT; @@ -199,50 +202,41 @@ hose->bus = bus; hose->last_busno = bus->subordinate; - /* We're going to size the root bus, so we must - - have a non-NULL PCI device associated with the bus - - preserve hose resources. */ irongate = pci_find_slot(0, 0); bus->self = irongate; - saved_io_start = bus->resource[0]->start; - saved_io_end = bus->resource[0]->end; - saved_mem_start = bus->resource[1]->start; - saved_mem_end = bus->resource[1]->end; + bus->resource[1] = &irongate_mem; pci_bus_size_bridges(bus); - /* Don't care about IO. */ - bus->resource[0]->start = saved_io_start; - bus->resource[0]->end = saved_io_end; + /* IO port range. */ + bus->resource[0]->start = 0; + bus->resource[0]->end = 0xffff; + /* Set up PCI memory range - limit is hardwired to 0xffffffff, + base must be at aligned to 16Mb. */ bus_align = bus->resource[1]->start; bus_size = bus->resource[1]->end + 1 - bus_align; - /* Align to 16Mb. */ if (bus_align < 0x1000000UL) bus_align = 0x1000000UL; - /* Restore hose MEM resource. */ - bus->resource[1]->start = saved_mem_start; - bus->resource[1]->end = saved_mem_end; - pci_mem = (0x100000000UL - bus_size) & -bus_align; + bus->resource[1]->start = pci_mem; + bus->resource[1]->end = 0xffffffffUL; + if (request_resource(&iomem_resource, bus->resource[1]) < 0) + printk(KERN_ERR "Failed to request MEM on hose 0\n"); + if (pci_mem < memtop && pci_mem > alpha_mv.min_mem_address) { free_reserved_mem(__va(alpha_mv.min_mem_address), __va(pci_mem)); - printk("nautilus_init_arch: %ldk freed\n", + printk("nautilus_init_pci: %ldk freed\n", (pci_mem - alpha_mv.min_mem_address) >> 10); } - alpha_mv.min_mem_address = pci_mem; if ((IRONGATE0->dev_vendor >> 16) > 0x7006) /* Albacore? */ IRONGATE0->pci_mem = pci_mem; pci_bus_assign_resources(bus); - - /* To break the loop in common_swizzle() */ - bus->self = NULL; - pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq); } diff -Nru a/arch/ppc64/Makefile b/arch/ppc64/Makefile --- a/arch/ppc64/Makefile Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/Makefile Tue Mar 25 18:36:42 2003 @@ -13,10 +13,10 @@ # Adjusted for PPC64 by Tom Gall # -KERNELLOAD = 0xc000000000000000 +KERNELLOAD := 0xc000000000000000 LDFLAGS := -m elf64ppc -LDFLAGS_vmlinux = -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD) +LDFLAGS_vmlinux := -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD) LDFLAGS_BLOB := --format binary --oformat elf64-powerpc CFLAGS += -msoft-float -pipe -Wno-uninitialized -mminimal-toc \ -mtraceback=full -mcpu=power4 @@ -29,21 +29,19 @@ core-$(CONFIG_XMON) += arch/ppc64/xmon/ drivers-$(CONFIG_OPROFILE) += arch/ppc64/oprofile/ -makeboot =$(Q)$(MAKE) -f scripts/Makefile.build obj=arch/ppc64/boot $(1) +boot := arch/ppc64/boot boottarget-$(CONFIG_PPC_PSERIES) := zImage zImage.initrd boottarget-$(CONFIG_PPC_ISERIES) := vmlinux.sminitrd vmlinux.initrd vmlinux.sm $(boottarget-y): vmlinux - $(call makeboot,arch/ppc64/boot/$@) + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ %_config: arch/ppc64/configs/%_defconfig rm -f .config arch/ppc64/defconfig cp -f arch/ppc64/configs/$(@:config=defconfig) arch/ppc64/defconfig archclean: - $(Q)$(MAKE) -f scripts/Makefile.clean obj=arch/ppc64/boot - -archmrproper: + $(Q)$(MAKE) $(clean)=$(boot) prepare: include/asm-ppc64/offsets.h diff -Nru a/arch/ppc64/boot/Makefile b/arch/ppc64/boot/Makefile --- a/arch/ppc64/boot/Makefile Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/boot/Makefile Tue Mar 25 18:36:42 2003 @@ -79,7 +79,7 @@ $(obj)/vmlinux.sminitrd: $(obj)/vmlinux.sm $(obj)/addRamDisk $(obj)/ramdisk.image.gz FORCE $(call if_changed,ramdisk) -$(obj)/sysmap.o: System.map $(obj)/piggyback +$(obj)/sysmap.o: System.map $(obj)/piggyback FORCE $(call if_changed,piggy) addsection = $(BOOTOBJCOPY) $(1) \ @@ -92,7 +92,7 @@ quiet_cmd_piggy = PIGGY $@ cmd_piggy = $(obj)/piggyback $(@:.o=) < $< | $(BOOTAS) -o $@ -$(call gz-sec, $(required)): $(obj)/kernel-%.gz: % +$(call gz-sec, $(required)): $(obj)/kernel-%.gz: % FORCE $(call if_changed,gzip) $(obj)/kernel-initrd.gz: $(obj)/ramdisk.image.gz @@ -101,7 +101,7 @@ $(call src-sec, $(required) $(initrd)): $(obj)/kernel-%.c: $(obj)/kernel-%.gz touch $@ -$(call obj-sec, $(required) $(initrd)): $(obj)/kernel-%.o: $(obj)/kernel-%.c +$(call obj-sec, $(required) $(initrd)): $(obj)/kernel-%.o: $(obj)/kernel-%.c FORCE $(call if_changed_dep,bootcc) $(call addsection, $@) diff -Nru a/arch/ppc64/kernel/chrp_setup.c b/arch/ppc64/kernel/chrp_setup.c --- a/arch/ppc64/kernel/chrp_setup.c Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/kernel/chrp_setup.c Tue Mar 25 18:36:42 2003 @@ -71,9 +71,6 @@ extern void init_ras_IRQ(void); extern void find_and_init_phbs(void); -extern void pSeries_pcibios_fixup(void); -extern void pSeries_pcibios_fixup_bus(struct pci_bus *bus); -extern void iSeries_pcibios_fixup(void); extern void pSeries_get_rtc_time(struct rtc_time *rtc_time); extern int pSeries_set_rtc_time(struct rtc_time *rtc_time); @@ -201,7 +198,6 @@ hpte_init_pSeries(); tce_init_pSeries(); - pSeries_pcibios_init_early(); #ifdef CONFIG_SMP smp_init_pSeries(); @@ -243,15 +239,6 @@ ppc_md.get_irq = xics_get_irq; } ppc_md.init_ras_IRQ = init_ras_IRQ; - - #ifndef CONFIG_PPC_ISERIES - ppc_md.pcibios_fixup = pSeries_pcibios_fixup; - ppc_md.pcibios_fixup_bus = pSeries_pcibios_fixup_bus; - #else - ppc_md.pcibios_fixup = NULL; - // ppc_md.pcibios_fixup = iSeries_pcibios_fixup; - #endif - ppc_md.init = chrp_init2; diff -Nru a/arch/ppc64/kernel/iSeries_pci.c b/arch/ppc64/kernel/iSeries_pci.c --- a/arch/ppc64/kernel/iSeries_pci.c Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/kernel/iSeries_pci.c Tue Mar 25 18:36:42 2003 @@ -84,8 +84,6 @@ struct iSeries_Device_Node* get_Device_Node(struct pci_dev* PciDev); unsigned long find_and_init_phbs(void); -void fixup_resources(struct pci_dev *dev); -void iSeries_pcibios_fixup(void); struct pci_controller* alloc_phb(struct device_node *dev, char *model, unsigned int addr_size_words) ; void iSeries_Scan_PHBs_Slots(struct pci_controller* Phb); @@ -275,7 +273,7 @@ return 0; } /*********************************************************************** - * ppc64_pcibios_init + * iSeries_pcibios_init * * Chance to initialize and structures or variable before PCI Bus walk. * @@ -302,9 +300,9 @@ PPCDBG(PPCDBG_BUSWALK,"iSeries_pcibios_init Exit.\n"); } /*********************************************************************** - * iSeries_pcibios_fixup(void) + * pcibios_final_fixup(void) ***********************************************************************/ -void __init iSeries_pcibios_fixup(void) +void __init pcibios_final_fixup(void) { struct pci_dev* PciDev; struct iSeries_Device_Node* DeviceNode; @@ -328,8 +326,6 @@ iSeries_allocateDeviceBars(PciDev); - PPCDBGCALL(PPCDBG_BUSWALK,dumpPci_Dev(PciDev) ); - iSeries_Device_Information(PciDev,Buffer, sizeof(Buffer) ); printk("%d. %s\n",DeviceCount,Buffer); @@ -345,11 +341,7 @@ mf_displaySrc(0xC9000200); } -/*********************************************************************** - * iSeries_pcibios_fixup_bus(int Bus) - * - ***********************************************************************/ -void iSeries_pcibios_fixup_bus(struct pci_bus* PciBus) +void pcibios_fixup_bus(struct pci_bus* PciBus) { PPCDBG(PPCDBG_BUSWALK,"iSeries_pcibios_fixup_bus(0x%04X) Entry.\n",PciBus->number); @@ -357,12 +349,12 @@ /*********************************************************************** - * fixup_resources(struct pci_dev *dev) + * pcibios_fixup_resources(struct pci_dev *dev) * ***********************************************************************/ -void fixup_resources(struct pci_dev *PciDev) +void pcibios_fixup_resources(struct pci_dev *PciDev) { - PPCDBG(PPCDBG_BUSWALK,"fixup_resources PciDev %p\n",PciDev); + PPCDBG(PPCDBG_BUSWALK,"pcibios_fixup_resources PciDev %p\n",PciDev); } @@ -910,18 +902,3 @@ } while (CheckReturnCode("WWL",DevNode, Return.rc) != 0); if(Pci_Trace_Flag == 1) PCIFR("WWL: IoAddress 0x%p = 0x%08X",IoAddress, Data); } -/* - * This is called very early before the page table is setup. - * There are warnings here because of type mismatches.. Okay for now. AHT - */ -void -iSeries_pcibios_init_early(void) -{ - //ppc_md.pcibios_read_config_byte = iSeries_Node_read_config_byte; - //ppc_md.pcibios_read_config_word = iSeries_Node_read_config_word; - //ppc_md.pcibios_read_config_dword = iSeries_Node_read_config_dword; - //ppc_md.pcibios_write_config_byte = iSeries_Node_write_config_byte; - //ppc_md.pcibios_write_config_word = iSeries_Node_write_config_word; - //ppc_md.pcibios_write_config_dword = iSeries_Node_write_config_dword; -} - diff -Nru a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c --- a/arch/ppc64/kernel/iSeries_setup.c Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/kernel/iSeries_setup.c Tue Mar 25 18:36:42 2003 @@ -62,8 +62,6 @@ pte_t * ptep, unsigned hpteflags, unsigned bolted ); extern void ppcdbg_initialize(void); extern void iSeries_pcibios_init(void); -extern void iSeries_pcibios_fixup(void); -extern void iSeries_pcibios_fixup_bus(int); static void iSeries_setup_dprofile(void); /* Global Variables */ @@ -316,9 +314,6 @@ ppc_md.init_ras_IRQ = NULL; ppc_md.get_irq = iSeries_get_irq; ppc_md.init = NULL; - - ppc_md.pcibios_fixup = iSeries_pcibios_fixup; - ppc_md.pcibios_fixup_bus = iSeries_pcibios_fixup_bus; ppc_md.restart = iSeries_restart; ppc_md.power_off = iSeries_power_off; diff -Nru a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c --- a/arch/ppc64/kernel/pSeries_lpar.c Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/kernel/pSeries_lpar.c Tue Mar 25 18:36:42 2003 @@ -320,7 +320,6 @@ #ifdef CONFIG_SMP smp_init_pSeries(); #endif - pSeries_pcibios_init_early(); /* The keyboard is not useful in the LPAR environment. * Leave all the interfaces NULL. diff -Nru a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c --- a/arch/ppc64/kernel/pSeries_pci.c Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/kernel/pSeries_pci.c Tue Mar 25 18:36:42 2003 @@ -2,6 +2,7 @@ * pSeries_pci.c * * Copyright (C) 2001 Dave Engebretsen, IBM Corporation + * Copyright (C) 2003 Anton Blanchard , IBM * * pSeries specific routines for PCI. * @@ -51,6 +52,8 @@ static int s7a_workaround; +extern unsigned long pci_probe_only; + static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) { unsigned long returnval = ~0L; @@ -371,9 +374,6 @@ phb->last_busno += (phb->global_number << 8); } - /* Dump PHB information for Debug */ - PPCDBGCALL(PPCDBG_PHBINIT, dumpPci_Controller(phb)); - return phb; } @@ -423,129 +423,96 @@ return 0; } -void -fixup_resources(struct pci_dev *dev) +void pcibios_name_device(struct pci_dev *dev) { - int i; - struct pci_controller *phb = PCI_GET_PHB_PTR(dev); struct device_node *dn; - /* Add IBM loc code (slot) as a prefix to the device names for service */ + /* + * Add IBM loc code (slot) as a prefix to the device names for service + */ dn = pci_device_to_OF_node(dev); if (dn) { char *loc_code = get_property(dn, "ibm,loc-code", 0); if (loc_code) { int loc_len = strlen(loc_code); if (loc_len < sizeof(dev->dev.name)) { - memmove(dev->dev.name+loc_len+1, dev->dev.name, sizeof(dev->dev.name)-loc_len-1); + memmove(dev->dev.name+loc_len+1, dev->dev.name, + sizeof(dev->dev.name)-loc_len-1); memcpy(dev->dev.name, loc_code, loc_len); dev->dev.name[loc_len] = ' '; dev->dev.name[sizeof(dev->dev.name)-1] = '\0'; } } } +} - PPCDBG(PPCDBG_PHBINIT, "fixup_resources:\n"); - PPCDBG(PPCDBG_PHBINIT, "\tphb = 0x%016LX\n", phb); - PPCDBG(PPCDBG_PHBINIT, "\tphb->pci_io_offset = 0x%016LX\n", phb->pci_io_offset); - PPCDBG(PPCDBG_PHBINIT, "\tphb->pci_mem_offset = 0x%016LX\n", phb->pci_mem_offset); - - PPCDBG(PPCDBG_PHBINIT, "\tdev->dev.name = %s\n", dev->dev.name); - PPCDBG(PPCDBG_PHBINIT, "\tdev->vendor:device = 0x%04X : 0x%04X\n", dev->vendor, dev->device); - - if (phb == NULL) - return; - - for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) { - PPCDBG(PPCDBG_PHBINIT, "\tdevice %x.%x[%d] (flags %x) [%lx..%lx]\n", - dev->bus->number, dev->devfn, i, - dev->resource[i].flags, - dev->resource[i].start, - dev->resource[i].end); - - if ((dev->resource[i].start == 0) && (dev->resource[i].end == 0)) { - continue; - } - if (dev->resource[i].start > dev->resource[i].end) { - /* Bogus resource. Just clear it out. */ - dev->resource[i].start = dev->resource[i].end = 0; - continue; - } +void __init pcibios_fixup_device_resources(struct pci_dev *dev, + struct pci_bus *bus) +{ + /* Update device resources. */ + struct pci_controller *hose = PCI_GET_PHB_PTR(bus); + int i; + for (i = 0; i < PCI_NUM_RESOURCES; i++) { if (dev->resource[i].flags & IORESOURCE_IO) { - unsigned long offset = (unsigned long)phb->io_base_virt - pci_io_base; + unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; dev->resource[i].start += offset; dev->resource[i].end += offset; - PPCDBG(PPCDBG_PHBINIT, "\t\t-> now [%lx .. %lx]\n", - dev->resource[i].start, dev->resource[i].end); - } else if (dev->resource[i].flags & IORESOURCE_MEM) { - if (dev->resource[i].start == 0) { - /* Bogus. Probably an unused bridge. */ - dev->resource[i].end = 0; - } else { - dev->resource[i].start += phb->pci_mem_offset; - dev->resource[i].end += phb->pci_mem_offset; - } - PPCDBG(PPCDBG_PHBINIT, "\t\t-> now [%lx..%lx]\n", - dev->resource[i].start, dev->resource[i].end); - - } else { - continue; } + else if (dev->resource[i].flags & IORESOURCE_MEM) { + dev->resource[i].start += hose->pci_mem_offset; + dev->resource[i].end += hose->pci_mem_offset; + } + } +} - /* zap the 2nd function of the winbond chip */ - if (dev->resource[i].flags & IORESOURCE_IO - && dev->bus->number == 0 && dev->devfn == 0x81) - dev->resource[i].flags &= ~IORESOURCE_IO; - } -} - -void __init pSeries_pcibios_fixup_bus(struct pci_bus *bus) +void __init pcibios_fixup_bus(struct pci_bus *bus) { - struct pci_controller *phb = PCI_GET_PHB_PTR(bus); + struct pci_controller *hose = PCI_GET_PHB_PTR(bus); + struct list_head *ln; + + /* XXX or bus->parent? */ + struct pci_dev *dev = bus->self; struct resource *res; int i; - if (bus->parent == NULL) { - /* This is a host bridge - fill in its resources */ - phb->bus = bus; - bus->resource[0] = res = &phb->io_resource; + if (!dev) { + /* Root bus. */ + + hose->bus = bus; + bus->resource[0] = res = &hose->io_resource; if (!res->flags) BUG(); /* No I/O resource for this PHB? */ + if (request_resource(&ioport_resource, res)) + printk(KERN_ERR "Failed to request IO" + "on hose %d\n", 0 /* FIXME */); + for (i = 0; i < 3; ++i) { - res = &phb->mem_resources[i]; - if (!res->flags) { - if (i == 0) - BUG(); /* No memory resource for this PHB? */ - } + res = &hose->mem_resources[i]; + if (!res->flags && i == 0) + BUG(); /* No memory resource for this PHB? */ bus->resource[i+1] = res; + if (res->flags && request_resource(&iomem_resource, res)) + printk(KERN_ERR "Failed to request MEM" + "on hose %d\n", 0 /* FIXME */); } - } else { + } else if (pci_probe_only && + (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { /* This is a subordinate bridge */ + pci_read_bridge_bases(bus); + pcibios_fixup_device_resources(dev, bus); + } - for (i = 0; i < 4; ++i) { - if ((res = bus->resource[i]) == NULL) - continue; - if (!res->flags) - continue; - if (res == pci_find_parent_resource(bus->self, res)) { - /* Transparent resource -- don't try to "fix" it. */ - continue; - } - if (res->flags & IORESOURCE_IO) { - unsigned long offset = (unsigned long)phb->io_base_virt - pci_io_base; - res->start += offset; - res->end += offset; - } else if (phb->pci_mem_offset - && (res->flags & IORESOURCE_MEM)) { - if (res->start < phb->pci_mem_offset) { - res->start += phb->pci_mem_offset; - res->end += phb->pci_mem_offset; - } - } - } + /* XXX Need to check why Alpha doesnt do this - Anton */ + if (!pci_probe_only) + return; + + for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) { + struct pci_dev *dev = pci_dev_b(ln); + if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) + pcibios_fixup_device_resources(dev, bus); } } @@ -562,19 +529,20 @@ } } -void __init -pSeries_pcibios_fixup(void) +extern void chrp_request_regions(void); + +void __init pcibios_final_fixup(void) { struct pci_dev *dev; - PPCDBG(PPCDBG_PHBINIT, "pSeries_pcibios_fixup: start\n"); - check_s7a(); - - pci_for_each_dev(dev) { + + pci_for_each_dev(dev) pci_read_irq_line(dev); - PPCDBGCALL(PPCDBG_PHBINIT, dumpPci_Dev(dev) ); - } + + chrp_request_regions(); + pci_fix_bus_sysdata(); + create_tce_tables(); } /*********************************************************************** @@ -595,14 +563,4 @@ node=node->parent; } return NULL; -} - -/* - * This is called very early before the page table is setup. - */ -void -pSeries_pcibios_init_early(void) -{ - ppc_md.pcibios_read_config = rtas_read_config; - ppc_md.pcibios_write_config = rtas_write_config; } diff -Nru a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c --- a/arch/ppc64/kernel/pci.c Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/kernel/pci.c Tue Mar 25 18:36:42 2003 @@ -2,6 +2,9 @@ * Port for PPC64 David Engebretsen, IBM Corp. * Contains common pci routines for ppc64 platform, pSeries and iSeries brands. * + * Copyright (C) 2003 Anton Blanchard , IBM + * Rework, based on alpha PCI code. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -30,31 +33,37 @@ #include #include #include -#include #include "pci.h" +unsigned long pci_probe_only = 1; +unsigned long pci_assign_all_buses = 0; + +unsigned int pcibios_assign_all_busses(void) +{ + return pci_assign_all_buses; +} + /* pci_io_base -- the base address from which io bars are offsets. * This is the lowest I/O base address (so bar values are always positive), * and it *must* be the start of ISA space if an ISA bus exists because * ISA drivers use hard coded offsets. If no ISA bus exists a dummy * page is mapped and isa_io_limit prevents access to it. */ -unsigned long isa_io_base = 0; /* NULL if no ISA bus */ -unsigned long pci_io_base = 0; +unsigned long isa_io_base; /* NULL if no ISA bus */ +unsigned long pci_io_base; -static void pcibios_fixup_resources(struct pci_dev* dev); +void pcibios_name_device(struct pci_dev* dev); +void pcibios_final_fixup(void); static void fixup_broken_pcnet32(struct pci_dev* dev); static void fixup_windbond_82c105(struct pci_dev* dev); -void fixup_resources(struct pci_dev* dev); -void iSeries_pcibios_init(void); +void iSeries_pcibios_init(void); -struct pci_controller* hose_head; -struct pci_controller** hose_tail = &hose_head; +struct pci_controller *hose_head; +struct pci_controller **hose_tail = &hose_head; -int global_phb_number = 0; /* Global phb counter */ -struct pci_controller *phbtab[PCI_MAX_PHB]; +int global_phb_number; /* Global phb counter */ /* Cached ISA bridge dev. */ struct pci_dev *ppc64_isabridge_dev = NULL; @@ -62,8 +71,8 @@ struct pci_fixup pcibios_fixups[] = { { PCI_FIXUP_HEADER, PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32 }, { PCI_FIXUP_HEADER, PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, fixup_windbond_82c105 }, - { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources }, - { 0 } + { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device }, + { 0 } }; static void fixup_broken_pcnet32(struct pci_dev* dev) @@ -81,14 +90,21 @@ * p610. We should probably be more careful in case * someone tries to plug in a similar adapter. */ + int i; unsigned int reg; printk("Using INTC for W82c105 IDE controller.\n"); pci_read_config_dword(dev, 0x40, ®); /* Enable LEGIRQ to use INTC instead of ISA interrupts */ pci_write_config_dword(dev, 0x40, reg | (1<<11)); -} + for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) { + /* zap the 2nd function of the winbond chip */ + if (dev->resource[i].flags & IORESOURCE_IO + && dev->bus->number == 0 && dev->devfn == 0x81) + dev->resource[i].flags &= ~IORESOURCE_IO; + } +} /* Given an mmio phys address, find a pci device that implements * this address. This is of course expensive, but only used @@ -127,12 +143,30 @@ return NULL; } -static void -pcibios_fixup_resources(struct pci_dev* dev) +void __devinit +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) { - fixup_resources(dev); + unsigned long offset = 0; + struct pci_controller *hose = PCI_GET_PHB_PTR(dev); + + if (!hose) + return; + + if (res->flags & IORESOURCE_IO) + offset = (unsigned long)hose->io_base_virt - pci_io_base; + + if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; + + region->start = res->start - offset; + region->end = res->end - offset; } +#ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pcibios_resource_to_bus); +#endif + /* * We need to avoid collisions with `mirrored' VGA ports * and other strange ISA hardware, so we always want the @@ -146,180 +180,38 @@ * but we want to try to avoid allocating at 0x2900-0x2bff * which might have be mirrored at 0x0100-0x03ff.. */ -void -pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) +void pcibios_align_resource(void *data, struct resource *res, + unsigned long size, unsigned long align) { struct pci_dev *dev = data; + struct pci_controller *hose = PCI_GET_PHB_PTR(dev); + unsigned long start = res->start; + unsigned long alignto; if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; - - if (size > 0x100) { - printk(KERN_ERR "PCI: Can not align I/O Region %s %s because size %ld is too large.\n", - dev->slot_name, res->name, size); - } - - if (start & 0x300) { + unsigned long offset = (unsigned long)hose->io_base_virt - + pci_io_base; + /* Make sure we start at our min on all hoses */ + if (start - offset < PCIBIOS_MIN_IO) + start = PCIBIOS_MIN_IO + offset; + + /* + * Put everything into 0x00-0xff region modulo 0x400 + */ + if (start & 0x300) start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } - } -} -/* - * Handle resources of PCI devices. If the world were perfect, we could - * just allocate all the resource regions and do nothing more. It isn't. - * On the other hand, we cannot just re-allocate all devices, as it would - * require us to know lots of host bridge internals. So we attempt to - * keep as much of the original configuration as possible, but tweak it - * when it's found to be wrong. - * - * Known BIOS problems we have to work around: - * - I/O or memory regions not configured - * - regions configured, but not enabled in the command register - * - bogus I/O addresses above 64K used - * - expansion ROMs left enabled (this may sound harmless, but given - * the fact the PCI specs explicitly allow address decoders to be - * shared between expansion ROMs and other resource regions, it's - * at least dangerous) - * - * Our solution: - * (1) Allocate resources for all buses behind PCI-to-PCI bridges. - * This gives us fixed barriers on where we can allocate. - * (2) Allocate resources for all enabled devices. If there is - * a collision, just mark the resource as unallocated. Also - * disable expansion ROMs during this step. - * (3) Try to allocate resources for disabled devices. If the - * resources were assigned correctly, everything goes well, - * if they weren't, they won't disturb allocation of other - * resources. - * (4) Assign new addresses to resources which were either - * not configured at all or misconfigured. If explicitly - * requested by the user, configure expansion ROM address - * as well. - */ + } else if (res->flags & IORESOURCE_MEM) { + /* Make sure we start at our min on all hoses */ + if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM) + start = PCIBIOS_MIN_MEM + hose->pci_mem_offset; -static void __init -pcibios_allocate_bus_resources(struct list_head *bus_list) -{ - struct list_head *ln; - struct pci_bus *bus; - int i; - struct resource *res, *pr; - - /* Depth-First Search on bus tree */ - for (ln=bus_list->next; ln != bus_list; ln=ln->next) { - bus = pci_bus_b(ln); - for (i = 0; i < 4; ++i) { - if ((res = bus->resource[i]) == NULL || !res->flags) - continue; - if (bus->parent == NULL) - pr = (res->flags & IORESOURCE_IO)? - &ioport_resource: &iomem_resource; - else - pr = pci_find_parent_resource(bus->self, res); - - if (pr == res) - continue; /* transparent bus or undefined */ - if (pr && request_resource(pr, res) == 0) - continue; - printk(KERN_ERR "PCI: Cannot allocate resource region " - "%d of PCI bridge %x\n", i, bus->number); - printk(KERN_ERR "PCI: resource is %lx..%lx (%lx), parent %p\n", - res->start, res->end, res->flags, pr); - } - pcibios_allocate_bus_resources(&bus->children); + /* Align to multiple of size of minimum base. */ + alignto = max(0x1000UL, align); + start = ALIGN(start, alignto); } -} - -static void __init -pcibios_allocate_resources(int pass) -{ - struct pci_dev *dev; - int idx, disabled; - u16 command; - struct resource *r, *pr; - pci_for_each_dev(dev) { - pci_read_config_word(dev, PCI_COMMAND, &command); - for(idx = 0; idx < 6; idx++) { - r = &dev->resource[idx]; - if (r->parent) /* Already allocated */ - continue; - if (!r->start) /* Address not assigned at all */ - continue; - - if (r->flags & IORESOURCE_IO) - disabled = !(command & PCI_COMMAND_IO); - else - disabled = !(command & PCI_COMMAND_MEMORY); - if (pass == disabled) { - PPCDBG(PPCDBG_PHBINIT, - "PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n", - r->start, r->end, r->flags, disabled, pass); - pr = pci_find_parent_resource(dev, r); - if (!pr || request_resource(pr, r) < 0) { - PPCDBG(PPCDBG_PHBINIT, - "PCI: Cannot allocate resource region %d of device %s, pr = 0x%lx\n", idx, dev->slot_name, pr); - if(pr) { - PPCDBG(PPCDBG_PHBINIT, - "PCI: Cannot allocate resource 0x%lx\n", request_resource(pr,r)); - } - /* We'll assign a new address later */ - r->end -= r->start; - r->start = 0; - } - } - } - if (!pass) { - r = &dev->resource[PCI_ROM_RESOURCE]; - if (r->flags & PCI_ROM_ADDRESS_ENABLE) { - /* Turn the ROM off, leave the resource region, but keep it unregistered. */ - u32 reg; - r->flags &= ~PCI_ROM_ADDRESS_ENABLE; - pci_read_config_dword(dev, dev->rom_base_reg, ®); - pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE); - } - } - } -} - -static void __init -pcibios_assign_resources(void) -{ - struct pci_dev *dev; - int idx; - struct resource *r; - - pci_for_each_dev(dev) { - int class = dev->class >> 8; - - /* Don't touch classless devices and host bridges */ - if (!class || class == PCI_CLASS_BRIDGE_HOST) - continue; - - for (idx = 0; idx < 6; idx++) { - r = &dev->resource[idx]; - - /* - * We shall assign a new address to this resource, - * either because the BIOS (sic) forgot to do so - * or because we have decided the old address was - * unusable for some reason. - */ - if (!r->start && r->end) - pci_assign_resource(dev, idx); - } - -#if 0 /* don't assign ROMs */ - r = &dev->resource[PCI_ROM_RESOURCE]; - r->end -= r->start; - r->start = 0; - if (r->end) - pci_assign_resource(dev, PCI_ROM_RESOURCE); -#endif - } + res->start = start; } /* @@ -358,20 +250,49 @@ else memcpy(hose->what,model,7); hose->type = controller_type; - hose->global_number = global_phb_number; - phbtab[global_phb_number++] = hose; + hose->global_number = global_phb_number++; *hose_tail = hose; hose_tail = &hose->next; return hose; } -static int __init -pcibios_init(void) +static void __init pcibios_claim_one_bus(struct pci_bus *b) +{ + struct list_head *ld; + struct pci_bus *child_bus; + + for (ld = b->devices.next; ld != &b->devices; ld = ld->next) { + struct pci_dev *dev = pci_dev_b(ld); + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r = &dev->resource[i]; + + if (r->parent || !r->start || !r->flags) + continue; + pci_claim_resource(dev, i); + } + } + + list_for_each_entry(child_bus, &b->children, node) + pcibios_claim_one_bus(child_bus); +} + +static void __init pcibios_claim_of_setup(void) +{ + struct list_head *lb; + + for (lb = pci_root_buses.next; lb != &pci_root_buses; lb = lb->next) { + struct pci_bus *b = pci_bus_b(lb); + pcibios_claim_one_bus(b); + } +} + +static int __init pcibios_init(void) { struct pci_controller *hose; struct pci_bus *bus; - int next_busno; #ifdef CONFIG_PPC_ISERIES iSeries_pcibios_init(); @@ -379,38 +300,26 @@ //ppc64_boot_msg(0x40, "PCI Probe"); printk("PCI: Probing PCI hardware\n"); - PPCDBG(PPCDBG_BUSWALK,"PCI: Probing PCI hardware\n"); /* Scan all of the recorded PCI controllers. */ - for (next_busno = 0, hose = hose_head; hose; hose = hose->next) { + for (hose = hose_head; hose; hose = hose->next) { hose->last_busno = 0xff; - bus = pci_scan_bus(hose->first_busno, hose->ops, hose->arch_data); + bus = pci_scan_bus(hose->first_busno, hose->ops, + hose->arch_data); hose->bus = bus; hose->last_busno = bus->subordinate; - if (next_busno <= hose->last_busno) - next_busno = hose->last_busno+1; } - /* Call machine dependent fixup */ - if (ppc_md.pcibios_fixup) { - ppc_md.pcibios_fixup(); - } - - /* Allocate and assign resources */ - pcibios_allocate_bus_resources(&pci_root_buses); - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); - pcibios_assign_resources(); - -#ifndef CONFIG_PPC_ISERIES - void chrp_request_regions(void); - chrp_request_regions(); + if (pci_probe_only) + pcibios_claim_of_setup(); + else + /* FIXME: `else' will be removed when + pci_assign_unassigned_resources() is able to work + correctly with [partially] allocated PCI tree. */ + pci_assign_unassigned_resources(); - pci_fix_bus_sysdata(); - - create_tce_tables(); - PPCDBG(PPCDBG_BUSWALK,"pSeries create_tce_tables()\n"); -#endif + /* Call machine dependent fixup */ + pcibios_final_fixup(); /* Cache the location of the ISA bridge (if we have one) */ ppc64_isabridge_dev = pci_find_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); @@ -418,7 +327,6 @@ printk("ISA bridge at %s\n", ppc64_isabridge_dev->slot_name); printk("PCI: Probing PCI hardware done\n"); - PPCDBG(PPCDBG_BUSWALK,"PCI: Probing PCI hardware done.\n"); //ppc64_boot_msg(0x41, "PCI Done"); return 0; @@ -426,12 +334,6 @@ subsys_initcall(pcibios_init); -void __init pcibios_fixup_bus(struct pci_bus *bus) -{ - if (ppc_md.pcibios_fixup_bus) - ppc_md.pcibios_fixup_bus(bus); -} - char __init *pcibios_setup(char *str) { return str; @@ -439,35 +341,29 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) { - u16 cmd, old_cmd; - int idx; - struct resource *r; - - PPCDBG(PPCDBG_BUSWALK,"PCI: %s for device %s \n", __FUNCTION__, - dev->slot_name); + u16 cmd, oldcmd; + int i; pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for (idx = 0; idx < 6; idx++) { + oldcmd = cmd; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + /* Only set up the requested stuff */ - if (!(mask & (1<resource[idx]; - if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) + if (res->flags & IORESOURCE_IO) cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) + if (res->flags & IORESOURCE_MEM) cmd |= PCI_COMMAND_MEMORY; } - if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", - dev->slot_name, old_cmd, cmd); - PPCDBG(PPCDBG_BUSWALK,"PCI: Enabling device %s \n", - dev->slot_name); + + if (cmd != oldcmd) { + printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n", + dev->slot_name, cmd); + /* Enable the appropriate bits in the PCI command register. */ pci_write_config_word(dev, PCI_COMMAND, cmd); } return 0; @@ -608,113 +504,4 @@ vma->vm_end - vma->vm_start, vma->vm_page_prot); return ret; -} - -/***************************************************** - * Dump Resource information - *****************************************************/ -void dumpResources(struct resource* Resource) -{ - if(Resource != NULL) { - int Flags = 0x00000F00 & Resource->flags; - if(Resource->start == 0 && Resource->end == 0) return; - else if(Resource->start == Resource->end ) return; - else { - if (Flags == IORESOURCE_IO) udbg_printf("IO.:"); - else if(Flags == IORESOURCE_MEM) udbg_printf("MEM:"); - else if(Flags == IORESOURCE_IRQ) udbg_printf("IRQ:"); - else udbg_printf("0x%02X:",Resource->flags); - - } - udbg_printf("0x%016LX / 0x%016LX (0x%08X)\n", - Resource->start, Resource->end, Resource->end - Resource->start); - } -} - -int resourceSize(struct resource* Resource) -{ - if(Resource->start == 0 && Resource->end == 0) return 0; - else if(Resource->start == Resource->end ) return 0; - else return (Resource->end-1)-Resource->start; -} - - -/***************************************************** - * Dump PHB information for Debug - *****************************************************/ -void dumpPci_Controller(struct pci_controller* phb) -{ - udbg_printf("\tpci_controller= 0x%016LX\n", phb); - if (phb != NULL) { - udbg_printf("\twhat & type = %s 0x%02X\n ",phb->what,phb->type); - udbg_printf("\tbus = "); - if (phb->bus != NULL) udbg_printf("0x%02X\n", phb->bus->number); - else udbg_printf("\n"); - udbg_printf("\tarch_data = 0x%016LX\n", phb->arch_data); - udbg_printf("\tfirst_busno = 0x%02X\n", phb->first_busno); - udbg_printf("\tlast_busno = 0x%02X\n", phb->last_busno); - udbg_printf("\tio_base_virt* = 0x%016LX\n", phb->io_base_virt); - udbg_printf("\tio_base_phys = 0x%016LX\n", phb->io_base_phys); - udbg_printf("\tpci_mem_offset= 0x%016LX\n", phb->pci_mem_offset); - udbg_printf("\tpci_io_offset = 0x%016LX\n", phb->pci_io_offset); - - udbg_printf("\tResources\n"); - dumpResources(&phb->io_resource); - if (phb->mem_resource_count > 0) dumpResources(&phb->mem_resources[0]); - if (phb->mem_resource_count > 1) dumpResources(&phb->mem_resources[1]); - if (phb->mem_resource_count > 2) dumpResources(&phb->mem_resources[2]); - - udbg_printf("\tglobal_num = 0x%02X\n", phb->global_number); - udbg_printf("\tlocal_num = 0x%02X\n", phb->local_number); - } -} - -/***************************************************** - * Dump PHB information for Debug - *****************************************************/ -void dumpPci_Bus(struct pci_bus* Pci_Bus) -{ - int i; - udbg_printf("\tpci_bus = 0x%016LX \n",Pci_Bus); - if (Pci_Bus != NULL) { - - udbg_printf("\tnumber = 0x%02X \n",Pci_Bus->number); - udbg_printf("\tprimary = 0x%02X \n",Pci_Bus->primary); - udbg_printf("\tsecondary = 0x%02X \n",Pci_Bus->secondary); - udbg_printf("\tsubordinate = 0x%02X \n",Pci_Bus->subordinate); - - for (i=0;i<4;++i) { - if(Pci_Bus->resource[i] == NULL) continue; - if(Pci_Bus->resource[i]->start == 0 && Pci_Bus->resource[i]->end == 0) break; - udbg_printf("\tResources[%d]",i); - dumpResources(Pci_Bus->resource[i]); - } - } -} - -/***************************************************** - * Dump Device information for Debug - *****************************************************/ -void dumpPci_Dev(struct pci_dev* Pci_Dev) -{ - int i; - udbg_printf("\tpci_dev* = 0x%p\n",Pci_Dev); - if ( Pci_Dev == NULL ) return; - udbg_printf("\tname = %s \n",Pci_Dev->dev.name); - udbg_printf("\tbus* = 0x%p\n",Pci_Dev->bus); - udbg_printf("\tsysdata* = 0x%p\n",Pci_Dev->sysdata); - udbg_printf("\tDevice = 0x%4X%02X:%02X.%02X 0x%04X:%04X\n", - PCI_GET_PHB_NUMBER(Pci_Dev), - PCI_GET_BUS_NUMBER(Pci_Dev), - PCI_SLOT(Pci_Dev->devfn), - PCI_FUNC(Pci_Dev->devfn), - Pci_Dev->vendor, - Pci_Dev->device); - udbg_printf("\tHdr/Irq = 0x%02X/0x%02X \n",Pci_Dev->hdr_type,Pci_Dev->irq); - for (i=0;iresource[i].start == 0 && Pci_Dev->resource[i].end == 0) continue; - udbg_printf("\tResources[%d] ",i); - dumpResources(&Pci_Dev->resource[i]); - } - dumpResources(&Pci_Dev->resource[i]); } diff -Nru a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h --- a/arch/ppc64/kernel/pci.h Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/kernel/pci.h Tue Mar 25 18:36:42 2003 @@ -19,18 +19,14 @@ extern struct pci_controller* hose_head; extern struct pci_controller** hose_tail; -/* PHB's are also in a table. */ -#define PCI_MAX_PHB 64 -extern int global_phb_number; -extern struct pci_controller *phbtab[]; + +extern int global_phb_number; /******************************************************************* * Platform functions that are brand specific implementation. *******************************************************************/ extern unsigned long find_and_init_phbs(void); -extern void ppc64_pcibios_init(void); - extern struct pci_dev *ppc64_isabridge_dev; /* may be NULL if no ISA bus */ /******************************************************************* @@ -46,10 +42,6 @@ void pci_fix_bus_sysdata(void); struct device_node *fetch_dev_dn(struct pci_dev *dev); -void iSeries_pcibios_init_early(void); -void pSeries_pcibios_init_early(void); -void pSeries_pcibios_init(void); - /******************************************************************* * Helper macros for extracting data from pci structures. * PCI_GET_PHB_PTR(struct pci_dev*) returns the Phb pointer. @@ -59,13 +51,5 @@ #define PCI_GET_PHB_PTR(dev) (((struct device_node *)(dev)->sysdata)->phb) #define PCI_GET_PHB_NUMBER(dev) (((dev)->bus->number&0x00FFFF00)>>8) #define PCI_GET_BUS_NUMBER(dev) ((dev)->bus->number&0x0000FF) - -/******************************************************************* - * Debugging Routines. - *******************************************************************/ -extern void dumpResources(struct resource* Resource); -extern void dumpPci_Controller(struct pci_controller* phb); -extern void dumpPci_Bus(struct pci_bus* Pci_Bus); -extern void dumpPci_Dev(struct pci_dev* Pci_Dev); #endif /* __PPC_KERNEL_PCI_H__ */ diff -Nru a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c --- a/arch/ppc64/kernel/process.c Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/kernel/process.c Tue Mar 25 18:36:42 2003 @@ -208,6 +208,12 @@ } else { childregs->gpr[1] = usp; p->thread.regs = childregs; + if (clone_flags & CLONE_SETTLS) { + if (test_thread_flag(TIF_32BIT)) + childregs->gpr[2] = childregs->gpr[6]; + else + childregs->gpr[13] = childregs->gpr[6]; + } } childregs->gpr[3] = 0; /* Result from fork() */ sp -= STACK_FRAME_OVERHEAD; @@ -304,7 +310,7 @@ if (clone_flags & (CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) { parent_tidptr = p3; - child_tidptr = p4; + child_tidptr = p5; if (test_thread_flag(TIF_32BIT)) { parent_tidptr &= 0xffffffff; child_tidptr &= 0xffffffff; diff -Nru a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c --- a/arch/ppc64/kernel/prom.c Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/kernel/prom.c Tue Mar 25 18:36:42 2003 @@ -1270,8 +1270,8 @@ break; #ifdef CONFIG_LOGO_LINUX_CLUT224 - clut = RELOC(RELOC(&logo_linux_clut224)->clut); - for (i = 0; i < logo_linux_clut224.clutsize; i++, clut += 3) + clut = PTRRELOC(RELOC(logo_linux_clut224.clut)); + for (i = 0; i < RELOC(logo_linux_clut224.clutsize); i++, clut += 3) if (prom_set_color(ih, i + 32, clut[0], clut[1], clut[2]) != 0) break; diff -Nru a/arch/ppc64/kernel/stab.c b/arch/ppc64/kernel/stab.c --- a/arch/ppc64/kernel/stab.c Tue Mar 25 18:36:42 2003 +++ b/arch/ppc64/kernel/stab.c Tue Mar 25 18:36:42 2003 @@ -197,11 +197,23 @@ PMC_SW_PROCESSOR(stab_capacity_castouts); + /* + * Never cast out the segment for our kernel stack. Since we + * dont invalidate the ERAT we could have a valid translation + * for the kernel stack during the first part of exception exit + * which gets invalidated due to a tlbie from another cpu at a + * non recoverable point (after setting srr0/1) - Anton + */ castout_entry = get_paca()->xStab_data.next_round_robin; - entry = castout_entry; - castout_entry++; - if (castout_entry >= naca->slb_size) - castout_entry = 1; + do { + entry = castout_entry; + castout_entry++; + if (castout_entry >= naca->slb_size) + castout_entry = 1; + asm volatile("slbmfee %0,%1" : "=r" (esid_data) : "r" (entry)); + } while (esid_data.data.esid == GET_ESID((unsigned long)_get_SP()) && + esid_data.data.v); + get_paca()->xStab_data.next_round_robin = castout_entry; /* slbie not needed as the previous mapping is still valid. */ @@ -346,7 +358,12 @@ void flush_stab(struct task_struct *tsk, struct mm_struct *mm) { if (cpu_has_slb()) { - if (!STAB_PRESSURE && test_thread_flag(TIF_32BIT)) { + /* + * XXX disable 32bit slb invalidate optimisation until we fix + * the issue where a 32bit app execed out of a 64bit app can + * cause segments above 4GB not to be flushed - Anton + */ + if (0 && !STAB_PRESSURE && test_thread_flag(TIF_32BIT)) { union { unsigned long word0; slb_dword0 data; diff -Nru a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig --- a/arch/x86_64/Kconfig Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/Kconfig Tue Mar 25 18:36:42 2003 @@ -179,9 +179,7 @@ If you don't know what to do here, say N. -# broken currently config PREEMPT - depends on NOT_WORKING bool "Preemptible Kernel" ---help--- This option reduces the latency of the kernel when reacting to @@ -200,7 +198,7 @@ # someone write a better help text please. config K8_NUMA bool "K8 NUMA support" - depends on SMP && NOT_WORKING + depends on SMP help Enable NUMA (Non Unified Memory Architecture) support for AMD Opteron Multiprocessor systems. The kernel will try to allocate @@ -590,10 +588,8 @@ allocation as well as poisoning memory on free to catch use of freed memory. -# bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT config MAGIC_SYSRQ bool "Magic SysRq key" - depends on DEBUG_KERNEL help If you say Y here, you will have some control over the system even if the system crashes for example during kernel debugging (e.g., you @@ -639,13 +635,36 @@ config FRAME_POINTER bool "Compile the kernel with frame pointers" - depends on DEBUG_KERNEL help Compile the kernel with frame pointers. This may help for some debugging with external debuggers. Note the standard oops backtracer - doesn't make use of it and the x86-64 kernel doesn't ensure an consistent + doesn't make use of this and the x86-64 kernel doesn't ensure an consistent frame pointer through inline assembly (semaphores etc.) Normally you should say N. + +config IOMMU_DEBUG + bool "Force IOMMU to on" + help + Force the IOMMU to on even when you have less than 4GB of memory and add + debugging code. + Can be disabled at boot time with iommu=noforce. + +config IOMMU_LEAK + bool "IOMMU leak tracing" + depends on DEBUG_KERNEL + help + Add a simple leak tracer to the IOMMU code. This is useful when you + are debugging a buggy device driver that leaks IOMMU mappings. + +config MCE_DEBUG + bool "K8 Machine check debugging mode" + default y + help + Turn on all Machine Check debugging for device driver problems. + This can cause panics, but is useful to find device driver problems. + +#config X86_REMOTE_DEBUG +# bool "kgdb debugging stub" endmenu diff -Nru a/arch/x86_64/Makefile b/arch/x86_64/Makefile --- a/arch/x86_64/Makefile Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/Makefile Tue Mar 25 18:36:42 2003 @@ -47,6 +47,10 @@ # should lower this a lot and see how much .text is saves CFLAGS += -finline-limit=2000 #CFLAGS += -g +# don't enable this when you use kgdb: +ifneq ($(CONFIG_X86_REMOTE_DEBUG),y) +CFLAGS += -fno-asynchronous-unwind-tables +endif head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o diff -Nru a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c --- a/arch/x86_64/boot/compressed/misc.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/boot/compressed/misc.c Tue Mar 25 18:36:42 2003 @@ -274,7 +274,7 @@ puts(x); puts("\n\n -- System halted"); - while(1); /* Halt */ + while(1); } void setup_normal_output_buffer(void) @@ -429,8 +429,6 @@ else setup_output_buffer_if_we_run_high(mv); makecrc(); - puts("Checking CPU type..."); - check_cpu(); puts(".\nDecompressing Linux..."); gunzip(); puts("done.\nBooting the kernel.\n"); diff -Nru a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S --- a/arch/x86_64/boot/setup.S Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/boot/setup.S Tue Mar 25 18:36:42 2003 @@ -42,6 +42,7 @@ * if CX/DX have been changed in the e801 call and if so use AX/BX . * Michael Miller, April 2001 * + * Added long mode checking and SSE force. March 2003, Andi Kleen. */ #include @@ -200,10 +201,10 @@ prtsp2: call prtspc # Print double space prtspc: movb $0x20, %al # Print single space (note: fall-thru) -# Part of above routine, this one just prints ascii al -prtchr: pushw %ax +prtchr: + pushw %ax pushw %cx - xorb %bh, %bh + movw $0007,%bx movw $0x01, %cx movb $0x0e, %ah int $0x10 @@ -280,6 +281,75 @@ loader_panic_mess: .string "Wrong loader, giving up..." loader_ok: + /* check for long mode. */ + /* we have to do this before the VESA setup, otherwise the user + can't see the error message. */ + + pushw %ds + movw %cs,%ax + movw %ax,%ds + + /* minimum CPUID flags for x86-64 */ + /* see http://www.x86-64.org/lists/discuss/msg02971.html */ +#define SSE_MASK ((1<<25)|(1<<26)) +#define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<8)|(1<<11)| \ + (1<<13)|(1<<15)|(1<<24)|(1<<29)) + + pushfl /* standard way to check for cpuid */ + popl %eax + movl %eax,%ebx + xorl $0x200000,%eax + pushl %eax + popfl + pushfl + popl %eax + cmpl %eax,%ebx + jz no_longmode /* cpu has no cpuid */ + movl $0x80000000,%eax + cpuid + cmpl $0x80000001,%eax + jb no_longmode /* no extended cpuid */ + xor %di,%di + cmpl $0x68747541,%ebx /* AuthenticAMD */ + jnz noamd + cmpl $0x69746e65,%edx + jnz noamd + cmpl $0x444d4163,%ecx + jnz noamd + mov $1,%di /* cpu is from AMD */ +noamd: + movl $0x80000001,%eax + cpuid + andl $REQUIRED_MASK1,%edx + xorl $REQUIRED_MASK1,%edx + jnz no_longmode +sse_test: + movl $1,%eax + cpuid + andl $SSE_MASK,%edx + cmpl $SSE_MASK,%edx + je sse_ok + test %di,%di + jz no_longmode /* only try to force SSE on AMD */ + movl $0xc0010015,%ecx /* HWCR */ + rdmsr + btr $15,%eax /* enable SSE */ + wrmsr + xor %di,%di /* don't loop */ + jmp sse_test /* try again */ +no_longmode: + call beep + lea long_mode_panic,%si + call prtstr +no_longmode_loop: + jmp no_longmode_loop +long_mode_panic: + .string "Your CPU does not support long mode. Use a 32bit distribution." + .byte 0 + +sse_ok: + popw %ds + # Get memory size (extended mem, kB) xorl %eax, %eax diff -Nru a/arch/x86_64/ia32/fpu32.c b/arch/x86_64/ia32/fpu32.c --- a/arch/x86_64/ia32/fpu32.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/ia32/fpu32.c Tue Mar 25 18:36:42 2003 @@ -77,17 +77,20 @@ struct _fpxreg *to; struct _fpreg *from; int i; - int err; - __u32 v; + u32 v; + int err = 0; - err = __get_user(fxsave->cwd, &buf->cw); - err |= __get_user(fxsave->swd, &buf->sw); - err |= __get_user(fxsave->twd, &buf->tag); +#define G(num,val) err |= __get_user(val, num + (u32 *)buf) + G(0, fxsave->cwd); + G(1, fxsave->swd); + G(2, fxsave->twd); fxsave->twd = twd_i387_to_fxsr(fxsave->twd); - err |= __get_user(fxsave->rip, &buf->ipoff); - err |= __get_user(fxsave->rdp, &buf->dataoff); - err |= __get_user(v, &buf->cssel); - fxsave->fop = v >> 16; + G(3, fxsave->rip); + G(4, v); + fxsave->fop = v>>16; /* cs ignored */ + G(5, fxsave->rdp); + /* 6: ds ignored */ +#undef G if (err) return -1; @@ -109,21 +112,29 @@ struct _fpreg *to; struct _fpxreg *from; int i; - u32 ds; - int err; + u16 cs,ds; + int err = 0; - err = __put_user((unsigned long)fxsave->cwd | 0xffff0000, &buf->cw); - err |= __put_user((unsigned long)fxsave->swd | 0xffff0000, &buf->sw); - err |= __put_user((u32)fxsave->rip, &buf->ipoff); - err |= __put_user((u32)(regs->cs | ((u32)fxsave->fop << 16)), - &buf->cssel); - err |= __put_user((u32)twd_fxsr_to_i387(fxsave), &buf->tag); - err |= __put_user((u32)fxsave->rdp, &buf->dataoff); - if (tsk == current) - asm("movl %%ds,%0 " : "=r" (ds)); - else /* ptrace. task has stopped. */ + if (tsk == current) { + /* should be actually ds/cs at fpu exception time, + but that information is not available in 64bit mode. */ + asm("movw %%ds,%0 " : "=r" (ds)); + asm("movw %%cs,%0 " : "=r" (cs)); + } else { /* ptrace. task has stopped. */ ds = tsk->thread.ds; - err |= __put_user(ds, &buf->datasel); + cs = regs->cs; + } + +#define P(num,val) err |= __put_user(val, num + (u32 *)buf) + P(0, (u32)fxsave->cwd | 0xffff0000); + P(1, (u32)fxsave->swd | 0xffff0000); + P(2, twd_fxsr_to_i387(fxsave)); + P(3, (u32)fxsave->rip); + P(4, cs | ((u32)fxsave->fop) << 16); + P(5, fxsave->rdp); + P(6, 0xffff0000 | ds); +#undef P + if (err) return -1; @@ -144,9 +155,9 @@ &buf->_fxsr_env[0], sizeof(struct i387_fxsave_struct))) return -1; - } tsk->thread.i387.fxsave.mxcsr &= 0xffbf; - current->used_math = 1; + tsk->used_math = 1; + } return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf); } @@ -157,12 +168,11 @@ { int err = 0; - if (!tsk->used_math) - return 0; - tsk->used_math = 0; - unlazy_fpu(tsk); + init_fpu(tsk); if (convert_fxsr_to_user(buf, &tsk->thread.i387.fxsave, regs, tsk)) return -1; + if (fsave) + return 0; err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); if (fsave) return err ? -1 : 1; diff -Nru a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c --- a/arch/x86_64/ia32/ia32_ioctl.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/ia32/ia32_ioctl.c Tue Mar 25 18:36:42 2003 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,8 @@ #include #include #include +#include +#include #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) /* Ugh. This header really is not clean */ #define min min @@ -2906,35 +2909,28 @@ { typedef struct serial_struct SS; struct serial_struct32 *ss32 = ptr; - int err = 0; + int err; struct serial_struct ss; mm_segment_t oldseg = get_fs(); - set_fs(KERNEL_DS); if (cmd == TIOCSSERIAL) { - err = -EFAULT; if (copy_from_user(&ss, ss32, sizeof(struct serial_struct32))) - goto out; + return -EFAULT; memmove(&ss.iomem_reg_shift, ((char*)&ss.iomem_base)+4, sizeof(SS)-offsetof(SS,iomem_reg_shift)); ss.iomem_base = (void *)((unsigned long)ss.iomem_base & 0xffffffff); } - if (!err) + set_fs(KERNEL_DS); err = sys_ioctl(fd,cmd,(unsigned long)(&ss)); + set_fs(oldseg); if (cmd == TIOCGSERIAL && err >= 0) { - __u32 base; if (__copy_to_user(ss32,&ss,offsetof(SS,iomem_base)) || - __copy_to_user(&ss32->iomem_reg_shift, - &ss.iomem_reg_shift, - sizeof(SS) - offsetof(SS, iomem_reg_shift))) - err = -EFAULT; - if (ss.iomem_base > (unsigned char *)0xffffffff) - base = -1; - else - base = (unsigned long)ss.iomem_base; - err |= __put_user(base, &ss32->iomem_base); + __put_user((unsigned long)ss.iomem_base >> 32 ? + 0xffffffff : (unsigned)(unsigned long)ss.iomem_base, + &ss32->iomem_base) || + __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) || + __put_user(ss.port_high, &ss32->port_high)) + return -EFAULT; } - out: - set_fs(oldseg); return err; } @@ -3045,7 +3041,14 @@ return sys_ioctl(fd, BLKGETSIZE64, arg); } +/* Bluetooth ioctls */ +#define HCIUARTSETPROTO _IOW('U', 200, int) +#define HCIUARTGETPROTO _IOR('U', 201, int) +#define BNEPCONNADD _IOW('B', 200, int) +#define BNEPCONNDEL _IOW('B', 201, int) +#define BNEPGETCONNLIST _IOR('B', 210, int) +#define BNEPGETCONNINFO _IOR('B', 211, int) struct usbdevfs_ctrltransfer32 { __u8 bRequestType; @@ -4093,6 +4096,7 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC) COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER) COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE) +COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI) /* DEVFS */ COMPATIBLE_IOCTL(DEVFSDIOC_GET_PROTO_REV) COMPATIBLE_IOCTL(DEVFSDIOC_SET_EVENT_MASK) @@ -4200,6 +4204,17 @@ COMPATIBLE_IOCTL(HCISETACLMTU) COMPATIBLE_IOCTL(HCISETSCOMTU) COMPATIBLE_IOCTL(HCIINQUIRY) +COMPATIBLE_IOCTL(HCIUARTSETPROTO) +COMPATIBLE_IOCTL(HCIUARTGETPROTO) +COMPATIBLE_IOCTL(RFCOMMCREATEDEV) +COMPATIBLE_IOCTL(RFCOMMRELEASEDEV) +COMPATIBLE_IOCTL(RFCOMMGETDEVLIST) +COMPATIBLE_IOCTL(RFCOMMGETDEVINFO) +COMPATIBLE_IOCTL(RFCOMMSTEALDLC) +COMPATIBLE_IOCTL(BNEPCONNADD) +COMPATIBLE_IOCTL(BNEPCONNDEL) +COMPATIBLE_IOCTL(BNEPGETCONNLIST) +COMPATIBLE_IOCTL(BNEPGETCONNINFO) /* Misc. */ COMPATIBLE_IOCTL(0x41545900) /* ATYIO_CLKR */ COMPATIBLE_IOCTL(0x41545901) /* ATYIO_CLKW */ diff -Nru a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c --- a/arch/x86_64/ia32/ia32_signal.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/ia32/ia32_signal.c Tue Mar 25 18:36:42 2003 @@ -47,9 +47,16 @@ { if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) return -EFAULT; - if (from->si_code < 0) - return __copy_to_user(to, from, sizeof(siginfo_t)); - else { + if (from->si_code < 0) { + /* the only field that's different is the alignment + of the pointer in sigval_t. Move that 4 bytes down including + padding. */ + memmove(&((siginfo_t32 *)&from)->si_int, + &from->si_int, + sizeof(siginfo_t) - offsetof(siginfo_t, si_int)); + /* last 4 bytes stay the same */ + return __copy_to_user(to, from, sizeof(siginfo_t32)); + } else { int err; /* If you change siginfo_t structure, please be sure @@ -59,7 +66,7 @@ 3 ints plus the relevant union member. */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + err |= __put_user(from->si_code, &to->si_code); /* First 32bits of unions are always present. */ err |= __put_user(from->si_pid, &to->si_pid); switch (from->si_code >> 16) { @@ -108,6 +115,7 @@ mm_segment_t seg; if (uss_ptr) { u32 ptr; + memset(&uss,0,sizeof(stack_t)); if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) || __get_user(ptr, &uss_ptr->ss_sp) || __get_user(uss.ss_flags, &uss_ptr->ss_flags) || @@ -340,8 +348,11 @@ tmp = save_i387_ia32(current, fpstate, regs, 0); if (tmp < 0) err = -EFAULT; - else + else { + current->used_math = 0; + stts(); err |= __put_user((u32)(u64)(tmp ? fpstate : NULL), &sc->fpstate); + } /* non-iBCS2 extensions.. */ err |= __put_user(mask, &sc->oldmask); diff -Nru a/arch/x86_64/ia32/ipc32.c b/arch/x86_64/ia32/ipc32.c --- a/arch/x86_64/ia32/ipc32.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/ia32/ipc32.c Tue Mar 25 18:36:42 2003 @@ -187,12 +187,58 @@ } } +static int put_semid(void *user_semid, struct semid64_ds *s, int version) +{ + int err2; + switch (version) { + case IPC_64: { + struct semid64_ds32 *usp64 = (struct semid64_ds32 *) user_semid; + + if (!access_ok(VERIFY_WRITE, usp64, sizeof(*usp64))) { + err2 = -EFAULT; + break; + } + err2 = __put_user(s->sem_perm.key, &usp64->sem_perm.key); + err2 |= __put_user(s->sem_perm.uid, &usp64->sem_perm.uid); + err2 |= __put_user(s->sem_perm.gid, &usp64->sem_perm.gid); + err2 |= __put_user(s->sem_perm.cuid, &usp64->sem_perm.cuid); + err2 |= __put_user(s->sem_perm.cgid, &usp64->sem_perm.cgid); + err2 |= __put_user(s->sem_perm.mode, &usp64->sem_perm.mode); + err2 |= __put_user(s->sem_perm.seq, &usp64->sem_perm.seq); + err2 |= __put_user(s->sem_otime, &usp64->sem_otime); + err2 |= __put_user(s->sem_ctime, &usp64->sem_ctime); + err2 |= __put_user(s->sem_nsems, &usp64->sem_nsems); + break; + } + default: { + struct semid_ds32 *usp32 = (struct semid_ds32 *) user_semid; + + if (!access_ok(VERIFY_WRITE, usp32, sizeof(*usp32))) { + err2 = -EFAULT; + break; + } + err2 = __put_user(s->sem_perm.key, &usp32->sem_perm.key); + err2 |= __put_user(s->sem_perm.uid, &usp32->sem_perm.uid); + err2 |= __put_user(s->sem_perm.gid, &usp32->sem_perm.gid); + err2 |= __put_user(s->sem_perm.cuid, &usp32->sem_perm.cuid); + err2 |= __put_user(s->sem_perm.cgid, &usp32->sem_perm.cgid); + err2 |= __put_user(s->sem_perm.mode, &usp32->sem_perm.mode); + err2 |= __put_user(s->sem_perm.seq, &usp32->sem_perm.seq); + err2 |= __put_user(s->sem_otime, &usp32->sem_otime); + err2 |= __put_user(s->sem_ctime, &usp32->sem_ctime); + err2 |= __put_user(s->sem_nsems, &usp32->sem_nsems); + break; + } + } + return err2; +} + static int semctl32 (int first, int second, int third, void *uptr) { union semun fourth; u32 pad; - int err = 0, err2; + int err; struct semid64_ds s; mm_segment_t old_fs; int version = ipc_parse_version32(&third); @@ -225,46 +271,10 @@ fourth.__pad = &s; old_fs = get_fs(); set_fs(KERNEL_DS); - err = sys_semctl(first, second|IPC_64, third, fourth); + err = sys_semctl(first, second, third|IPC_64, fourth); set_fs(old_fs); - - if (version == IPC_64) { - struct semid64_ds32 *usp64 = (struct semid64_ds32 *) A(pad); - - if (!access_ok(VERIFY_WRITE, usp64, sizeof(*usp64))) { - err = -EFAULT; - break; - } - err2 = __put_user(s.sem_perm.key, &usp64->sem_perm.key); - err2 |= __put_user(s.sem_perm.uid, &usp64->sem_perm.uid); - err2 |= __put_user(s.sem_perm.gid, &usp64->sem_perm.gid); - err2 |= __put_user(s.sem_perm.cuid, &usp64->sem_perm.cuid); - err2 |= __put_user(s.sem_perm.cgid, &usp64->sem_perm.cgid); - err2 |= __put_user(s.sem_perm.mode, &usp64->sem_perm.mode); - err2 |= __put_user(s.sem_perm.seq, &usp64->sem_perm.seq); - err2 |= __put_user(s.sem_otime, &usp64->sem_otime); - err2 |= __put_user(s.sem_ctime, &usp64->sem_ctime); - err2 |= __put_user(s.sem_nsems, &usp64->sem_nsems); - } else { - struct semid_ds32 *usp32 = (struct semid_ds32 *) A(pad); - - if (!access_ok(VERIFY_WRITE, usp32, sizeof(*usp32))) { - err = -EFAULT; - break; - } - err2 = __put_user(s.sem_perm.key, &usp32->sem_perm.key); - err2 |= __put_user(s.sem_perm.uid, &usp32->sem_perm.uid); - err2 |= __put_user(s.sem_perm.gid, &usp32->sem_perm.gid); - err2 |= __put_user(s.sem_perm.cuid, &usp32->sem_perm.cuid); - err2 |= __put_user(s.sem_perm.cgid, &usp32->sem_perm.cgid); - err2 |= __put_user(s.sem_perm.mode, &usp32->sem_perm.mode); - err2 |= __put_user(s.sem_perm.seq, &usp32->sem_perm.seq); - err2 |= __put_user(s.sem_otime, &usp32->sem_otime); - err2 |= __put_user(s.sem_ctime, &usp32->sem_ctime); - err2 |= __put_user(s.sem_nsems, &usp32->sem_nsems); - } - if (err2) - err = -EFAULT; + if (!err) + err = put_semid((void *)A(pad), &s, version); break; default: err = -EINVAL; @@ -343,6 +353,7 @@ return err; } + static int msgctl32 (int first, int second, void *uptr) { @@ -387,7 +398,6 @@ set_fs(KERNEL_DS); err = sys_msgctl(first, second|IPC_64, (void *) &m64); set_fs(old_fs); - if (version == IPC_64) { if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64))) { err = -EFAULT; @@ -608,7 +618,9 @@ if (err2) err = -EFAULT; break; - + default: + err = -EINVAL; + break; } return err; } diff -Nru a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c --- a/arch/x86_64/ia32/ptrace32.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/ia32/ptrace32.c Tue Mar 25 18:36:42 2003 @@ -8,7 +8,7 @@ * This allows to access 64bit processes too; but there is no way to see the extended * register contents. * - * $Id: linus.patch,v 1.87 2003/03/26 02:45:29 akpm Exp $ + * $Id: linus.patch,v 1.87 2003/03/26 02:45:29 akpm Exp $ */ #include @@ -22,11 +22,9 @@ #include #include #include -#include -#include #include -#include #include +#include #define R32(l,q) \ case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break @@ -39,29 +37,26 @@ switch (regno) { case offsetof(struct user32, regs.fs): if (val && (val & 3) != 3) return -EIO; - child->thread.fs = val; + child->thread.fs = val & 0xffff; break; case offsetof(struct user32, regs.gs): if (val && (val & 3) != 3) return -EIO; - child->thread.gs = val; + child->thread.gs = val & 0xffff; break; case offsetof(struct user32, regs.ds): if (val && (val & 3) != 3) return -EIO; - child->thread.ds = val; + child->thread.ds = val & 0xffff; break; case offsetof(struct user32, regs.es): - if (val && (val & 3) != 3) return -EIO; - child->thread.es = val; + child->thread.es = val & 0xffff; break; - case offsetof(struct user32, regs.ss): if ((val & 3) != 3) return -EIO; - stack[offsetof(struct pt_regs, ss)/8] = val; + stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff; break; - case offsetof(struct user32, regs.cs): if ((val & 3) != 3) return -EIO; - stack[offsetof(struct pt_regs, cs)/8] = val; + stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff; break; R32(ebx, rbx); @@ -79,8 +74,16 @@ stack[offsetof(struct pt_regs, eflags)/8] = val & 0x44dd5; break; - case offsetof(struct user32, u_debugreg[0]) ... offsetof(struct user32, u_debugreg[6]): - child->thread.debugreg[(regno-offsetof(struct user32, u_debugreg[0]))/4] = val; + case offsetof(struct user32, u_debugreg[4]): + case offsetof(struct user32, u_debugreg[5]): + return -EIO; + + case offsetof(struct user32, u_debugreg[0]) ... + offsetof(struct user32, u_debugreg[3]): + case offsetof(struct user32, u_debugreg[6]): + child->thread.debugreg + [(regno-offsetof(struct user32, u_debugreg[0]))/4] + = val; break; case offsetof(struct user32, u_debugreg[7]): @@ -170,11 +173,19 @@ if (child) get_task_struct(child); read_unlock(&tasklist_lock); - *err = ptrace_check_attach(child,0); - if (*err == 0) + if (child) { + *err = -EPERM; + if (child->pid == 1) + goto out; + *err = ptrace_check_attach(child, request == PTRACE_KILL); + if (*err < 0) + goto out; return child; + } + out: put_task_struct(child); return NULL; + } extern asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, unsigned long data); @@ -187,6 +198,9 @@ __u32 val; switch (request) { + default: + return sys_ptrace(request, pid, addr, data); + case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: case PTRACE_POKEDATA: @@ -201,9 +215,6 @@ case PTRACE_GETFPXREGS: break; - default: - ret = sys_ptrace(request, pid, addr, data); - return ret; } child = find_target(request, pid, &ret); @@ -261,7 +272,6 @@ ret = -EIO; break; } - empty_fpu(child); ret = 0; for ( i = 0; i <= 16*4; i += sizeof(u32) ) { ret |= __get_user(tmp, (u32 *) (unsigned long) data); @@ -271,33 +281,47 @@ break; } - case PTRACE_SETFPREGS: - empty_fpu(child); + case PTRACE_GETFPREGS: + ret = -EIO; + if (!access_ok(VERIFY_READ, (void *)(u64)data, + sizeof(struct user_i387_struct))) + break; save_i387_ia32(child, (void *)(u64)data, childregs, 1); ret = 0; break; - case PTRACE_GETFPREGS: - empty_fpu(child); - restore_i387_ia32(child, (void *)(u64)data, 1); + case PTRACE_SETFPREGS: + ret = -EIO; + if (!access_ok(VERIFY_WRITE, (void *)(u64)data, + sizeof(struct user_i387_struct))) + break; ret = 0; + /* don't check EFAULT to be bug-to-bug compatible to i386 */ + restore_i387_ia32(child, (void *)(u64)data, 1); break; case PTRACE_GETFPXREGS: { struct user32_fxsr_struct *u = (void *)(u64)data; - empty_fpu(child); - ret = copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)); - ret |= __put_user(childregs->cs, &u->fcs); - ret |= __put_user(child->thread.ds, &u->fos); - if (ret) + init_fpu(child); + ret = -EIO; + if (!access_ok(VERIFY_WRITE, u, sizeof(*u))) + break; ret = -EFAULT; + if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u))) + break; + ret = __put_user(childregs->cs, &u->fcs); + ret |= __put_user(child->thread.ds, &u->fos); break; } case PTRACE_SETFPXREGS: { struct user32_fxsr_struct *u = (void *)(u64)data; - empty_fpu(child); - /* no error checking to be bug to bug compatible with i386 */ - copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)); + unlazy_fpu(child); + ret = -EIO; + if (!access_ok(VERIFY_READ, u, sizeof(*u))) + break; + /* no checking to be bug-to-bug compatible with i386 */ + __copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)); + child->used_math = 1; child->thread.i387.fxsave.mxcsr &= 0xffbf; ret = 0; break; diff -Nru a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c --- a/arch/x86_64/ia32/sys_ia32.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/ia32/sys_ia32.c Tue Mar 25 18:36:42 2003 @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,7 @@ #define A(__x) ((unsigned long)(__x)) #define AA(__x) ((unsigned long)(__x)) +#define u32_to_ptr(x) ((void *)(u64)(x)) #define ROUND_UP(x,a) ((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1))) #define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) @@ -738,7 +740,7 @@ asmlinkage ssize_t sys_writev(unsigned long,const struct iovec *,unsigned long); static struct iovec * -get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 count, int type, int *errp) +get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 *count, int type, int *errp) { int i; u32 buf, len; @@ -747,15 +749,18 @@ /* Get the "struct iovec" from user memory */ - if (!count) + *errp = 0; + if (!*count) return 0; - if (count > UIO_MAXIOV) + *errp = -EINVAL; + if (*count > UIO_MAXIOV) return(struct iovec *)0; - if(verify_area(VERIFY_READ, iov32, sizeof(struct compat_iovec)*count)) + *errp = -EFAULT; + if(verify_area(VERIFY_READ, iov32, sizeof(struct compat_iovec)*(*count))) return(struct iovec *)0; - if (count > UIO_FASTIOV) { + if (*count > UIO_FASTIOV) { *errp = -ENOMEM; - iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); + iov = kmalloc(*count*sizeof(struct iovec), GFP_KERNEL); if (!iov) return((struct iovec *)0); } else @@ -763,14 +768,19 @@ ivp = iov; totlen = 0; - for (i = 0; i < count; i++) { + for (i = 0; i < *count; i++) { *errp = __get_user(len, &iov32->iov_len) | __get_user(buf, &iov32->iov_base); if (*errp) goto error; *errp = verify_area(type, (void *)A(buf), len); - if (*errp) + if (*errp) { + if (i > 0) { + *count = i; + break; + } goto error; + } /* SuS checks: */ *errp = -EINVAL; if ((int)len < 0) @@ -799,7 +809,7 @@ int ret; mm_segment_t old_fs = get_fs(); - if ((iov = get_compat_iovec(vector, iovstack, count, VERIFY_WRITE, &ret)) == NULL) + if ((iov = get_compat_iovec(vector, iovstack, &count, VERIFY_WRITE, &ret)) == NULL) return ret; set_fs(KERNEL_DS); ret = sys_readv(fd, iov, count); @@ -817,7 +827,7 @@ int ret; mm_segment_t old_fs = get_fs(); - if ((iov = get_compat_iovec(vector, iovstack, count, VERIFY_READ, &ret)) == NULL) + if ((iov = get_compat_iovec(vector, iovstack, &count, VERIFY_READ, &ret)) == NULL) return ret; set_fs(KERNEL_DS); ret = sys_writev(fd, iov, count); @@ -1672,21 +1682,26 @@ return cnt; } -long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs) +asmlinkage long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs) { mm_segment_t oldseg; - char **buf; - int na,ne; + char **buf = NULL; + int na = 0,ne = 0; int ret; - unsigned sz; + unsigned sz = 0; + if (argv) { na = nargs(argv, NULL); if (na < 0) return -EFAULT; + } + if (envp) { ne = nargs(envp, NULL); if (ne < 0) return -EFAULT; + } + if (argv || envp) { sz = (na+ne)*sizeof(void *); if (sz > PAGE_SIZE) buf = vmalloc(sz); @@ -1694,14 +1709,19 @@ buf = kmalloc(sz, GFP_KERNEL); if (!buf) return -ENOMEM; + } + if (argv) { ret = nargs(argv, buf); if (ret < 0) goto free; + } + if (envp) { ret = nargs(envp, buf + na); if (ret < 0) goto free; + } name = getname(name); ret = PTR_ERR(name); @@ -1710,7 +1730,7 @@ oldseg = get_fs(); set_fs(KERNEL_DS); - ret = do_execve(name, buf, buf+na, ®s); + ret = do_execve(name, argv ? buf : NULL, envp ? buf+na : NULL, ®s); set_fs(oldseg); if (ret == 0) @@ -1719,10 +1739,12 @@ putname(name); free: + if (argv || envp) { if (sz > PAGE_SIZE) vfree(buf); else kfree(buf); + } return ret; } @@ -2012,12 +2034,8 @@ long sys32_module_warning(void) { - static long warn_time = -(60*HZ); - if (time_before(warn_time + 60*HZ,jiffies) && strcmp(current->comm,"klogd")) { printk(KERN_INFO "%s: 32bit 2.4.x modutils not supported on 64bit kernel\n", current->comm); - warn_time = jiffies; - } return -ENOSYS ; } @@ -2055,6 +2073,7 @@ return err; } + extern long sys_io_setup(unsigned nr_reqs, aio_context_t *ctx); long sys32_io_setup(unsigned nr_reqs, u32 *ctx32p) @@ -2071,48 +2090,47 @@ return ret; } -extern asmlinkage long sys_io_submit(aio_context_t ctx_id, long nr, - struct iocb **iocbpp); - -long sys32_io_submit(aio_context_t ctx_id, unsigned long nr, +asmlinkage long sys32_io_submit(aio_context_t ctx_id, int nr, u32 *iocbpp) { - mm_segment_t oldfs = get_fs(); - int k, err = 0; - struct iocb **iocb64; - if (nr > 128) + struct kioctx *ctx; + long ret = 0; + int i; + + if (unlikely(nr < 0)) + return -EINVAL; + + if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp))))) + return -EFAULT; + + ctx = lookup_ioctx(ctx_id); + if (unlikely(!ctx)) { + pr_debug("EINVAL: io_submit: invalid context id\n"); return -EINVAL; - iocb64 = kmalloc(sizeof(struct iocb *) * nr, GFP_KERNEL); - if (!iocb64) - return -ENOMEM; - for (k = 0; k < nr && !err; k++) { - u64 val1, val2; - u32 iocb32; - struct iocb *iocb; - err = get_user(iocb32, (u32 *)(u64)iocbpp[k]); - iocb64[k] = iocb = (void *)(u64)iocb32; - - if (get_user(val1, &iocb->aio_buf) || - get_user(val2, &iocb->aio_nbytes)) - err = -EFAULT; - else if (!val1) /* should check cmd */ - ; - else if (verify_area(VERIFY_WRITE, (void*)val1, val2)) - err = -EFAULT; - - /* paranoia check - remove it when you are sure they - are not pointers */ - if (get_user(val1, &iocb->aio_reserved2) || val1 || - get_user(val2, &iocb->aio_reserved2) || val2) - err = -EFAULT; } - if (!err) { - set_fs(KERNEL_DS); - err = sys_io_submit(ctx_id, nr, iocb64); - set_fs(oldfs); + + for (i=0; icomm); - warn_time = jiffies; - } return -ENOSYS ; } diff -Nru a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile --- a/arch/x86_64/kernel/Makefile Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/Makefile Tue Mar 25 18:36:42 2003 @@ -10,14 +10,13 @@ setup64.o bluesmoke.o bootflag.o e820.o reboot.o obj-$(CONFIG_MTRR) += mtrr/ +obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o suspend_asm.o -obj-$(CONFIG_ACPI) += acpi.o -obj-$(CONFIG_ACPI_SLEEP) += wakeup.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o diff -Nru a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c --- a/arch/x86_64/kernel/aperture.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/aperture.c Tue Mar 25 18:36:42 2003 @@ -105,7 +105,8 @@ if (!fix && !fallback_aper_force) return; - printk("Your BIOS is broken and doesn't leave a aperture memory hole\n"); + printk("Your BIOS doesn't leave a aperture memory hole\n"); + printk("Please enable the IOMMU option in the BIOS setup\n"); aper_alloc = allocate_aperture(); if (!aper_alloc) return; diff -Nru a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c --- a/arch/x86_64/kernel/apic.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/apic.c Tue Mar 25 18:36:42 2003 @@ -408,7 +408,7 @@ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); value = apic_read(APIC_ESR); - printk("ESR value before enabling vector: %08x\n", value); + Dprintk("ESR value before enabling vector: %08x\n", value); value = ERROR_APIC_VECTOR; // enables sending errors apic_write_around(APIC_LVTERR, value); @@ -418,7 +418,7 @@ if (maxlvt > 3) apic_write(APIC_ESR, 0); value = apic_read(APIC_ESR); - printk("ESR value after enabling vector: %08x\n", value); + Dprintk("ESR value after enabling vector: %08x\n", value); } else { if (esr_disable) /* @@ -1080,9 +1080,10 @@ if (nmi_watchdog == NMI_LOCAL_APIC) check_nmi_watchdog(); #ifdef CONFIG_X86_IO_APIC - if (smp_found_config) - if (!skip_ioapic_setup && nr_ioapics) + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); + else + nr_ioapics = 0; #endif setup_boot_APIC_clock(); diff -Nru a/arch/x86_64/kernel/bluesmoke.c b/arch/x86_64/kernel/bluesmoke.c --- a/arch/x86_64/kernel/bluesmoke.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/bluesmoke.c Tue Mar 25 18:36:42 2003 @@ -1,65 +1,87 @@ /* - * arch/x86_64/kernel/bluesmoke.c - x86-64 Machine Check Exception Reporting - * - -RED-PEN: need to add power management to restore after S3 wakeup. - + * Machine check handler. + * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. + * Rest from unknown author(s). */ - +#include #include #include #include -#include -#include -#include -#include +#include +#include +#include #include -#include #include -#include -#include -#include +#include +#include +#include -#ifdef CONFIG_X86_MCE +static int mce_disabled __initdata; +static unsigned long mce_cpus; -static int mce_disabled __initdata = 0; +/* + * Machine Check Handler For PII/PIII/K7 + */ static int banks; +static unsigned long ignored_banks, disabled_banks; +/* Machine Check on everything dubious. This is a good setting + for device driver testing. */ +#define K8_DRIVER_DEBUG ((1<<13)-1) +/* Report RAM errors and Hyper Transport Problems, but ignore Device + aborts and GART errors. */ +#define K8_NORMAL_OP 0xff -/* - * Machine Check Handler For Hammer - */ +#ifdef CONFIG_MCE_DEBUG +static u32 k8_nb_flags __initdata = K8_DRIVER_DEBUG; +#else +static u32 k8_nb_flags __initdata = K8_NORMAL_OP; +#endif -static void hammer_machine_check(struct pt_regs * regs, long error_code) +static void generic_machine_check(struct pt_regs * regs, long error_code) { int recover=1; u32 alow, ahigh, high, low; u32 mcgstl, mcgsth; int i; + preempt_disable(); + rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); if(mcgstl&(1<<0)) /* Recoverable ? */ recover=0; printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); - preempt_disable(); - for (i=0;i:%016lx RSP %016lx\n", + regs->cs, regs->rip, regs->rsp); + + for(i=0;ibus->number==0 && PCI_FUNC(dev->devfn)==3 && + PCI_SLOT(dev->devfn) == (24+cpu)) + return dev; + } + return NULL; +} -static void mce_checkregs (void *info) +static void check_k8_nb(void) { - u32 low, high; - int i; + struct pci_dev *nb; + nb = find_k8_nb(); + if (nb == NULL) + return; - for (i=0; irip, regs->rsp); + + others: + generic_machine_check(regs, error_code); + preempt_enable(); +} + +static struct timer_list mcheck_timer; +int mcheck_interval = 30*HZ; -static void mce_timerfunc (unsigned long data) +#ifndef CONFIG_SMP +static void mcheck_timer_handler(unsigned long data) { - on_each_cpu (mce_checkregs, NULL, 1, 1); + k8_machine_check(NULL,0); + mcheck_timer.expires = jiffies + mcheck_interval; + add_timer(&mcheck_timer); +} +#else + +/* SMP needs a process context trampoline because smp_call_function cannot be + called from interrupt context. */ - /* Refresh the timer. */ - mce_timer.expires = jiffies + MCE_RATE; - add_timer (&mce_timer); +static void mcheck_timer_other(void *data) +{ + k8_machine_check(NULL, 0); +} + +static void mcheck_timer_dist(void *data) +{ + smp_call_function(mcheck_timer_other,0,0,0); + k8_machine_check(NULL, 0); + mcheck_timer.expires = jiffies + mcheck_interval; + add_timer(&mcheck_timer); +} + +static void mcheck_timer_handler(unsigned long data) +{ + static DECLARE_WORK(mcheck_work, mcheck_timer_dist, NULL); + schedule_work(&mcheck_work); } #endif +static int nok8 __initdata; + +static void __init k8_mcheck_init(struct cpuinfo_x86 *c) +{ + u64 cap; + int i; + struct pci_dev *nb; + + if (!test_bit(X86_FEATURE_MCE, &c->x86_capability) || + !test_bit(X86_FEATURE_MCA, &c->x86_capability)) + return; + + rdmsrl(MSR_IA32_MCG_CAP, cap); + banks = cap&0xff; + machine_check_vector = k8_machine_check; + for (i = 0; i < banks; i++) { + u64 val = ((1UL<devfn, reg, reg2); + ignored_banks |= (1UL<<4); + } + + set_in_cr4(X86_CR4_MCE); + + if (mcheck_interval && (smp_processor_id() == 0)) { + init_timer(&mcheck_timer); + mcheck_timer.function = (void (*)(unsigned long))mcheck_timer_handler; + mcheck_timer.expires = jiffies + mcheck_interval; + add_timer(&mcheck_timer); + } + + printk(KERN_INFO "Machine Check Reporting enabled for CPU#%d\n", smp_processor_id()); +} /* - * Set up machine check reporting for processors with Intel style MCE + * Set up machine check reporting for Intel processors */ -static void __init hammer_mcheck_init(struct cpuinfo_x86 *c) +static void __init generic_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; @@ -154,33 +310,36 @@ * Check for MCE support */ - if( !test_bit(X86_FEATURE_MCE, c->x86_capability) ) + if( !test_bit(X86_FEATURE_MCE, &c->x86_capability) ) return; - /* Check for PPro style MCA */ - if( !test_bit(X86_FEATURE_MCA, c->x86_capability) ) + /* + * Check for PPro style MCA + */ + + if( !test_bit(X86_FEATURE_MCA, &c->x86_capability) ) return; /* Ok machine check is available */ - machine_check_vector = hammer_machine_check; + + machine_check_vector = generic_machine_check; wmb(); if(done==0) - printk(KERN_INFO "Machine check architecture supported.\n"); + printk(KERN_INFO "Intel machine check architecture supported.\n"); rdmsr(MSR_IA32_MCG_CAP, l, h); - if(l&(1<<8)) /* Control register present ? */ + if(l&(1<<8)) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); banks = l&0xff; - for(i=0; ix86_vendor) - { + switch(c->x86_vendor) { case X86_VENDOR_AMD: - hammer_mcheck_init(c); -#ifdef CONFIG_X86_MCE_NONFATAL - if (timerset == 0) { - /* Set the timer to check for non-fatal - errors every MCE_RATE seconds */ - init_timer (&mce_timer); - mce_timer.expires = jiffies + MCE_RATE; - mce_timer.data = 0; - mce_timer.function = &mce_timerfunc; - add_timer (&mce_timer); - timerset = 1; - printk(KERN_INFO "Machine check exception polling timer started.\n"); - } -#endif + if (c->x86 == 15 && !nok8) { + k8_mcheck_init(c); break; - + } + /* FALL THROUGH */ default: + case X86_VENDOR_INTEL: + generic_mcheck_init(c); break; } } @@ -224,16 +375,33 @@ return 0; } + +/* mce=off disable machine check + mce=nok8 disable k8 specific features + mce=disable disable bank NUMBER + mce=enable enable bank number + mce=device Enable device driver test reporting in NB + mce=NUMBER mcheck timer interval number seconds. + Can be also comma separated in a single mce= */ static int __init mcheck_enable(char *str) { - mce_disabled = -1; + char *p; + while ((p = strsep(&str,",")) != NULL) { + if (isdigit(*p)) + mcheck_interval = simple_strtol(p,NULL,0) * HZ; + else if (!strcmp(p,"off")) + mce_disabled = 1; + else if (!strncmp(p,"enable",6)) + disabled_banks &= ~(1< level2_kernel_pgt (so that __va works even before pagetable_init) */ .org 0xb000 +ENTRY(wakeup_level4_pgt) + .quad 0x0000000000102007 /* -> level3_ident_pgt */ + .fill 255,8,0 + .quad 0x000000000010a007 + .fill 254,8,0 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ + .quad 0x0000000000103007 /* -> level3_kernel_pgt */ + +.org 0xc000 .data .align 16 @@ -371,3 +380,4 @@ .quad 0 .quad 0 .endr + diff -Nru a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c --- a/arch/x86_64/kernel/head64.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/head64.c Tue Mar 25 18:36:42 2003 @@ -15,6 +15,7 @@ #include #include #include +#include /* Don't add a printk in there. printk relies on the PDA which is not initialized yet. */ @@ -51,7 +52,7 @@ printk("old bootloader convention, maybe loadlin?\n"); } command_line = (char *) ((u64)(new_data)); - memcpy(saved_command_line, command_line, 2048); + memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); printk("Bootdata ok (command line is %s)\n", saved_command_line); } diff -Nru a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c --- a/arch/x86_64/kernel/i387.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/i387.c Tue Mar 25 18:36:42 2003 @@ -42,7 +42,7 @@ /* clean state in init */ stts(); - clear_thread_flag(TIF_USEDFPU); + current_thread_info()->status = 0; current->used_math = 0; } @@ -51,13 +51,12 @@ * so initialize it and set the mxcsr to its default. * remeber the current task has used the FPU. */ -void init_fpu(void) +void init_fpu(struct task_struct *child) { - struct task_struct *me = current; - memset(&me->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); - me->thread.i387.fxsave.cwd = 0x37f; - me->thread.i387.fxsave.mxcsr = 0x1f80; - me->used_math = 1; + memset(&child->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); + child->thread.i387.fxsave.cwd = 0x37f; + child->thread.i387.fxsave.mxcsr = 0x1f80; + child->used_math = 1; } /* @@ -81,7 +80,7 @@ if (!tsk->used_math) return 0; tsk->used_math = 0; /* trigger finit */ - if (test_thread_flag(TIF_USEDFPU)) { + if (tsk->thread_info->status & TS_USEDFPU) { err = save_i387_checking((struct i387_fxsave_struct *)buf); if (err) return err; stts(); @@ -99,7 +98,7 @@ int get_fpregs(struct user_i387_struct *buf, struct task_struct *tsk) { - empty_fpu(tsk); + init_fpu(tsk); return __copy_to_user((void *)buf, &tsk->thread.i387.fxsave, sizeof(struct user_i387_struct)) ? -EFAULT : 0; } diff -Nru a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c --- a/arch/x86_64/kernel/nmi.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/nmi.c Tue Mar 25 18:36:42 2003 @@ -25,13 +25,15 @@ #include #include #include +#include +#include extern void default_do_nmi(struct pt_regs *); unsigned int nmi_watchdog = NMI_LOCAL_APIC; static unsigned int nmi_hz = HZ; unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ -extern void show_registers(struct pt_regs *regs); +int nmi_watchdog_disabled; #define K7_EVNTSEL_ENABLE (1 << 22) #define K7_EVNTSEL_INT (1 << 20) @@ -251,15 +253,13 @@ alert_counter[i] = 0; } -void nmi_watchdog_tick (struct pt_regs * regs) +void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) { + if (nmi_watchdog_disabled) + return; + + int sum, cpu = safe_smp_processor_id(); - /* - * Since current_thread_info()-> is always on the stack, and we - * always switch the stack NMI-atomically, it's safe to use - * smp_processor_id(). - */ - int sum, cpu = smp_processor_id(); sum = read_pda(apic_timer_irqs); if (last_irq_sums[cpu] == sum) { @@ -269,6 +269,10 @@ */ alert_counter[cpu]++; if (alert_counter[cpu] == 5*nmi_hz) { + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_BAD) { + alert_counter[cpu] = 0; + return; + } spin_lock(&nmi_print_lock); /* * We are in trouble anyway, lets at least try diff -Nru a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c --- a/arch/x86_64/kernel/pci-gart.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/pci-gart.c Tue Mar 25 18:36:42 2003 @@ -8,7 +8,7 @@ * See Documentation/DMA-mapping.txt for the interface specification. * * Copyright 2002 Andi Kleen, SuSE Labs. - * $Id: linus.patch,v 1.87 2003/03/26 02:45:29 akpm Exp $ + * $Id: linus.patch,v 1.87 2003/03/26 02:45:29 akpm Exp $ */ /* @@ -19,9 +19,12 @@ possible future tuning: fast path for sg streaming mappings - more intelligent flush strategy - flush only a single NB? + more intelligent flush strategy - flush only a single NB? flush only when + gart area fills up and alloc_iommu wraps. + don't flush on allocation - need to unmap the gart area first to avoid prefetches + by the CPU move boundary between IOMMU and AGP in GART dynamically - could use exact fit in the gart in alloc_consistent, not order of two. + */ #include @@ -49,7 +52,11 @@ int no_iommu; static int no_agp; +#ifdef CONFIG_IOMMU_DEBUG int force_mmu = 1; +#else +int force_mmu = 0; +#endif extern int fallback_aper_order; extern int fallback_aper_force; @@ -58,10 +65,9 @@ static spinlock_t iommu_bitmap_lock = SPIN_LOCK_UNLOCKED; static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ -#define GPTE_MASK 0xfffffff000 #define GPTE_VALID 1 #define GPTE_COHERENT 2 -#define GPTE_ENCODE(x,flag) (((x) & 0xfffffff0) | ((x) >> 28) | GPTE_VALID | (flag)) +#define GPTE_ENCODE(x) (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) #define for_all_nb(dev) \ @@ -72,7 +78,6 @@ #define EMERGENCY_PAGES 32 /* = 128KB */ #ifdef CONFIG_AGP -extern int agp_amdk8_init(void); extern int agp_init(void); #define AGPEXTERN extern #else @@ -130,7 +135,7 @@ { void *memory; int gfp = GFP_ATOMIC; - int order, i; + int i; unsigned long iommu_page; if (hwdev == NULL || hwdev->dma_mask < 0xffffffff || no_iommu) @@ -140,15 +145,15 @@ * First try to allocate continuous and use directly if already * in lowmem. */ - order = get_order(size); - memory = (void *)__get_free_pages(gfp, order); + size = round_up(size, PAGE_SIZE); + memory = (void *)__get_free_pages(gfp, get_order(size)); if (memory == NULL) { return NULL; } else { int high = (unsigned long)virt_to_bus(memory) + size >= 0xffffffff; int mmu = high; - if (force_mmu) + if (force_mmu && !(gfp & GFP_DMA)) mmu = 1; if (no_iommu) { if (high) goto error; @@ -161,19 +166,21 @@ } } - iommu_page = alloc_iommu(1<>= PAGE_SHIFT; + + iommu_page = alloc_iommu(size); if (iommu_page == -1) goto error; /* Fill in the GATT, allocating pages as needed. */ - for (i = 0; i < 1< 0) atomic_inc(&virt_to_page(mem)->count); phys_mem = virt_to_phys(mem); - BUG_ON(phys_mem & ~PTE_MASK); - iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem,GPTE_COHERENT); + BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK); + iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); } flush_gart(); @@ -181,7 +188,7 @@ return memory; error: - free_pages((unsigned long)memory, order); + free_pages((unsigned long)memory, get_order(size)); return NULL; } @@ -193,30 +200,32 @@ void *vaddr, dma_addr_t bus) { u64 pte; - int order = get_order(size); unsigned long iommu_page; int i; + size = round_up(size, PAGE_SIZE); if (bus < iommu_bus_base || bus > iommu_bus_base + iommu_size) { - free_pages((unsigned long)vaddr, order); + free_pages((unsigned long)vaddr, get_order(size)); return; } + size >>= PAGE_SHIFT; iommu_page = (bus - iommu_bus_base) / PAGE_SIZE; - for (i = 0; i < 1<>10); return 0; @@ -530,8 +547,10 @@ off don't use the IOMMU leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) memaper[=order] allocate an own aperture over RAM with size 32MB^order. + noforce don't force IOMMU usage. Should be fastest. + force Force IOMMU and turn on unmap debugging. */ -__init int iommu_setup(char *opt, char **end) +__init int iommu_setup(char *opt) { int arg; char *p = opt; @@ -552,17 +571,21 @@ fallback_aper_order = arg; } #ifdef CONFIG_IOMMU_LEAK - if (!memcmp(p,"leak", 4)) + if (!memcmp(p,"leak", 4)) { leak_trace = 1; + p += 4; + if (*p == '=') ++p; + if (isdigit(*p) && get_option(&p, &arg)) + iommu_leak_pages = arg; + } else #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; do { - if (*p == ' ' || *p == 0) { - *end = p; + if (*p == ' ' || *p == 0) return 0; - } } while (*p++ != ','); } + return 1; } diff -Nru a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c --- a/arch/x86_64/kernel/process.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/process.c Tue Mar 25 18:36:42 2003 @@ -192,6 +192,8 @@ fs,fsindex,gs,gsindex,shadowgs); printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); + + show_trace(®s->rsp); } extern void load_gs_index(unsigned); @@ -260,6 +262,14 @@ (((u32)desc->base2) << 24); } +/* + * This gets called before we allocate a new thread and copy + * the current task into it. + */ +void prepare_to_copy(struct task_struct *tsk) +{ + unlazy_fpu(tsk); +} int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, unsigned long unused, @@ -294,9 +304,6 @@ asm("movl %%es,%0" : "=m" (p->thread.es)); asm("movl %%ds,%0" : "=m" (p->thread.ds)); - unlazy_fpu(me); - p->thread.i387 = me->thread.i387; - if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc((IO_BITMAP_SIZE+1)*4, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) @@ -314,7 +321,7 @@ err = ia32_child_tls(p, childregs); else #endif - err = do_arch_prctl(p, ARCH_SET_FS, childregs->r10); + err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); if (err) goto out; } diff -Nru a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c --- a/arch/x86_64/kernel/ptrace.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/ptrace.c Tue Mar 25 18:36:42 2003 @@ -240,8 +240,8 @@ unsigned long tmp; ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) + if ((addr & 7) || addr < 0 || + addr > sizeof(struct user) - 7) break; tmp = 0; /* Default return condition */ @@ -250,7 +250,7 @@ if(addr >= (long) &dummy->u_debugreg[0] && addr <= (long) &dummy->u_debugreg[7]){ addr -= (long) &dummy->u_debugreg[0]; - addr = addr >> 2; + addr = addr >> 3; tmp = child->thread.debugreg[addr]; } ret = put_user(tmp,(unsigned long *) data); @@ -268,8 +268,8 @@ case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) + if ((addr & 7) || addr < 0 || + addr > sizeof(struct user) - 7) break; if (addr < sizeof(struct user_regs_struct)) { @@ -290,6 +290,11 @@ if(addr < (long) &dummy->u_debugreg[4] && ((unsigned long) data) >= TASK_SIZE-3) break; + if (addr == (long) &dummy->u_debugreg[6]) { + if (data >> 32) + goto out_tsk; + } + if(addr == (long) &dummy->u_debugreg[7]) { data &= ~DR_CONTROL_RESERVED; for(i=0; i<4; i++) @@ -298,7 +303,7 @@ } addr -= (long) &dummy->u_debugreg; - addr = addr >> 2; + addr = addr >> 3; child->thread.debugreg[addr] = data; ret = 0; } diff -Nru a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c --- a/arch/x86_64/kernel/reboot.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/reboot.c Tue Mar 25 18:36:42 2003 @@ -101,7 +101,6 @@ * Stop all CPUs and turn off local APICs and the IO-APIC, so * other OSs see a clean IRQ state. */ - if (notify_die(DIE_STOP,"cpustop",0,0) != NOTIFY_BAD) smp_send_stop(); disable_IO_APIC(); #endif diff -Nru a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c --- a/arch/x86_64/kernel/setup.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/setup.c Tue Mar 25 18:36:42 2003 @@ -197,12 +197,15 @@ if (!memcmp(from, "acpi=off", 8)) acpi_disabled = 1; + if (!memcmp(from, "disableapic", 11)) + disable_apic = 1; + if (!memcmp(from, "mem=", 4)) parse_memopt(from+4, &from); #ifdef CONFIG_GART_IOMMU if (!memcmp(from,"iommu=",6)) { - iommu_setup(from+6, &from); + iommu_setup(from+6); } #endif diff -Nru a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c --- a/arch/x86_64/kernel/setup64.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/setup64.c Tue Mar 25 18:36:42 2003 @@ -40,7 +40,7 @@ char boot_cpu_stack[IRQSTACKSIZE] __cacheline_aligned; unsigned long __supported_pte_mask = ~0UL; -static int do_not_nx = 0; +static int do_not_nx = 1; static int __init nonx_setup(char *str) { @@ -98,6 +98,8 @@ pda->cpudata_offset = 0; pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; + pda->active_mm = &init_mm; + pda->mmu_state = 0; if (cpu == 0) { /* others are initialized in smpboot.c */ @@ -121,8 +123,6 @@ asm volatile("movq %0,%%cr3" :: "r" (__pa(level4))); pda->irqstackptr += IRQSTACKSIZE-64; - pda->active_mm = &init_mm; - pda->mmu_state = 0; } #define EXCEPTION_STK_ORDER 0 /* >= N_EXCEPTION_STACKS*EXCEPTION_STKSZ */ diff -Nru a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c --- a/arch/x86_64/kernel/signal.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/signal.c Tue Mar 25 18:36:42 2003 @@ -225,6 +225,7 @@ rsp = regs->rsp - 128; /* This is the X/Open sanctioned signal stack switching. */ + /* RED-PEN: redzone on that stack? */ if (ka->sa.sa_flags & SA_ONSTACK) { if (sas_ss_flags(rsp) == 0) rsp = current->sas_ss_sp + current->sas_ss_size; @@ -433,7 +434,8 @@ * have been cleared if the watchpoint triggered * inside the kernel. */ - __asm__("movq %0,%%db7" : : "r" (current->thread.debugreg[7])); + if (current->thread.debugreg[7]) + asm volatile("movq %0,%%db7" : : "r" (current->thread.debugreg[7])); /* Whee! Actually deliver the signal. */ handle_signal(signr, &info, oldset, regs); diff -Nru a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c --- a/arch/x86_64/kernel/smp.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/smp.c Tue Mar 25 18:36:42 2003 @@ -485,16 +485,16 @@ } } - /* Slow. Should be only used for debugging. */ int slow_smp_processor_id(void) { int stack_location; unsigned long sp = (unsigned long)&stack_location; - int cpu; - unsigned long mask; + int offset = 0, cpu; + + for (offset = 0; (cpu_online_map >> offset); offset = cpu + 1) { + cpu = ffz(~(cpu_online_map >> offset)); - for_each_cpu(cpu, mask) { if (sp >= (u64)cpu_pda[cpu].irqstackptr - IRQSTACKSIZE && sp <= (u64)cpu_pda[cpu].irqstackptr) return cpu; diff -Nru a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c --- a/arch/x86_64/kernel/smpboot.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/smpboot.c Tue Mar 25 18:36:42 2003 @@ -318,8 +318,6 @@ */ smp_store_cpu_info(cpuid); - notify_die(DIE_CPUINIT, "cpuinit", NULL, 0); - local_irq_disable(); /* @@ -898,6 +896,8 @@ */ if (!skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); + else + nr_ioapics = 0; setup_boot_APIC_clock(); diff -Nru a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c --- a/arch/x86_64/kernel/suspend.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/suspend.c Tue Mar 25 18:36:42 2003 @@ -28,7 +28,7 @@ #include #include -static struct saved_context saved_context; +struct saved_context saved_context; unsigned long saved_context_eax, saved_context_ebx, saved_context_ecx, saved_context_edx; unsigned long saved_context_esp, saved_context_ebp, saved_context_esi, saved_context_edi; diff -Nru a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c --- a/arch/x86_64/kernel/traps.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/kernel/traps.c Tue Mar 25 18:36:42 2003 @@ -110,7 +110,7 @@ } #endif -static inline unsigned long *in_exception_stack(int cpu, unsigned long stack) +unsigned long *in_exception_stack(int cpu, unsigned long stack) { int k; for (k = 0; k < N_EXCEPTION_STACKS; k++) { @@ -249,7 +249,7 @@ void dump_stack(void) { unsigned long dummy; - show_stack(&dummy); + show_trace(&dummy); } void show_registers(struct pt_regs *regs) @@ -344,7 +344,6 @@ show_registers(regs); bust_spinlocks(0); spin_unlock_irq(&die_lock); - notify_die(DIE_OOPS, (char *)str, regs, err); do_exit(SIGSEGV); } @@ -419,6 +418,8 @@ #define DO_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \ + return; \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ } @@ -430,10 +431,13 @@ info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void *)siaddr; \ + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \ + return; \ do_trap(trapnr, signr, str, regs, error_code, &info); \ } DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip) +DO_ERROR( 3, SIGTRAP, "int3", int3); DO_ERROR( 4, SIGSEGV, "overflow", overflow) DO_ERROR( 5, SIGSEGV, "bounds", bounds) DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip) @@ -446,13 +450,6 @@ DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2()) DO_ERROR(18, SIGSEGV, "reserved", reserved) -asmlinkage void do_int3(struct pt_regs * regs, long error_code) -{ - if (notify_die(DIE_INT3, "int3", regs, error_code) == NOTIFY_BAD) - return; - do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); -} - extern void dump_pagetable(unsigned long); asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) @@ -493,6 +490,8 @@ regs->rip = fixup->fixup; return; } + notify_die(DIE_GPF, "general protection fault", regs, error_code, + 13, SIGSEGV); die("general protection fault", regs, error_code); } } @@ -537,14 +536,14 @@ * so it must be the NMI watchdog. */ if (nmi_watchdog) { - nmi_watchdog_tick(regs); + nmi_watchdog_tick(regs,reason); return; } #endif unknown_nmi_error(reason, regs); return; } - if (notify_die(DIE_NMI, "nmi", regs, reason) == NOTIFY_BAD) + if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_BAD) return; if (reason & 0x80) mem_parity_error(reason, regs); @@ -569,6 +568,7 @@ #ifdef CONFIG_CHECKING { + /* RED-PEN interaction with debugger - could destroy gs */ unsigned long gs; struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); rdmsrl(MSR_GS_BASE, gs); @@ -583,9 +583,6 @@ conditional_sti(regs); - if (notify_die(DIE_DEBUG, "debug", regs, error_code) == NOTIFY_BAD) - return; - /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { if (!tsk->thread.debugreg[7]) { @@ -618,17 +615,22 @@ info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_BRKPT; - info.si_addr = ((regs->cs & 3) == 0) ? (void *)tsk->thread.rip : - (void *)regs->rip; + if ((regs->cs & 3) == 0) + goto clear_dr7; + + info.si_addr = (void *)regs->rip; force_sig_info(SIGTRAP, &info, tsk); clear_dr7: - asm("movq %0,%%db7"::"r"(0UL)); + asm volatile("movq %0,%%db7"::"r"(0UL)); + notify_die(DIE_DEBUG, "debug", regs, error_code, 1, SIGTRAP); return; clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); clear_TF: + /* RED-PEN could cause spurious errors */ + if (notify_die(DIE_DEBUG, "debug2", regs, error_code, 1, SIGTRAP) != NOTIFY_BAD) regs->eflags &= ~TF_MASK; return; } @@ -775,9 +777,9 @@ clts(); /* Allow maths ops (or we recurse) */ if (!me->used_math) - init_fpu(); + init_fpu(me); restore_fpu_checking(&me->thread.i387.fxsave); - set_thread_flag(TIF_USEDFPU); + me->thread_info->status |= TS_USEDFPU; } asmlinkage void math_emulate(void) @@ -787,7 +789,7 @@ void do_call_debug(struct pt_regs *regs) { - notify_die(DIE_CALL, "debug call", regs, 0); + notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT); } void __init trap_init(void) @@ -819,8 +821,6 @@ set_intr_gate(KDB_VECTOR, call_debug); - notify_die(DIE_TRAPINIT, "traps initialized", 0, 0); - /* * Should be a barrier for any external CPU state. */ diff -Nru a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c --- a/arch/x86_64/mm/fault.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/mm/fault.c Tue Mar 25 18:36:42 2003 @@ -57,29 +57,41 @@ } } +static int bad_address(void *p) +{ + unsigned long dummy; + return __get_user(dummy, (unsigned long *)p); +} + void dump_pagetable(unsigned long address) { - static char *name[] = { "PML4", "PGD", "PDE", "PTE" }; - int i, shift; - unsigned long page; + pml4_t *pml4; + asm("movq %%cr3,%0" : "=r" (pml4)); + + pml4 = __va((unsigned long)pml4 & PHYSICAL_PAGE_MASK); + pml4 += pml4_index(address); + printk("PML4 %lx ", pml4_val(*pml4)); + if (bad_address(pml4)) goto bad; + if (!pml4_present(*pml4)) goto ret; + + pgd_t *pgd = __pgd_offset_k((pgd_t *)pml4_page(*pml4), address); + if (bad_address(pgd)) goto bad; + printk("PGD %lx ", pgd_val(*pgd)); + if (!pgd_present(*pgd)) goto ret; + + pmd_t *pmd = pmd_offset(pgd, address); + if (bad_address(pmd)) goto bad; + printk("PMD %lx ", pmd_val(*pmd)); + if (!pmd_present(*pmd)) goto ret; - shift = 9+9+9+12; - address &= ~0xFFFF000000000000UL; - asm("movq %%cr3,%0" : "=r" (page)); - for (i = 0; i < 4; i++) { - unsigned long *padr = (unsigned long *) __va(page); - padr += (address >> shift) & 0x1FFU; - if (__get_user(page, padr)) { - printk("%s: bad %p\n", name[i], padr); - break; - } - printk("%s: %016lx ", name[i], page); - if ((page & (1 | (1<<7))) != 1) /* Not present or 2MB page */ - break; - page &= ~0xFFFUL; - shift -= (i == 0) ? 12 : 9; - } + pte_t *pte = pte_offset_kernel(pmd, address); + if (bad_address(pte)) goto bad; + printk("PTE %lx", pte_val(*pte)); +ret: printk("\n"); + return; +bad: + printk("BAD\n"); } int page_fault_trace; diff -Nru a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c --- a/arch/x86_64/mm/ioremap.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/mm/ioremap.c Tue Mar 25 18:36:42 2003 @@ -150,7 +150,7 @@ */ offset = phys_addr & ~PAGE_MASK; phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - phys_addr; + size = PAGE_ALIGN(last_addr+1) - phys_addr; /* * Ok, go for it.. diff -Nru a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c --- a/arch/x86_64/mm/k8topology.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/mm/k8topology.c Tue Mar 25 18:36:42 2003 @@ -21,7 +21,7 @@ #include #include -static int find_northbridge(void) +static __init int find_northbridge(void) { int num; @@ -45,7 +45,8 @@ { unsigned long prevbase; struct node nodes[MAXNODE]; - int nodeid, numnodes, maxnode, i, nb; + int nodeid, i, nb; + int found = 0; nb = find_northbridge(); if (nb < 0) @@ -53,12 +54,13 @@ printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); - numnodes = (read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3; + numnodes = (1 << ((read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3)); + + printk(KERN_INFO "Assuming %d nodes\n", numnodes - 1); memset(&nodes,0,sizeof(nodes)); prevbase = 0; - maxnode = -1; - for (i = 0; i < MAXNODE; i++) { + for (i = 0; i < numnodes; i++) { unsigned long base,limit; base = read_pci_config(0, nb, 1, 0x40 + i*8); @@ -66,18 +68,16 @@ nodeid = limit & 3; if (!limit) { - printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i, base); - continue; + printk(KERN_ERR "Skipping node entry %d (base %lx)\n", i, base); + return -1; } if ((base >> 8) & 3 || (limit >> 8) & 3) { printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", nodeid, (base>>8)&3, (limit>>8) & 3); return -1; } - if (nodeid > maxnode) - maxnode = nodeid; if ((1UL << nodeid) & nodes_present) { - printk("Node %d already present. Skipping\n", nodeid); + printk(KERN_INFO "Node %d already present. Skipping\n", nodeid); continue; } @@ -98,17 +98,19 @@ base = start; if (limit > end) limit = end; - if (limit == base) + if (limit == base) { + printk(KERN_ERR "Empty node %d\n", nodeid); continue; + } if (limit < base) { - printk(KERN_INFO"Node %d bogus settings %lx-%lx. Ignored.\n", + printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n", nodeid, base, limit); - continue; + return -1; } /* Could sort here, but pun for now. Should not happen anyroads. */ if (prevbase > base) { - printk(KERN_INFO "Node map not sorted %lx,%lx\n", + printk(KERN_ERR "Node map not sorted %lx,%lx\n", prevbase,base); return -1; } @@ -116,23 +118,26 @@ printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n", nodeid, base, limit); + found++; + nodes[nodeid].start = base; nodes[nodeid].end = limit; prevbase = base; } - if (maxnode <= 0) + if (!found) return -1; - memnode_shift = compute_hash_shift(nodes,maxnode,end); + memnode_shift = compute_hash_shift(nodes); if (memnode_shift < 0) { printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); return -1; } printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); - early_for_all_nodes(i) { + for (i = 0; i < numnodes; i++) { + if (nodes[i].start != nodes[i].end) setup_node_bootmem(i, nodes[i].start, nodes[i].end); } diff -Nru a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c --- a/arch/x86_64/mm/numa.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/mm/numa.c Tue Mar 25 18:36:42 2003 @@ -26,11 +26,10 @@ static int numa_off __initdata; unsigned long nodes_present; -int maxnode; static int emunodes __initdata; -int compute_hash_shift(struct node *nodes, int numnodes, u64 maxmem) +int __init compute_hash_shift(struct node *nodes) { int i; int shift = 24; @@ -39,12 +38,16 @@ /* When in doubt use brute force. */ while (shift < 48) { memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); - early_for_all_nodes (i) { + for (i = 0; i < numnodes; i++) { + if (nodes[i].start == nodes[i].end) + continue; for (addr = nodes[i].start; addr < nodes[i].end; addr += (1UL << shift)) { - if (memnodemap[addr >> shift] != 0xff) { - printk("node %d shift %d addr %Lx conflict %d\n", + if (memnodemap[addr >> shift] != 0xff && + memnodemap[addr >> shift] != i) { + printk(KERN_INFO + "node %d shift %d addr %Lx conflict %d\n", i, shift, addr, memnodemap[addr>>shift]); goto next; } @@ -101,9 +104,8 @@ reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages< maxnode) - maxnode = nodeid; + if (nodeid + 1 > numnodes) + numnodes = nodeid + 1; nodes_present |= (1UL << nodeid); } @@ -151,6 +153,7 @@ int i; if (emunodes > MAXNODE) emunodes = MAXNODE; + memset(&nodes, 0, sizeof(nodes)); printk(KERN_INFO "Faking %d nodes of size %ld MB\n", emunodes, nodesize>>20); for (i = 0; i < emunodes; i++) { unsigned long end = (i+1)*nodesize; @@ -160,7 +163,7 @@ nodes[i].end = end; setup_node_bootmem(i, nodes[i].start, nodes[i].end); } - memnode_shift = compute_hash_shift(nodes, emunodes, nodes[i-1].end); + memnode_shift = compute_hash_shift(nodes); return 0; } diff -Nru a/arch/x86_64/pci/irq.c b/arch/x86_64/pci/irq.c --- a/arch/x86_64/pci/irq.c Tue Mar 25 18:36:42 2003 +++ b/arch/x86_64/pci/irq.c Tue Mar 25 18:36:42 2003 @@ -618,11 +618,20 @@ int pirq_enable_irq(struct pci_dev *dev) { u8 pin; + extern int interrupt_line_quirk; pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { + /* With IDE legacy devices the IRQ lookup failure is not a problem.. */ + if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5)) + return 0; + printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.\n", 'A' + pin - 1, dev->slot_name); } + /* VIA bridges use interrupt line for apic/pci steering across + the V-Link */ + else if (interrupt_line_quirk) + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); return 0; } diff -Nru a/drivers/acorn/char/i2c.c b/drivers/acorn/char/i2c.c --- a/drivers/acorn/char/i2c.c Tue Mar 25 18:36:42 2003 +++ b/drivers/acorn/char/i2c.c Tue Mar 25 18:36:42 2003 @@ -303,11 +303,13 @@ } static struct i2c_adapter ioc_ops = { - .name = "IOC/IOMD", .id = I2C_HW_B_IOC, .algo_data = &ioc_data, .client_register = ioc_client_reg, .client_unregister = ioc_client_unreg + .dev = { + .name = "IOC/IOMD", + }, }; static int __init i2c_ioc_init(void) diff -Nru a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig --- a/drivers/acpi/Kconfig Tue Mar 25 18:36:42 2003 +++ b/drivers/acpi/Kconfig Tue Mar 25 18:36:42 2003 @@ -132,7 +132,7 @@ may be damaged without it. config ACPI_NUMA - bool "NUMA support" if NUMA && (IA64 && !IA64_HP_SIM || X86 && ACPI && !ACPI_HT_ONLY) + bool "NUMA support" if NUMA && (IA64 && !IA64_HP_SIM || X86 && ACPI && !ACPI_HT_ONLY && !X86_64) default y if IA64 && IA64_SGI_SN config ACPI_TOSHIBA diff -Nru a/drivers/char/drm/drm_drv.h b/drivers/char/drm/drm_drv.h --- a/drivers/char/drm/drm_drv.h Tue Mar 25 18:36:42 2003 +++ b/drivers/char/drm/drm_drv.h Tue Mar 25 18:36:42 2003 @@ -545,9 +545,7 @@ drm_device_t *dev; int i; -#if __HAVE_CTX_BITMAP int retcode; -#endif DRM_DEBUG( "\n" ); #ifdef MODULE @@ -626,9 +624,11 @@ return 0; +#if (__REALLY_HAVE_AGP && __MUST_HAVE_AGP) || __HAVE_CTX_BITMAP fail: DRM(stub_unregister)(DRM(minor)[i]); DRM(takedown)( dev ); +#endif fail_reg: kfree (DRM(device)); diff -Nru a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c --- a/drivers/i2c/busses/i2c-ali15x3.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/busses/i2c-ali15x3.c Tue Mar 25 18:36:42 2003 @@ -474,9 +474,11 @@ static struct i2c_adapter ali15x3_adapter = { .owner = THIS_MODULE, - .name = "unset", .id = I2C_ALGO_SMBUS | I2C_HW_SMBUS_ALI15X3, .algo = &smbus_algorithm, + .dev = { + .name = "unset", + }, }; static struct pci_device_id ali15x3_ids[] __devinitdata = { @@ -500,8 +502,8 @@ /* set up the driverfs linkage to our parent device */ ali15x3_adapter.dev.parent = &dev->dev; - sprintf(ali15x3_adapter.name, "SMBus ALI15X3 adapter at %04x", - ali15x3_smba); + snprintf(ali15x3_adapter.dev.name, DEVICE_NAME_SIZE, + "SMBus ALI15X3 adapter at %04x", ali15x3_smba); return i2c_add_adapter(&ali15x3_adapter); } diff -Nru a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c --- a/drivers/i2c/busses/i2c-amd756.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/busses/i2c-amd756.c Tue Mar 25 18:36:42 2003 @@ -312,9 +312,11 @@ static struct i2c_adapter amd756_adapter = { .owner = THIS_MODULE, - .name = "unset", .id = I2C_ALGO_SMBUS | I2C_HW_SMBUS_AMD756, .algo = &smbus_algorithm, + .dev = { + .name = "unset", + }, }; enum chiptype { AMD756, AMD766, AMD768, NFORCE }; @@ -376,7 +378,7 @@ /* set up the driverfs linkage to our parent device */ amd756_adapter.dev.parent = &pdev->dev; - sprintf(amd756_adapter.name, + snprintf(amd756_adapter.dev.name, DEVICE_NAME_SIZE, "SMBus AMD75x adapter at %04x", amd756_ioport); error = i2c_add_adapter(&amd756_adapter); diff -Nru a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c --- a/drivers/i2c/busses/i2c-amd8111.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/busses/i2c-amd8111.c Tue Mar 25 18:36:42 2003 @@ -357,8 +357,8 @@ goto out_kfree; smbus->adapter.owner = THIS_MODULE; - sprintf(smbus->adapter.name, - "SMBus2 AMD8111 adapter at %04x", smbus->base); + snprintf(smbus->adapter.dev.name, DEVICE_NAME_SIZE, + "SMBus2 AMD8111 adapter at %04x", smbus->base); smbus->adapter.id = I2C_ALGO_SMBUS | I2C_HW_SMBUS_AMD8111; smbus->adapter.algo = &smbus_algorithm; smbus->adapter.algo_data = smbus; diff -Nru a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c --- a/drivers/i2c/busses/i2c-i801.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/busses/i2c-i801.c Tue Mar 25 18:36:42 2003 @@ -546,9 +546,11 @@ static struct i2c_adapter i801_adapter = { .owner = THIS_MODULE, - .name = "unset", .id = I2C_ALGO_SMBUS | I2C_HW_SMBUS_I801, .algo = &smbus_algorithm, + .dev = { + .name = "unset", + }, }; static struct pci_device_id i801_ids[] __devinitdata = { @@ -597,8 +599,8 @@ /* set up the driverfs linkage to our parent device */ i801_adapter.dev.parent = &dev->dev; - sprintf(i801_adapter.name, "SMBus I801 adapter at %04x", - i801_smba); + snprintf(i801_adapter.dev.name, DEVICE_NAME_SIZE, + "SMBus I801 adapter at %04x", i801_smba); return i2c_add_adapter(&i801_adapter); } diff -Nru a/drivers/i2c/busses/i2c-isa.c b/drivers/i2c/busses/i2c-isa.c --- a/drivers/i2c/busses/i2c-isa.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/busses/i2c-isa.c Tue Mar 25 18:36:42 2003 @@ -39,9 +39,11 @@ /* There can only be one... */ static struct i2c_adapter isa_adapter = { .owner = THIS_MODULE, - .name = "ISA main adapter", .id = I2C_ALGO_ISA | I2C_HW_ISA, .algo = &isa_algorithm, + .dev = { + .name = "ISA main adapter", + }, }; static int __init i2c_isa_init(void) diff -Nru a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c --- a/drivers/i2c/busses/i2c-piix4.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/busses/i2c-piix4.c Tue Mar 25 18:36:42 2003 @@ -394,9 +394,11 @@ static struct i2c_adapter piix4_adapter = { .owner = THIS_MODULE, - .name = "unset", .id = I2C_ALGO_SMBUS | I2C_HW_SMBUS_PIIX4, .algo = &smbus_algorithm, + .dev = { + .name = "unset", + }, }; static struct pci_device_id piix4_ids[] __devinitdata = { @@ -449,8 +451,8 @@ /* set up the driverfs linkage to our parent device */ piix4_adapter.dev.parent = &dev->dev; - sprintf(piix4_adapter.name, "SMBus PIIX4 adapter at %04x", - piix4_smba); + snprintf(piix4_adapter.dev.name, DEVICE_NAME_SIZE, + "SMBus PIIX4 adapter at %04x", piix4_smba); retval = i2c_add_adapter(&piix4_adapter); diff -Nru a/drivers/i2c/chips/adm1021.c b/drivers/i2c/chips/adm1021.c --- a/drivers/i2c/chips/adm1021.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/chips/adm1021.c Tue Mar 25 18:36:42 2003 @@ -144,7 +144,7 @@ /* This is the driver that will be inserted */ static struct i2c_driver adm1021_driver = { .owner = THIS_MODULE, - .name = "ADM1021, MAX1617 sensor driver", + .name = "ADM1021-MAX1617", .id = I2C_DRIVERID_ADM1021, .flags = I2C_DF_NOTIFY, .attach_adapter = adm1021_attach_adapter, @@ -221,10 +221,12 @@ err = -ENOMEM; goto error0; } + memset(new_client, 0x00, sizeof(struct i2c_client) + + sizeof(struct adm1021_data)); data = (struct adm1021_data *) (new_client + 1); + i2c_set_clientdata(new_client, data); new_client->addr = address; - new_client->data = data; new_client->adapter = adapter; new_client->driver = &adm1021_driver; new_client->flags = 0; @@ -299,7 +301,7 @@ } /* Fill in the remaining client fields and put it into the global list */ - strcpy(new_client->name, client_name); + strncpy(new_client->dev.name, client_name, DEVICE_NAME_SIZE); data->type = kind; new_client->id = adm1021_id++; @@ -354,8 +356,7 @@ int err; - i2c_deregister_entry(((struct adm1021_data *) (client->data))-> - sysctl_id); + i2c_deregister_entry(((struct adm1021_data *) (i2c_get_clientdata(client)))->sysctl_id); if ((err = i2c_detach_client(client))) { printk @@ -384,7 +385,7 @@ static void adm1021_update_client(struct i2c_client *client) { - struct adm1021_data *data = client->data; + struct adm1021_data *data = i2c_get_clientdata(client); down(&data->update_lock); @@ -435,7 +436,7 @@ static void adm1021_temp(struct i2c_client *client, int operation, int ctl_name, int *nrels_mag, long *results) { - struct adm1021_data *data = client->data; + struct adm1021_data *data = i2c_get_clientdata(client); if (operation == SENSORS_PROC_REAL_INFO) *nrels_mag = 0; @@ -462,7 +463,7 @@ static void adm1021_remote_temp(struct i2c_client *client, int operation, int ctl_name, int *nrels_mag, long *results) { - struct adm1021_data *data = client->data; + struct adm1021_data *data = i2c_get_clientdata(client); int prec = 0; if (operation == SENSORS_PROC_REAL_INFO) @@ -535,7 +536,7 @@ static void adm1021_die_code(struct i2c_client *client, int operation, int ctl_name, int *nrels_mag, long *results) { - struct adm1021_data *data = client->data; + struct adm1021_data *data = i2c_get_clientdata(client); if (operation == SENSORS_PROC_REAL_INFO) *nrels_mag = 0; @@ -551,7 +552,7 @@ static void adm1021_alarms(struct i2c_client *client, int operation, int ctl_name, int *nrels_mag, long *results) { - struct adm1021_data *data = client->data; + struct adm1021_data *data = i2c_get_clientdata(client); if (operation == SENSORS_PROC_REAL_INFO) *nrels_mag = 0; else if (operation == SENSORS_PROC_REAL_READ) { diff -Nru a/drivers/i2c/chips/lm75.c b/drivers/i2c/chips/lm75.c --- a/drivers/i2c/chips/lm75.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/chips/lm75.c Tue Mar 25 18:36:42 2003 @@ -82,7 +82,7 @@ /* This is the driver that will be inserted */ static struct i2c_driver lm75_driver = { .owner = THIS_MODULE, - .name = "LM75 sensor chip driver", + .name = "LM75 sensor", .id = I2C_DRIVERID_LM75, .flags = I2C_DF_NOTIFY, .attach_adapter = lm75_attach_adapter, @@ -140,10 +140,12 @@ err = -ENOMEM; goto error0; } + memset(new_client, 0x00, sizeof(struct i2c_client) + + sizeof(struct lm75_data)); data = (struct lm75_data *) (new_client + 1); + i2c_set_clientdata(new_client, data); new_client->addr = address; - new_client->data = data; new_client->adapter = adapter; new_client->driver = &lm75_driver; new_client->flags = 0; @@ -180,7 +182,7 @@ } /* Fill in the remaining client fields and put it into the global list */ - strcpy(new_client->name, client_name); + strncpy(new_client->dev.name, client_name, DEVICE_NAME_SIZE); new_client->id = lm75_id++; data->valid = 0; @@ -215,7 +217,7 @@ static int lm75_detach_client(struct i2c_client *client) { - struct lm75_data *data = client->data; + struct lm75_data *data = i2c_get_clientdata(client); i2c_deregister_entry(data->sysctl_id); i2c_detach_client(client); @@ -263,7 +265,7 @@ static void lm75_update_client(struct i2c_client *client) { - struct lm75_data *data = client->data; + struct lm75_data *data = i2c_get_clientdata(client); down(&data->update_lock); @@ -286,7 +288,7 @@ static void lm75_temp(struct i2c_client *client, int operation, int ctl_name, int *nrels_mag, long *results) { - struct lm75_data *data = client->data; + struct lm75_data *data = i2c_get_clientdata(client); if (operation == SENSORS_PROC_REAL_INFO) *nrels_mag = 1; else if (operation == SENSORS_PROC_REAL_READ) { diff -Nru a/drivers/i2c/i2c-algo-bit.c b/drivers/i2c/i2c-algo-bit.c --- a/drivers/i2c/i2c-algo-bit.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/i2c-algo-bit.c Tue Mar 25 18:36:42 2003 @@ -23,6 +23,8 @@ /* $Id: linus.patch,v 1.87 2003/03/26 02:45:29 akpm Exp $ */ +/* #define DEBUG 1 */ + #include #include #include @@ -338,16 +340,14 @@ while (count > 0) { c = *temp; - DEB2(printk(KERN_DEBUG "i2c-algo-bit.o: %s sendbytes: writing %2.2X\n", - i2c_adap->name, c&0xff)); + DEB2(dev_dbg(&i2c_adap->dev, "sendbytes: writing %2.2X\n", c&0xff)); retval = i2c_outb(i2c_adap,c); if ((retval>0) || (nak_ok && (retval==0))) { /* ok or ignored NAK */ count--; temp++; wrcount++; } else { /* arbitration or no acknowledge */ - printk(KERN_ERR "i2c-algo-bit.o: %s sendbytes: error - bailout.\n", - i2c_adap->name); + dev_err(&i2c_adap->dev, "sendbytes: error - bailout.\n"); i2c_stop(adap); return (retval<0)? retval : -EFAULT; /* got a better one ?? */ @@ -527,13 +527,12 @@ struct i2c_algo_bit_data *bit_adap = adap->algo_data; if (bit_test) { - int ret = test_bus(bit_adap, adap->name); + int ret = test_bus(bit_adap, adap->dev.name); if (ret<0) return -ENODEV; } - DEB2(printk(KERN_DEBUG "i2c-algo-bit.o: hw routines for %s registered.\n", - adap->name)); + DEB2(dev_dbg(&adap->dev, "hw routines registered.\n")); /* register new adapter to i2c module... */ diff -Nru a/drivers/i2c/i2c-algo-pcf.c b/drivers/i2c/i2c-algo-pcf.c --- a/drivers/i2c/i2c-algo-pcf.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/i2c-algo-pcf.c Tue Mar 25 18:36:42 2003 @@ -27,6 +27,8 @@ messages, proper stop/repstart signaling during receive, added detect code */ +/* #define DEBUG 1 */ /* to pick up dev_dbg calls */ + #include #include #include @@ -222,21 +224,19 @@ int wrcount, status, timeout; for (wrcount=0; wrcountname, buf[wrcount]&0xff)); + DEB2(dev_dbg(&i2c_adap->dev, "i2c_write: writing %2.2X\n", + buf[wrcount]&0xff)); i2c_outb(adap, buf[wrcount]); timeout = wait_for_pin(adap, &status); if (timeout) { i2c_stop(adap); - printk(KERN_ERR "i2c-algo-pcf.o: %s i2c_write: " - "error - timeout.\n", i2c_adap->name); + dev_err(&i2c_adap->dev, "i2c_write: error - timeout.\n"); return -EREMOTEIO; /* got a better one ?? */ } #ifndef STUB_I2C if (status & I2C_PCF_LRB) { i2c_stop(adap); - printk(KERN_ERR "i2c-algo-pcf.o: %s i2c_write: " - "error - no ack.\n", i2c_adap->name); + dev_err(&i2c_adap->dev, "i2c_write: error - no ack.\n"); return -EREMOTEIO; /* got a better one ?? */ } #endif @@ -263,14 +263,14 @@ if (wait_for_pin(adap, &status)) { i2c_stop(adap); - printk(KERN_ERR "i2c-algo-pcf.o: pcf_readbytes timed out.\n"); + dev_err(&i2c_adap->dev, "pcf_readbytes timed out.\n"); return (-1); } #ifndef STUB_I2C if ((status & I2C_PCF_LRB) && (i != count)) { i2c_stop(adap); - printk(KERN_ERR "i2c-algo-pcf.o: i2c_read: i2c_inb, No ack.\n"); + dev_err(&i2c_adap->dev, "i2c_read: i2c_inb, No ack.\n"); return (-1); } #endif @@ -445,8 +445,7 @@ struct i2c_algo_pcf_data *pcf_adap = adap->algo_data; int rval; - DEB2(printk(KERN_DEBUG "i2c-algo-pcf.o: hw routines for %s registered.\n", - adap->name)); + DEB2(dev_dbg(&adap->dev, "hw routines registered.\n")); /* register new adapter to i2c module... */ diff -Nru a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c --- a/drivers/i2c/i2c-core.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/i2c-core.c Tue Mar 25 18:36:42 2003 @@ -23,6 +23,8 @@ /* $Id: linus.patch,v 1.87 2003/03/26 02:45:29 akpm Exp $ */ +/* #define DEBUG 1 */ /* needed to pick up the dev_dbg() calls */ + #include #include #include @@ -63,6 +65,14 @@ return 0; } +static struct device_driver i2c_generic_driver = { + .name = "i2c", + .bus = &i2c_bus_type, + .probe = i2c_device_probe, + .remove = i2c_device_remove, +}; + + /* --------------------------------------------------- * registering functions * --------------------------------------------------- @@ -82,9 +92,8 @@ if (NULL == adapters[i]) break; if (I2C_ADAP_MAX == i) { - printk(KERN_WARNING - " i2c-core.o: register_adapter(%s) - enlarge I2C_ADAP_MAX.\n", - adap->name); + dev_warn(&adap->dev, + "register_adapter - enlarge I2C_ADAP_MAX.\n"); res = -ENOMEM; goto out_unlock; } @@ -105,7 +114,7 @@ if (adap->dev.parent == NULL) adap->dev.parent = &legacy_bus; sprintf(adap->dev.bus_id, "i2c-%d", i); - strcpy(adap->dev.name, "i2c controller"); + adap->dev.driver = &i2c_generic_driver; device_register(&adap->dev); /* inform drivers of new adapters */ @@ -116,8 +125,7 @@ drivers[j]->attach_adapter(adap); up(&core_lists); - DEB(printk(KERN_DEBUG "i2c-core.o: adapter %s registered as adapter %d.\n", - adap->name,i)); + DEB(dev_dbg(&adap->dev, "registered as adapter %d.\n", i)); out_unlock: up(&core_lists); @@ -134,8 +142,7 @@ if (adap == adapters[i]) break; if (I2C_ADAP_MAX == i) { - printk( KERN_WARNING "i2c-core.o: unregister_adapter adap [%s] not found.\n", - adap->name); + dev_warn(&adap->dev, "unregister_adapter adap not found.\n"); res = -ENODEV; goto out_unlock; } @@ -148,9 +155,9 @@ for (j = 0; j < I2C_DRIVER_MAX; j++) if (drivers[j] && (drivers[j]->flags & I2C_DF_DUMMY)) if ((res = drivers[j]->attach_adapter(adap))) { - printk(KERN_WARNING "i2c-core.o: can't detach adapter %s " + dev_warn(&adap->dev, "can't detach adapter" "while detaching driver %s: driver not " - "detached!",adap->name,drivers[j]->name); + "detached!", drivers[j]->name); goto out_unlock; } @@ -164,10 +171,10 @@ * must be deleted, as this would cause invalid states. */ if ((res=client->driver->detach_client(client))) { - printk(KERN_ERR "i2c-core.o: adapter %s not " + dev_err(&adap->dev, "adapter not " "unregistered, because client at " "address %02x can't be detached. ", - adap->name, client->addr); + client->addr); goto out_unlock; } } @@ -180,7 +187,7 @@ adapters[i] = NULL; - DEB(printk(KERN_DEBUG "i2c-core.o: adapter unregistered: %s\n",adap->name)); + DEB(dev_dbg(&adap->dev, "adapter unregistered\n")); out_unlock: up(&core_lists); @@ -272,8 +279,7 @@ struct i2c_adapter *adap = adapters[k]; if (adap == NULL) /* skip empty entries. */ continue; - DEB2(printk(KERN_DEBUG "i2c-core.o: examining adapter %s:\n", - adap->name)); + DEB2(dev_dbg(&adap->dev, "examining adapter\n")); if (driver->flags & I2C_DF_DUMMY) { /* DUMMY drivers do not register their clients, so we have to * use a trick here: we call driver->attach_adapter to @@ -281,11 +287,10 @@ * this or hell will break loose... */ if ((res = driver->attach_adapter(adap))) { - printk(KERN_WARNING "i2c-core.o: while unregistering " - "dummy driver %s, adapter %s could " + dev_warn(&adap->dev, "while unregistering " + "dummy driver %s, adapter could " "not be detached properly; driver " - "not unloaded!",driver->name, - adap->name); + "not unloaded!",driver->name); goto out_unlock; } } else { @@ -295,20 +300,17 @@ client->driver == driver) { DEB2(printk(KERN_DEBUG "i2c-core.o: " "detaching client %s:\n", - client->name)); - if ((res = driver-> - detach_client(client))) - { - printk(KERN_ERR "i2c-core.o: while " + client->dev.name)); + if ((res = driver->detach_client(client))) { + dev_err(&adap->dev, "while " "unregistering driver " "`%s', the client at " "address %02x of " - "adapter `%s' could not " + "adapter could not " "be detached; driver " "not unloaded!", driver->name, - client->addr, - adap->name); + client->addr); goto out_unlock; } } @@ -362,7 +364,7 @@ printk(KERN_WARNING " i2c-core.o: attach_client(%s) - enlarge I2C_CLIENT_MAX.\n", - client->name); + client->dev.name); out_unlock_list: up(&adapter->list); @@ -374,19 +376,26 @@ if (adapter->client_register) { if (adapter->client_register(client)) { - printk(KERN_DEBUG - "i2c-core.o: warning: client_register seems " - "to have failed for client %02x at adapter %s\n", - client->addr, adapter->name); + dev_warn(&adapter->dev, "warning: client_register " + "seems to have failed for client %02x\n", + client->addr); } } - DEB(printk(KERN_DEBUG - "i2c-core.o: client [%s] registered to adapter [%s] " - "(pos. %d).\n", client->name, adapter->name, i)); + DEB(dev_dbg(&adapter->dev, "client [%s] registered to adapter " + "(pos. %d).\n", client->dev.name, i)); if (client->flags & I2C_CLIENT_ALLOW_USE) client->usage_count = 0; + + client->dev.parent = &client->adapter->dev; + client->dev.driver = &client->driver->driver; + client->dev.bus = &i2c_bus_type; + + snprintf(&client->dev.bus_id[0], sizeof(client->dev.bus_id), "i2c_dev_%d", i); + printk("registering %s\n", client->dev.bus_id); + device_register(&client->dev); + return 0; } @@ -404,7 +413,7 @@ if (res) { printk(KERN_ERR "i2c-core.o: client_unregister [%s] failed, " - "client not detached", client->name); + "client not detached", client->dev.name); goto out; } } @@ -419,10 +428,11 @@ printk(KERN_WARNING " i2c-core.o: unregister_client [%s] not found\n", - client->name); + client->dev.name); res = -ENODEV; out_unlock: + device_unregister(&client->dev); up(&adapter->list); out: return res; @@ -531,7 +541,7 @@ client = adapters[i]->clients[order[j]]; len += sprintf(kbuf+len,"%02x\t%-32s\t%-32s\n", client->addr, - client->name, + client->dev.name, client->driver->name); } len = len - file->f_pos; @@ -579,7 +589,7 @@ seq_printf(s, "dummy "); seq_printf(s, "\t%-32s\t%-32s\n", - adapter->name, adapter->algo->name); + adapter->dev.name, adapter->algo->name); } up(&core_lists); @@ -675,7 +685,7 @@ bus_unregister(&i2c_bus_type); } -module_init(i2c_init); +subsys_initcall(i2c_init); module_exit(i2c_exit); /* ---------------------------------------------------- @@ -688,8 +698,7 @@ int ret; if (adap->algo->master_xfer) { - DEB2(printk(KERN_DEBUG "i2c-core.o: master_xfer: %s with %d msgs.\n", - adap->name,num)); + DEB2(dev_dbg(&adap->dev, "master_xfer: with %d msgs.\n", num)); down(&adap->bus); ret = adap->algo->master_xfer(adap,msgs,num); @@ -697,8 +706,7 @@ return ret; } else { - printk(KERN_ERR "i2c-core.o: I2C adapter %04x: I2C level transfers not supported\n", - adap->id); + dev_err(&adap->dev, "I2C level transfers not supported\n"); return -ENOSYS; } } @@ -715,8 +723,8 @@ msg.len = count; (const char *)msg.buf = buf; - DEB2(printk(KERN_DEBUG "i2c-core.o: master_send: writing %d bytes on %s.\n", - count,client->adapter->name)); + DEB2(dev_dbg(&client->adapter->dev, "master_send: writing %d bytes.\n", + count)); down(&adap->bus); ret = adap->algo->master_xfer(adap,&msg,1); @@ -745,8 +753,8 @@ msg.len = count; msg.buf = buf; - DEB2(printk(KERN_DEBUG "i2c-core.o: master_recv: reading %d bytes on %s.\n", - count,client->adapter->name)); + DEB2(dev_dbg(&client->adapter->dev, "master_recv: reading %d bytes.\n", + count)); down(&adap->bus); ret = adap->algo->master_xfer(adap,&msg,1); diff -Nru a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c --- a/drivers/i2c/i2c-dev.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/i2c-dev.c Tue Mar 25 18:36:42 2003 @@ -30,6 +30,9 @@ /* $Id: linus.patch,v 1.87 2003/03/26 02:45:29 akpm Exp $ */ +/* If you want debugging uncomment: */ +/* #define DEBUG 1 */ + #include #include #include @@ -41,10 +44,6 @@ #include #include -/* If you want debugging uncomment: */ -/* #define DEBUG */ - - /* struct file_operations changed too often in the 2.1 series for nice code */ static ssize_t i2cdev_read (struct file *file, char *buf, size_t count, @@ -87,7 +86,9 @@ }; static struct i2c_client i2cdev_client_template = { - .name = "I2C /dev entry", + .dev = { + .name = "I2C /dev entry", + }, .id = 1, .addr = -1, .driver = &i2cdev_driver, @@ -386,11 +387,11 @@ char name[12]; if ((i = i2c_adapter_id(adap)) < 0) { - printk(KERN_DEBUG "i2c-dev.o: Unknown adapter ?!?\n"); + dev_dbg(&adap->dev, "Unknown adapter ?!?\n"); return -ENODEV; } if (i >= I2CDEV_ADAPS_MAX) { - printk(KERN_DEBUG "i2c-dev.o: Adapter number too large?!? (%d)\n",i); + dev_dbg(&adap->dev, "Adapter number too large?!? (%d)\n",i); return -ENODEV; } @@ -401,14 +402,12 @@ DEVFS_FL_DEFAULT, I2C_MAJOR, i, S_IFCHR | S_IRUSR | S_IWUSR, &i2cdev_fops, NULL); - printk(KERN_DEBUG "i2c-dev.o: Registered '%s' as minor %d\n",adap->name,i); + dev_dbg(&adap->dev, "Registered as minor %d\n", i); } else { /* This is actually a detach_adapter call! */ devfs_remove("i2c/%d", i); i2cdev_adaps[i] = NULL; -#ifdef DEBUG - printk(KERN_DEBUG "i2c-dev.o: Adapter unregistered: %s\n",adap->name); -#endif + dev_dbg(&adap->dev, "Adapter unregistered\n"); } return 0; diff -Nru a/drivers/i2c/i2c-elektor.c b/drivers/i2c/i2c-elektor.c --- a/drivers/i2c/i2c-elektor.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/i2c-elektor.c Tue Mar 25 18:36:42 2003 @@ -174,10 +174,12 @@ }; static struct i2c_adapter pcf_isa_ops = { - .owner = THIS_MODULE, - .name = "PCF8584 ISA adapter", - .id = I2C_HW_P_ELEK, - .algo_data = &pcf_isa_data, + .owner = THIS_MODULE, + .id = I2C_HW_P_ELEK, + .algo_data = &pcf_isa_data, + .dev = { + .name = "PCF8584 ISA adapter", + }, }; static int __init i2c_pcfisa_init(void) diff -Nru a/drivers/i2c/i2c-elv.c b/drivers/i2c/i2c-elv.c --- a/drivers/i2c/i2c-elv.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/i2c-elv.c Tue Mar 25 18:36:42 2003 @@ -129,9 +129,11 @@ static struct i2c_adapter bit_elv_ops = { .owner = THIS_MODULE, - .name = "ELV Parallel port adaptor", .id = I2C_HW_B_ELV, .algo_data = &bit_elv_data, + .dev = { + .name = "ELV Parallel port adaptor", + }, }; static int __init i2c_bitelv_init(void) @@ -148,7 +150,7 @@ return -ENODEV; } } else { - bit_elv_ops.data=(void*)base; + i2c_set_adapdata(&bit_elv_ops, (void *)base); if (bit_elv_init()==0) { if(i2c_bit_add_bus(&bit_elv_ops) < 0) return -ENODEV; diff -Nru a/drivers/i2c/i2c-philips-par.c b/drivers/i2c/i2c-philips-par.c --- a/drivers/i2c/i2c-philips-par.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/i2c-philips-par.c Tue Mar 25 18:36:42 2003 @@ -151,8 +151,10 @@ static struct i2c_adapter bit_lp_ops = { .owner = THIS_MODULE, - .name = "Philips Parallel port adapter", .id = I2C_HW_B_LP, + .dev = { + .name = "Philips Parallel port adapter", + }, }; static void i2c_parport_attach (struct parport *port) diff -Nru a/drivers/i2c/i2c-proc.c b/drivers/i2c/i2c-proc.c --- a/drivers/i2c/i2c-proc.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/i2c-proc.c Tue Mar 25 18:36:42 2003 @@ -23,6 +23,8 @@ This driver puts entries in /proc/sys/dev/sensors for each I2C device */ +/* #define DEBUG 1 */ + #include #include #include @@ -581,9 +583,9 @@ is_isa ? SENSORS_ISA_BUS : i2c_adapter_id(adapter); /* Forget it if we can't probe using SMBUS_QUICK */ - if ((!is_isa) - && !i2c_check_functionality(adapter, - I2C_FUNC_SMBUS_QUICK)) return -1; + if ((!is_isa) && + !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_QUICK)) + return -1; for (addr = 0x00; addr <= (is_isa ? 0xffff : 0x7f); addr++) { /* XXX: WTF is going on here??? */ @@ -594,31 +596,14 @@ /* If it is in one of the force entries, we don't do any detection at all */ found = 0; - for (i = 0; - !found - && (this_force = - address_data->forces + i, this_force->force); i++) { - for (j = 0; - !found - && (this_force->force[j] != SENSORS_I2C_END); - j += 2) { - if ( - ((adapter_id == this_force->force[j]) - || - ((this_force-> - force[j] == SENSORS_ANY_I2C_BUS) - && !is_isa)) - && (addr == this_force->force[j + 1])) { -#ifdef DEBUG - printk - (KERN_DEBUG "i2c-proc.o: found force parameter for adapter %d, addr %04x\n", - adapter_id, addr); -#endif - if ( - (err = - found_proc(adapter, addr, 0, - this_force-> - kind))) return err; + for (i = 0; !found && (this_force = address_data->forces + i, this_force->force); i++) { + for (j = 0; !found && (this_force->force[j] != SENSORS_I2C_END); j += 2) { + if ( ((adapter_id == this_force->force[j]) || + ((this_force->force[j] == SENSORS_ANY_I2C_BUS) && !is_isa)) && + (addr == this_force->force[j + 1]) ) { + dev_dbg(&adapter->dev, "found force parameter for adapter %d, addr %04x\n", adapter_id, addr); + if ((err = found_proc(adapter, addr, 0, this_force->kind))) + return err; found = 1; } } @@ -628,42 +613,22 @@ /* If this address is in one of the ignores, we can forget about it right now */ - for (i = 0; - !found - && (address_data->ignore[i] != SENSORS_I2C_END); - i += 2) { - if ( - ((adapter_id == address_data->ignore[i]) - || - ((address_data-> - ignore[i] == SENSORS_ANY_I2C_BUS) - && !is_isa)) - && (addr == address_data->ignore[i + 1])) { -#ifdef DEBUG - printk - (KERN_DEBUG "i2c-proc.o: found ignore parameter for adapter %d, " - "addr %04x\n", adapter_id, addr); -#endif + for (i = 0; !found && (address_data->ignore[i] != SENSORS_I2C_END); i += 2) { + if ( ((adapter_id == address_data->ignore[i]) || + ((address_data->ignore[i] == SENSORS_ANY_I2C_BUS) && + !is_isa)) && + (addr == address_data->ignore[i + 1])) { + dev_dbg(&adapter->dev, "found ignore parameter for adapter %d, addr %04x\n", adapter_id, addr); found = 1; } } - for (i = 0; - !found - && (address_data->ignore_range[i] != SENSORS_I2C_END); - i += 3) { - if ( - ((adapter_id == address_data->ignore_range[i]) - || - ((address_data-> - ignore_range[i] == - SENSORS_ANY_I2C_BUS) & !is_isa)) - && (addr >= address_data->ignore_range[i + 1]) - && (addr <= address_data->ignore_range[i + 2])) { -#ifdef DEBUG - printk - (KERN_DEBUG "i2c-proc.o: found ignore_range parameter for adapter %d, " - "addr %04x\n", adapter_id, addr); -#endif + for (i = 0; !found && (address_data->ignore_range[i] != SENSORS_I2C_END); i += 3) { + if ( ((adapter_id == address_data->ignore_range[i]) || + ((address_data-> ignore_range[i] == SENSORS_ANY_I2C_BUS) & + !is_isa)) && + (addr >= address_data->ignore_range[i + 1]) && + (addr <= address_data->ignore_range[i + 2])) { + dev_dbg(&adapter->dev, "found ignore_range parameter for adapter %d, addr %04x\n", adapter_id, addr); found = 1; } } @@ -673,68 +638,31 @@ /* Now, we will do a detection, but only if it is in the normal or probe entries */ if (is_isa) { - for (i = 0; - !found - && (address_data->normal_isa[i] != - SENSORS_ISA_END); i += 1) { + for (i = 0; !found && (address_data->normal_isa[i] != SENSORS_ISA_END); i += 1) { if (addr == address_data->normal_isa[i]) { -#ifdef DEBUG - printk - (KERN_DEBUG "i2c-proc.o: found normal isa entry for adapter %d, " - "addr %04x\n", adapter_id, - addr); -#endif + dev_dbg(&adapter->dev, "found normal isa entry for adapter %d, addr %04x\n", adapter_id, addr); found = 1; } } - for (i = 0; - !found - && (address_data->normal_isa_range[i] != - SENSORS_ISA_END); i += 3) { - if ((addr >= - address_data->normal_isa_range[i]) - && (addr <= - address_data->normal_isa_range[i + 1]) - && - ((addr - - address_data->normal_isa_range[i]) % - address_data->normal_isa_range[i + 2] == - 0)) { -#ifdef DEBUG - printk - (KERN_DEBUG "i2c-proc.o: found normal isa_range entry for adapter %d, " - "addr %04x", adapter_id, addr); -#endif + for (i = 0; !found && (address_data->normal_isa_range[i] != SENSORS_ISA_END); i += 3) { + if ((addr >= address_data->normal_isa_range[i]) && + (addr <= address_data->normal_isa_range[i + 1]) && + ((addr - address_data->normal_isa_range[i]) % address_data->normal_isa_range[i + 2] == 0)) { + dev_dbg(&adapter->dev, "found normal isa_range entry for adapter %d, addr %04x", adapter_id, addr); found = 1; } } } else { - for (i = 0; - !found && (address_data->normal_i2c[i] != - SENSORS_I2C_END); i += 1) { + for (i = 0; !found && (address_data->normal_i2c[i] != SENSORS_I2C_END); i += 1) { if (addr == address_data->normal_i2c[i]) { found = 1; -#ifdef DEBUG - printk - (KERN_DEBUG "i2c-proc.o: found normal i2c entry for adapter %d, " - "addr %02x", adapter_id, addr); -#endif + dev_dbg(&adapter->dev, "found normal i2c entry for adapter %d, addr %02x", adapter_id, addr); } } - for (i = 0; - !found - && (address_data->normal_i2c_range[i] != - SENSORS_I2C_END); i += 2) { - if ((addr >= - address_data->normal_i2c_range[i]) - && (addr <= - address_data->normal_i2c_range[i + 1])) - { -#ifdef DEBUG - printk - (KERN_DEBUG "i2c-proc.o: found normal i2c_range entry for adapter %d, " - "addr %04x\n", adapter_id, addr); -#endif + for (i = 0; !found && (address_data->normal_i2c_range[i] != SENSORS_I2C_END); i += 2) { + if ((addr >= address_data->normal_i2c_range[i]) && + (addr <= address_data->normal_i2c_range[i + 1])) { + dev_dbg(&adapter->dev, "found normal i2c_range entry for adapter %d, addr %04x\n", adapter_id, addr); found = 1; } } @@ -747,30 +675,17 @@ ((address_data-> probe[i] == SENSORS_ANY_I2C_BUS) & !is_isa)) && (addr == address_data->probe[i + 1])) { -#ifdef DEBUG - printk - (KERN_DEBUG "i2c-proc.o: found probe parameter for adapter %d, " - "addr %04x\n", adapter_id, addr); -#endif + dev_dbg(&adapter->dev, "found probe parameter for adapter %d, addr %04x\n", adapter_id, addr); found = 1; } } - for (i = 0; !found && - (address_data->probe_range[i] != SENSORS_I2C_END); - i += 3) { - if ( - ((adapter_id == address_data->probe_range[i]) - || - ((address_data->probe_range[i] == - SENSORS_ANY_I2C_BUS) & !is_isa)) - && (addr >= address_data->probe_range[i + 1]) - && (addr <= address_data->probe_range[i + 2])) { + for (i = 0; !found && (address_data->probe_range[i] != SENSORS_I2C_END); i += 3) { + if ( ((adapter_id == address_data->probe_range[i]) || + ((address_data->probe_range[i] == SENSORS_ANY_I2C_BUS) & !is_isa)) && + (addr >= address_data->probe_range[i + 1]) && + (addr <= address_data->probe_range[i + 2])) { found = 1; -#ifdef DEBUG - printk - (KERN_DEBUG "i2c-proc.o: found probe_range parameter for adapter %d, " - "addr %04x\n", adapter_id, addr); -#endif + dev_dbg(&adapter->dev, "found probe_range parameter for adapter %d, addr %04x\n", adapter_id, addr); } } if (!found) @@ -779,8 +694,7 @@ /* OK, so we really should examine this address. First check whether there is some client here at all! */ if (is_isa || - (i2c_smbus_xfer - (adapter, addr, 0, 0, 0, I2C_SMBUS_QUICK, NULL) >= 0)) + (i2c_smbus_xfer (adapter, addr, 0, 0, 0, I2C_SMBUS_QUICK, NULL) >= 0)) if ((err = found_proc(adapter, addr, 0, -1))) return err; } diff -Nru a/drivers/i2c/i2c-velleman.c b/drivers/i2c/i2c-velleman.c --- a/drivers/i2c/i2c-velleman.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/i2c-velleman.c Tue Mar 25 18:36:42 2003 @@ -114,9 +114,11 @@ static struct i2c_adapter bit_velle_ops = { .owner = THIS_MODULE, - .name = "Velleman K8000", .id = I2C_HW_B_VELLE, .algo_data = &bit_velle_data, + .dev = { + .name = "Velleman K8000", + }, }; static int __init i2c_bitvelle_init(void) diff -Nru a/drivers/i2c/scx200_acb.c b/drivers/i2c/scx200_acb.c --- a/drivers/i2c/scx200_acb.c Tue Mar 25 18:36:42 2003 +++ b/drivers/i2c/scx200_acb.c Tue Mar 25 18:36:42 2003 @@ -140,8 +140,7 @@ switch (iface->state) { case state_idle: - printk(KERN_WARNING NAME ": %s, interrupt in idle state\n", - iface->adapter.name); + dev_warn(&iface->adapter.dev, "interrupt in idle state\n"); break; case state_address: @@ -226,8 +225,8 @@ return; error: - printk(KERN_ERR NAME ": %s, %s in state %s\n", iface->adapter.name, - errmsg, scx200_acb_state_name[iface->state]); + dev_err(&iface->adapter.dev, "%s in state %s\n", errmsg, + scx200_acb_state_name[iface->state]); iface->state = state_idle; iface->result = -EIO; @@ -236,8 +235,8 @@ static void scx200_acb_timeout(struct scx200_acb_iface *iface) { - printk(KERN_ERR NAME ": %s, timeout in state %s\n", - iface->adapter.name, scx200_acb_state_name[iface->state]); + dev_err(&iface->adapter.dev, "timeout in state %s\n", + scx200_acb_state_name[iface->state]); iface->state = state_idle; iface->result = -EIO; @@ -290,7 +289,7 @@ char rw, u8 command, int size, union i2c_smbus_data *data) { - struct scx200_acb_iface *iface = adapter->data; + struct scx200_acb_iface *iface = i2c_get_adapdata(adapter); int len; u8 *buffer; u16 cur_word; @@ -331,13 +330,12 @@ size, address, command, len, rw == I2C_SMBUS_READ); if (!len && rw == I2C_SMBUS_READ) { - printk(KERN_WARNING NAME ": %s, zero length read\n", - adapter->name); + dev_warn(&adapter->dev, "zero length read\n"); return -EINVAL; } if (len && !buffer) { - printk(KERN_WARNING NAME ": %s, nonzero length but no buffer\n", adapter->name); + dev_warn(&adapter->dev, "nonzero length but no buffer\n"); return -EFAULT; } @@ -457,18 +455,18 @@ memset(iface, 0, sizeof(*iface)); adapter = &iface->adapter; - adapter->data = iface; - sprintf(adapter->name, "SCx200 ACB%d", index); + i2c_set_adapdata(adapter, iface); + snprintf(adapter->dev.name, DEVICE_NAME_SIZE, "SCx200 ACB%d", index); adapter->owner = THIS_MODULE; adapter->id = I2C_ALGO_SMBUS; adapter->algo = &scx200_acb_algorithm; init_MUTEX(&iface->sem); - sprintf(description, "NatSemi SCx200 ACCESS.bus [%s]", adapter->name); + snprintf(description, sizeof(description), "NatSemi SCx200 ACCESS.bus [%s]", adapter->dev.name); if (request_region(base, 8, description) == 0) { - printk(KERN_ERR NAME ": %s, can't allocate io 0x%x-0x%x\n", - adapter->name, base, base + 8-1); + dev_err(&adapter->dev, "can't allocate io 0x%x-0x%x\n", + base, base + 8-1); rc = -EBUSY; goto errout; } @@ -476,14 +474,14 @@ rc = scx200_acb_probe(iface); if (rc) { - printk(KERN_WARNING NAME ": %s, probe failed\n", adapter->name); + dev_warn(&adapter->dev, "probe failed\n"); goto errout; } scx200_acb_reset(iface); if (i2c_add_adapter(adapter) < 0) { - printk(KERN_ERR NAME ": %s, failed to register\n", adapter->name); + dev_err(&adapter->dev, "failed to register\n"); rc = -ENODEV; goto errout; } diff -Nru a/drivers/ieee1394/pcilynx.c b/drivers/ieee1394/pcilynx.c --- a/drivers/ieee1394/pcilynx.c Tue Mar 25 18:36:42 2003 +++ b/drivers/ieee1394/pcilynx.c Tue Mar 25 18:36:42 2003 @@ -138,10 +138,12 @@ }; static struct i2c_adapter bit_ops = { - .name = "PCILynx I2C adapter", .id = 0xAA, //FIXME: probably we should get an id in i2c-id.h .client_register = bit_reg, .client_unregister = bit_unreg, + .dev = { + .name = "PCILynx I2C", + }, }; diff -Nru a/drivers/media/video/adv7175.c b/drivers/media/video/adv7175.c --- a/drivers/media/video/adv7175.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/adv7175.c Tue Mar 25 18:36:42 2003 @@ -170,6 +170,7 @@ client=kmalloc(sizeof(*client), GFP_KERNEL); if(client == NULL) return -ENOMEM; + memset(client, 0, sizeof(*client)); client_template.adapter = adap; client_template.addr = addr; @@ -190,9 +191,10 @@ // We should never get here!!! dname = unknown_name; } - strcpy(client->name, dname); + strncpy(client->dev.name, dname, DEVICE_NAME_SIZE); init_MUTEX(&encoder->lock); encoder->client = client; + i2c_set_clientdata(client, encoder); encoder->addr = addr; encoder->norm = VIDEO_MODE_PAL; encoder->input = 0; @@ -201,7 +203,7 @@ for (i=1; iname, rv); + printk(KERN_ERR "%s_attach: init error %d\n", client->dev.name, rv); break; } } @@ -211,7 +213,7 @@ i2c_smbus_write_byte_data(client,0x07, TR0MODE); i2c_smbus_read_byte_data(client,0x12); printk(KERN_INFO "%s_attach: %s rev. %d at 0x%02x\n", - client->name, dname, rv & 1, client->addr); + client->dev.name, dname, rv & 1, client->addr); } i2c_attach_client(client); @@ -229,7 +231,7 @@ static int adv717x_detach(struct i2c_client *client) { i2c_detach_client(client); - kfree(client->data); + i2c_get_clientdata(client); kfree(client); return 0; } @@ -237,7 +239,7 @@ static int adv717x_command(struct i2c_client *client, unsigned int cmd, void *arg) { - struct adv7175 *encoder = client->data; + struct adv7175 *encoder = i2c_get_clientdata(client); int i, x_ntsc=13, x_pal=13; /* x_ntsc is number of entries in init_ntsc -1 */ /* x_pal is number of entries in init_pal -1 */ @@ -297,7 +299,7 @@ default: printk(KERN_ERR "%s: illegal norm: %d\n", - client->name, iarg); + client->dev.name, iarg); return -EINVAL; } @@ -353,7 +355,7 @@ default: printk(KERN_ERR "%s: illegal input: %d\n", - client->name, iarg); + client->dev.name, iarg); return -EINVAL; } @@ -419,8 +421,10 @@ }; static struct i2c_client client_template = { - .name = "adv7175_client", - .driver = &i2c_driver_adv7175 + .driver = &i2c_driver_adv7175, + .dev = { + .name = "adv7175_client", + }, }; static int adv717x_init(void) diff -Nru a/drivers/media/video/bt819.c b/drivers/media/video/bt819.c --- a/drivers/media/video/bt819.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/bt819.c Tue Mar 25 18:36:42 2003 @@ -128,7 +128,7 @@ struct timing *timing; - decoder = client->data; + decoder = i2c_get_clientdata(client); timing = &timing_data[decoder->norm]; init[3 * 2 - 1] = (((timing->vdelay >> 8) & 0x03) << 6) | @@ -159,6 +159,7 @@ client = kmalloc(sizeof(*client), GFP_KERNEL); if(client == NULL) return -ENOMEM; + memset(client, 0, sizeof(*client)); client_template.adapter = adap; client_template.addr = addr; memcpy(client, &client_template, sizeof(*client)); @@ -170,8 +171,8 @@ } memset(decoder, 0, sizeof(struct bt819)); - strcpy(client->name, "bt819"); - client->data = decoder; + strncpy(client->dev.name, "bt819", DEVICE_NAME_SIZE); + i2c_set_clientdata(client, decoder); decoder->client = client; decoder->addr = addr; decoder->norm = VIDEO_MODE_NTSC; @@ -186,10 +187,10 @@ i = bt819_init(client); if (i < 0) { printk(KERN_ERR "%s: bt819_attach: init status %d\n", - decoder->client->name, i); + decoder->client->dev.name, i); } else { printk(KERN_INFO "%s: bt819_attach: chip version %x\n", - decoder->client->name, i2c_smbus_read_byte_data(client, + decoder->client->dev.name, i2c_smbus_read_byte_data(client, 0x17) & 0x0f); } init_MUTEX(&decoder->lock); @@ -205,7 +206,7 @@ static int bt819_detach(struct i2c_client *client) { i2c_detach_client(client); - kfree(client->data); + i2c_get_clientdata(client); kfree(client); MOD_DEC_USE_COUNT; return 0; @@ -215,7 +216,7 @@ { int temp; - struct bt819 *decoder = client->data; + struct bt819 *decoder = i2c_get_clientdata(client); //return 0; if (!decoder->initialized) { // First call to bt819_init could be @@ -268,7 +269,7 @@ *iarg = res; DEBUG(printk(KERN_INFO "%s-bt819: get status %x\n", - decoder->client->name, *iarg)); + decoder->client->dev.name, *iarg)); } break; @@ -278,7 +279,7 @@ struct timing *timing; DEBUG(printk(KERN_INFO "%s-bt819: set norm %x\n", - decoder->client->name, *iarg)); + decoder->client->dev.name, *iarg)); if (*iarg == VIDEO_MODE_NTSC) { bt819_setbit(decoder, 0x01, 0, 1); @@ -319,7 +320,7 @@ int *iarg = arg; DEBUG(printk(KERN_INFO "%s-bt819: set input %x\n", - decoder->client->name, *iarg)); + decoder->client->dev.name, *iarg)); if (*iarg < 0 || *iarg > 7) { return -EINVAL; @@ -344,7 +345,7 @@ int *iarg = arg; DEBUG(printk(KERN_INFO "%s-bt819: set output %x\n", - decoder->client->name, *iarg)); + decoder->client->dev.name, *iarg)); /* not much choice of outputs */ if (*iarg != 0) { @@ -360,7 +361,7 @@ DEBUG(printk (KERN_INFO "%s-bt819: enable output %x\n", - decoder->client->name, *iarg)); + decoder->client->dev.name, *iarg)); if (decoder->enable != enable) { decoder->enable = enable; @@ -381,7 +382,7 @@ DEBUG(printk (KERN_INFO "%s-bt819: set picture brightness %d contrast %d colour %d\n", - decoder->client->name, pic->brightness, + decoder->client->dev.name, pic->brightness, pic->contrast, pic->colour)); @@ -448,9 +449,11 @@ }; static struct i2c_client client_template = { - .name = "bt819_client", .id = -1, - .driver = &i2c_driver_bt819 + .driver = &i2c_driver_bt819, + .dev = { + .name = "bt819_client", + }, }; static int bt819_setup(void) diff -Nru a/drivers/media/video/bt856.c b/drivers/media/video/bt856.c --- a/drivers/media/video/bt856.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/bt856.c Tue Mar 25 18:36:42 2003 @@ -106,6 +106,7 @@ client = kmalloc(sizeof(*client), GFP_KERNEL); if(client == NULL) return -ENOMEM; + memset(client, 0, sizeof(*client)); client_template.adapter = adap; client_template.addr = addr; memcpy(client, &client_template, sizeof(*client)); @@ -123,14 +124,14 @@ memset(encoder, 0, sizeof(struct bt856)); - strcpy(client->name, "bt856"); + strncpy(client->dev.name, "bt856", DEVICE_NAME_SIZE); encoder->client = client; - client->data = encoder; + i2c_set_clientdata(client, encoder); encoder->addr = client->addr; encoder->norm = VIDEO_MODE_NTSC; encoder->enable = 1; - DEBUG(printk(KERN_INFO "%s-bt856: attach\n", encoder->client->name)); + DEBUG(printk(KERN_INFO "%s-bt856: attach\n", encoder->client->dev.name)); i2c_smbus_write_byte_data(client, 0xdc, 0x18); encoder->reg[0xdc] = 0x18; @@ -171,7 +172,7 @@ static int bt856_detach(struct i2c_client *client) { i2c_detach_client(client); - kfree(client->data); + i2c_get_clientdata(client); kfree(client); MOD_DEC_USE_COUNT; return 0; @@ -180,7 +181,7 @@ static int bt856_command(struct i2c_client *client, unsigned int cmd, void *arg) { - struct bt856 *encoder = client->data; + struct bt856 *encoder = i2c_get_clientdata(client); switch (cmd) { @@ -190,7 +191,7 @@ DEBUG(printk (KERN_INFO "%s-bt856: get capabilities\n", - encoder->client->name)); + encoder->client->dev.name)); cap->flags = VIDEO_ENCODER_PAL @@ -205,7 +206,7 @@ int *iarg = arg; DEBUG(printk(KERN_INFO "%s-bt856: set norm %d\n", - encoder->client->name, *iarg)); + encoder->client->dev.name, *iarg)); switch (*iarg) { @@ -232,7 +233,7 @@ int *iarg = arg; DEBUG(printk(KERN_INFO "%s-bt856: set input %d\n", - encoder->client->name, *iarg)); + encoder->client->dev.name, *iarg)); /* We only have video bus. *iarg = 0: input is from bt819 @@ -268,7 +269,7 @@ int *iarg = arg; DEBUG(printk(KERN_INFO "%s-bt856: set output %d\n", - encoder->client->name, *iarg)); + encoder->client->dev.name, *iarg)); /* not much choice of outputs */ if (*iarg != 0) { @@ -285,7 +286,7 @@ DEBUG(printk (KERN_INFO "%s-bt856: enable output %d\n", - encoder->client->name, encoder->enable)); + encoder->client->dev.name, encoder->enable)); } break; @@ -309,9 +310,11 @@ }; static struct i2c_client client_template = { - .name = "bt856_client", .id = -1, - .driver = &i2c_driver_bt856 + .driver = &i2c_driver_bt856, + .dev = { + .name = "bt856_client", + }, }; static int bt856_init(void) diff -Nru a/drivers/media/video/bttv-if.c b/drivers/media/video/bttv-if.c --- a/drivers/media/video/bttv-if.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/bttv-if.c Tue Mar 25 18:36:42 2003 @@ -194,7 +194,7 @@ static int attach_inform(struct i2c_client *client) { - struct bttv *btv = (struct bttv*)client->adapter->data; + struct bttv *btv = i2c_get_adapdata(client->adapter); int i; for (i = 0; i < I2C_CLIENTS_MAX; i++) { @@ -207,13 +207,13 @@ bttv_call_i2c_clients(btv,TUNER_SET_TYPE,&btv->tuner_type); if (bttv_verbose) printk("bttv%d: i2c attach [client=%s,%s]\n",btv->nr, - client->name, (i < I2C_CLIENTS_MAX) ? "ok" : "failed"); + client->dev.name, (i < I2C_CLIENTS_MAX) ? "ok" : "failed"); return 0; } static int detach_inform(struct i2c_client *client) { - struct bttv *btv = (struct bttv*)client->adapter->data; + struct bttv *btv = i2c_get_adapdata(client->adapter); int i; for (i = 0; i < I2C_CLIENTS_MAX; i++) { @@ -224,7 +224,7 @@ } if (bttv_verbose) printk("bttv%d: i2c detach [client=%s,%s]\n",btv->nr, - client->name, (i < I2C_CLIENTS_MAX) ? "ok" : "failed"); + client->dev.name, (i < I2C_CLIENTS_MAX) ? "ok" : "failed"); return 0; } @@ -261,15 +261,19 @@ static struct i2c_adapter bttv_i2c_adap_template = { .owner = THIS_MODULE, - .name = "bt848", .id = I2C_HW_B_BT848, .client_register = attach_inform, .client_unregister = detach_inform, + .dev = { + .name = "bt848", + }, }; static struct i2c_client bttv_i2c_client_template = { - .name = "bttv internal use only", - .id = -1, + .id = -1, + .dev = { + .name = "bttv internal", + }, }; @@ -343,10 +347,10 @@ memcpy(&btv->i2c_client, &bttv_i2c_client_template, sizeof(struct i2c_client)); - sprintf(btv->i2c_adap.name+strlen(btv->i2c_adap.name), + sprintf(btv->i2c_adap.dev.name+strlen(btv->i2c_adap.dev.name), " #%d", btv->nr); btv->i2c_algo.data = btv; - btv->i2c_adap.data = btv; + i2c_set_adapdata(&btv->i2c_adap, btv); btv->i2c_adap.algo_data = &btv->i2c_algo; btv->i2c_client.adapter = &btv->i2c_adap; diff -Nru a/drivers/media/video/msp3400.c b/drivers/media/video/msp3400.c --- a/drivers/media/video/msp3400.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/msp3400.c Tue Mar 25 18:36:42 2003 @@ -349,7 +349,7 @@ static void msp3400c_set_scart(struct i2c_client *client, int in, int out) { - struct msp3400c *msp = client->data; + struct msp3400c *msp = i2c_get_clientdata(client); if (-1 == scarts[out][in]) return; @@ -411,7 +411,7 @@ static void msp3400c_setmode(struct i2c_client *client, int type) { - struct msp3400c *msp = client->data; + struct msp3400c *msp = i2c_get_clientdata(client); int i; dprintk("msp3400: setmode: %d\n",type); @@ -471,7 +471,7 @@ { static char *strmode[] = { "0", "mono", "stereo", "3", "lang1", "5", "6", "7", "lang2" }; - struct msp3400c *msp = client->data; + struct msp3400c *msp = i2c_get_clientdata(client); int nicam=0; /* channel source: FM/AM or nicam */ int src=0; @@ -599,7 +599,7 @@ static void msp3400c_restore_dfp(struct i2c_client *client) { - struct msp3400c *msp = client->data; + struct msp3400c *msp = i2c_get_clientdata(client); int i; for (i = 0; i < DFP_COUNT; i++) { @@ -627,7 +627,7 @@ static int autodetect_stereo(struct i2c_client *client) { - struct msp3400c *msp = client->data; + struct msp3400c *msp = i2c_get_clientdata(client); int val; int newstereo = msp->stereo; int newnicam = msp->nicam_on; @@ -727,7 +727,7 @@ /* stereo/multilang monitoring */ static void watch_stereo(struct i2c_client *client) { - struct msp3400c *msp = client->data; + struct msp3400c *msp = i2c_get_clientdata(client); if (autodetect_stereo(client)) { if (msp->stereo & VIDEO_SOUND_STEREO) @@ -746,7 +746,7 @@ static int msp3400c_thread(void *data) { struct i2c_client *client = data; - struct msp3400c *msp = client->data; + struct msp3400c *msp = i2c_get_clientdata(client); struct CARRIER_DETECT *cd; int count, max1,max2,val1,val2, val,this; @@ -1002,7 +1002,7 @@ static int msp3410d_thread(void *data) { struct i2c_client *client = data; - struct msp3400c *msp = client->data; + struct msp3400c *msp = i2c_get_clientdata(client); int mode,val,i,std; #ifdef CONFIG_SMP @@ -1226,9 +1226,11 @@ static struct i2c_client client_template = { - .name = "(unset)", .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, + .dev = { + .name = "(unset)", + }, }; static int msp_attach(struct i2c_adapter *adap, int addr, @@ -1265,7 +1267,7 @@ for (i = 0; i < DFP_COUNT; i++) msp->dfp_regs[i] = -1; - c->data = msp; + i2c_set_clientdata(c, msp); init_waitqueue_head(&msp->wq); if (-1 == msp3400c_reset(c)) { @@ -1291,7 +1293,7 @@ #endif msp3400c_setvolume(c,msp->muted,msp->left,msp->right); - sprintf(c->name,"MSP34%02d%c-%c%d", + snprintf(c->dev.name, DEVICE_NAME_SIZE, "MSP34%02d%c-%c%d", (rev2>>8)&0xff, (rev1&0xff)+'@', ((rev1>>8)&0xff)+'@', rev2&0x1f); msp->nicam = (((rev2>>8)&0xff) != 00) ? 1 : 0; @@ -1310,7 +1312,7 @@ msp->wake_stereo.data = (unsigned long)msp; /* hello world :-) */ - printk(KERN_INFO "msp34xx: init: chip=%s",c->name); + printk(KERN_INFO "msp34xx: init: chip=%s",c->dev.name); if (msp->nicam) printk(", has NICAM support"); printk("\n"); @@ -1340,7 +1342,7 @@ static int msp_detach(struct i2c_client *client) { DECLARE_MUTEX_LOCKED(sem); - struct msp3400c *msp = (struct msp3400c*)client->data; + struct msp3400c *msp = i2c_get_clientdata(client); int i; /* shutdown control thread */ @@ -1379,7 +1381,7 @@ static void msp_wake_thread(struct i2c_client *client) { - struct msp3400c *msp = (struct msp3400c*)client->data; + struct msp3400c *msp = i2c_get_clientdata(client); msp3400c_setvolume(client,msp->muted,0,0); msp->watch_stereo=0; @@ -1391,7 +1393,7 @@ static int msp_command(struct i2c_client *client, unsigned int cmd, void *arg) { - struct msp3400c *msp = (struct msp3400c*)client->data; + struct msp3400c *msp = i2c_get_clientdata(client); __u16 *sarg = arg; #if 0 int *iarg = (int*)arg; diff -Nru a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c --- a/drivers/media/video/saa5249.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/saa5249.c Tue Mar 25 18:36:42 2003 @@ -171,20 +171,21 @@ return -ENOMEM; } memset(t, 0, sizeof(*t)); - strcpy(client->name, IF_NAME); + strncpy(client->dev.name, IF_NAME, DEVICE_NAME_SIZE); init_MUTEX(&t->lock); /* * Now create a video4linux device */ - client->data = vd=(struct video_device *)kmalloc(sizeof(struct video_device), GFP_KERNEL); + vd = (struct video_device *)kmalloc(sizeof(struct video_device), GFP_KERNEL); if(vd==NULL) { kfree(t); kfree(client); return -ENOMEM; } + i2c_set_clientdata(client, vd); memcpy(vd, &saa_template, sizeof(*vd)); for (pgbuf = 0; pgbuf < NUM_DAUS; pgbuf++) @@ -234,7 +235,7 @@ static int saa5249_detach(struct i2c_client *client) { - struct video_device *vd=client->data; + struct video_device *vd = i2c_get_clientdata(client); i2c_detach_client(client); video_unregister_device(vd); kfree(vd->priv); @@ -264,9 +265,11 @@ }; static struct i2c_client client_template = { - .name = "(unset)", .id = -1, - .driver = &i2c_driver_videotext + .driver = &i2c_driver_videotext, + .dev = { + .name = "(unset)", + }, }; /* diff -Nru a/drivers/media/video/saa7110.c b/drivers/media/video/saa7110.c --- a/drivers/media/video/saa7110.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/saa7110.c Tue Mar 25 18:36:42 2003 @@ -163,6 +163,7 @@ client=kmalloc(sizeof(*client), GFP_KERNEL); if(client == NULL) return -ENOMEM; + memset(client, 0, sizeof(*client)); client_template.adapter = adap; client_template.addr = addr; memcpy(client, &client_template, sizeof(*client)); @@ -175,9 +176,9 @@ /* clear our private data */ memset(decoder, 0, sizeof(*decoder)); - strcpy(client->name, IF_NAME); + strncpy(client->dev.name, IF_NAME, DEVICE_NAME_SIZE); decoder->client = client; - client->data = decoder; + i2c_set_clientdata(client, decoder); decoder->addr = addr; decoder->norm = VIDEO_MODE_PAL; decoder->input = 0; @@ -189,7 +190,7 @@ rv = i2c_master_send(client, initseq, sizeof(initseq)); if (rv < 0) - printk(KERN_ERR "%s_attach: init status %d\n", client->name, rv); + printk(KERN_ERR "%s_attach: init status %d\n", client->dev.name, rv); else { i2c_smbus_write_byte_data(client,0x21,0x16); i2c_smbus_write_byte_data(client,0x0D,0x04); @@ -213,7 +214,7 @@ static int saa7110_detach(struct i2c_client *client) { - struct saa7110* decoder = client->data; + struct saa7110* decoder = i2c_get_clientdata(client); i2c_detach_client(client); @@ -232,7 +233,7 @@ static int saa7110_command(struct i2c_client *client, unsigned int cmd, void *arg) { - struct saa7110* decoder = client->data; + struct saa7110* decoder = i2c_get_clientdata(client); int v; switch (cmd) { @@ -251,7 +252,7 @@ case DECODER_GET_STATUS: { - struct saa7110* decoder = client->data; + struct saa7110* decoder = i2c_get_clientdata(client); int status; int res = 0; @@ -390,9 +391,11 @@ .command = saa7110_command }; static struct i2c_client client_template = { - .name = "saa7110_client", .id = -1, - .driver = &i2c_driver_saa7110 + .driver = &i2c_driver_saa7110, + .dev = { + .name = "saa7110_client", + }, }; static int saa7110_init(void) diff -Nru a/drivers/media/video/saa7111.c b/drivers/media/video/saa7111.c --- a/drivers/media/video/saa7111.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/saa7111.c Tue Mar 25 18:36:42 2003 @@ -120,6 +120,7 @@ client = kmalloc(sizeof(*client), GFP_KERNEL); if(client == NULL) return -ENOMEM; + memset(client, 0, sizeof(*client)); client_template.adapter = adap; client_template.addr = addr; memcpy(client, &client_template, sizeof(*client)); @@ -132,9 +133,9 @@ } memset(decoder, 0, sizeof(*decoder)); - strcpy(client->name, "saa7111"); + strncpy(client->dev.name, "saa7111", DEVICE_NAME_SIZE); decoder->client = client; - client->data = decoder; + i2c_set_clientdata(client, decoder); decoder->addr = addr; decoder->norm = VIDEO_MODE_NTSC; decoder->input = 0; @@ -147,10 +148,10 @@ i = i2c_master_send(client, init, sizeof(init)); if (i < 0) { printk(KERN_ERR "%s_attach: init status %d\n", - client->name, i); + client->dev.name, i); } else { printk(KERN_INFO "%s_attach: chip version %x\n", - client->name, i2c_smbus_read_byte_data(client, 0x00) >> 4); + client->dev.name, i2c_smbus_read_byte_data(client, 0x00) >> 4); } init_MUTEX(&decoder->lock); i2c_attach_client(client); @@ -164,7 +165,7 @@ static int saa7111_detach(struct i2c_client *client) { - struct saa7111 *decoder = client->data; + struct saa7111 *decoder = i2c_get_clientdata(client); i2c_detach_client(client); kfree(decoder); kfree(client); @@ -175,7 +176,7 @@ static int saa7111_command(struct i2c_client *client, unsigned int cmd, void *arg) { - struct saa7111 *decoder = client->data; + struct saa7111 *decoder = i2c_get_clientdata(client); switch (cmd) { @@ -187,7 +188,7 @@ for (i = 0; i < 32; i += 16) { int j; - printk("KERN_DEBUG %s: %03x", client->name, + printk("KERN_DEBUG %s: %03x", client->dev.name, i); for (j = 0; j < 16; ++j) { printk(" %02x", @@ -407,9 +408,11 @@ }; static struct i2c_client client_template = { - .name = "saa7111_client", .id = -1, - .driver = &i2c_driver_saa7111 + .driver = &i2c_driver_saa7111, + .dev = { + .name = "saa7111_client", + }, }; static int saa7111_init(void) diff -Nru a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c --- a/drivers/media/video/saa7134/saa7134-i2c.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/saa7134/saa7134-i2c.c Tue Mar 25 18:36:42 2003 @@ -334,15 +334,19 @@ static struct i2c_adapter saa7134_adap_template = { .owner = THIS_MODULE, - .name = "saa7134", .id = I2C_ALGO_SAA7134, .algo = &saa7134_algo, .client_register = attach_inform, + .dev = { + .name = "saa7134", + }, }; static struct i2c_client saa7134_client_template = { - .name = "saa7134 internal", .id = -1, + .dev = { + .name = "saa7134 internal", + }, }; /* ----------------------------------------------------------- */ @@ -410,7 +414,7 @@ int saa7134_i2c_register(struct saa7134_dev *dev) { dev->i2c_adap = saa7134_adap_template; - strcpy(dev->i2c_adap.name,dev->name); + strncpy(dev->i2c_adap.dev.name, dev->name, DEVICE_NAME_SIZE); dev->i2c_adap.algo_data = dev; i2c_add_adapter(&dev->i2c_adap); diff -Nru a/drivers/media/video/saa7185.c b/drivers/media/video/saa7185.c --- a/drivers/media/video/saa7185.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/saa7185.c Tue Mar 25 18:36:42 2003 @@ -191,6 +191,7 @@ client = kmalloc(sizeof(*client), GFP_KERNEL); if (client == NULL) return -ENOMEM; + memset(client, 0, sizeof(*client)); client_template.adapter = adap; client_template.addr = addr; memcpy(client, &client_template, sizeof(*client)); @@ -202,9 +203,9 @@ memset(encoder, 0, sizeof(*encoder)); - strcpy(client->name, "saa7185"); + strncpy(client->dev.name, "saa7185", DEVICE_NAME_SIZE); encoder->client = client; - client->data = encoder; + i2c_set_clientdata(client, encoder); encoder->addr = addr; encoder->norm = VIDEO_MODE_NTSC; encoder->enable = 1; @@ -215,11 +216,11 @@ sizeof(init_ntsc)); } if (i < 0) { - printk(KERN_ERR "%s_attach: init error %d\n", client->name, + printk(KERN_ERR "%s_attach: init error %d\n", client->dev.name, i); } else { printk(KERN_INFO "%s_attach: chip version %d\n", - client->name, i2c_smbus_read_byte(client) >> 5); + client->dev.name, i2c_smbus_read_byte(client) >> 5); } init_MUTEX(&encoder->lock); i2c_attach_client(client); @@ -233,7 +234,7 @@ static int saa7185_detach(struct i2c_client *client) { - struct saa7185 *encoder = client->data; + struct saa7185 *encoder = i2c_get_clientdata(client); i2c_detach_client(client); i2c_smbus_write_byte_data(client, 0x61, (encoder->reg[0x61]) | 0x40); /* SW: output off is active */ //i2c_smbus_write_byte_data(client, 0x3a, (encoder->reg[0x3a]) | 0x80); /* SW: color bar */ @@ -246,7 +247,7 @@ static int saa7185_command(struct i2c_client *client, unsigned int cmd, void *arg) { - struct saa7185 *encoder = client->data; + struct saa7185 *encoder = i2c_get_clientdata(client); switch (cmd) { @@ -365,9 +366,11 @@ }; static struct i2c_client client_template = { - .name = "saa7185_client", .id = -1, - .driver = &i2c_driver_saa7185 + .driver = &i2c_driver_saa7185, + .dev = { + .name = "saa7185_client", + }, }; static int saa7185_init(void) diff -Nru a/drivers/media/video/tda7432.c b/drivers/media/video/tda7432.c --- a/drivers/media/video/tda7432.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/tda7432.c Tue Mar 25 18:36:42 2003 @@ -260,7 +260,7 @@ static int tda7432_set(struct i2c_client *client) { - struct tda7432 *t = client->data; + struct tda7432 *t = i2c_get_clientdata(client); unsigned char buf[16]; d2printk("tda7432: In tda7432_set\n"); @@ -287,7 +287,7 @@ static void do_tda7432_init(struct i2c_client *client) { - struct tda7432 *t = client->data; + struct tda7432 *t = i2c_get_clientdata(client); d2printk("tda7432: In tda7432_init\n"); t->input = TDA7432_STEREO_IN | /* Main (stereo) input */ @@ -328,11 +328,11 @@ memcpy(client,&client_template,sizeof(struct i2c_client)); client->adapter = adap; client->addr = addr; - client->data = t; + i2c_set_clientdata(client, t); do_tda7432_init(client); MOD_INC_USE_COUNT; - strcpy(client->name,"TDA7432"); + strncpy(client->dev.name, "TDA7432", DEVICE_NAME_SIZE); printk(KERN_INFO "tda7432: init\n"); i2c_attach_client(client); @@ -348,7 +348,7 @@ static int tda7432_detach(struct i2c_client *client) { - struct tda7432 *t = client->data; + struct tda7432 *t = i2c_get_clientdata(client); do_tda7432_init(client); i2c_detach_client(client); @@ -361,7 +361,7 @@ static int tda7432_command(struct i2c_client *client, unsigned int cmd, void *arg) { - struct tda7432 *t = client->data; + struct tda7432 *t = i2c_get_clientdata(client); d2printk("tda7432: In tda7432_command\n"); switch (cmd) { @@ -526,9 +526,11 @@ static struct i2c_client client_template = { - .name = "tda7432", .id = -1, .driver = &driver, + .dev = { + .name = "tda7432", + }, }; static int tda7432_init(void) diff -Nru a/drivers/media/video/tda9875.c b/drivers/media/video/tda9875.c --- a/drivers/media/video/tda9875.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/tda9875.c Tue Mar 25 18:36:42 2003 @@ -158,7 +158,7 @@ static void tda9875_set(struct i2c_client *client) { - struct tda9875 *tda = client->data; + struct tda9875 *tda = i2c_get_clientdata(client); unsigned char a; dprintk(KERN_DEBUG "tda9875_set(%04x,%04x,%04x,%04x)\n",tda->lvol,tda->rvol,tda->bass,tda->treble); @@ -176,7 +176,7 @@ static void do_tda9875_init(struct i2c_client *client) { - struct tda9875 *t = client->data; + struct tda9875 *t = i2c_get_clientdata(client); dprintk("In tda9875_init\n"); tda9875_write(client, TDA9875_CFG, 0xd0 ); /*reg de config 0 (reset)*/ tda9875_write(client, TDA9875_MSR, 0x03 ); /* Monitor 0b00000XXX*/ @@ -256,7 +256,7 @@ memcpy(client,&client_template,sizeof(struct i2c_client)); client->adapter = adap; client->addr = addr; - client->data = t; + i2c_set_clientdata(client, t); if(!tda9875_checkit(adap,addr)) { kfree(t); @@ -265,7 +265,7 @@ do_tda9875_init(client); MOD_INC_USE_COUNT; - strcpy(client->name,"TDA9875"); + strncpy(client->dev.name, "TDA9875", DEVICE_NAME_SIZE); printk(KERN_INFO "tda9875: init\n"); i2c_attach_client(client); @@ -281,7 +281,7 @@ static int tda9875_detach(struct i2c_client *client) { - struct tda9875 *t = client->data; + struct tda9875 *t = i2c_get_clientdata(client); do_tda9875_init(client); i2c_detach_client(client); @@ -294,7 +294,7 @@ static int tda9875_command(struct i2c_client *client, unsigned int cmd, void *arg) { - struct tda9875 *t = client->data; + struct tda9875 *t = i2c_get_clientdata(client); dprintk("In tda9875_command...\n"); @@ -396,9 +396,11 @@ static struct i2c_client client_template = { - .name = "tda9875", .id = -1, .driver = &driver, + .dev = { + .name = "tda9875", + }, }; static int tda9875_init(void) diff -Nru a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c --- a/drivers/media/video/tda9887.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/tda9887.c Tue Mar 25 18:36:42 2003 @@ -359,7 +359,7 @@ return -ENOMEM; memset(t,0,sizeof(*t)); t->client = client_template; - t->client.data = t; + i2c_set_clientdata(&t->client, t); t->pinnacle_id = -1; i2c_attach_client(&t->client); @@ -376,12 +376,12 @@ case I2C_ALGO_BIT | I2C_HW_B_RIVA: case I2C_ALGO_SAA7134: printk("tda9887: probing %s i2c adapter [id=0x%x]\n", - adap->name,adap->id); + adap->dev.name,adap->id); rc = i2c_probe(adap, &addr_data, tda9887_attach); break; default: printk("tda9887: ignoring %s i2c adapter [id=0x%x]\n", - adap->name,adap->id); + adap->dev.name,adap->id); rc = 0; /* nothing */ } @@ -390,7 +390,7 @@ static int tda9887_detach(struct i2c_client *client) { - struct tda9887 *t = (struct tda9887*)client->data; + struct tda9887 *t = i2c_get_clientdata(client); i2c_detach_client(client); kfree(t); @@ -401,7 +401,7 @@ static int tda9887_command(struct i2c_client *client, unsigned int cmd, void *arg) { - struct tda9887 *t = (struct tda9887*)client->data; + struct tda9887 *t = i2c_get_clientdata(client); switch (cmd) { @@ -456,9 +456,11 @@ }; static struct i2c_client client_template = { - .name = "tda9887", .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, + .dev = { + .name = "tda9887", + }, }; static int tda9887_init_module(void) diff -Nru a/drivers/media/video/tuner-3036.c b/drivers/media/video/tuner-3036.c --- a/drivers/media/video/tuner-3036.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/tuner-3036.c Tue Mar 25 18:36:42 2003 @@ -196,9 +196,11 @@ static struct i2c_client client_template = { - .name = "SAB3036", .id = -1, - .driver = &i2c_driver_tuner + .driver = &i2c_driver_tuner, + .dev = { + .name = "SAB3036", + }, }; int __init diff -Nru a/drivers/media/video/tuner.c b/drivers/media/video/tuner.c --- a/drivers/media/video/tuner.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/tuner.c Tue Mar 25 18:36:42 2003 @@ -226,7 +226,7 @@ { unsigned char byte; - struct tuner *t = (struct tuner*)c->data; + struct tuner *t = i2c_get_clientdata(c); if (t->type == TUNER_MT2032) return 0; @@ -276,7 +276,7 @@ { unsigned char buf[21]; int ret,xogc,xok=0; - struct tuner *t = (struct tuner*)c->data; + struct tuner *t = i2c_get_clientdata(c); buf[0]=0; ret=i2c_master_send(c,buf,1); @@ -517,7 +517,7 @@ { unsigned char buf[21]; int lint_try,ret,sel,lock=0; - struct tuner *t = (struct tuner*)c->data; + struct tuner *t = i2c_get_clientdata(c); dprintk("mt2032_set_if_freq rfin=%d if1=%d if2=%d from=%d to=%d\n",rfin,if1,if2,from,to); @@ -594,7 +594,7 @@ u8 config; u16 div; struct tunertype *tun; - struct tuner *t = c->data; + struct tuner *t = i2c_get_clientdata(c); unsigned char buffer[4]; int rc; @@ -733,7 +733,7 @@ static void set_radio_freq(struct i2c_client *c, int freq) { struct tunertype *tun; - struct tuner *t = (struct tuner*)c->data; + struct tuner *t = i2c_get_clientdata(c); unsigned char buffer[4]; int rc,div; @@ -794,16 +794,17 @@ if (NULL == (client = kmalloc(sizeof(struct i2c_client), GFP_KERNEL))) return -ENOMEM; memcpy(client,&client_template,sizeof(struct i2c_client)); - client->data = t = kmalloc(sizeof(struct tuner),GFP_KERNEL); + t = kmalloc(sizeof(struct tuner),GFP_KERNEL); if (NULL == t) { kfree(client); return -ENOMEM; } + i2c_set_clientdata(client, t); memset(t,0,sizeof(struct tuner)); if (type >= 0 && type < TUNERS) { t->type = type; printk("tuner(bttv): type forced to %d (%s) [insmod]\n",t->type,tuners[t->type].name); - strncpy(client->name, tuners[t->type].name, sizeof(client->name)); + strncpy(client->dev.name, tuners[t->type].name, DEVICE_NAME_SIZE); } else { t->type = -1; } @@ -830,12 +831,12 @@ case I2C_ALGO_SAA7134: case I2C_ALGO_SAA7146: printk("tuner: probing %s i2c adapter [id=0x%x]\n", - adap->name,adap->id); + adap->dev.name,adap->id); rc = i2c_probe(adap, &addr_data, tuner_attach); break; default: printk("tuner: ignoring %s i2c adapter [id=0x%x]\n", - adap->name,adap->id); + adap->dev.name,adap->id); rc = 0; /* nothing */ } @@ -844,7 +845,7 @@ static int tuner_detach(struct i2c_client *client) { - struct tuner *t = (struct tuner*)client->data; + struct tuner *t = i2c_get_clientdata(client); i2c_detach_client(client); kfree(t); @@ -856,7 +857,7 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg) { - struct tuner *t = (struct tuner*)client->data; + struct tuner *t = i2c_get_clientdata(client); int *iarg = (int*)arg; #if 0 __u16 *sarg = (__u16*)arg; @@ -875,7 +876,7 @@ t->type = *iarg; printk("tuner: type set to %d (%s)\n", t->type,tuners[t->type].name); - strncpy(client->name, tuners[t->type].name, sizeof(client->name)); + strncpy(client->dev.name, tuners[t->type].name, DEVICE_NAME_SIZE); if (t->type == TUNER_MT2032) mt2032_init(client); break; @@ -977,9 +978,11 @@ }; static struct i2c_client client_template = { - .name = "(tuner unset)", .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, + .dev = { + .name = "(tuner unset)", + }, }; static int tuner_init_module(void) diff -Nru a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c --- a/drivers/media/video/tvaudio.c Tue Mar 25 18:36:42 2003 +++ b/drivers/media/video/tvaudio.c Tue Mar 25 18:36:42 2003 @@ -161,22 +161,22 @@ unsigned char buffer[2]; if (-1 == subaddr) { - dprintk("%s: chip_write: 0x%x\n", chip->c.name, val); + dprintk("%s: chip_write: 0x%x\n", chip->c.dev.name, val); chip->shadow.bytes[1] = val; buffer[0] = val; if (1 != i2c_master_send(&chip->c,buffer,1)) { printk(KERN_WARNING "%s: I/O error (write 0x%x)\n", - chip->c.name, val); + chip->c.dev.name, val); return -1; } } else { - dprintk("%s: chip_write: reg%d=0x%x\n", chip->c.name, subaddr, val); + dprintk("%s: chip_write: reg%d=0x%x\n", chip->c.dev.name, subaddr, val); chip->shadow.bytes[subaddr+1] = val; buffer[0] = subaddr; buffer[1] = val; if (2 != i2c_master_send(&chip->c,buffer,2)) { printk(KERN_WARNING "%s: I/O error (write reg%d=0x%x)\n", - chip->c.name, subaddr, val); + chip->c.dev.name, subaddr, val); return -1; } } @@ -201,10 +201,10 @@ if (1 != i2c_master_recv(&chip->c,&buffer,1)) { printk(KERN_WARNING "%s: I/O error (read)\n", - chip->c.name); + chip->c.dev.name); return -1; } - dprintk("%s: chip_read: 0x%x\n",chip->c.name,buffer); + dprintk("%s: chip_read: 0x%x\n",chip->c.dev.name,buffer); return buffer; } @@ -220,11 +220,11 @@ if (2 != i2c_transfer(chip->c.adapter,msgs,2)) { printk(KERN_WARNING "%s: I/O error (read2)\n", - chip->c.name); + chip->c.dev.name); return -1; } dprintk("%s: chip_read2: reg%d=0x%x\n", - chip->c.name,subaddr,read[0]); + chip->c.dev.name,subaddr,read[0]); return read[0]; } @@ -237,7 +237,7 @@ /* update our shadow register set; print bytes if (debug > 0) */ dprintk("%s: chip_cmd(%s): reg=%d, data:", - chip->c.name,name,cmd->bytes[0]); + chip->c.dev.name,name,cmd->bytes[0]); for (i = 1; i < cmd->count; i++) { dprintk(" 0x%x",cmd->bytes[i]); chip->shadow.bytes[i+cmd->bytes[0]] = cmd->bytes[i]; @@ -246,7 +246,7 @@ /* send data to the chip */ if (cmd->count != i2c_master_send(&chip->c,cmd->bytes,cmd->count)) { - printk(KERN_WARNING "%s: I/O error (%s)\n", chip->c.name, name); + printk(KERN_WARNING "%s: I/O error (%s)\n", chip->c.dev.name, name); return -1; } return 0; @@ -273,19 +273,19 @@ #ifdef CONFIG_SMP lock_kernel(); #endif - daemonize("%s", chip->c.name); + daemonize("%s", chip->c.dev.name); chip->thread = current; #ifdef CONFIG_SMP unlock_kernel(); #endif - dprintk("%s: thread started\n", chip->c.name); + dprintk("%s: thread started\n", chip->c.dev.name); if(chip->notify != NULL) up(chip->notify); for (;;) { interruptible_sleep_on(&chip->wq); - dprintk("%s: thread wakeup\n", chip->c.name); + dprintk("%s: thread wakeup\n", chip->c.dev.name); if (chip->done || signal_pending(current)) break; @@ -301,7 +301,7 @@ } chip->thread = NULL; - dprintk("%s: thread exiting\n", chip->c.name); + dprintk("%s: thread exiting\n", chip->c.dev.name); if(chip->notify != NULL) up(chip->notify); @@ -316,7 +316,7 @@ if (mode == chip->prevmode) return; - dprintk("%s: thread checkmode\n", chip->c.name); + dprintk("%s: thread checkmode\n", chip->c.dev.name); chip->prevmode = mode; if (mode & VIDEO_SOUND_STEREO) @@ -1339,7 +1339,7 @@ memcpy(&chip->c,&client_template,sizeof(struct i2c_client)); chip->c.adapter = adap; chip->c.addr = addr; - chip->c.data = chip; + i2c_set_clientdata(&chip->c, chip); /* find description for the chip */ dprintk("tvaudio: chip found @ i2c-addr=0x%x\n", addr<<1); @@ -1364,7 +1364,7 @@ (desc->flags & CHIP_HAS_INPUTSEL) ? " audiomux" : ""); /* fill required data structures */ - strcpy(chip->c.name,desc->name); + strncpy(chip->c.dev.name, desc->name, DEVICE_NAME_SIZE); chip->type = desc-chiplist; chip->shadow.count = desc->registers+1; chip->prevmode = -1; @@ -1421,7 +1421,7 @@ static int chip_detach(struct i2c_client *client) { - struct CHIPSTATE *chip = client->data; + struct CHIPSTATE *chip = i2c_get_clientdata(client); del_timer(&chip->wt); if (NULL != chip->thread) { @@ -1447,10 +1447,10 @@ unsigned int cmd, void *arg) { __u16 *sarg = arg; - struct CHIPSTATE *chip = client->data; + struct CHIPSTATE *chip = i2c_get_clientdata(client); struct CHIPDESC *desc = chiplist + chip->type; - dprintk("%s: chip_command 0x%x\n",chip->c.name,cmd); + dprintk("%s: chip_command 0x%x\n",chip->c.dev.name,cmd); switch (cmd) { case AUDC_SET_INPUT: @@ -1558,9 +1558,11 @@ static struct i2c_client client_template = { - .name = "(unset)", .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, + .dev = { + .name = "(unset)", + }, }; static int audiochip_init_module(void) diff -Nru a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c --- a/drivers/s390/net/ctcmain.c Tue Mar 25 18:36:42 2003 +++ b/drivers/s390/net/ctcmain.c Tue Mar 25 18:36:42 2003 @@ -2761,7 +2761,6 @@ dev->addr_len = 0; dev->type = ARPHRD_SLIP; dev->tx_queue_len = 100; - dev_init_buffers(dev); dev->flags = IFF_POINTOPOINT | IFF_NOARP; return dev; } diff -Nru a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c --- a/drivers/s390/net/netiucv.c Tue Mar 25 18:36:42 2003 +++ b/drivers/s390/net/netiucv.c Tue Mar 25 18:36:42 2003 @@ -1630,7 +1630,6 @@ dev->addr_len = 0; dev->type = ARPHRD_SLIP; dev->tx_queue_len = NETIUCV_QUEUELEN_DEFAULT; - dev_init_buffers(dev); dev->flags = IFF_POINTOPOINT | IFF_NOARP; return dev; } diff -Nru a/drivers/video/matrox/i2c-matroxfb.c b/drivers/video/matrox/i2c-matroxfb.c --- a/drivers/video/matrox/i2c-matroxfb.c Tue Mar 25 18:36:42 2003 +++ b/drivers/video/matrox/i2c-matroxfb.c Tue Mar 25 18:36:42 2003 @@ -111,7 +111,8 @@ b->mask.data = data; b->mask.clock = clock; b->adapter = matrox_i2c_adapter_template; - sprintf(b->adapter.name, name, minor(minfo->fbcon.node)); + snprintf(b->adapter.dev.name, DEVICE_NAME_SIZE, name, + minor(minfo->fbcon.node)); b->adapter.data = b; b->adapter.algo_data = &b->bac; b->bac = matrox_i2c_algo_template; @@ -159,22 +160,22 @@ switch (ACCESS_FBINFO(chip)) { case MGA_2064: case MGA_2164: - err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1B_DATA, DDC1B_CLK, "DDC:fb%u #0 on i2c-matroxfb"); + err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1B_DATA, DDC1B_CLK, "DDC:fb%u #0"); break; default: - err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1_DATA, DDC1_CLK, "DDC:fb%u #0 on i2c-matroxfb"); + err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1_DATA, DDC1_CLK, "DDC:fb%u #0"); break; } if (err) goto fail_ddc1; if (ACCESS_FBINFO(devflags.dualhead)) { - err = i2c_bus_reg(&m2info->ddc2, minfo, DDC2_DATA, DDC2_CLK, "DDC:fb%u #1 on i2c-matroxfb"); + err = i2c_bus_reg(&m2info->ddc2, minfo, DDC2_DATA, DDC2_CLK, "DDC:fb%u #1"); if (err == -ENODEV) { printk(KERN_INFO "i2c-matroxfb: VGA->TV plug detected, DDC unavailable.\n"); } else if (err) printk(KERN_INFO "i2c-matroxfb: Could not register secondary output i2c bus. Continuing anyway.\n"); /* Register maven bus even on G450/G550 */ - err = i2c_bus_reg(&m2info->maven, minfo, MAT_DATA, MAT_CLK, "MAVEN:fb%u on i2c-matroxfb"); + err = i2c_bus_reg(&m2info->maven, minfo, MAT_DATA, MAT_CLK, "MAVEN:fb%u"); if (err) printk(KERN_INFO "i2c-matroxfb: Could not register Maven i2c bus. Continuing anyway.\n"); } diff -Nru a/fs/aio.c b/fs/aio.c --- a/fs/aio.c Tue Mar 25 18:36:42 2003 +++ b/fs/aio.c Tue Mar 25 18:36:42 2003 @@ -522,7 +522,7 @@ /* Lookup an ioctx id. ioctx_list is lockless for reads. * FIXME: this is O(n) and is only suitable for development. */ -static struct kioctx *lookup_ioctx(unsigned long ctx_id) +struct kioctx *lookup_ioctx(unsigned long ctx_id) { struct kioctx *ioctx; struct mm_struct *mm; @@ -984,9 +984,9 @@ return -EINVAL; } -static int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb *user_iocb, +int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb *user_iocb, struct iocb *iocb)); -static int io_submit_one(struct kioctx *ctx, struct iocb *user_iocb, +int io_submit_one(struct kioctx *ctx, struct iocb *user_iocb, struct iocb *iocb) { struct kiocb *req; diff -Nru a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c --- a/fs/jfs/jfs_dtree.c Tue Mar 25 18:36:42 2003 +++ b/fs/jfs/jfs_dtree.c Tue Mar 25 18:36:42 2003 @@ -2978,7 +2978,6 @@ int d_namleft, len, outlen; unsigned long dirent_buf; char *name_ptr; - int dtlhdrdatalen; u32 dir_index; int do_index = 0; uint loop_count = 0; @@ -2998,7 +2997,6 @@ * -1 = End of directory */ do_index = 1; - dtlhdrdatalen = DTLHDRDATALEN; dir_index = (u32) filp->f_pos; @@ -3083,8 +3081,6 @@ * pn > 0: Real entries, pn=1 -> leftmost page * pn = index = -1: No more entries */ - dtlhdrdatalen = DTLHDRDATALEN_LEGACY; - dtpos = filp->f_pos; if (dtpos == 0) { /* build "." entry */ diff -Nru a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c --- a/fs/jfs/jfs_extent.c Tue Mar 25 18:36:42 2003 +++ b/fs/jfs/jfs_extent.c Tue Mar 25 18:36:42 2003 @@ -91,7 +91,7 @@ { struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); s64 nxlen, nxaddr, xoff, hint, xaddr = 0; - int rc, nbperpage; + int rc; int xflag; /* This blocks if we are low on resources */ @@ -103,9 +103,6 @@ /* validate extent length */ if (xlen > MAXXLEN) xlen = MAXXLEN; - - /* get the number of blocks per page */ - nbperpage = sbi->nbperpage; /* get the page's starting extent offset */ xoff = pno << sbi->l2nbperpage; diff -Nru a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c --- a/fs/jfs/jfs_imap.c Tue Mar 25 18:36:42 2003 +++ b/fs/jfs/jfs_imap.c Tue Mar 25 18:36:42 2003 @@ -2966,7 +2966,6 @@ struct buffer_head *bh; struct inode *ip; tid_t tid; - int rc; /* if AIT2 ipmap2 is bad, do not try to update it */ if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ @@ -2974,7 +2973,7 @@ ip = diReadSpecial(sb, FILESYSTEM_I, 1); if (ip == NULL) { JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; - if ((rc = readSuper(sb, &bh))) + if (readSuper(sb, &bh)) return; j_sb = (struct jfs_superblock *)bh->b_data; j_sb->s_flag |= JFS_BAD_SAIT; @@ -2988,7 +2987,7 @@ /* start transaction */ tid = txBegin(sb, COMMIT_FORCE); /* update the inode map addressing structure to point to it */ - if ((rc = xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0))) { + if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) { JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; txAbort(tid, 1); goto cleanup; @@ -2997,7 +2996,7 @@ /* update the inode map's inode to reflect the extension */ ip->i_size += PSIZE; ip->i_blocks += LBLK2PBLK(sb, xlen); - rc = txCommit(tid, 1, &ip, COMMIT_FORCE); + txCommit(tid, 1, &ip, COMMIT_FORCE); cleanup: txEnd(tid); diFreeSpecial(ip); diff -Nru a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c --- a/fs/jfs/jfs_txnmgr.c Tue Mar 25 18:36:42 2003 +++ b/fs/jfs/jfs_txnmgr.c Tue Mar 25 18:36:42 2003 @@ -380,8 +380,8 @@ tblk = tid_to_tblock(t); - if ((tblk->next == 0) && (current != jfsCommitTask)) { - /* Save one tblk for jfsCommit thread */ + if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { + /* Don't let a non-forced transaction take the last tblk */ jfs_info("txBegin: waiting for free tid"); INCREMENT(TxStat.txBegin_freetid); TXN_SLEEP(&TxAnchor.freewait); @@ -1553,12 +1553,10 @@ void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) { - struct inode *ip; struct metapage *mp; struct pxd_lock *pxdlock; pxd_t *pxd; - ip = tlck->ip; mp = tlck->mp; /* initialize as REDOPAGE/NOREDOPAGE record format */ @@ -2894,7 +2892,6 @@ struct inode *ip; struct jfs_inode_info *jfs_ip; struct jfs_log *log = JFS_SBI(sb)->log; - int rc; tid_t tid; set_bit(log_QUIESCE, &log->flag); @@ -2914,7 +2911,7 @@ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); down(&jfs_ip->commit_sem); - rc = txCommit(tid, 1, &ip, 0); + txCommit(tid, 1, &ip, 0); txEnd(tid); up(&jfs_ip->commit_sem); /* @@ -2994,8 +2991,7 @@ * when it is committed */ TXN_UNLOCK(); - tid = txBegin(ip->i_sb, - COMMIT_INODE | COMMIT_FORCE); + tid = txBegin(ip->i_sb, COMMIT_INODE); rc = txCommit(tid, 1, &ip, 0); txEnd(tid); up(&jfs_ip->commit_sem); diff -Nru a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c --- a/fs/jfs/jfs_xtree.c Tue Mar 25 18:36:42 2003 +++ b/fs/jfs/jfs_xtree.c Tue Mar 25 18:36:42 2003 @@ -3268,14 +3268,13 @@ void xtInitRoot(tid_t tid, struct inode *ip) { xtpage_t *p; - struct tlock *tlck; /* * acquire a transaction lock on the root * * action: */ - tlck = txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, + txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, tlckXTREE | tlckNEW); p = &JFS_IP(ip)->i_xtroot; diff -Nru a/include/asm-alpha/fcntl.h b/include/asm-alpha/fcntl.h --- a/include/asm-alpha/fcntl.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-alpha/fcntl.h Tue Mar 25 18:36:42 2003 @@ -69,9 +69,6 @@ __kernel_pid_t l_pid; }; -#ifdef __KERNEL__ -#define flock64 flock -#endif #define F_LINUX_SPECIFIC_BASE 1024 #endif diff -Nru a/include/asm-alpha/pgtable.h b/include/asm-alpha/pgtable.h --- a/include/asm-alpha/pgtable.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-alpha/pgtable.h Tue Mar 25 18:36:42 2003 @@ -68,6 +68,7 @@ /* .. and these are ours ... */ #define _PAGE_DIRTY 0x20000 #define _PAGE_ACCESSED 0x40000 +#define _PAGE_FILE 0x80000 /* pagecache or swap? */ /* * NOTE! The "accessed" bit isn't necessarily exact: it can be kept exactly @@ -254,6 +255,7 @@ extern inline int pte_exec(pte_t pte) { return !(pte_val(pte) & _PAGE_FOE); } extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +extern inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; } extern inline pte_t pte_rdprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOR; return pte; } @@ -311,11 +313,16 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { pte_t pte; pte_val(pte) = (type << 32) | (offset << 40); return pte; } -#define __swp_type(x) (((x).val >> 32) & 0xff) -#define __swp_offset(x) ((x).val >> 40) -#define __swp_entry(type, offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) -#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +#define __swp_type(x) (((x).val >> 32) & 0xff) +#define __swp_offset(x) ((x).val >> 40) +#define __swp_entry(type, off) ((swp_entry_t) { pte_val(mk_swap_pte((type), (off))) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +#define pte_to_pgoff(pte) (pte_val(pte) >> 32) +#define pgoff_to_pte(off) ((pte_t) { ((off) << 32) | _PAGE_FILE }) + +#define PTE_FILE_MAX_BITS 32 #ifndef CONFIG_DISCONTIGMEM #define kern_addr_valid(addr) (1) diff -Nru a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h --- a/include/asm-ppc64/machdep.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-ppc64/machdep.h Tue Mar 25 18:36:42 2003 @@ -90,22 +90,6 @@ unsigned char (*udbg_getc)(void); int (*udbg_getc_poll)(void); - /* PCI interfaces */ - int (*pcibios_read_config)(struct device_node *dn, int where, int size, - u32 *val); - int (*pcibios_write_config)(struct device_node *dn, int where, - int size, u32 val); - - /* Called after scanning the bus, before allocating - * resources - */ - void (*pcibios_fixup)(void); - - /* Called for each PCI bus in the system - * when it's probed - */ - void (*pcibios_fixup_bus)(struct pci_bus *); - #ifdef CONFIG_SMP /* functions for dealing with other cpus */ struct smp_ops_t smp_ops; diff -Nru a/include/asm-ppc64/pci-bridge.h b/include/asm-ppc64/pci-bridge.h --- a/include/asm-ppc64/pci-bridge.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-ppc64/pci-bridge.h Tue Mar 25 18:36:42 2003 @@ -40,7 +40,7 @@ void *io_base_virt; unsigned long io_base_phys; - /* Some machines (PReP) have a non 1:1 mapping of + /* Some machines have a non 1:1 mapping of * the PCI memory space in the CPU bus space */ unsigned long pci_mem_offset; diff -Nru a/include/asm-ppc64/pci.h b/include/asm-ppc64/pci.h --- a/include/asm-ppc64/pci.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-ppc64/pci.h Tue Mar 25 18:36:42 2003 @@ -16,11 +16,6 @@ #include #include -static inline int pcibios_assign_all_busses(void) -{ - return 0; -} - #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 @@ -36,7 +31,18 @@ struct pci_dev; -extern char* pci_card_location(struct pci_dev*); +#define HAVE_ARCH_PCI_MWI 1 +static inline int pcibios_prep_mwi(struct pci_dev *dev) +{ + /* + * pSeries firmware sets cacheline size and hardware treats + * MWI the same as memory write, so we dont change cacheline size + * or the MWI bit. + */ + return 1; +} + +extern unsigned int pcibios_assign_all_busses(void); extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle); @@ -52,8 +58,6 @@ extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction); -extern void pSeries_pcibios_init_early(void); - static inline void pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction) @@ -122,9 +126,10 @@ */ #define PCI_DMA_BUS_IS_PHYS (0) -#endif /* __KERNEL__ */ +extern void +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res); -/* generic pci stuff */ -#include +#endif /* __KERNEL__ */ #endif /* __PPC64_PCI_H */ diff -Nru a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h --- a/include/asm-x86_64/apic.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/apic.h Tue Mar 25 18:36:42 2003 @@ -75,7 +75,7 @@ extern void setup_boot_APIC_clock (void); extern void setup_secondary_APIC_clock (void); extern void setup_apic_nmi_watchdog (void); -extern inline void nmi_watchdog_tick (struct pt_regs * regs); +extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); extern int APIC_init_uniprocessor (void); extern void disable_APIC_timer(void); extern void enable_APIC_timer(void); diff -Nru a/include/asm-x86_64/debugreg.h b/include/asm-x86_64/debugreg.h --- a/include/asm-x86_64/debugreg.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/debugreg.h Tue Mar 25 18:36:42 2003 @@ -58,7 +58,7 @@ We can slow the instruction pipeline for instructions coming via the gdt or the ldt if we want to. I am not sure why this is an advantage */ -#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */ +#define DR_CONTROL_RESERVED (0xFFFFFFFFFC00) /* Reserved by Intel */ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ diff -Nru a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h --- a/include/asm-x86_64/desc.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/desc.h Tue Mar 25 18:36:42 2003 @@ -8,6 +8,7 @@ #ifndef __ASSEMBLY__ #include +#include // 8 byte segment descriptor struct desc_struct { diff -Nru a/include/asm-x86_64/hdreg.h b/include/asm-x86_64/hdreg.h --- a/include/asm-x86_64/hdreg.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/hdreg.h Tue Mar 25 18:36:42 2003 @@ -7,6 +7,4 @@ #ifndef __ASMx86_64_HDREG_H #define __ASMx86_64_HDREG_H -typedef unsigned long ide_ioreg_t; - #endif /* __ASMx86_64_HDREG_H */ diff -Nru a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h --- a/include/asm-x86_64/i387.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/i387.h Tue Mar 25 18:36:42 2003 @@ -19,15 +19,15 @@ #include extern void fpu_init(void); -extern void init_fpu(void); -int save_i387(struct _fpstate *buf); +extern void init_fpu(struct task_struct *child); +extern int save_i387(struct _fpstate *buf); static inline int need_signal_i387(struct task_struct *me) { if (!me->used_math) return 0; me->used_math = 0; - if (me->thread_info->flags & _TIF_USEDFPU) + if (me->thread_info->status & TS_USEDFPU) return 0; return 1; } @@ -39,14 +39,14 @@ #define kernel_fpu_end() stts() #define unlazy_fpu(tsk) do { \ - if ((tsk)->thread_info->flags & _TIF_USEDFPU) \ + if ((tsk)->thread_info->status & TS_USEDFPU) \ save_init_fpu(tsk); \ } while (0) #define clear_fpu(tsk) do { \ - if ((tsk)->thread_info->flags & _TIF_USEDFPU) { \ + if ((tsk)->thread_info->status & TS_USEDFPU) { \ asm volatile("fwait"); \ - (tsk)->thread_info->flags &= ~_TIF_USEDFPU; \ + (tsk)->thread_info->status &= ~TS_USEDFPU; \ stts(); \ } \ } while (0) @@ -114,11 +114,11 @@ static inline void kernel_fpu_begin(void) { - struct task_struct *me = current; - if (test_tsk_thread_flag(me,TIF_USEDFPU)) { - asm volatile("fxsave %0 ; fnclex" - : "=m" (me->thread.i387.fxsave)); - clear_tsk_thread_flag(me, TIF_USEDFPU); + struct thread_info *me = current_thread_info(); + if (me->status & TS_USEDFPU) { + asm volatile("rex64 ; fxsave %0 ; fnclex" + : "=m" (me->task->thread.i387.fxsave)); + me->status &= ~TS_USEDFPU; return; } clts(); @@ -128,7 +128,7 @@ { asm volatile( "fxsave %0 ; fnclex" : "=m" (tsk->thread.i387.fxsave)); - tsk->thread_info->flags &= ~TIF_USEDFPU; + tsk->thread_info->status &= ~TS_USEDFPU; stts(); } @@ -139,19 +139,5 @@ { return restore_fpu_checking((struct i387_fxsave_struct *)buf); } - - -static inline void empty_fpu(struct task_struct *child) -{ - if (!child->used_math) { - /* Simulate an empty FPU. */ - memset(&child->thread.i387.fxsave,0,sizeof(struct i387_fxsave_struct)); - child->thread.i387.fxsave.cwd = 0x037f; - child->thread.i387.fxsave.swd = 0; - child->thread.i387.fxsave.twd = 0; - child->thread.i387.fxsave.mxcsr = 0x1f80; - } - child->used_math = 1; -} #endif /* __ASM_X86_64_I387_H */ diff -Nru a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h --- a/include/asm-x86_64/kdebug.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/kdebug.h Tue Mar 25 18:36:42 2003 @@ -9,8 +9,13 @@ struct pt_regs *regs; const char *str; long err; + int trapnr; + int signr; }; +/* Note - you should never unregister because that can race with NMIs. + If you really want to do it first unregister - then synchronize_kernel - then free. + */ extern struct notifier_block *die_chain; /* Grossly misnamed. */ @@ -21,15 +26,16 @@ DIE_PANIC, DIE_NMI, DIE_DIE, + DIE_NMIWATCHDOG, + DIE_KERNELDEBUG, + DIE_TRAP, + DIE_GPF, DIE_CALL, - DIE_CPUINIT, /* not really a die, but .. */ - DIE_TRAPINIT, /* not really a die, but .. */ - DIE_STOP, }; -static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err) +static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig) { - struct die_args args = { regs: regs, str: str, err: err }; + struct die_args args = { .regs=regs, .str=str, .err=err, .trapnr=trap,.signr=sig }; return notifier_call_chain(&die_chain, val, &args); } diff -Nru a/include/asm-x86_64/numa.h b/include/asm-x86_64/numa.h --- a/include/asm-x86_64/numa.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/numa.h Tue Mar 25 18:36:42 2003 @@ -8,13 +8,11 @@ u64 start,end; }; -#define for_all_nodes(x) for ((x) = 0; (x) <= maxnode; (x)++) \ +#define for_all_nodes(x) for ((x) = 0; (x) < numnodes; (x)++) \ if ((1UL << (x)) & nodes_present) -#define early_for_all_nodes(n) \ - for (n=0; n> PML4_SHIFT) & (PTRS_PER_PML4-1)) #define pml4_offset_k(address) (init_level4_pgt + pml4_index(address)) +#define pml4_present(pml4) (pml4_val(pml4) & _PAGE_PRESENT) #define mk_kernel_pml4(address) ((pml4_t){ (address) | _KERNPG_TABLE }) #define level3_offset_k(dir, address) ((pgd_t *) pml4_page(*(dir)) + pgd_index(address)) diff -Nru a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h --- a/include/asm-x86_64/processor.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/processor.h Tue Mar 25 18:36:42 2003 @@ -269,7 +269,7 @@ extern void release_thread(struct task_struct *); /* Prepare to copy thread state - unlazy all lazy status */ -#define prepare_to_copy(tsk) do { } while (0) +extern void prepare_to_copy(struct task_struct *tsk); /* * create a kernel thread without removing it from tasklists @@ -308,8 +308,8 @@ #define ARCH_HAS_PREFETCHW #define ARCH_HAS_SPINLOCK_PREFETCH -#define prefetch(x) __builtin_prefetch((x),0) -#define prefetchw(x) __builtin_prefetch((x),1) +#define prefetch(x) __builtin_prefetch((x),0,1) +#define prefetchw(x) __builtin_prefetch((x),1,1) #define spin_lock_prefetch(x) prefetchw(x) #define cpu_relax() rep_nop() diff -Nru a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h --- a/include/asm-x86_64/proto.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/proto.h Tue Mar 25 18:36:42 2003 @@ -6,6 +6,7 @@ /* misc architecture specific prototypes */ struct cpuinfo_x86; +struct pt_regs; extern void get_cpu_vendor(struct cpuinfo_x86*); extern void start_kernel(void); @@ -41,6 +42,8 @@ extern unsigned long end_pfn_map; extern void show_stack(unsigned long * rsp); +extern void show_trace(unsigned long * rsp); +extern void show_registers(struct pt_regs *regs); extern void exception_table_check(void); diff -Nru a/include/asm-x86_64/suspend.h b/include/asm-x86_64/suspend.h --- a/include/asm-x86_64/suspend.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/suspend.h Tue Mar 25 18:36:42 2003 @@ -11,7 +11,7 @@ { } -/* image of the saved processor state */ +/* Image of the saved processor state. If you touch this, fix acpi_wakeup.S. */ struct saved_context { u16 ds, es, fs, gs, ss; unsigned long gs_base, gs_kernel_base, fs_base; diff -Nru a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h --- a/include/asm-x86_64/system.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/system.h Tue Mar 25 18:36:42 2003 @@ -22,18 +22,18 @@ struct save_context_frame { unsigned long rbp; unsigned long rbx; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; unsigned long rcx; unsigned long rdx; - unsigned long rsi; - unsigned long rdi; unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; - unsigned long r11; - unsigned long r10; - unsigned long r9; - unsigned long r8; + unsigned long rdi; + unsigned long rsi; }; /* frame pointer must be last for get_wchan */ @@ -43,19 +43,20 @@ rbp needs to be always explicitely saved because gcc cannot clobber the frame pointer and the scheduler is compiled with frame pointers. -AK */ #define SAVE_CONTEXT \ - __PUSH(r8) __PUSH(r9) __PUSH(r10) __PUSH(r11) __PUSH(r12) __PUSH(r13) \ - __PUSH(r14) __PUSH(r15) \ - __PUSH(rdi) __PUSH(rsi) \ - __PUSH(rdx) __PUSH(rcx) \ + __PUSH(rsi) __PUSH(rdi) \ + __PUSH(r12) __PUSH(r13) __PUSH(r14) __PUSH(r15) \ + __PUSH(rdx) __PUSH(rcx) __PUSH(r8) __PUSH(r9) __PUSH(r10) __PUSH(r11) \ __PUSH(rbx) __PUSH(rbp) #define RESTORE_CONTEXT \ __POP(rbp) __POP(rbx) \ - __POP(rcx) __POP(rdx) \ - __POP(rsi) __POP(rdi) \ - __POP(r15) __POP(r14) __POP(r13) __POP(r12) __POP(r11) __POP(r10) \ - __POP(r9) __POP(r8) + __POP(r11) __POP(r10) __POP(r9) __POP(r8) __POP(rcx) __POP(rdx) \ + __POP(r15) __POP(r14) __POP(r13) __POP(r12) \ + __POP(rdi) __POP(rsi) /* RED-PEN: pipeline stall on ret because it is not predicted */ +/* RED-PEN: the register saving could be optimized */ +/* frame pointer must be last for get_wchan */ + #define switch_to(prev,next,last) \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%[prevrsp]\n\t" \ diff -Nru a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h --- a/include/asm-x86_64/thread_info.h Tue Mar 25 18:36:42 2003 +++ b/include/asm-x86_64/thread_info.h Tue Mar 25 18:36:42 2003 @@ -27,6 +27,7 @@ struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ __u32 flags; /* low level flags */ + __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ int preempt_count; @@ -100,16 +101,14 @@ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_USEDFPU 16 /* FPU was used by this task this quantum */ -#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ -#define TIF_IA32 18 /* 32bit process */ +#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ +#define TIF_IA32 17 /* 32bit process */ #define _TIF_SYSCALL_TRACE (1<users) <= 0)) BUG(); atomic_inc(&(kioctx)->users); } while (0) #define put_ioctx(kioctx) do { if (unlikely(atomic_dec_and_test(&(kioctx)->users))) __put_ioctx(kioctx); else if (unlikely(atomic_read(&(kioctx)->users) < 0)) BUG(); } while (0) diff -Nru a/include/linux/i2c.h b/include/linux/i2c.h --- a/include/linux/i2c.h Tue Mar 25 18:36:42 2003 +++ b/include/linux/i2c.h Tue Mar 25 18:36:42 2003 @@ -156,7 +156,6 @@ * function is mainly used for lookup & other admin. functions. */ struct i2c_client { - char name[32]; int id; unsigned int flags; /* div., see below */ unsigned int addr; /* chip address - NOTE: 7bit */ @@ -167,11 +166,21 @@ alignment considerations */ struct i2c_adapter *adapter; /* the adapter we sit on */ struct i2c_driver *driver; /* and our access routines */ - void *data; /* for the clients */ int usage_count; /* How many accesses currently */ /* to the client */ + struct device dev; /* the device structure */ }; +#define to_i2c_client(d) container_of(d, struct i2c_client, dev) +static inline void *i2c_get_clientdata (struct i2c_client *dev) +{ + return dev_get_drvdata (&dev->dev); +} + +static inline void i2c_set_clientdata (struct i2c_client *dev, void *data) +{ + return dev_set_drvdata (&dev->dev, data); +} /* * The following structs are for those who like to implement new bus drivers: @@ -210,7 +219,6 @@ */ struct i2c_adapter { struct module *owner; - char name[32]; /* some useful name to identify the adapter */ unsigned int id;/* == is algo->id | hwdep.struct->id, */ /* for registered values see below */ struct i2c_algorithm *algo;/* the algorithm to access the bus */ @@ -220,12 +228,7 @@ int (*client_register)(struct i2c_client *); int (*client_unregister)(struct i2c_client *); - void *data; /* private data for the adapter */ - /* some data fields that are used by all types */ - /* these data fields are readonly to the public */ - /* and can be set via the i2c_ioctl call */ - - /* data fields that are valid for all devices */ + /* data fields that are valid for all devices */ struct semaphore bus; struct semaphore list; unsigned int flags;/* flags specifying div. data */ @@ -242,6 +245,16 @@ #endif /* def CONFIG_PROC_FS */ }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) + +static inline void *i2c_get_adapdata (struct i2c_adapter *dev) +{ + return dev_get_drvdata (&dev->dev); +} + +static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data) +{ + return dev_set_drvdata (&dev->dev, data); +} /*flags for the driver struct: */ #define I2C_DF_NOTIFY 0x01 /* notify on bus (de/a)ttaches */ diff -Nru a/include/linux/netdevice.h b/include/linux/netdevice.h --- a/include/linux/netdevice.h Tue Mar 25 18:36:42 2003 +++ b/include/linux/netdevice.h Tue Mar 25 18:36:42 2003 @@ -624,11 +624,6 @@ return err; } -static inline void dev_init_buffers(struct net_device *dev) -{ - /* WILL BE REMOVED IN 2.5.0 */ -} - extern int netdev_finish_unregister(struct net_device *dev); static inline void dev_put(struct net_device *dev) diff -Nru a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h --- a/include/net/sctp/sctp.h Tue Mar 25 18:36:42 2003 +++ b/include/net/sctp/sctp.h Tue Mar 25 18:36:42 2003 @@ -356,7 +356,7 @@ static inline void sctp_skb_list_tail(struct sk_buff_head *list, struct sk_buff_head *head) { - int flags __attribute__ ((unused)); + unsigned long flags; sctp_spin_lock_irqsave(&head->lock, flags); sctp_spin_lock(&list->lock); diff -Nru a/include/net/xfrm.h b/include/net/xfrm.h --- a/include/net/xfrm.h Tue Mar 25 18:36:42 2003 +++ b/include/net/xfrm.h Tue Mar 25 18:36:42 2003 @@ -744,6 +744,7 @@ struct xfrm_policy *xfrm_policy_delete(int dir, struct xfrm_selector *sel); struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete); void xfrm_policy_flush(void); +u32 xfrm_get_acqseq(void); void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); struct xfrm_state * xfrm_find_acq(u8 mode, u16 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, diff -Nru a/net/Kconfig b/net/Kconfig --- a/net/Kconfig Tue Mar 25 18:36:42 2003 +++ b/net/Kconfig Tue Mar 25 18:36:42 2003 @@ -224,6 +224,8 @@ source "net/ipv6/Kconfig" +source "net/xfrm/Kconfig" + source "net/sctp/Kconfig" config ATM diff -Nru a/net/Makefile b/net/Makefile --- a/net/Makefile Tue Mar 25 18:36:42 2003 +++ b/net/Makefile Tue Mar 25 18:36:42 2003 @@ -14,7 +14,7 @@ # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ -obj-$(CONFIG_INET) += ipv4/ +obj-$(CONFIG_INET) += ipv4/ xfrm/ obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_IPV6) += ipv6/ obj-$(CONFIG_PACKET) += packet/ diff -Nru a/net/ipv4/Kconfig b/net/ipv4/Kconfig --- a/net/ipv4/Kconfig Tue Mar 25 18:36:42 2003 +++ b/net/ipv4/Kconfig Tue Mar 25 18:36:42 2003 @@ -362,13 +362,5 @@ If unsure, say Y. -config XFRM_USER - tristate "IP: IPsec user configuration interface" - ---help--- - Support for IPsec user configuration interface used - by native Linux tools. - - If unsure, say Y. - source "net/ipv4/netfilter/Kconfig" diff -Nru a/net/ipv4/Makefile b/net/ipv4/Makefile --- a/net/ipv4/Makefile Tue Mar 25 18:36:42 2003 +++ b/net/ipv4/Makefile Tue Mar 25 18:36:42 2003 @@ -20,6 +20,5 @@ obj-$(CONFIG_INET_ESP) += esp.o obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter/ -obj-$(CONFIG_XFRM_USER) += xfrm_user.o -obj-y += xfrm_policy.o xfrm4_policy.o xfrm_state.o xfrm4_state.o xfrm_input.o xfrm4_input.o xfrm_algo.o +obj-y += xfrm4_policy.o xfrm4_state.o xfrm4_input.o diff -Nru a/net/ipv4/tcp.c b/net/ipv4/tcp.c --- a/net/ipv4/tcp.c Tue Mar 25 18:36:42 2003 +++ b/net/ipv4/tcp.c Tue Mar 25 18:36:42 2003 @@ -1189,7 +1189,8 @@ from += copy; copied += copy; - seglen -= copy; + if ((seglen -= copy) == 0 && iovlen == 0) + goto out; if (skb->len != mss_now || (flags & MSG_OOB)) continue; diff -Nru a/net/ipv4/xfrm_algo.c b/net/ipv4/xfrm_algo.c --- a/net/ipv4/xfrm_algo.c Tue Mar 25 18:36:42 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,771 +0,0 @@ -/* - * xfrm algorithm interface - * - * Copyright (c) 2002 James Morris - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ -#include -#include -#include -#include -#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE) -#include -#endif -#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) -#include -#endif -#include - -/* - * Algorithms supported by IPsec. These entries contain properties which - * are used in key negotiation and xfrm processing, and are used to verify - * that instantiated crypto transforms have correct parameters for IPsec - * purposes. - */ -static struct xfrm_algo_desc aalg_list[] = { -{ - .name = "digest_null", - - .uinfo = { - .auth = { - .icv_truncbits = 0, - .icv_fullbits = 0, - } - }, - - .desc = { - .sadb_alg_id = SADB_X_AALG_NULL, - .sadb_alg_ivlen = 0, - .sadb_alg_minbits = 0, - .sadb_alg_maxbits = 0 - } -}, -{ - .name = "md5", - - .uinfo = { - .auth = { - .icv_truncbits = 96, - .icv_fullbits = 128, - } - }, - - .desc = { - .sadb_alg_id = SADB_AALG_MD5HMAC, - .sadb_alg_ivlen = 0, - .sadb_alg_minbits = 128, - .sadb_alg_maxbits = 128 - } -}, -{ - .name = "sha1", - - .uinfo = { - .auth = { - .icv_truncbits = 96, - .icv_fullbits = 160, - } - }, - - .desc = { - .sadb_alg_id = SADB_AALG_SHA1HMAC, - .sadb_alg_ivlen = 0, - .sadb_alg_minbits = 160, - .sadb_alg_maxbits = 160 - } -}, -{ - .name = "sha256", - - .uinfo = { - .auth = { - .icv_truncbits = 128, - .icv_fullbits = 256, - } - }, - - .desc = { - .sadb_alg_id = SADB_X_AALG_SHA2_256HMAC, - .sadb_alg_ivlen = 0, - .sadb_alg_minbits = 256, - .sadb_alg_maxbits = 256 - } -}, -{ - .name = "ripemd160", - - .uinfo = { - .auth = { - .icv_truncbits = 96, - .icv_fullbits = 160, - } - }, - - .desc = { - .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, - .sadb_alg_ivlen = 0, - .sadb_alg_minbits = 160, - .sadb_alg_maxbits = 160 - } -}, -}; - -static struct xfrm_algo_desc ealg_list[] = { -{ - .name = "cipher_null", - - .uinfo = { - .encr = { - .blockbits = 8, - .defkeybits = 0, - } - }, - - .desc = { - .sadb_alg_id = SADB_EALG_NULL, - .sadb_alg_ivlen = 0, - .sadb_alg_minbits = 0, - .sadb_alg_maxbits = 0 - } -}, -{ - .name = "des", - - .uinfo = { - .encr = { - .blockbits = 64, - .defkeybits = 64, - } - }, - - .desc = { - .sadb_alg_id = SADB_EALG_DESCBC, - .sadb_alg_ivlen = 8, - .sadb_alg_minbits = 64, - .sadb_alg_maxbits = 64 - } -}, -{ - .name = "des3_ede", - - .uinfo = { - .encr = { - .blockbits = 64, - .defkeybits = 192, - } - }, - - .desc = { - .sadb_alg_id = SADB_EALG_3DESCBC, - .sadb_alg_ivlen = 8, - .sadb_alg_minbits = 192, - .sadb_alg_maxbits = 192 - } -}, -{ - .name = "cast128", - - .uinfo = { - .encr = { - .blockbits = 64, - .defkeybits = 128, - } - }, - - .desc = { - .sadb_alg_id = SADB_X_EALG_CASTCBC, - .sadb_alg_ivlen = 8, - .sadb_alg_minbits = 40, - .sadb_alg_maxbits = 128 - } -}, -{ - .name = "blowfish", - - .uinfo = { - .encr = { - .blockbits = 64, - .defkeybits = 128, - } - }, - - .desc = { - .sadb_alg_id = SADB_X_EALG_BLOWFISHCBC, - .sadb_alg_ivlen = 8, - .sadb_alg_minbits = 40, - .sadb_alg_maxbits = 448 - } -}, -{ - .name = "aes", - - .uinfo = { - .encr = { - .blockbits = 128, - .defkeybits = 128, - } - }, - - .desc = { - .sadb_alg_id = SADB_X_EALG_AESCBC, - .sadb_alg_ivlen = 8, - .sadb_alg_minbits = 128, - .sadb_alg_maxbits = 256 - } -}, -}; - -static struct xfrm_algo_desc calg_list[] = { -{ - .name = "deflate", - .uinfo = { - .comp = { - .threshold = 90, - } - }, - .desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE } -}, -{ - .name = "lzs", - .uinfo = { - .comp = { - .threshold = 90, - } - }, - .desc = { .sadb_alg_id = SADB_X_CALG_LZS } -}, -{ - .name = "lzjh", - .uinfo = { - .comp = { - .threshold = 50, - } - }, - .desc = { .sadb_alg_id = SADB_X_CALG_LZJH } -}, -}; - -static inline int aalg_entries(void) -{ - return sizeof(aalg_list) / sizeof(aalg_list[0]); -} - -static inline int ealg_entries(void) -{ - return sizeof(ealg_list) / sizeof(ealg_list[0]); -} - -static inline int calg_entries(void) -{ - return sizeof(calg_list) / sizeof(calg_list[0]); -} - -/* Todo: generic iterators */ -struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id) -{ - int i; - - for (i = 0; i < aalg_entries(); i++) { - if (aalg_list[i].desc.sadb_alg_id == alg_id) { - if (aalg_list[i].available) - return &aalg_list[i]; - else - break; - } - } - return NULL; -} - -struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id) -{ - int i; - - for (i = 0; i < ealg_entries(); i++) { - if (ealg_list[i].desc.sadb_alg_id == alg_id) { - if (ealg_list[i].available) - return &ealg_list[i]; - else - break; - } - } - return NULL; -} - -struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) -{ - int i; - - for (i = 0; i < calg_entries(); i++) { - if (calg_list[i].desc.sadb_alg_id == alg_id) { - if (calg_list[i].available) - return &calg_list[i]; - else - break; - } - } - return NULL; -} - -struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name) -{ - int i; - - if (!name) - return NULL; - - for (i=0; i < aalg_entries(); i++) { - if (strcmp(name, aalg_list[i].name) == 0) { - if (aalg_list[i].available) - return &aalg_list[i]; - else - break; - } - } - return NULL; -} - -struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name) -{ - int i; - - if (!name) - return NULL; - - for (i=0; i < ealg_entries(); i++) { - if (strcmp(name, ealg_list[i].name) == 0) { - if (ealg_list[i].available) - return &ealg_list[i]; - else - break; - } - } - return NULL; -} - -struct xfrm_algo_desc *xfrm_calg_get_byname(char *name) -{ - int i; - - if (!name) - return NULL; - - for (i=0; i < calg_entries(); i++) { - if (strcmp(name, calg_list[i].name) == 0) { - if (calg_list[i].available) - return &calg_list[i]; - else - break; - } - } - return NULL; -} - -struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx) -{ - if (idx >= aalg_entries()) - return NULL; - - return &aalg_list[idx]; -} - -struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx) -{ - if (idx >= ealg_entries()) - return NULL; - - return &ealg_list[idx]; -} - -struct xfrm_algo_desc *xfrm_calg_get_byidx(unsigned int idx) -{ - if (idx >= calg_entries()) - return NULL; - - return &calg_list[idx]; -} - -/* - * Probe for the availability of crypto algorithms, and set the available - * flag for any algorithms found on the system. This is typically called by - * pfkey during userspace SA add, update or register. - */ -void xfrm_probe_algs(void) -{ -#ifdef CONFIG_CRYPTO - int i, status; - - BUG_ON(in_softirq()); - - for (i = 0; i < aalg_entries(); i++) { - status = crypto_alg_available(aalg_list[i].name, 0); - if (aalg_list[i].available != status) - aalg_list[i].available = status; - } - - for (i = 0; i < ealg_entries(); i++) { - status = crypto_alg_available(ealg_list[i].name, 0); - if (ealg_list[i].available != status) - ealg_list[i].available = status; - } - - for (i = 0; i < calg_entries(); i++) { - status = crypto_alg_available(calg_list[i].name, 0); - if (calg_list[i].available != status) - calg_list[i].available = status; - } -#endif -} - -int xfrm_count_auth_supported(void) -{ - int i, n; - - for (i = 0, n = 0; i < aalg_entries(); i++) - if (aalg_list[i].available) - n++; - return n; -} - -int xfrm_count_enc_supported(void) -{ - int i, n; - - for (i = 0, n = 0; i < ealg_entries(); i++) - if (ealg_list[i].available) - n++; - return n; -} - -#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE) -void skb_ah_walk(const struct sk_buff *skb, - struct crypto_tfm *tfm, icv_update_fn_t icv_update) -{ - int offset = 0; - int len = skb->len; - int start = skb->len - skb->data_len; - int i, copy = start - offset; - struct scatterlist sg; - - /* Checksum header. */ - if (copy > 0) { - if (copy > len) - copy = len; - - sg.page = virt_to_page(skb->data + offset); - sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; - sg.length = copy; - - icv_update(tfm, &sg, 1); - - if ((len -= copy) == 0) - return; - offset += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - - sg.page = frag->page; - sg.offset = frag->page_offset + offset-start; - sg.length = copy; - - icv_update(tfm, &sg, 1); - - if (!(len -= copy)) - return; - offset += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - skb_ah_walk(list, tfm, icv_update); - if ((len -= copy) == 0) - return; - offset += copy; - } - start = end; - } - } - if (len) - BUG(); -} -#endif - -#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) -/* Move to common area: it is shared with AH. */ - -void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, - int offset, int len, icv_update_fn_t icv_update) -{ - int start = skb->len - skb->data_len; - int i, copy = start - offset; - struct scatterlist sg; - - /* Checksum header. */ - if (copy > 0) { - if (copy > len) - copy = len; - - sg.page = virt_to_page(skb->data + offset); - sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; - sg.length = copy; - - icv_update(tfm, &sg, 1); - - if ((len -= copy) == 0) - return; - offset += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - - sg.page = frag->page; - sg.offset = frag->page_offset + offset-start; - sg.length = copy; - - icv_update(tfm, &sg, 1); - - if (!(len -= copy)) - return; - offset += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - skb_icv_walk(list, tfm, offset-start, copy, icv_update); - if ((len -= copy) == 0) - return; - offset += copy; - } - start = end; - } - } - if (len) - BUG(); -} - - -/* Looking generic it is not used in another places. */ - -int -skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -{ - int start = skb->len - skb->data_len; - int i, copy = start - offset; - int elt = 0; - - if (copy > 0) { - if (copy > len) - copy = len; - sg[elt].page = virt_to_page(skb->data + offset); - sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; - sg[elt].length = copy; - elt++; - if ((len -= copy) == 0) - return elt; - offset += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - sg[elt].page = frag->page; - sg[elt].offset = frag->page_offset+offset-start; - sg[elt].length = copy; - elt++; - if (!(len -= copy)) - return elt; - offset += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - elt += skb_to_sgvec(list, sg+elt, offset - start, copy); - if ((len -= copy) == 0) - return elt; - offset += copy; - } - start = end; - } - } - if (len) - BUG(); - return elt; -} - -/* Check that skb data bits are writable. If they are not, copy data - * to newly created private area. If "tailbits" is given, make sure that - * tailbits bytes beyond current end of skb are writable. - * - * Returns amount of elements of scatterlist to load for subsequent - * transformations and pointer to writable trailer skb. - */ - -int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) -{ - int copyflag; - int elt; - struct sk_buff *skb1, **skb_p; - - /* If skb is cloned or its head is paged, reallocate - * head pulling out all the pages (pages are considered not writable - * at the moment even if they are anonymous). - */ - if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && - __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) - return -ENOMEM; - - /* Easy case. Most of packets will go this way. */ - if (!skb_shinfo(skb)->frag_list) { - /* A little of trouble, not enough of space for trailer. - * This should not happen, when stack is tuned to generate - * good frames. OK, on miss we reallocate and reserve even more - * space, 128 bytes is fair. */ - - if (skb_tailroom(skb) < tailbits && - pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) - return -ENOMEM; - - /* Voila! */ - *trailer = skb; - return 1; - } - - /* Misery. We are in troubles, going to mincer fragments... */ - - elt = 1; - skb_p = &skb_shinfo(skb)->frag_list; - copyflag = 0; - - while ((skb1 = *skb_p) != NULL) { - int ntail = 0; - - /* The fragment is partially pulled by someone, - * this can happen on input. Copy it and everything - * after it. */ - - if (skb_shared(skb1)) - copyflag = 1; - - /* If the skb is the last, worry about trailer. */ - - if (skb1->next == NULL && tailbits) { - if (skb_shinfo(skb1)->nr_frags || - skb_shinfo(skb1)->frag_list || - skb_tailroom(skb1) < tailbits) - ntail = tailbits + 128; - } - - if (copyflag || - skb_cloned(skb1) || - ntail || - skb_shinfo(skb1)->nr_frags || - skb_shinfo(skb1)->frag_list) { - struct sk_buff *skb2; - - /* Fuck, we are miserable poor guys... */ - if (ntail == 0) - skb2 = skb_copy(skb1, GFP_ATOMIC); - else - skb2 = skb_copy_expand(skb1, - skb_headroom(skb1), - ntail, - GFP_ATOMIC); - if (unlikely(skb2 == NULL)) - return -ENOMEM; - - if (skb1->sk) - skb_set_owner_w(skb, skb1->sk); - - /* Looking around. Are we still alive? - * OK, link new skb, drop old one */ - - skb2->next = skb1->next; - *skb_p = skb2; - kfree_skb(skb1); - skb1 = skb2; - } - elt++; - *trailer = skb1; - skb_p = &skb1->next; - } - - return elt; -} - -void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) -{ - if (tail != skb) { - skb->data_len += len; - skb->len += len; - } - return skb_put(tail, len); -} -#endif diff -Nru a/net/ipv4/xfrm_input.c b/net/ipv4/xfrm_input.c --- a/net/ipv4/xfrm_input.c Tue Mar 25 18:36:42 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,52 +0,0 @@ -/* - * xfrm_input.c - * - * Changes: - * YOSHIFUJI Hideaki @USAGI - * Split up af-specific portion - * - */ - -#include -#include - -void __secpath_destroy(struct sec_path *sp) -{ - int i; - for (i = 0; i < sp->len; i++) - xfrm_state_put(sp->xvec[i]); - kmem_cache_free(sp->pool, sp); -} - -/* Fetch spi and seq frpm ipsec header */ - -int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) -{ - int offset, offset_seq; - - switch (nexthdr) { - case IPPROTO_AH: - offset = offsetof(struct ip_auth_hdr, spi); - offset_seq = offsetof(struct ip_auth_hdr, seq_no); - break; - case IPPROTO_ESP: - offset = offsetof(struct ip_esp_hdr, spi); - offset_seq = offsetof(struct ip_esp_hdr, seq_no); - break; - case IPPROTO_COMP: - if (!pskb_may_pull(skb, 4)) - return -EINVAL; - *spi = ntohl(ntohs(*(u16*)(skb->h.raw + 2))); - *seq = 0; - return 0; - default: - return 1; - } - - if (!pskb_may_pull(skb, 16)) - return -EINVAL; - - *spi = *(u32*)(skb->h.raw + offset); - *seq = *(u32*)(skb->h.raw + offset_seq); - return 0; -} diff -Nru a/net/ipv4/xfrm_policy.c b/net/ipv4/xfrm_policy.c --- a/net/ipv4/xfrm_policy.c Tue Mar 25 18:36:42 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,1232 +0,0 @@ -/* - * xfrm_policy.c - * - * Changes: - * Mitsuru KANDA @USAGI - * Kazunori MIYAZAWA @USAGI - * Kunihiro Ishiguro - * IPv6 support - * Kazunori MIYAZAWA @USAGI - * YOSHIFUJI Hideaki - * Split up af-specific portion - * - */ - -#include -#include -#include - -DECLARE_MUTEX(xfrm_cfg_sem); - -static u32 xfrm_policy_genid; -static rwlock_t xfrm_policy_lock = RW_LOCK_UNLOCKED; - -struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; - -static rwlock_t xfrm_policy_afinfo_lock = RW_LOCK_UNLOCKED; -static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; - -kmem_cache_t *xfrm_dst_cache; - -/* Limited flow cache. Its function now is to accelerate search for - * policy rules. - * - * Flow cache is private to cpus, at the moment this is important - * mostly for flows which do not match any rule, so that flow lookups - * are absolultely cpu-local. When a rule exists we do some updates - * to rule (refcnt, stats), so that locality is broken. Later this - * can be repaired. - */ - -struct flow_entry -{ - struct flow_entry *next; - struct flowi fl; - u8 dir; - u32 genid; - struct xfrm_policy *pol; -}; - -static kmem_cache_t *flow_cachep; - -struct flow_entry **flow_table; - -static int flow_lwm = 2*XFRM_FLOWCACHE_HASH_SIZE; -static int flow_hwm = 4*XFRM_FLOWCACHE_HASH_SIZE; - -static int flow_number[NR_CPUS] __cacheline_aligned; - -#define flow_count(cpu) (flow_number[cpu]) - -static void flow_cache_shrink(int cpu) -{ - int i; - struct flow_entry *fle, **flp; - int shrink_to = flow_lwm/XFRM_FLOWCACHE_HASH_SIZE; - - for (i=0; inext; - } - while ((fle=*flp) != NULL) { - *flp = fle->next; - if (fle->pol) - xfrm_pol_put(fle->pol); - kmem_cache_free(flow_cachep, fle); - } - } -} - -struct xfrm_policy *flow_lookup(int dir, struct flowi *fl, - unsigned short family) -{ - struct xfrm_policy *pol = NULL; - struct flow_entry *fle; - u32 hash; - int cpu; - - hash = flow_hash(fl, family); - - local_bh_disable(); - cpu = smp_processor_id(); - - for (fle = flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash]; - fle; fle = fle->next) { - if (memcmp(fl, &fle->fl, sizeof(fle->fl)) == 0 && - fle->dir == dir) { - if (fle->genid == xfrm_policy_genid) { - if ((pol = fle->pol) != NULL) - atomic_inc(&pol->refcnt); - local_bh_enable(); - return pol; - } - break; - } - } - - pol = xfrm_policy_lookup(dir, fl, family); - - if (fle) { - /* Stale flow entry found. Update it. */ - fle->genid = xfrm_policy_genid; - - if (fle->pol) - xfrm_pol_put(fle->pol); - fle->pol = pol; - if (pol) - atomic_inc(&pol->refcnt); - } else { - if (flow_count(cpu) > flow_hwm) - flow_cache_shrink(cpu); - - fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC); - if (fle) { - flow_count(cpu)++; - fle->fl = *fl; - fle->genid = xfrm_policy_genid; - fle->dir = dir; - fle->pol = pol; - if (pol) - atomic_inc(&pol->refcnt); - fle->next = flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash]; - flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash] = fle; - } - } - local_bh_enable(); - return pol; -} - -void __init flow_cache_init(void) -{ - int order; - - flow_cachep = kmem_cache_create("flow_cache", - sizeof(struct flow_entry), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - - if (!flow_cachep) - panic("NET: failed to allocate flow cache slab\n"); - - for (order = 0; - (PAGE_SIZE<type_map; - - write_lock(&typemap->lock); - if (likely(typemap->map[type->proto] == NULL)) - typemap->map[type->proto] = type; - else - err = -EEXIST; - write_unlock(&typemap->lock); - xfrm_policy_put_afinfo(afinfo); - return err; -} - -int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - struct xfrm_type_map *typemap; - int err = 0; - - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - typemap = afinfo->type_map; - - write_lock(&typemap->lock); - if (unlikely(typemap->map[type->proto] != type)) - err = -ENOENT; - else - typemap->map[type->proto] = NULL; - write_unlock(&typemap->lock); - xfrm_policy_put_afinfo(afinfo); - return err; -} - -struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - struct xfrm_type_map *typemap; - struct xfrm_type *type; - - if (unlikely(afinfo == NULL)) - return NULL; - typemap = afinfo->type_map; - - read_lock(&typemap->lock); - type = typemap->map[proto]; - if (unlikely(type && !try_module_get(type->owner))) - type = NULL; - read_unlock(&typemap->lock); - xfrm_policy_put_afinfo(afinfo); - return type; -} - -int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, - unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - int err = 0; - - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - if (likely(afinfo->dst_lookup != NULL)) - err = afinfo->dst_lookup(dst, fl); - else - err = -EINVAL; - xfrm_policy_put_afinfo(afinfo); - return err; -} - -void xfrm_put_type(struct xfrm_type *type) -{ - module_put(type->owner); -} - -static inline unsigned long make_jiffies(long secs) -{ - if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) - return MAX_SCHEDULE_TIMEOUT-1; - else - return secs*HZ; -} - -static void xfrm_policy_timer(unsigned long data) -{ - struct xfrm_policy *xp = (struct xfrm_policy*)data; - unsigned long now = (unsigned long)xtime.tv_sec; - long next = LONG_MAX; - u32 index; - - if (xp->dead) - goto out; - - if (xp->lft.hard_add_expires_seconds) { - long tmo = xp->lft.hard_add_expires_seconds + - xp->curlft.add_time - now; - if (tmo <= 0) - goto expired; - if (tmo < next) - next = tmo; - } - if (next != LONG_MAX && - !mod_timer(&xp->timer, jiffies + make_jiffies(next))) - atomic_inc(&xp->refcnt); - -out: - xfrm_pol_put(xp); - return; - -expired: - index = xp->index; - xfrm_pol_put(xp); - - /* Not 100% correct. id can be recycled in theory */ - xp = xfrm_policy_byid(0, index, 1); - if (xp) { - xfrm_policy_kill(xp); - xfrm_pol_put(xp); - } -} - - -/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 - * SPD calls. - */ - -struct xfrm_policy *xfrm_policy_alloc(int gfp) -{ - struct xfrm_policy *policy; - - policy = kmalloc(sizeof(struct xfrm_policy), gfp); - - if (policy) { - memset(policy, 0, sizeof(struct xfrm_policy)); - atomic_set(&policy->refcnt, 1); - policy->lock = RW_LOCK_UNLOCKED; - init_timer(&policy->timer); - policy->timer.data = (unsigned long)policy; - policy->timer.function = xfrm_policy_timer; - } - return policy; -} - -/* Destroy xfrm_policy: descendant resources must be released to this moment. */ - -void __xfrm_policy_destroy(struct xfrm_policy *policy) -{ - if (!policy->dead) - BUG(); - - if (policy->bundles) - BUG(); - - if (del_timer(&policy->timer)) - BUG(); - - kfree(policy); -} - -/* Rule must be locked. Release descentant resources, announce - * entry dead. The rule must be unlinked from lists to the moment. - */ - -void xfrm_policy_kill(struct xfrm_policy *policy) -{ - struct dst_entry *dst; - - write_lock_bh(&policy->lock); - if (policy->dead) - goto out; - - policy->dead = 1; - - while ((dst = policy->bundles) != NULL) { - policy->bundles = dst->next; - dst_free(dst); - } - - if (del_timer(&policy->timer)) - atomic_dec(&policy->refcnt); - -out: - write_unlock_bh(&policy->lock); -} - -/* Generate new index... KAME seems to generate them ordered by cost - * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(int dir) -{ - u32 idx; - struct xfrm_policy *p; - static u32 idx_generator; - - for (;;) { - idx = (idx_generator | dir); - idx_generator += 8; - if (idx == 0) - idx = 8; - for (p = xfrm_policy_list[dir]; p; p = p->next) { - if (p->index == idx) - break; - } - if (!p) - return idx; - } -} - -int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) -{ - struct xfrm_policy *pol, **p; - - write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { - if (memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) { - if (excl) { - write_unlock_bh(&xfrm_policy_lock); - return -EEXIST; - } - break; - } - } - atomic_inc(&policy->refcnt); - policy->next = pol ? pol->next : NULL; - *p = policy; - xfrm_policy_genid++; - policy->index = pol ? pol->index : xfrm_gen_index(dir); - policy->curlft.add_time = (unsigned long)xtime.tv_sec; - policy->curlft.use_time = 0; - if (policy->lft.hard_add_expires_seconds && - !mod_timer(&policy->timer, jiffies + HZ)) - atomic_inc(&policy->refcnt); - write_unlock_bh(&xfrm_policy_lock); - - if (pol) { - atomic_dec(&pol->refcnt); - xfrm_policy_kill(pol); - xfrm_pol_put(pol); - } - return 0; -} - -struct xfrm_policy *xfrm_policy_delete(int dir, struct xfrm_selector *sel) -{ - struct xfrm_policy *pol, **p; - - write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { - if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) { - *p = pol->next; - break; - } - } - if (pol) - xfrm_policy_genid++; - write_unlock_bh(&xfrm_policy_lock); - return pol; -} - -struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) -{ - struct xfrm_policy *pol, **p; - - write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) { - if (pol->index == id) { - if (delete) - *p = pol->next; - break; - } - } - if (pol) { - if (delete) - xfrm_policy_genid++; - else - atomic_inc(&pol->refcnt); - } - write_unlock_bh(&xfrm_policy_lock); - return pol; -} - -void xfrm_policy_flush() -{ - struct xfrm_policy *xp; - int dir; - - write_lock_bh(&xfrm_policy_lock); - for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - while ((xp = xfrm_policy_list[dir]) != NULL) { - xfrm_policy_list[dir] = xp->next; - write_unlock_bh(&xfrm_policy_lock); - - xfrm_policy_kill(xp); - xfrm_pol_put(xp); - - write_lock_bh(&xfrm_policy_lock); - } - } - xfrm_policy_genid++; - write_unlock_bh(&xfrm_policy_lock); -} - -int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), - void *data) -{ - struct xfrm_policy *xp; - int dir; - int count = 0; - int error = 0; - - read_lock_bh(&xfrm_policy_lock); - for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) - count++; - } - - if (count == 0) { - error = -ENOENT; - goto out; - } - - for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { - error = func(xp, dir%XFRM_POLICY_MAX, --count, data); - if (error) - goto out; - } - } - -out: - read_unlock_bh(&xfrm_policy_lock); - return error; -} - - -/* Find policy to apply to this flow. */ - -struct xfrm_policy *xfrm_policy_lookup(int dir, struct flowi *fl, - unsigned short family) -{ - struct xfrm_policy *pol; - - read_lock_bh(&xfrm_policy_lock); - for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { - struct xfrm_selector *sel = &pol->selector; - int match; - - if (pol->family != family) - continue; - - match = xfrm_selector_match(sel, fl, family); - if (match) { - atomic_inc(&pol->refcnt); - break; - } - } - read_unlock_bh(&xfrm_policy_lock); - return pol; -} - -struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) -{ - struct xfrm_policy *pol; - - read_lock_bh(&xfrm_policy_lock); - if ((pol = sk->policy[dir]) != NULL) { - int match; - - match = xfrm_selector_match(&pol->selector, fl, sk->family); - if (match) - atomic_inc(&pol->refcnt); - else - pol = NULL; - } - read_unlock_bh(&xfrm_policy_lock); - return pol; -} - -void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir) -{ - pol->next = xfrm_policy_list[XFRM_POLICY_MAX+dir]; - xfrm_policy_list[XFRM_POLICY_MAX+dir] = pol; - atomic_inc(&pol->refcnt); -} - -void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir) -{ - struct xfrm_policy **polp; - - for (polp = &xfrm_policy_list[XFRM_POLICY_MAX+dir]; - *polp != NULL; polp = &(*polp)->next) { - if (*polp == pol) { - *polp = pol->next; - atomic_dec(&pol->refcnt); - return; - } - } -} - -int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) -{ - struct xfrm_policy *old_pol; - - write_lock_bh(&xfrm_policy_lock); - old_pol = sk->policy[dir]; - sk->policy[dir] = pol; - if (pol) { - pol->curlft.add_time = (unsigned long)xtime.tv_sec; - pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); - xfrm_sk_policy_link(pol, dir); - } - if (old_pol) - xfrm_sk_policy_unlink(old_pol, dir); - write_unlock_bh(&xfrm_policy_lock); - - if (old_pol) { - xfrm_policy_kill(old_pol); - xfrm_pol_put(old_pol); - } - return 0; -} - -static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) -{ - struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); - - if (newp) { - newp->selector = old->selector; - newp->lft = old->lft; - newp->curlft = old->curlft; - newp->action = old->action; - newp->flags = old->flags; - newp->xfrm_nr = old->xfrm_nr; - newp->index = old->index; - memcpy(newp->xfrm_vec, old->xfrm_vec, - newp->xfrm_nr*sizeof(struct xfrm_tmpl)); - write_lock_bh(&xfrm_policy_lock); - xfrm_sk_policy_link(newp, dir); - write_unlock_bh(&xfrm_policy_lock); - } - return newp; -} - -int __xfrm_sk_clone_policy(struct sock *sk) -{ - struct xfrm_policy *p0, *p1; - p0 = sk->policy[0]; - p1 = sk->policy[1]; - sk->policy[0] = NULL; - sk->policy[1] = NULL; - if (p0 && (sk->policy[0] = clone_policy(p0, 0)) == NULL) - return -ENOMEM; - if (p1 && (sk->policy[1] = clone_policy(p1, 1)) == NULL) - return -ENOMEM; - return 0; -} - -void __xfrm_sk_free_policy(struct xfrm_policy *pol, int dir) -{ - write_lock_bh(&xfrm_policy_lock); - xfrm_sk_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); - - xfrm_policy_kill(pol); - xfrm_pol_put(pol); -} - -/* Resolve list of templates for the flow, given policy. */ - -static int -xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) -{ - int nx; - int i, error; - xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); - xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); - - for (nx=0, i = 0; i < policy->xfrm_nr; i++) { - struct xfrm_state *x; - xfrm_address_t *remote = daddr; - xfrm_address_t *local = saddr; - struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - - if (tmpl->mode) { - remote = &tmpl->id.daddr; - local = &tmpl->saddr; - } - - x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); - - if (x && x->km.state == XFRM_STATE_VALID) { - xfrm[nx++] = x; - daddr = remote; - saddr = local; - continue; - } - if (x) { - error = (x->km.state == XFRM_STATE_ERROR ? - -EINVAL : -EAGAIN); - xfrm_state_put(x); - } - - if (!tmpl->optional) - goto fail; - } - return nx; - -fail: - for (nx--; nx>=0; nx--) - xfrm_state_put(xfrm[nx]); - return error; -} - -/* Check that the bundle accepts the flow and its components are - * still valid. - */ - -static struct dst_entry * -xfrm_find_bundle(struct flowi *fl, struct rtable *rt, struct xfrm_policy *policy, unsigned short family) -{ - struct dst_entry *x; - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return ERR_PTR(-EINVAL); - x = afinfo->find_bundle(fl, rt, policy); - xfrm_policy_put_afinfo(afinfo); - return x; -} - -/* Allocate chain of dst_entry's, attach known xfrm's, calculate - * all the metrics... Shortly, bundle a bundle. - */ - -static int -xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, - struct flowi *fl, struct dst_entry **dst_p, - unsigned short family) -{ - int err; - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EINVAL; - err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p); - xfrm_policy_put_afinfo(afinfo); - return err; -} - -/* Main function: finds/creates a bundle for given flow. - * - * At the moment we eat a raw IP route. Mostly to speed up lookups - * on interfaces with disabled IPsec. - */ -int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) -{ - struct xfrm_policy *policy; - struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; - struct rtable *rt = (struct rtable*)*dst_p; - struct dst_entry *dst; - int nx = 0; - int err; - u32 genid; - u16 family = (*dst_p)->ops->family; - - switch (family) { - case AF_INET: - if (!fl->fl4_src) - fl->fl4_src = rt->rt_src; - if (!fl->fl4_dst) - fl->fl4_dst = rt->rt_dst; - case AF_INET6: - /* Still not clear... */ - default: - /* nothing */; - } - -restart: - genid = xfrm_policy_genid; - policy = NULL; - if (sk && sk->policy[1]) - policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); - - if (!policy) { - /* To accelerate a bit... */ - if ((rt->u.dst.flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) - return 0; - - policy = flow_lookup(XFRM_POLICY_OUT, fl, family); - } - - if (!policy) - return 0; - - policy->curlft.use_time = (unsigned long)xtime.tv_sec; - - switch (policy->action) { - case XFRM_POLICY_BLOCK: - /* Prohibit the flow */ - xfrm_pol_put(policy); - return -EPERM; - - case XFRM_POLICY_ALLOW: - if (policy->xfrm_nr == 0) { - /* Flow passes not transformed. */ - xfrm_pol_put(policy); - return 0; - } - - /* Try to find matching bundle. - * - * LATER: help from flow cache. It is optional, this - * is required only for output policy. - */ - dst = xfrm_find_bundle(fl, rt, policy, family); - if (IS_ERR(dst)) { - xfrm_pol_put(policy); - return PTR_ERR(dst); - } - - if (dst) - break; - - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); - - if (unlikely(nx<0)) { - err = nx; - if (err == -EAGAIN) { - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - if (!flags) - goto error; - - __set_task_state(tsk, TASK_INTERRUPTIBLE); - add_wait_queue(&km_waitq, &wait); - err = xfrm_tmpl_resolve(policy, fl, xfrm, family); - if (err == -EAGAIN) - schedule(); - __set_task_state(tsk, TASK_RUNNING); - remove_wait_queue(&km_waitq, &wait); - - if (err == -EAGAIN && signal_pending(current)) { - err = -ERESTART; - goto error; - } - if (err == -EAGAIN || - genid != xfrm_policy_genid) - goto restart; - } - if (err) - goto error; - } else if (nx == 0) { - /* Flow passes not transformed. */ - xfrm_pol_put(policy); - return 0; - } - - dst = &rt->u.dst; - err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family); - - if (unlikely(err)) { - int i; - for (i=0; ilock); - if (unlikely(policy->dead)) { - /* Wow! While we worked on resolving, this - * policy has gone. Retry. It is not paranoia, - * we just cannot enlist new bundle to dead object. - */ - write_unlock_bh(&policy->lock); - - xfrm_pol_put(policy); - if (dst) - dst_free(dst); - goto restart; - } - dst->next = policy->bundles; - policy->bundles = dst; - dst_hold(dst); - write_unlock_bh(&policy->lock); - } - *dst_p = dst; - ip_rt_put(rt); - xfrm_pol_put(policy); - return 0; - -error: - ip_rt_put(rt); - xfrm_pol_put(policy); - *dst_p = NULL; - return err; -} - -/* When skb is transformed back to its "native" form, we have to - * check policy restrictions. At the moment we make this in maximally - * stupid way. Shame on me. :-) Of course, connected sockets must - * have policy cached at them. - */ - -static inline int -xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, - unsigned short family) -{ - return x->id.proto == tmpl->id.proto && - (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && - x->props.mode == tmpl->mode && - (tmpl->aalgos & (1<props.aalgo)) && - !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); -} - -static inline int -xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int idx, - unsigned short family) -{ - for (; idx < sp->len; idx++) { - if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) - return ++idx; - } - return -1; -} - -static int -_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - afinfo->decode_session(skb, fl); - xfrm_policy_put_afinfo(afinfo); - return 0; -} - -int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, - unsigned short family) -{ - struct xfrm_policy *pol; - struct flowi fl; - - if (_decode_session(skb, &fl, family) < 0) - return 0; - - /* First, check used SA against their selectors. */ - if (skb->sp) { - int i; - - for (i=skb->sp->len-1; i>=0; i--) { - if (!xfrm_selector_match(&skb->sp->xvec[i]->sel, &fl, family)) - return 0; - } - } - - pol = NULL; - if (sk && sk->policy[dir]) - pol = xfrm_sk_policy_lookup(sk, dir, &fl); - - if (!pol) - pol = flow_lookup(dir, &fl, family); - - if (!pol) - return 1; - - pol->curlft.use_time = (unsigned long)xtime.tv_sec; - - if (pol->action == XFRM_POLICY_ALLOW) { - if (pol->xfrm_nr != 0) { - struct sec_path *sp; - static struct sec_path dummy; - int i, k; - - if ((sp = skb->sp) == NULL) - sp = &dummy; - - /* For each tmpl search corresponding xfrm. - * Order is _important_. Later we will implement - * some barriers, but at the moment barriers - * are implied between each two transformations. - */ - for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { - k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); - if (k < 0) - goto reject; - } - } - xfrm_pol_put(pol); - return 1; - } - -reject: - xfrm_pol_put(pol); - return 0; -} - -int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) -{ - struct flowi fl; - - if (_decode_session(skb, &fl, family) < 0) - return 0; - - return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; -} - -/* Optimize later using cookies and generation ids. */ - -static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) -{ - struct dst_entry *child = dst; - - while (child) { - if (child->obsolete > 0 || - (child->xfrm && child->xfrm->km.state != XFRM_STATE_VALID)) { - dst_release(dst); - return NULL; - } - child = child->child; - } - - return dst; -} - -static void xfrm_dst_destroy(struct dst_entry *dst) -{ - xfrm_state_put(dst->xfrm); - dst->xfrm = NULL; -} - -static void xfrm_link_failure(struct sk_buff *skb) -{ - /* Impossible. Such dst must be popped before reaches point of failure. */ - return; -} - -static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) -{ - if (dst) { - if (dst->obsolete) { - dst_release(dst); - dst = NULL; - } - } - return dst; -} - -static void __xfrm_garbage_collect(void) -{ - int i; - struct xfrm_policy *pol; - struct dst_entry *dst, **dstp, *gc_list = NULL; - - read_lock_bh(&xfrm_policy_lock); - for (i=0; i<2*XFRM_POLICY_MAX; i++) { - for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (atomic_read(&dst->__refcnt) == 0) { - *dstp = dst->next; - dst->next = gc_list; - gc_list = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); - } - } - read_unlock_bh(&xfrm_policy_lock); - - while (gc_list) { - dst = gc_list; - gc_list = dst->next; - dst_free(dst); - } -} - -static int bundle_depends_on(struct dst_entry *dst, struct xfrm_state *x) -{ - do { - if (dst->xfrm == x) - return 1; - } while ((dst = dst->child) != NULL); - return 0; -} - -int xfrm_flush_bundles(struct xfrm_state *x) -{ - int i; - struct xfrm_policy *pol; - struct dst_entry *dst, **dstp, *gc_list = NULL; - - read_lock_bh(&xfrm_policy_lock); - for (i=0; i<2*XFRM_POLICY_MAX; i++) { - for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (bundle_depends_on(dst, x)) { - *dstp = dst->next; - dst->next = gc_list; - gc_list = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); - } - } - read_unlock_bh(&xfrm_policy_lock); - - while (gc_list) { - dst = gc_list; - gc_list = dst->next; - dst_free(dst); - } - - return 0; -} - -/* Well... that's _TASK_. We need to scan through transformation - * list and figure out what mss tcp should generate in order to - * final datagram fit to mtu. Mama mia... :-) - * - * Apparently, some easy way exists, but we used to choose the most - * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta. - * - * Consider this function as something like dark humour. :-) - */ -static int xfrm_get_mss(struct dst_entry *dst, u32 mtu) -{ - int res = mtu - dst->header_len; - - for (;;) { - struct dst_entry *d = dst; - int m = res; - - do { - struct xfrm_state *x = d->xfrm; - if (x) { - spin_lock_bh(&x->lock); - if (x->km.state == XFRM_STATE_VALID && - x->type && x->type->get_max_size) - m = x->type->get_max_size(d->xfrm, m); - else - m += x->props.header_len; - spin_unlock_bh(&x->lock); - } - } while ((d = d->child) != NULL); - - if (m <= mtu) - break; - res -= (m - mtu); - if (res < 88) - return mtu; - } - - return res + dst->header_len; -} - -int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) - return -EAFNOSUPPORT; - write_lock(&xfrm_policy_afinfo_lock); - if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) - err = -ENOBUFS; - else { - struct dst_ops *dst_ops = afinfo->dst_ops; - if (likely(dst_ops->kmem_cachep == NULL)) - dst_ops->kmem_cachep = xfrm_dst_cache; - if (likely(dst_ops->check == NULL)) - dst_ops->check = xfrm_dst_check; - if (likely(dst_ops->destroy == NULL)) - dst_ops->destroy = xfrm_dst_destroy; - if (likely(dst_ops->negative_advice == NULL)) - dst_ops->negative_advice = xfrm_negative_advice; - if (likely(dst_ops->link_failure == NULL)) - dst_ops->link_failure = xfrm_link_failure; - if (likely(dst_ops->get_mss == NULL)) - dst_ops->get_mss = xfrm_get_mss; - if (likely(afinfo->garbage_collect == NULL)) - afinfo->garbage_collect = __xfrm_garbage_collect; - xfrm_policy_afinfo[afinfo->family] = afinfo; - } - write_unlock(&xfrm_policy_afinfo_lock); - return err; -} - -int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) - return -EAFNOSUPPORT; - write_lock(&xfrm_policy_afinfo_lock); - if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { - if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) - err = -EINVAL; - else { - struct dst_ops *dst_ops = afinfo->dst_ops; - xfrm_policy_afinfo[afinfo->family] = NULL; - dst_ops->kmem_cachep = NULL; - dst_ops->check = NULL; - dst_ops->destroy = NULL; - dst_ops->negative_advice = NULL; - dst_ops->link_failure = NULL; - dst_ops->get_mss = NULL; - afinfo->garbage_collect = NULL; - } - } - write_unlock(&xfrm_policy_afinfo_lock); - return err; -} - -struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) -{ - struct xfrm_policy_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - read_lock(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[family]; - if (likely(afinfo != NULL)) - read_lock(&afinfo->lock); - read_unlock(&xfrm_policy_afinfo_lock); - return afinfo; -} - -void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - if (unlikely(afinfo == NULL)) - return; - read_unlock(&afinfo->lock); -} - -void __init xfrm_policy_init(void) -{ - xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", - sizeof(struct xfrm_dst), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!xfrm_dst_cache) - panic("XFRM: failed to allocate xfrm_dst_cache\n"); -} - -void __init xfrm_init(void) -{ - xfrm_state_init(); - flow_cache_init(); - xfrm_policy_init(); -} - diff -Nru a/net/ipv4/xfrm_state.c b/net/ipv4/xfrm_state.c --- a/net/ipv4/xfrm_state.c Tue Mar 25 18:36:42 2003 +++ /dev/null Wed Dec 31 16:00:00 1969 @@ -1,728 +0,0 @@ -/* - * xfrm_state.c - * - * Changes: - * Mitsuru KANDA @USAGI - * Kazunori MIYAZAWA @USAGI - * Kunihiro Ishiguro - * IPv6 support - * YOSHIFUJI Hideaki @USAGI - * Split up af-specific functions - * - */ - -#include -#include -#include - -/* Each xfrm_state may be linked to two tables: - - 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) - 2. Hash table by daddr to find what SAs exist for given - destination/tunnel endpoint. (output) - */ - -static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED; - -/* Hash table to find appropriate SA towards given target (endpoint - * of tunnel or destination of transport mode) allowed by selector. - * - * Main use is finding SA after policy selected tunnel or transport mode. - * Also, it can be used by ah/esp icmp error handler to find offending SA. - */ -static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; -static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; - -DECLARE_WAIT_QUEUE_HEAD(km_waitq); - -static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED; -static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; - -static void __xfrm_state_delete(struct xfrm_state *x); - -static inline unsigned long make_jiffies(long secs) -{ - if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) - return MAX_SCHEDULE_TIMEOUT-1; - else - return secs*HZ; -} - -static void xfrm_timer_handler(unsigned long data) -{ - struct xfrm_state *x = (struct xfrm_state*)data; - unsigned long now = (unsigned long)xtime.tv_sec; - long next = LONG_MAX; - int warn = 0; - - spin_lock(&x->lock); - if (x->km.state == XFRM_STATE_DEAD) - goto out; - if (x->km.state == XFRM_STATE_EXPIRED) - goto expired; - if (x->lft.hard_add_expires_seconds) { - long tmo = x->lft.hard_add_expires_seconds + - x->curlft.add_time - now; - if (tmo <= 0) - goto expired; - if (tmo < next) - next = tmo; - } - if (x->lft.hard_use_expires_seconds && x->curlft.use_time) { - long tmo = x->lft.hard_use_expires_seconds + - x->curlft.use_time - now; - if (tmo <= 0) - goto expired; - if (tmo < next) - next = tmo; - } - if (x->km.dying) - goto resched; - if (x->lft.soft_add_expires_seconds) { - long tmo = x->lft.soft_add_expires_seconds + - x->curlft.add_time - now; - if (tmo <= 0) - warn = 1; - else if (tmo < next) - next = tmo; - } - if (x->lft.soft_use_expires_seconds && x->curlft.use_time) { - long tmo = x->lft.soft_use_expires_seconds + - x->curlft.use_time - now; - if (tmo <= 0) - warn = 1; - else if (tmo < next) - next = tmo; - } - - if (warn) - km_warn_expired(x); -resched: - if (next != LONG_MAX && - !mod_timer(&x->timer, jiffies + make_jiffies(next))) - atomic_inc(&x->refcnt); - goto out; - -expired: - if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) { - x->km.state = XFRM_STATE_EXPIRED; - wake_up(&km_waitq); - next = 2; - goto resched; - } - if (x->id.spi != 0) - km_expired(x); - __xfrm_state_delete(x); - -out: - spin_unlock(&x->lock); - xfrm_state_put(x); -} - -struct xfrm_state *xfrm_state_alloc(void) -{ - struct xfrm_state *x; - - x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC); - - if (x) { - memset(x, 0, sizeof(struct xfrm_state)); - atomic_set(&x->refcnt, 1); - INIT_LIST_HEAD(&x->bydst); - INIT_LIST_HEAD(&x->byspi); - init_timer(&x->timer); - x->timer.function = xfrm_timer_handler; - x->timer.data = (unsigned long)x; - x->curlft.add_time = (unsigned long)xtime.tv_sec; - x->lft.soft_byte_limit = XFRM_INF; - x->lft.soft_packet_limit = XFRM_INF; - x->lft.hard_byte_limit = XFRM_INF; - x->lft.hard_packet_limit = XFRM_INF; - x->lock = SPIN_LOCK_UNLOCKED; - } - return x; -} - -void __xfrm_state_destroy(struct xfrm_state *x) -{ - BUG_TRAP(x->km.state == XFRM_STATE_DEAD); - if (del_timer(&x->timer)) - BUG(); - if (x->aalg) - kfree(x->aalg); - if (x->ealg) - kfree(x->ealg); - if (x->calg) - kfree(x->calg); - if (x->type) - xfrm_put_type(x->type); - kfree(x); -} - -static void __xfrm_state_delete(struct xfrm_state *x) -{ - int kill = 0; - - if (x->km.state != XFRM_STATE_DEAD) { - x->km.state = XFRM_STATE_DEAD; - kill = 1; - spin_lock(&xfrm_state_lock); - list_del(&x->bydst); - atomic_dec(&x->refcnt); - if (x->id.spi) { - list_del(&x->byspi); - atomic_dec(&x->refcnt); - } - spin_unlock(&xfrm_state_lock); - if (del_timer(&x->timer)) - atomic_dec(&x->refcnt); - if (atomic_read(&x->refcnt) != 1) - xfrm_flush_bundles(x); - } - - if (kill && x->type) - x->type->destructor(x); - wake_up(&km_waitq); -} - -void xfrm_state_delete(struct xfrm_state *x) -{ - spin_lock_bh(&x->lock); - __xfrm_state_delete(x); - spin_unlock_bh(&x->lock); -} - -void xfrm_state_flush(u8 proto) -{ - int i; - struct xfrm_state *x; - - spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { -restart: - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) { - atomic_inc(&x->refcnt); - spin_unlock_bh(&xfrm_state_lock); - - xfrm_state_delete(x); - xfrm_state_put(x); - - spin_lock_bh(&xfrm_state_lock); - goto restart; - } - } - } - spin_unlock_bh(&xfrm_state_lock); - wake_up(&km_waitq); -} - -static int -xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr, - unsigned short family) -{ - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return -1; - afinfo->init_tempsel(x, fl, tmpl, daddr, saddr); - xfrm_state_put_afinfo(afinfo); - return 0; -} - -struct xfrm_state * -xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, - struct flowi *fl, struct xfrm_tmpl *tmpl, - struct xfrm_policy *pol, int *err, - unsigned short family) -{ - unsigned h = xfrm_dst_hash(daddr, family); - struct xfrm_state *x; - int acquire_in_progress = 0; - int error = 0; - struct xfrm_state *best = NULL; - - spin_lock_bh(&xfrm_state_lock); - list_for_each_entry(x, xfrm_state_bydst+h, bydst) { - if (x->props.family == family && - x->props.reqid == tmpl->reqid && - xfrm_state_addr_check(x, daddr, saddr, family) && - tmpl->mode == x->props.mode && - tmpl->id.proto == x->id.proto) { - /* Resolution logic: - 1. There is a valid state with matching selector. - Done. - 2. Valid state with inappropriate selector. Skip. - - Entering area of "sysdeps". - - 3. If state is not valid, selector is temporary, - it selects only session which triggered - previous resolution. Key manager will do - something to install a state with proper - selector. - */ - if (x->km.state == XFRM_STATE_VALID) { - if (!xfrm_selector_match(&x->sel, fl, family)) - continue; - if (!best || - best->km.dying > x->km.dying || - (best->km.dying == x->km.dying && - best->curlft.add_time < x->curlft.add_time)) - best = x; - } else if (x->km.state == XFRM_STATE_ACQ) { - acquire_in_progress = 1; - } else if (x->km.state == XFRM_STATE_ERROR || - x->km.state == XFRM_STATE_EXPIRED) { - if (xfrm_selector_match(&x->sel, fl, family)) - error = 1; - } - } - } - - if (best) { - atomic_inc(&best->refcnt); - spin_unlock_bh(&xfrm_state_lock); - return best; - } - - x = NULL; - if (!error && !acquire_in_progress && - ((x = xfrm_state_alloc()) != NULL)) { - /* Initialize temporary selector matching only - * to current session. */ - xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); - - if (km_query(x, tmpl, pol) == 0) { - x->km.state = XFRM_STATE_ACQ; - list_add_tail(&x->bydst, xfrm_state_bydst+h); - atomic_inc(&x->refcnt); - if (x->id.spi) { - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - list_add(&x->byspi, xfrm_state_byspi+h); - atomic_inc(&x->refcnt); - } - x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - atomic_inc(&x->refcnt); - mod_timer(&x->timer, XFRM_ACQ_EXPIRES*HZ); - } else { - x->km.state = XFRM_STATE_DEAD; - xfrm_state_put(x); - x = NULL; - error = 1; - } - } - spin_unlock_bh(&xfrm_state_lock); - if (!x) - *err = acquire_in_progress ? -EAGAIN : - (error ? -ESRCH : -ENOMEM); - return x; -} - -void xfrm_state_insert(struct xfrm_state *x) -{ - unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); - - spin_lock_bh(&xfrm_state_lock); - list_add(&x->bydst, xfrm_state_bydst+h); - atomic_inc(&x->refcnt); - - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - - list_add(&x->byspi, xfrm_state_byspi+h); - atomic_inc(&x->refcnt); - - if (!mod_timer(&x->timer, jiffies + HZ)) - atomic_inc(&x->refcnt); - - spin_unlock_bh(&xfrm_state_lock); - wake_up(&km_waitq); -} - -int xfrm_state_check_expire(struct xfrm_state *x) -{ - if (!x->curlft.use_time) - x->curlft.use_time = (unsigned long)xtime.tv_sec; - - if (x->km.state != XFRM_STATE_VALID) - return -EINVAL; - - if (x->curlft.bytes >= x->lft.hard_byte_limit || - x->curlft.packets >= x->lft.hard_packet_limit) { - km_expired(x); - if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ)) - atomic_inc(&x->refcnt); - return -EINVAL; - } - - if (!x->km.dying && - (x->curlft.bytes >= x->lft.soft_byte_limit || - x->curlft.packets >= x->lft.soft_packet_limit)) - km_warn_expired(x); - return 0; -} - -int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) -{ - int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev) - - skb_headroom(skb); - - if (nhead > 0) - return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); - - /* Check tail too... */ - return 0; -} - -struct xfrm_state * -xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, - unsigned short family) -{ - struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; - - spin_lock_bh(&xfrm_state_lock); - x = afinfo->state_lookup(daddr, spi, proto); - spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); - return x; -} - -struct xfrm_state * -xfrm_find_acq(u8 mode, u16 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create, unsigned short family) -{ - struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; - - spin_lock_bh(&xfrm_state_lock); - x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); - spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); - return x; -} - -/* Silly enough, but I'm lazy to build resolution list */ - -struct xfrm_state * xfrm_find_acq_byseq(u32 seq) -{ - int i; - struct xfrm_state *x; - - spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (x->km.seq == seq) { - atomic_inc(&x->refcnt); - spin_unlock_bh(&xfrm_state_lock); - return x; - } - } - } - spin_unlock_bh(&xfrm_state_lock); - return NULL; -} - - -void -xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) -{ - u32 h; - struct xfrm_state *x0; - - if (x->id.spi) - return; - - if (minspi == maxspi) { - x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); - if (x0) { - xfrm_state_put(x0); - return; - } - x->id.spi = minspi; - } else { - u32 spi = 0; - minspi = ntohl(minspi); - maxspi = ntohl(maxspi); - for (h=0; hid.daddr, minspi, x->id.proto, x->props.family); - if (x0 == NULL) - break; - xfrm_state_put(x0); - } - x->id.spi = htonl(spi); - } - if (x->id.spi) { - spin_lock_bh(&xfrm_state_lock); - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); - atomic_inc(&x->refcnt); - spin_unlock_bh(&xfrm_state_lock); - wake_up(&km_waitq); - } -} - -int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), - void *data) -{ - int i; - struct xfrm_state *x; - int count = 0; - int err = 0; - - spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) - count++; - } - } - if (count == 0) { - err = -ENOENT; - goto out; - } - - for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) - continue; - err = func(x, --count, data); - if (err) - goto out; - } - } -out: - spin_unlock_bh(&xfrm_state_lock); - return err; -} - - -int xfrm_replay_check(struct xfrm_state *x, u32 seq) -{ - u32 diff; - - seq = ntohl(seq); - - if (unlikely(seq == 0)) - return -EINVAL; - - if (likely(seq > x->replay.seq)) - return 0; - - diff = x->replay.seq - seq; - if (diff >= x->props.replay_window) { - x->stats.replay_window++; - return -EINVAL; - } - - if (x->replay.bitmap & (1U << diff)) { - x->stats.replay++; - return -EINVAL; - } - return 0; -} - -void xfrm_replay_advance(struct xfrm_state *x, u32 seq) -{ - u32 diff; - - seq = ntohl(seq); - - if (seq > x->replay.seq) { - diff = seq - x->replay.seq; - if (diff < x->props.replay_window) - x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; - else - x->replay.bitmap = 1; - x->replay.seq = seq; - } else { - diff = x->replay.seq - seq; - x->replay.bitmap |= (1U << diff); - } -} - -int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl) -{ - int i; - - for (i=0; isel, fl, x[i]->props.family); - if (!match) - return -EINVAL; - } - return 0; -} - -static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list); -static rwlock_t xfrm_km_lock = RW_LOCK_UNLOCKED; - -void km_warn_expired(struct xfrm_state *x) -{ - struct xfrm_mgr *km; - - x->km.dying = 1; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) - km->notify(x, 0); - read_unlock(&xfrm_km_lock); -} - -void km_expired(struct xfrm_state *x) -{ - struct xfrm_mgr *km; - - x->km.state = XFRM_STATE_EXPIRED; - - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) - km->notify(x, 1); - read_unlock(&xfrm_km_lock); - wake_up(&km_waitq); -} - -int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) -{ - int err = -EINVAL; - struct xfrm_mgr *km; - - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { - err = km->acquire(x, t, pol, XFRM_POLICY_OUT); - if (!err) - break; - } - read_unlock(&xfrm_km_lock); - return err; -} - -int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen) -{ - int err; - u8 *data; - struct xfrm_mgr *km; - struct xfrm_policy *pol = NULL; - - if (optlen <= 0 || optlen > PAGE_SIZE) - return -EMSGSIZE; - - data = kmalloc(optlen, GFP_KERNEL); - if (!data) - return -ENOMEM; - - err = -EFAULT; - if (copy_from_user(data, optval, optlen)) - goto out; - - err = -EINVAL; - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { - pol = km->compile_policy(sk->family, optname, data, optlen, &err); - if (err >= 0) - break; - } - read_unlock(&xfrm_km_lock); - - if (err >= 0) { - xfrm_sk_policy_insert(sk, err, pol); - err = 0; - } - -out: - kfree(data); - return err; -} - -int xfrm_register_km(struct xfrm_mgr *km) -{ - write_lock_bh(&xfrm_km_lock); - list_add_tail(&km->list, &xfrm_km_list); - write_unlock_bh(&xfrm_km_lock); - return 0; -} - -int xfrm_unregister_km(struct xfrm_mgr *km) -{ - write_lock_bh(&xfrm_km_lock); - list_del(&km->list); - write_unlock_bh(&xfrm_km_lock); - return 0; -} - -int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) -{ - int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) - return -EAFNOSUPPORT; - write_lock(&xfrm_state_afinfo_lock); - if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) - err = -ENOBUFS; - else { - afinfo->state_bydst = xfrm_state_bydst; - afinfo->state_byspi = xfrm_state_byspi; - xfrm_state_afinfo[afinfo->family] = afinfo; - } - write_unlock(&xfrm_state_afinfo_lock); - return err; -} - -int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) -{ - int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) - return -EAFNOSUPPORT; - write_lock(&xfrm_state_afinfo_lock); - if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { - if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) - err = -EINVAL; - else { - xfrm_state_afinfo[afinfo->family] = NULL; - afinfo->state_byspi = NULL; - afinfo->state_bydst = NULL; - } - } - write_unlock(&xfrm_state_afinfo_lock); - return err; -} - -struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) -{ - struct xfrm_state_afinfo *afinfo; - if (unlikely(family >= NPROTO)) - return NULL; - read_lock(&xfrm_state_afinfo_lock); - afinfo = xfrm_state_afinfo[family]; - if (likely(afinfo != NULL)) - read_lock(&afinfo->lock); - read_unlock(&xfrm_state_afinfo_lock); - return afinfo; -} - -void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) -{ - if (unlikely(afinfo == NULL)) - return; - read_unlock(&afinfo->lock); -} - -void __init xfrm_state_init(void) -{ - int i; - - for (i=0; i -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static struct sock *xfrm_nl; - -static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) -{ - struct rtattr *rt = xfrma[type - 1]; - struct xfrm_algo *algp; - - if (!rt) - return 0; - - if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp)) - return -EINVAL; - - algp = RTA_DATA(rt); - switch (type) { - case XFRMA_ALG_AUTH: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "digest_null") != 0) - return -EINVAL; - break; - - case XFRMA_ALG_CRYPT: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "cipher_null") != 0) - return -EINVAL; - break; - - case XFRMA_ALG_COMP: - /* Zero length keys are legal. */ - break; - - default: - return -EINVAL; - }; - - algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; - return 0; -} - -static int verify_newsa_info(struct xfrm_usersa_info *p, - struct rtattr **xfrma) -{ - int err; - - err = -EINVAL; - switch (p->family) { - case AF_INET: - break; - - case AF_INET6: -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - break; -#else - err = -EAFNOSUPPORT; - goto out; -#endif - - default: - goto out; - }; - - err = -EINVAL; - switch (p->id.proto) { - case IPPROTO_AH: - if (!xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1] || - xfrma[XFRMA_ALG_COMP-1]) - goto out; - break; - - case IPPROTO_ESP: - if ((!xfrma[XFRMA_ALG_AUTH-1] && - !xfrma[XFRMA_ALG_CRYPT-1]) || - xfrma[XFRMA_ALG_COMP-1]) - goto out; - break; - - case IPPROTO_COMP: - if (!xfrma[XFRMA_ALG_COMP-1] || - xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1]) - goto out; - break; - - default: - goto out; - }; - - if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH))) - goto out; - if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT))) - goto out; - if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP))) - goto out; - - err = -EINVAL; - switch (p->mode) { - case 0: - case 1: - break; - - default: - goto out; - }; - - err = 0; - -out: - return err; -} - -static int attach_one_algo(struct xfrm_algo **algpp, struct rtattr *u_arg) -{ - struct rtattr *rta = u_arg; - struct xfrm_algo *p, *ualg; - - if (!rta) - return 0; - - ualg = RTA_DATA(rta); - p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL); - if (!p) - return -ENOMEM; - - memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len); - *algpp = p; - return 0; -} - -static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) -{ - memcpy(&x->id, &p->id, sizeof(x->id)); - memcpy(&x->sel, &p->sel, sizeof(x->sel)); - memcpy(&x->lft, &p->lft, sizeof(x->lft)); - x->props.mode = p->mode; - x->props.replay_window = p->replay_window; - x->props.reqid = p->reqid; - x->props.family = p->family; - x->props.saddr = x->sel.saddr; -} - -static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, - struct rtattr **xfrma, - int *errp) -{ - struct xfrm_state *x = xfrm_state_alloc(); - int err = -ENOMEM; - - if (!x) - goto error_no_put; - - copy_from_user_state(x, p); - - if ((err = attach_one_algo(&x->aalg, xfrma[XFRMA_ALG_AUTH-1]))) - goto error; - if ((err = attach_one_algo(&x->ealg, xfrma[XFRMA_ALG_CRYPT-1]))) - goto error; - if ((err = attach_one_algo(&x->calg, xfrma[XFRMA_ALG_COMP-1]))) - goto error; - - err = -ENOENT; - x->type = xfrm_get_type(x->id.proto, x->props.family); - if (x->type == NULL) - goto error; - - err = x->type->init_state(x, NULL); - if (err) - goto error; - - x->curlft.add_time = (unsigned long) xtime.tv_sec; - x->km.state = XFRM_STATE_VALID; - x->km.seq = p->seq; - - return x; - -error: - xfrm_state_put(x); -error_no_put: - *errp = err; - return NULL; -} - -static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) -{ - struct xfrm_usersa_info *p = NLMSG_DATA(nlh); - struct xfrm_state *x, *x1; - int err; - - err = verify_newsa_info(p, (struct rtattr **) xfrma); - if (err) - return err; - - x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err); - if (!x) - return err; - - x1 = xfrm_state_lookup(&x->props.saddr, x->id.spi, x->id.proto, x->props.family); - if (x1) { - xfrm_state_put(x); - xfrm_state_put(x1); - return -EEXIST; - } - - xfrm_state_insert(x); - - return 0; -} - -static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) -{ - struct xfrm_state *x; - struct xfrm_usersa_id *p = NLMSG_DATA(nlh); - - x = xfrm_state_lookup(&p->saddr, p->spi, p->proto, p->family); - if (x == NULL) - return -ESRCH; - - xfrm_state_delete(x); - xfrm_state_put(x); - - return 0; -} - -static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) -{ - memcpy(&p->id, &x->id, sizeof(p->id)); - memcpy(&p->sel, &x->sel, sizeof(p->sel)); - memcpy(&p->lft, &x->lft, sizeof(p->lft)); - memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); - memcpy(&p->stats, &x->stats, sizeof(p->stats)); - p->mode = x->props.mode; - p->replay_window = x->props.replay_window; - p->reqid = x->props.reqid; - p->family = x->props.family; - p->seq = x->km.seq; -} - -struct xfrm_dump_info { - struct sk_buff *in_skb; - struct sk_buff *out_skb; - u32 nlmsg_seq; - int start_idx; - int this_idx; -}; - -static int dump_one_state(struct xfrm_state *x, int count, void *ptr) -{ - struct xfrm_dump_info *sp = ptr; - struct sk_buff *in_skb = sp->in_skb; - struct sk_buff *skb = sp->out_skb; - struct xfrm_usersa_info *p; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - if (sp->this_idx < sp->start_idx) - goto out; - - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, - sp->nlmsg_seq, - XFRM_MSG_NEWSA, sizeof(*p)); - nlh->nlmsg_flags = 0; - - p = NLMSG_DATA(nlh); - copy_to_user_state(x, p); - - if (x->aalg) - RTA_PUT(skb, XFRMA_ALG_AUTH, - sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg); - if (x->ealg) - RTA_PUT(skb, XFRMA_ALG_CRYPT, - sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg); - if (x->calg) - RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); - - nlh->nlmsg_len = skb->tail - b; -out: - sp->this_idx++; - return 0; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct xfrm_dump_info info; - - info.in_skb = cb->skb; - info.out_skb = skb; - info.nlmsg_seq = cb->nlh->nlmsg_seq; - info.this_idx = 0; - info.start_idx = cb->args[0]; - (void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info); - cb->args[0] = info.this_idx; - - return skb->len; -} - -static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, - struct xfrm_state *x, u32 seq) -{ - struct xfrm_dump_info info; - struct sk_buff *skb; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); - if (!skb) - return ERR_PTR(-ENOMEM); - - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; - info.in_skb = in_skb; - info.out_skb = skb; - info.nlmsg_seq = seq; - info.this_idx = info.start_idx = 0; - - if (dump_one_state(x, 0, &info)) { - kfree_skb(skb); - return NULL; - } - - return skb; -} - -static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) -{ - struct xfrm_usersa_id *p = NLMSG_DATA(nlh); - struct xfrm_state *x; - struct sk_buff *resp_skb; - int err; - - x = xfrm_state_lookup(&p->saddr, p->spi, p->proto, p->family); - err = -ESRCH; - if (x == NULL) - goto out_noput; - - resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); - if (IS_ERR(resp_skb)) { - err = PTR_ERR(resp_skb); - } else { - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); - } - xfrm_state_put(x); -out_noput: - return err; -} - -static int verify_userspi_info(struct xfrm_userspi_info *p) -{ - switch (p->info.id.proto) { - case IPPROTO_AH: - case IPPROTO_ESP: - break; - - case IPPROTO_COMP: - /* IPCOMP spi is 16-bits. */ - if (p->min >= 0x10000 || - p->max >= 0x10000) - return -EINVAL; - - default: - return -EINVAL; - }; - - if (p->min > p->max) - return -EINVAL; - - return 0; -} - -static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) -{ - struct xfrm_state *x; - struct xfrm_userspi_info *p; - struct sk_buff *resp_skb; - int err; - - p = NLMSG_DATA(nlh); - err = verify_userspi_info(p); - if (err) - goto out_noput; - x = xfrm_find_acq(p->info.mode, p->info.reqid, p->info.id.proto, - &p->info.sel.daddr, - &p->info.sel.saddr, 1, - p->info.family); - err = -ENOENT; - if (x == NULL) - goto out_noput; - - resp_skb = ERR_PTR(-ENOENT); - - spin_lock_bh(&x->lock); - if (x->km.state != XFRM_STATE_DEAD) { - xfrm_alloc_spi(x, p->min, p->max); - if (x->id.spi) - resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); - } - spin_unlock_bh(&x->lock); - - if (IS_ERR(resp_skb)) { - err = PTR_ERR(resp_skb); - goto out; - } - - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); - -out: - xfrm_state_put(x); -out_noput: - return err; -} - -static int verify_policy_dir(__u8 dir) -{ - switch (dir) { - case XFRM_POLICY_IN: - case XFRM_POLICY_OUT: - case XFRM_POLICY_FWD: - break; - - default: - return -EINVAL; - }; - - return 0; -} - -static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) -{ - switch (p->share) { - case XFRM_SHARE_ANY: - case XFRM_SHARE_SESSION: - case XFRM_SHARE_USER: - case XFRM_SHARE_UNIQUE: - break; - - default: - return -EINVAL; - }; - - switch (p->action) { - case XFRM_POLICY_ALLOW: - case XFRM_POLICY_BLOCK: - break; - - default: - return -EINVAL; - }; - - switch (p->family) { - case AF_INET: - break; - - case AF_INET6: -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - break; -#else - return -EAFNOSUPPORT; -#endif - - default: - return -EINVAL; - }; - - return verify_policy_dir(p->dir); -} - -static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, - int nr) -{ - int i; - - xp->xfrm_nr = nr; - for (i = 0; i < nr; i++, ut++) { - struct xfrm_tmpl *t = &xp->xfrm_vec[i]; - - memcpy(&t->id, &ut->id, sizeof(struct xfrm_id)); - memcpy(&t->saddr, &ut->saddr, - sizeof(xfrm_address_t)); - t->reqid = ut->reqid; - t->mode = ut->mode; - t->share = ut->share; - t->optional = ut->optional; - t->aalgos = ut->aalgos; - t->ealgos = ut->ealgos; - t->calgos = ut->calgos; - } -} - -static int copy_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) -{ - struct rtattr *rt = xfrma[XFRMA_TMPL-1]; - struct xfrm_user_tmpl *utmpl; - int nr; - - if (!rt) { - pol->xfrm_nr = 0; - } else { - nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl); - - if (nr > XFRM_MAX_DEPTH) - return -EINVAL; - - copy_templates(pol, RTA_DATA(rt), nr); - } - return 0; -} - -static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) -{ - xp->priority = p->priority; - xp->index = p->index; - memcpy(&xp->selector, &p->sel, sizeof(xp->selector)); - memcpy(&xp->lft, &p->lft, sizeof(xp->lft)); - xp->action = p->action; - xp->flags = p->flags; - xp->family = p->family; - /* XXX xp->share = p->share; */ -} - -static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) -{ - memcpy(&p->sel, &xp->selector, sizeof(p->sel)); - memcpy(&p->lft, &xp->lft, sizeof(p->lft)); - memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); - p->priority = xp->priority; - p->index = xp->index; - p->family = xp->family; - p->dir = dir; - p->action = xp->action; - p->flags = xp->flags; - p->share = XFRM_SHARE_ANY; /* XXX xp->share */ -} - -static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp) -{ - struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL); - int err; - - if (!xp) { - *errp = -ENOMEM; - return NULL; - } - - copy_from_user_policy(xp, p); - err = copy_user_tmpl(xp, xfrma); - if (err) { - *errp = err; - kfree(xp); - xp = NULL; - } - - return xp; -} - -static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) -{ - struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); - struct xfrm_policy *xp; - int err; - - err = verify_newpolicy_info(p); - if (err) - return err; - - xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err); - if (!xp) - return err; - - err = xfrm_policy_insert(p->dir, xp, 1); - if (err) { - kfree(xp); - return err; - } - - xfrm_pol_put(xp); - - return 0; -} - -static int xfrm_del_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) -{ - struct xfrm_policy *xp; - struct xfrm_userpolicy_id *p; - int err; - - p = NLMSG_DATA(nlh); - - err = verify_policy_dir(p->dir); - if (err) - return err; - - xp = xfrm_policy_delete(p->dir, &p->sel); - if (xp == NULL) - return -ENOENT; - xfrm_policy_kill(xp); - xfrm_pol_put(xp); - return 0; -} - -static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) -{ - struct xfrm_dump_info *sp = ptr; - struct xfrm_userpolicy_info *p; - struct sk_buff *in_skb = sp->in_skb; - struct sk_buff *skb = sp->out_skb; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - if (sp->this_idx < sp->start_idx) - goto out; - - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, - sp->nlmsg_seq, - XFRM_MSG_NEWPOLICY, sizeof(*p)); - p = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; - - copy_to_user_policy(xp, p, dir); - - if (xp->xfrm_nr) { - struct xfrm_user_tmpl vec[XFRM_MAX_DEPTH]; - int i; - - for (i = 0; i < xp->xfrm_nr; i++) { - struct xfrm_user_tmpl *up = &vec[i]; - struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; - - memcpy(&up->id, &kp->id, sizeof(up->id)); - memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); - up->reqid = kp->reqid; - up->mode = kp->mode; - up->share = kp->share; - up->optional = kp->optional; - up->aalgos = kp->aalgos; - up->ealgos = kp->ealgos; - up->calgos = kp->calgos; - } - RTA_PUT(skb, XFRMA_TMPL, - (sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr), - vec); - } - - nlh->nlmsg_len = skb->tail - b; -out: - sp->this_idx++; - return 0; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct xfrm_dump_info info; - - info.in_skb = cb->skb; - info.out_skb = skb; - info.nlmsg_seq = cb->nlh->nlmsg_seq; - info.this_idx = 0; - info.start_idx = cb->args[0]; - (void) xfrm_policy_walk(dump_one_policy, &info); - cb->args[0] = info.this_idx; - - return skb->len; -} - -static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, - struct xfrm_policy *xp, - int dir, u32 seq) -{ - struct xfrm_dump_info info; - struct sk_buff *skb; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return ERR_PTR(-ENOMEM); - - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; - info.in_skb = in_skb; - info.out_skb = skb; - info.nlmsg_seq = seq; - info.this_idx = info.start_idx = 0; - - if (dump_one_policy(xp, dir, 0, &info) < 0) { - kfree_skb(skb); - return NULL; - } - - return skb; -} - -static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) -{ - struct xfrm_policy *xp; - struct xfrm_userpolicy_id *p; - struct sk_buff *resp_skb; - int err; - - p = NLMSG_DATA(nlh); - xp = xfrm_policy_byid(p->dir, p->index, 0); - if (xp == NULL) - return -ENOENT; - - resp_skb = xfrm_policy_netlink(skb, xp, p->dir, nlh->nlmsg_seq); - if (IS_ERR(resp_skb)) { - err = PTR_ERR(resp_skb); - } else { - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); - } - - xfrm_pol_put(xp); - - return err; -} - -static const int xfrm_msg_min[(XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)] = { - NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)), /* NEW SA */ - NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)), /* DEL SA */ - NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)), /* GET SA */ - NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* NEW POLICY */ - NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)), /* DEL POLICY */ - NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)), /* GET POLICY */ - NLMSG_LENGTH(sizeof(struct xfrm_userspi_info)), /* ALLOC SPI */ - NLMSG_LENGTH(sizeof(struct xfrm_user_acquire)), /* ACQUIRE */ - NLMSG_LENGTH(sizeof(struct xfrm_user_expire)), /* EXPIRE */ -}; - -static struct xfrm_link { - int (*doit)(struct sk_buff *, struct nlmsghdr *, void **); - int (*dump)(struct sk_buff *, struct netlink_callback *); -} xfrm_dispatch[] = { - { .doit = xfrm_add_sa, }, - { .doit = xfrm_del_sa, }, - { - .doit = xfrm_get_sa, - .dump = xfrm_dump_sa, - }, - { .doit = xfrm_add_policy }, - { .doit = xfrm_del_policy }, - { - .doit = xfrm_get_policy, - .dump = xfrm_dump_policy, - }, - { .doit = xfrm_alloc_userspi }, -}; - -static int xfrm_done(struct netlink_callback *cb) -{ - return 0; -} - -static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) -{ - struct rtattr *xfrma[XFRMA_MAX]; - struct xfrm_link *link; - int type, min_len; - - if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) - return 0; - - type = nlh->nlmsg_type; - - /* A control message: ignore them */ - if (type < XFRM_MSG_BASE) - return 0; - - /* Unknown message: reply with EINVAL */ - if (type > XFRM_MSG_MAX) - goto err_einval; - - type -= XFRM_MSG_BASE; - link = &xfrm_dispatch[type]; - - /* All operations require privileges, even GET */ - if (security_netlink_recv(skb)) { - *errp = -EPERM; - return -1; - } - - if ((type == 2 || type == 5) && (nlh->nlmsg_flags & NLM_F_DUMP)) { - u32 rlen; - - if (link->dump == NULL) - goto err_einval; - - if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh, - link->dump, - xfrm_done)) != 0) { - return -1; - } - rlen = NLMSG_ALIGN(nlh->nlmsg_len); - if (rlen > skb->len) - rlen = skb->len; - skb_pull(skb, rlen); - return -1; - } - - memset(xfrma, 0, sizeof(xfrma)); - - if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type])) - goto err_einval; - - if (nlh->nlmsg_len > min_len) { - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len); - - while (RTA_OK(attr, attrlen)) { - unsigned short flavor = attr->rta_type; - if (flavor) { - if (flavor > XFRMA_MAX) - goto err_einval; - xfrma[flavor - 1] = attr; - } - attr = RTA_NEXT(attr, attrlen); - } - } - - if (link->doit == NULL) - goto err_einval; - *errp = link->doit(skb, nlh, (void **) &xfrma); - - return *errp; - -err_einval: - *errp = -EINVAL; - return -1; -} - -static int xfrm_user_rcv_skb(struct sk_buff *skb) -{ - int err; - struct nlmsghdr *nlh; - - while (skb->len >= NLMSG_SPACE(0)) { - u32 rlen; - - nlh = (struct nlmsghdr *) skb->data; - if (nlh->nlmsg_len < sizeof(*nlh) || - skb->len < nlh->nlmsg_len) - return 0; - rlen = NLMSG_ALIGN(nlh->nlmsg_len); - if (rlen > skb->len) - rlen = skb->len; - if (xfrm_user_rcv_msg(skb, nlh, &err)) { - if (err == 0) - return -1; - netlink_ack(skb, nlh, err); - } else if (nlh->nlmsg_flags & NLM_F_ACK) - netlink_ack(skb, nlh, 0); - skb_pull(skb, rlen); - } - - return 0; -} - -static void xfrm_netlink_rcv(struct sock *sk, int len) -{ - do { - struct sk_buff *skb; - - down(&xfrm_cfg_sem); - - while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) { - if (xfrm_user_rcv_skb(skb)) { - if (skb->len) - skb_queue_head(&sk->receive_queue, skb); - else - kfree_skb(skb); - break; - } - kfree_skb(skb); - } - - up(&xfrm_cfg_sem); - - } while (xfrm_nl && xfrm_nl->receive_queue.qlen); -} - -static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard) -{ - struct xfrm_user_expire *ue; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_EXPIRE, - sizeof(*ue)); - ue = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; - - copy_to_user_state(x, &ue->state); - ue->hard = (hard != 0) ? 1 : 0; - - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -static int xfrm_send_notify(struct xfrm_state *x, int hard) -{ - struct sk_buff *skb; - - skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC); - if (skb == NULL) - return -ENOMEM; - - if (build_expire(skb, x, hard) < 0) - BUG(); - - NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); -} - -/* XXX Make this xfrm_state.c:xfrm_get_acqseq() */ -static u32 get_acqseq(void) -{ - u32 res; - static u32 acqseq; - static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED; - - spin_lock_bh(&acqseq_lock); - res = (++acqseq ? : ++acqseq); - spin_unlock_bh(&acqseq_lock); - return res; -} - -static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, - struct xfrm_tmpl *xt, struct xfrm_policy *xp, - int dir) -{ - struct xfrm_user_acquire *ua; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - __u32 seq = get_acqseq(); - - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE, - sizeof(*ua)); - ua = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; - - memcpy(&ua->id, &x->id, sizeof(ua->id)); - memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); - copy_to_user_policy(xp, &ua->policy, dir); - ua->aalgos = xt->aalgos; - ua->ealgos = xt->ealgos; - ua->calgos = xt->calgos; - ua->seq = x->km.seq = seq; - - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, - struct xfrm_policy *xp, int dir) -{ - struct sk_buff *skb; - - skb = alloc_skb(sizeof(struct xfrm_user_acquire) + 16, GFP_ATOMIC); - if (skb == NULL) - return -ENOMEM; - - if (build_acquire(skb, x, xt, xp, dir) < 0) - BUG(); - - NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC); -} - -/* User gives us xfrm_user_policy_info followed by an array of 0 - * or more templates. - */ -struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, - u8 *data, int len, int *dir) -{ - struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; - struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1); - struct xfrm_policy *xp; - int nr; - - switch (family) { - case AF_INET: - if (opt != IP_XFRM_POLICY) { - *dir = -EOPNOTSUPP; - return NULL; - } - break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case AF_INET6: - if (opt != IPV6_XFRM_POLICY) { - *dir = -EOPNOTSUPP; - return NULL; - } - break; -#endif - default: - *dir = -EINVAL; - return NULL; - } - - *dir = -EINVAL; - - if (len < sizeof(*p) || - verify_newpolicy_info(p)) - return NULL; - - nr = ((len - sizeof(*p)) / sizeof(*ut)); - if (nr > XFRM_MAX_DEPTH) - return NULL; - - xp = xfrm_policy_alloc(GFP_KERNEL); - if (xp == NULL) { - *dir = -ENOBUFS; - return NULL; - } - - copy_from_user_policy(xp, p); - copy_templates(xp, ut, nr); - - *dir = p->dir; - - return xp; -} - -static struct xfrm_mgr netlink_mgr = { - .id = "netlink", - .notify = xfrm_send_notify, - .acquire = xfrm_send_acquire, - .compile_policy = xfrm_compile_policy, -}; - -static int __init xfrm_user_init(void) -{ - printk(KERN_INFO "Initializing IPsec netlink socket\n"); - - xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv); - if (xfrm_nl == NULL) - panic("xfrm_user_init: cannot initialize xfrm_nl\n"); - - - xfrm_register_km(&netlink_mgr); - - return 0; -} - -static void __exit xfrm_user_exit(void) -{ - xfrm_unregister_km(&netlink_mgr); - sock_release(xfrm_nl->socket); -} - -module_init(xfrm_user_init); -module_exit(xfrm_user_exit); diff -Nru a/net/ipv6/udp.c b/net/ipv6/udp.c --- a/net/ipv6/udp.c Tue Mar 25 18:36:42 2003 +++ b/net/ipv6/udp.c Tue Mar 25 18:36:42 2003 @@ -653,9 +653,6 @@ if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto short_packet; - if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto discard; - saddr = &skb->nh.ipv6h->saddr; daddr = &skb->nh.ipv6h->daddr; uh = skb->h.uh; @@ -713,6 +710,9 @@ sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex); if (sk == NULL) { + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto discard; + if (skb->ip_summed != CHECKSUM_UNNECESSARY && (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) goto discard; diff -Nru a/net/netsyms.c b/net/netsyms.c --- a/net/netsyms.c Tue Mar 25 18:36:42 2003 +++ b/net/netsyms.c Tue Mar 25 18:36:42 2003 @@ -316,6 +316,7 @@ EXPORT_SYMBOL(xfrm_replay_advance); EXPORT_SYMBOL(xfrm_check_selectors); EXPORT_SYMBOL(__secpath_destroy); +EXPORT_SYMBOL(xfrm_get_acqseq); EXPORT_SYMBOL(xfrm_parse_spi); EXPORT_SYMBOL(xfrm4_rcv); EXPORT_SYMBOL(xfrm_register_type); diff -Nru a/net/xfrm/Kconfig b/net/xfrm/Kconfig --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/net/xfrm/Kconfig Tue Mar 25 18:36:42 2003 @@ -0,0 +1,12 @@ +# +# XFRM configuration +# +config XFRM_USER + tristate "IPsec user configuration interface" + depends on INET + ---help--- + Support for IPsec user configuration interface used + by native Linux tools. + + If unsure, say Y. + diff -Nru a/net/xfrm/Makefile b/net/xfrm/Makefile --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/net/xfrm/Makefile Tue Mar 25 18:36:42 2003 @@ -0,0 +1,7 @@ +# +# Makefile for the XFRM subsystem. +# + +obj-y := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o +obj-$(CONFIG_XFRM_USER) += xfrm_user.o + diff -Nru a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/net/xfrm/xfrm_algo.c Tue Mar 25 18:36:42 2003 @@ -0,0 +1,771 @@ +/* + * xfrm algorithm interface + * + * Copyright (c) 2002 James Morris + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ +#include +#include +#include +#include +#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE) +#include +#endif +#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) +#include +#endif +#include + +/* + * Algorithms supported by IPsec. These entries contain properties which + * are used in key negotiation and xfrm processing, and are used to verify + * that instantiated crypto transforms have correct parameters for IPsec + * purposes. + */ +static struct xfrm_algo_desc aalg_list[] = { +{ + .name = "digest_null", + + .uinfo = { + .auth = { + .icv_truncbits = 0, + .icv_fullbits = 0, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_AALG_NULL, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 0, + .sadb_alg_maxbits = 0 + } +}, +{ + .name = "md5", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_AALG_MD5HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 128 + } +}, +{ + .name = "sha1", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 160, + } + }, + + .desc = { + .sadb_alg_id = SADB_AALG_SHA1HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 160, + .sadb_alg_maxbits = 160 + } +}, +{ + .name = "sha256", + + .uinfo = { + .auth = { + .icv_truncbits = 128, + .icv_fullbits = 256, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_AALG_SHA2_256HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 256, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "ripemd160", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 160, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 160, + .sadb_alg_maxbits = 160 + } +}, +}; + +static struct xfrm_algo_desc ealg_list[] = { +{ + .name = "cipher_null", + + .uinfo = { + .encr = { + .blockbits = 8, + .defkeybits = 0, + } + }, + + .desc = { + .sadb_alg_id = SADB_EALG_NULL, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 0, + .sadb_alg_maxbits = 0 + } +}, +{ + .name = "des", + + .uinfo = { + .encr = { + .blockbits = 64, + .defkeybits = 64, + } + }, + + .desc = { + .sadb_alg_id = SADB_EALG_DESCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 64, + .sadb_alg_maxbits = 64 + } +}, +{ + .name = "des3_ede", + + .uinfo = { + .encr = { + .blockbits = 64, + .defkeybits = 192, + } + }, + + .desc = { + .sadb_alg_id = SADB_EALG_3DESCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 192, + .sadb_alg_maxbits = 192 + } +}, +{ + .name = "cast128", + + .uinfo = { + .encr = { + .blockbits = 64, + .defkeybits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_CASTCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 40, + .sadb_alg_maxbits = 128 + } +}, +{ + .name = "blowfish", + + .uinfo = { + .encr = { + .blockbits = 64, + .defkeybits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_BLOWFISHCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 40, + .sadb_alg_maxbits = 448 + } +}, +{ + .name = "aes", + + .uinfo = { + .encr = { + .blockbits = 128, + .defkeybits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AESCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +}; + +static struct xfrm_algo_desc calg_list[] = { +{ + .name = "deflate", + .uinfo = { + .comp = { + .threshold = 90, + } + }, + .desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE } +}, +{ + .name = "lzs", + .uinfo = { + .comp = { + .threshold = 90, + } + }, + .desc = { .sadb_alg_id = SADB_X_CALG_LZS } +}, +{ + .name = "lzjh", + .uinfo = { + .comp = { + .threshold = 50, + } + }, + .desc = { .sadb_alg_id = SADB_X_CALG_LZJH } +}, +}; + +static inline int aalg_entries(void) +{ + return sizeof(aalg_list) / sizeof(aalg_list[0]); +} + +static inline int ealg_entries(void) +{ + return sizeof(ealg_list) / sizeof(ealg_list[0]); +} + +static inline int calg_entries(void) +{ + return sizeof(calg_list) / sizeof(calg_list[0]); +} + +/* Todo: generic iterators */ +struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id) +{ + int i; + + for (i = 0; i < aalg_entries(); i++) { + if (aalg_list[i].desc.sadb_alg_id == alg_id) { + if (aalg_list[i].available) + return &aalg_list[i]; + else + break; + } + } + return NULL; +} + +struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id) +{ + int i; + + for (i = 0; i < ealg_entries(); i++) { + if (ealg_list[i].desc.sadb_alg_id == alg_id) { + if (ealg_list[i].available) + return &ealg_list[i]; + else + break; + } + } + return NULL; +} + +struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) +{ + int i; + + for (i = 0; i < calg_entries(); i++) { + if (calg_list[i].desc.sadb_alg_id == alg_id) { + if (calg_list[i].available) + return &calg_list[i]; + else + break; + } + } + return NULL; +} + +struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name) +{ + int i; + + if (!name) + return NULL; + + for (i=0; i < aalg_entries(); i++) { + if (strcmp(name, aalg_list[i].name) == 0) { + if (aalg_list[i].available) + return &aalg_list[i]; + else + break; + } + } + return NULL; +} + +struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name) +{ + int i; + + if (!name) + return NULL; + + for (i=0; i < ealg_entries(); i++) { + if (strcmp(name, ealg_list[i].name) == 0) { + if (ealg_list[i].available) + return &ealg_list[i]; + else + break; + } + } + return NULL; +} + +struct xfrm_algo_desc *xfrm_calg_get_byname(char *name) +{ + int i; + + if (!name) + return NULL; + + for (i=0; i < calg_entries(); i++) { + if (strcmp(name, calg_list[i].name) == 0) { + if (calg_list[i].available) + return &calg_list[i]; + else + break; + } + } + return NULL; +} + +struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx) +{ + if (idx >= aalg_entries()) + return NULL; + + return &aalg_list[idx]; +} + +struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx) +{ + if (idx >= ealg_entries()) + return NULL; + + return &ealg_list[idx]; +} + +struct xfrm_algo_desc *xfrm_calg_get_byidx(unsigned int idx) +{ + if (idx >= calg_entries()) + return NULL; + + return &calg_list[idx]; +} + +/* + * Probe for the availability of crypto algorithms, and set the available + * flag for any algorithms found on the system. This is typically called by + * pfkey during userspace SA add, update or register. + */ +void xfrm_probe_algs(void) +{ +#ifdef CONFIG_CRYPTO + int i, status; + + BUG_ON(in_softirq()); + + for (i = 0; i < aalg_entries(); i++) { + status = crypto_alg_available(aalg_list[i].name, 0); + if (aalg_list[i].available != status) + aalg_list[i].available = status; + } + + for (i = 0; i < ealg_entries(); i++) { + status = crypto_alg_available(ealg_list[i].name, 0); + if (ealg_list[i].available != status) + ealg_list[i].available = status; + } + + for (i = 0; i < calg_entries(); i++) { + status = crypto_alg_available(calg_list[i].name, 0); + if (calg_list[i].available != status) + calg_list[i].available = status; + } +#endif +} + +int xfrm_count_auth_supported(void) +{ + int i, n; + + for (i = 0, n = 0; i < aalg_entries(); i++) + if (aalg_list[i].available) + n++; + return n; +} + +int xfrm_count_enc_supported(void) +{ + int i, n; + + for (i = 0, n = 0; i < ealg_entries(); i++) + if (ealg_list[i].available) + n++; + return n; +} + +#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE) +void skb_ah_walk(const struct sk_buff *skb, + struct crypto_tfm *tfm, icv_update_fn_t icv_update) +{ + int offset = 0; + int len = skb->len; + int start = skb->len - skb->data_len; + int i, copy = start - offset; + struct scatterlist sg; + + /* Checksum header. */ + if (copy > 0) { + if (copy > len) + copy = len; + + sg.page = virt_to_page(skb->data + offset); + sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; + sg.length = copy; + + icv_update(tfm, &sg, 1); + + if ((len -= copy) == 0) + return; + offset += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (copy > len) + copy = len; + + sg.page = frag->page; + sg.offset = frag->page_offset + offset-start; + sg.length = copy; + + icv_update(tfm, &sg, 1); + + if (!(len -= copy)) + return; + offset += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + list->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + skb_ah_walk(list, tfm, icv_update); + if ((len -= copy) == 0) + return; + offset += copy; + } + start = end; + } + } + if (len) + BUG(); +} +#endif + +#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) +/* Move to common area: it is shared with AH. */ + +void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, + int offset, int len, icv_update_fn_t icv_update) +{ + int start = skb->len - skb->data_len; + int i, copy = start - offset; + struct scatterlist sg; + + /* Checksum header. */ + if (copy > 0) { + if (copy > len) + copy = len; + + sg.page = virt_to_page(skb->data + offset); + sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; + sg.length = copy; + + icv_update(tfm, &sg, 1); + + if ((len -= copy) == 0) + return; + offset += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (copy > len) + copy = len; + + sg.page = frag->page; + sg.offset = frag->page_offset + offset-start; + sg.length = copy; + + icv_update(tfm, &sg, 1); + + if (!(len -= copy)) + return; + offset += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + list->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + skb_icv_walk(list, tfm, offset-start, copy, icv_update); + if ((len -= copy) == 0) + return; + offset += copy; + } + start = end; + } + } + if (len) + BUG(); +} + + +/* Looking generic it is not used in another places. */ + +int +skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +{ + int start = skb->len - skb->data_len; + int i, copy = start - offset; + int elt = 0; + + if (copy > 0) { + if (copy > len) + copy = len; + sg[elt].page = virt_to_page(skb->data + offset); + sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; + sg[elt].length = copy; + elt++; + if ((len -= copy) == 0) + return elt; + offset += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (copy > len) + copy = len; + sg[elt].page = frag->page; + sg[elt].offset = frag->page_offset+offset-start; + sg[elt].length = copy; + elt++; + if (!(len -= copy)) + return elt; + offset += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + list->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + elt += skb_to_sgvec(list, sg+elt, offset - start, copy); + if ((len -= copy) == 0) + return elt; + offset += copy; + } + start = end; + } + } + if (len) + BUG(); + return elt; +} + +/* Check that skb data bits are writable. If they are not, copy data + * to newly created private area. If "tailbits" is given, make sure that + * tailbits bytes beyond current end of skb are writable. + * + * Returns amount of elements of scatterlist to load for subsequent + * transformations and pointer to writable trailer skb. + */ + +int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) +{ + int copyflag; + int elt; + struct sk_buff *skb1, **skb_p; + + /* If skb is cloned or its head is paged, reallocate + * head pulling out all the pages (pages are considered not writable + * at the moment even if they are anonymous). + */ + if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && + __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) + return -ENOMEM; + + /* Easy case. Most of packets will go this way. */ + if (!skb_shinfo(skb)->frag_list) { + /* A little of trouble, not enough of space for trailer. + * This should not happen, when stack is tuned to generate + * good frames. OK, on miss we reallocate and reserve even more + * space, 128 bytes is fair. */ + + if (skb_tailroom(skb) < tailbits && + pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) + return -ENOMEM; + + /* Voila! */ + *trailer = skb; + return 1; + } + + /* Misery. We are in troubles, going to mincer fragments... */ + + elt = 1; + skb_p = &skb_shinfo(skb)->frag_list; + copyflag = 0; + + while ((skb1 = *skb_p) != NULL) { + int ntail = 0; + + /* The fragment is partially pulled by someone, + * this can happen on input. Copy it and everything + * after it. */ + + if (skb_shared(skb1)) + copyflag = 1; + + /* If the skb is the last, worry about trailer. */ + + if (skb1->next == NULL && tailbits) { + if (skb_shinfo(skb1)->nr_frags || + skb_shinfo(skb1)->frag_list || + skb_tailroom(skb1) < tailbits) + ntail = tailbits + 128; + } + + if (copyflag || + skb_cloned(skb1) || + ntail || + skb_shinfo(skb1)->nr_frags || + skb_shinfo(skb1)->frag_list) { + struct sk_buff *skb2; + + /* Fuck, we are miserable poor guys... */ + if (ntail == 0) + skb2 = skb_copy(skb1, GFP_ATOMIC); + else + skb2 = skb_copy_expand(skb1, + skb_headroom(skb1), + ntail, + GFP_ATOMIC); + if (unlikely(skb2 == NULL)) + return -ENOMEM; + + if (skb1->sk) + skb_set_owner_w(skb, skb1->sk); + + /* Looking around. Are we still alive? + * OK, link new skb, drop old one */ + + skb2->next = skb1->next; + *skb_p = skb2; + kfree_skb(skb1); + skb1 = skb2; + } + elt++; + *trailer = skb1; + skb_p = &skb1->next; + } + + return elt; +} + +void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) +{ + if (tail != skb) { + skb->data_len += len; + skb->len += len; + } + return skb_put(tail, len); +} +#endif diff -Nru a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/net/xfrm/xfrm_input.c Tue Mar 25 18:36:42 2003 @@ -0,0 +1,52 @@ +/* + * xfrm_input.c + * + * Changes: + * YOSHIFUJI Hideaki @USAGI + * Split up af-specific portion + * + */ + +#include +#include + +void __secpath_destroy(struct sec_path *sp) +{ + int i; + for (i = 0; i < sp->len; i++) + xfrm_state_put(sp->xvec[i]); + kmem_cache_free(sp->pool, sp); +} + +/* Fetch spi and seq frpm ipsec header */ + +int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) +{ + int offset, offset_seq; + + switch (nexthdr) { + case IPPROTO_AH: + offset = offsetof(struct ip_auth_hdr, spi); + offset_seq = offsetof(struct ip_auth_hdr, seq_no); + break; + case IPPROTO_ESP: + offset = offsetof(struct ip_esp_hdr, spi); + offset_seq = offsetof(struct ip_esp_hdr, seq_no); + break; + case IPPROTO_COMP: + if (!pskb_may_pull(skb, 4)) + return -EINVAL; + *spi = ntohl(ntohs(*(u16*)(skb->h.raw + 2))); + *seq = 0; + return 0; + default: + return 1; + } + + if (!pskb_may_pull(skb, 16)) + return -EINVAL; + + *spi = *(u32*)(skb->h.raw + offset); + *seq = *(u32*)(skb->h.raw + offset_seq); + return 0; +} diff -Nru a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/net/xfrm/xfrm_policy.c Tue Mar 25 18:36:42 2003 @@ -0,0 +1,1232 @@ +/* + * xfrm_policy.c + * + * Changes: + * Mitsuru KANDA @USAGI + * Kazunori MIYAZAWA @USAGI + * Kunihiro Ishiguro + * IPv6 support + * Kazunori MIYAZAWA @USAGI + * YOSHIFUJI Hideaki + * Split up af-specific portion + * + */ + +#include +#include +#include + +DECLARE_MUTEX(xfrm_cfg_sem); + +static u32 xfrm_policy_genid; +static rwlock_t xfrm_policy_lock = RW_LOCK_UNLOCKED; + +struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; + +static rwlock_t xfrm_policy_afinfo_lock = RW_LOCK_UNLOCKED; +static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; + +kmem_cache_t *xfrm_dst_cache; + +/* Limited flow cache. Its function now is to accelerate search for + * policy rules. + * + * Flow cache is private to cpus, at the moment this is important + * mostly for flows which do not match any rule, so that flow lookups + * are absolultely cpu-local. When a rule exists we do some updates + * to rule (refcnt, stats), so that locality is broken. Later this + * can be repaired. + */ + +struct flow_entry +{ + struct flow_entry *next; + struct flowi fl; + u8 dir; + u32 genid; + struct xfrm_policy *pol; +}; + +static kmem_cache_t *flow_cachep; + +struct flow_entry **flow_table; + +static int flow_lwm = 2*XFRM_FLOWCACHE_HASH_SIZE; +static int flow_hwm = 4*XFRM_FLOWCACHE_HASH_SIZE; + +static int flow_number[NR_CPUS] __cacheline_aligned; + +#define flow_count(cpu) (flow_number[cpu]) + +static void flow_cache_shrink(int cpu) +{ + int i; + struct flow_entry *fle, **flp; + int shrink_to = flow_lwm/XFRM_FLOWCACHE_HASH_SIZE; + + for (i=0; inext; + } + while ((fle=*flp) != NULL) { + *flp = fle->next; + if (fle->pol) + xfrm_pol_put(fle->pol); + kmem_cache_free(flow_cachep, fle); + } + } +} + +struct xfrm_policy *flow_lookup(int dir, struct flowi *fl, + unsigned short family) +{ + struct xfrm_policy *pol = NULL; + struct flow_entry *fle; + u32 hash; + int cpu; + + hash = flow_hash(fl, family); + + local_bh_disable(); + cpu = smp_processor_id(); + + for (fle = flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash]; + fle; fle = fle->next) { + if (memcmp(fl, &fle->fl, sizeof(fle->fl)) == 0 && + fle->dir == dir) { + if (fle->genid == xfrm_policy_genid) { + if ((pol = fle->pol) != NULL) + atomic_inc(&pol->refcnt); + local_bh_enable(); + return pol; + } + break; + } + } + + pol = xfrm_policy_lookup(dir, fl, family); + + if (fle) { + /* Stale flow entry found. Update it. */ + fle->genid = xfrm_policy_genid; + + if (fle->pol) + xfrm_pol_put(fle->pol); + fle->pol = pol; + if (pol) + atomic_inc(&pol->refcnt); + } else { + if (flow_count(cpu) > flow_hwm) + flow_cache_shrink(cpu); + + fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC); + if (fle) { + flow_count(cpu)++; + fle->fl = *fl; + fle->genid = xfrm_policy_genid; + fle->dir = dir; + fle->pol = pol; + if (pol) + atomic_inc(&pol->refcnt); + fle->next = flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash]; + flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash] = fle; + } + } + local_bh_enable(); + return pol; +} + +void __init flow_cache_init(void) +{ + int order; + + flow_cachep = kmem_cache_create("flow_cache", + sizeof(struct flow_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + + if (!flow_cachep) + panic("NET: failed to allocate flow cache slab\n"); + + for (order = 0; + (PAGE_SIZE<type_map; + + write_lock(&typemap->lock); + if (likely(typemap->map[type->proto] == NULL)) + typemap->map[type->proto] = type; + else + err = -EEXIST; + write_unlock(&typemap->lock); + xfrm_policy_put_afinfo(afinfo); + return err; +} + +int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct xfrm_type_map *typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_map; + + write_lock(&typemap->lock); + if (unlikely(typemap->map[type->proto] != type)) + err = -ENOENT; + else + typemap->map[type->proto] = NULL; + write_unlock(&typemap->lock); + xfrm_policy_put_afinfo(afinfo); + return err; +} + +struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct xfrm_type_map *typemap; + struct xfrm_type *type; + + if (unlikely(afinfo == NULL)) + return NULL; + typemap = afinfo->type_map; + + read_lock(&typemap->lock); + type = typemap->map[proto]; + if (unlikely(type && !try_module_get(type->owner))) + type = NULL; + read_unlock(&typemap->lock); + xfrm_policy_put_afinfo(afinfo); + return type; +} + +int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, + unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + if (likely(afinfo->dst_lookup != NULL)) + err = afinfo->dst_lookup(dst, fl); + else + err = -EINVAL; + xfrm_policy_put_afinfo(afinfo); + return err; +} + +void xfrm_put_type(struct xfrm_type *type) +{ + module_put(type->owner); +} + +static inline unsigned long make_jiffies(long secs) +{ + if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) + return MAX_SCHEDULE_TIMEOUT-1; + else + return secs*HZ; +} + +static void xfrm_policy_timer(unsigned long data) +{ + struct xfrm_policy *xp = (struct xfrm_policy*)data; + unsigned long now = (unsigned long)xtime.tv_sec; + long next = LONG_MAX; + u32 index; + + if (xp->dead) + goto out; + + if (xp->lft.hard_add_expires_seconds) { + long tmo = xp->lft.hard_add_expires_seconds + + xp->curlft.add_time - now; + if (tmo <= 0) + goto expired; + if (tmo < next) + next = tmo; + } + if (next != LONG_MAX && + !mod_timer(&xp->timer, jiffies + make_jiffies(next))) + atomic_inc(&xp->refcnt); + +out: + xfrm_pol_put(xp); + return; + +expired: + index = xp->index; + xfrm_pol_put(xp); + + /* Not 100% correct. id can be recycled in theory */ + xp = xfrm_policy_byid(0, index, 1); + if (xp) { + xfrm_policy_kill(xp); + xfrm_pol_put(xp); + } +} + + +/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 + * SPD calls. + */ + +struct xfrm_policy *xfrm_policy_alloc(int gfp) +{ + struct xfrm_policy *policy; + + policy = kmalloc(sizeof(struct xfrm_policy), gfp); + + if (policy) { + memset(policy, 0, sizeof(struct xfrm_policy)); + atomic_set(&policy->refcnt, 1); + policy->lock = RW_LOCK_UNLOCKED; + init_timer(&policy->timer); + policy->timer.data = (unsigned long)policy; + policy->timer.function = xfrm_policy_timer; + } + return policy; +} + +/* Destroy xfrm_policy: descendant resources must be released to this moment. */ + +void __xfrm_policy_destroy(struct xfrm_policy *policy) +{ + if (!policy->dead) + BUG(); + + if (policy->bundles) + BUG(); + + if (del_timer(&policy->timer)) + BUG(); + + kfree(policy); +} + +/* Rule must be locked. Release descentant resources, announce + * entry dead. The rule must be unlinked from lists to the moment. + */ + +void xfrm_policy_kill(struct xfrm_policy *policy) +{ + struct dst_entry *dst; + + write_lock_bh(&policy->lock); + if (policy->dead) + goto out; + + policy->dead = 1; + + while ((dst = policy->bundles) != NULL) { + policy->bundles = dst->next; + dst_free(dst); + } + + if (del_timer(&policy->timer)) + atomic_dec(&policy->refcnt); + +out: + write_unlock_bh(&policy->lock); +} + +/* Generate new index... KAME seems to generate them ordered by cost + * of an absolute inpredictability of ordering of rules. This will not pass. */ +static u32 xfrm_gen_index(int dir) +{ + u32 idx; + struct xfrm_policy *p; + static u32 idx_generator; + + for (;;) { + idx = (idx_generator | dir); + idx_generator += 8; + if (idx == 0) + idx = 8; + for (p = xfrm_policy_list[dir]; p; p = p->next) { + if (p->index == idx) + break; + } + if (!p) + return idx; + } +} + +int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) +{ + struct xfrm_policy *pol, **p; + + write_lock_bh(&xfrm_policy_lock); + for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { + if (memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) { + if (excl) { + write_unlock_bh(&xfrm_policy_lock); + return -EEXIST; + } + break; + } + } + atomic_inc(&policy->refcnt); + policy->next = pol ? pol->next : NULL; + *p = policy; + xfrm_policy_genid++; + policy->index = pol ? pol->index : xfrm_gen_index(dir); + policy->curlft.add_time = (unsigned long)xtime.tv_sec; + policy->curlft.use_time = 0; + if (policy->lft.hard_add_expires_seconds && + !mod_timer(&policy->timer, jiffies + HZ)) + atomic_inc(&policy->refcnt); + write_unlock_bh(&xfrm_policy_lock); + + if (pol) { + atomic_dec(&pol->refcnt); + xfrm_policy_kill(pol); + xfrm_pol_put(pol); + } + return 0; +} + +struct xfrm_policy *xfrm_policy_delete(int dir, struct xfrm_selector *sel) +{ + struct xfrm_policy *pol, **p; + + write_lock_bh(&xfrm_policy_lock); + for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { + if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) { + *p = pol->next; + break; + } + } + if (pol) + xfrm_policy_genid++; + write_unlock_bh(&xfrm_policy_lock); + return pol; +} + +struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) +{ + struct xfrm_policy *pol, **p; + + write_lock_bh(&xfrm_policy_lock); + for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) { + if (pol->index == id) { + if (delete) + *p = pol->next; + break; + } + } + if (pol) { + if (delete) + xfrm_policy_genid++; + else + atomic_inc(&pol->refcnt); + } + write_unlock_bh(&xfrm_policy_lock); + return pol; +} + +void xfrm_policy_flush() +{ + struct xfrm_policy *xp; + int dir; + + write_lock_bh(&xfrm_policy_lock); + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { + while ((xp = xfrm_policy_list[dir]) != NULL) { + xfrm_policy_list[dir] = xp->next; + write_unlock_bh(&xfrm_policy_lock); + + xfrm_policy_kill(xp); + xfrm_pol_put(xp); + + write_lock_bh(&xfrm_policy_lock); + } + } + xfrm_policy_genid++; + write_unlock_bh(&xfrm_policy_lock); +} + +int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), + void *data) +{ + struct xfrm_policy *xp; + int dir; + int count = 0; + int error = 0; + + read_lock_bh(&xfrm_policy_lock); + for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { + for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) + count++; + } + + if (count == 0) { + error = -ENOENT; + goto out; + } + + for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { + for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { + error = func(xp, dir%XFRM_POLICY_MAX, --count, data); + if (error) + goto out; + } + } + +out: + read_unlock_bh(&xfrm_policy_lock); + return error; +} + + +/* Find policy to apply to this flow. */ + +struct xfrm_policy *xfrm_policy_lookup(int dir, struct flowi *fl, + unsigned short family) +{ + struct xfrm_policy *pol; + + read_lock_bh(&xfrm_policy_lock); + for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { + struct xfrm_selector *sel = &pol->selector; + int match; + + if (pol->family != family) + continue; + + match = xfrm_selector_match(sel, fl, family); + if (match) { + atomic_inc(&pol->refcnt); + break; + } + } + read_unlock_bh(&xfrm_policy_lock); + return pol; +} + +struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) +{ + struct xfrm_policy *pol; + + read_lock_bh(&xfrm_policy_lock); + if ((pol = sk->policy[dir]) != NULL) { + int match; + + match = xfrm_selector_match(&pol->selector, fl, sk->family); + if (match) + atomic_inc(&pol->refcnt); + else + pol = NULL; + } + read_unlock_bh(&xfrm_policy_lock); + return pol; +} + +void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir) +{ + pol->next = xfrm_policy_list[XFRM_POLICY_MAX+dir]; + xfrm_policy_list[XFRM_POLICY_MAX+dir] = pol; + atomic_inc(&pol->refcnt); +} + +void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir) +{ + struct xfrm_policy **polp; + + for (polp = &xfrm_policy_list[XFRM_POLICY_MAX+dir]; + *polp != NULL; polp = &(*polp)->next) { + if (*polp == pol) { + *polp = pol->next; + atomic_dec(&pol->refcnt); + return; + } + } +} + +int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) +{ + struct xfrm_policy *old_pol; + + write_lock_bh(&xfrm_policy_lock); + old_pol = sk->policy[dir]; + sk->policy[dir] = pol; + if (pol) { + pol->curlft.add_time = (unsigned long)xtime.tv_sec; + pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); + xfrm_sk_policy_link(pol, dir); + } + if (old_pol) + xfrm_sk_policy_unlink(old_pol, dir); + write_unlock_bh(&xfrm_policy_lock); + + if (old_pol) { + xfrm_policy_kill(old_pol); + xfrm_pol_put(old_pol); + } + return 0; +} + +static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) +{ + struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); + + if (newp) { + newp->selector = old->selector; + newp->lft = old->lft; + newp->curlft = old->curlft; + newp->action = old->action; + newp->flags = old->flags; + newp->xfrm_nr = old->xfrm_nr; + newp->index = old->index; + memcpy(newp->xfrm_vec, old->xfrm_vec, + newp->xfrm_nr*sizeof(struct xfrm_tmpl)); + write_lock_bh(&xfrm_policy_lock); + xfrm_sk_policy_link(newp, dir); + write_unlock_bh(&xfrm_policy_lock); + } + return newp; +} + +int __xfrm_sk_clone_policy(struct sock *sk) +{ + struct xfrm_policy *p0, *p1; + p0 = sk->policy[0]; + p1 = sk->policy[1]; + sk->policy[0] = NULL; + sk->policy[1] = NULL; + if (p0 && (sk->policy[0] = clone_policy(p0, 0)) == NULL) + return -ENOMEM; + if (p1 && (sk->policy[1] = clone_policy(p1, 1)) == NULL) + return -ENOMEM; + return 0; +} + +void __xfrm_sk_free_policy(struct xfrm_policy *pol, int dir) +{ + write_lock_bh(&xfrm_policy_lock); + xfrm_sk_policy_unlink(pol, dir); + write_unlock_bh(&xfrm_policy_lock); + + xfrm_policy_kill(pol); + xfrm_pol_put(pol); +} + +/* Resolve list of templates for the flow, given policy. */ + +static int +xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) +{ + int nx; + int i, error; + xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); + xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); + + for (nx=0, i = 0; i < policy->xfrm_nr; i++) { + struct xfrm_state *x; + xfrm_address_t *remote = daddr; + xfrm_address_t *local = saddr; + struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; + + if (tmpl->mode) { + remote = &tmpl->id.daddr; + local = &tmpl->saddr; + } + + x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); + + if (x && x->km.state == XFRM_STATE_VALID) { + xfrm[nx++] = x; + daddr = remote; + saddr = local; + continue; + } + if (x) { + error = (x->km.state == XFRM_STATE_ERROR ? + -EINVAL : -EAGAIN); + xfrm_state_put(x); + } + + if (!tmpl->optional) + goto fail; + } + return nx; + +fail: + for (nx--; nx>=0; nx--) + xfrm_state_put(xfrm[nx]); + return error; +} + +/* Check that the bundle accepts the flow and its components are + * still valid. + */ + +static struct dst_entry * +xfrm_find_bundle(struct flowi *fl, struct rtable *rt, struct xfrm_policy *policy, unsigned short family) +{ + struct dst_entry *x; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return ERR_PTR(-EINVAL); + x = afinfo->find_bundle(fl, rt, policy); + xfrm_policy_put_afinfo(afinfo); + return x; +} + +/* Allocate chain of dst_entry's, attach known xfrm's, calculate + * all the metrics... Shortly, bundle a bundle. + */ + +static int +xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, + struct flowi *fl, struct dst_entry **dst_p, + unsigned short family) +{ + int err; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EINVAL; + err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p); + xfrm_policy_put_afinfo(afinfo); + return err; +} + +/* Main function: finds/creates a bundle for given flow. + * + * At the moment we eat a raw IP route. Mostly to speed up lookups + * on interfaces with disabled IPsec. + */ +int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, + struct sock *sk, int flags) +{ + struct xfrm_policy *policy; + struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; + struct rtable *rt = (struct rtable*)*dst_p; + struct dst_entry *dst; + int nx = 0; + int err; + u32 genid; + u16 family = (*dst_p)->ops->family; + + switch (family) { + case AF_INET: + if (!fl->fl4_src) + fl->fl4_src = rt->rt_src; + if (!fl->fl4_dst) + fl->fl4_dst = rt->rt_dst; + case AF_INET6: + /* Still not clear... */ + default: + /* nothing */; + } + +restart: + genid = xfrm_policy_genid; + policy = NULL; + if (sk && sk->policy[1]) + policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + + if (!policy) { + /* To accelerate a bit... */ + if ((rt->u.dst.flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) + return 0; + + policy = flow_lookup(XFRM_POLICY_OUT, fl, family); + } + + if (!policy) + return 0; + + policy->curlft.use_time = (unsigned long)xtime.tv_sec; + + switch (policy->action) { + case XFRM_POLICY_BLOCK: + /* Prohibit the flow */ + xfrm_pol_put(policy); + return -EPERM; + + case XFRM_POLICY_ALLOW: + if (policy->xfrm_nr == 0) { + /* Flow passes not transformed. */ + xfrm_pol_put(policy); + return 0; + } + + /* Try to find matching bundle. + * + * LATER: help from flow cache. It is optional, this + * is required only for output policy. + */ + dst = xfrm_find_bundle(fl, rt, policy, family); + if (IS_ERR(dst)) { + xfrm_pol_put(policy); + return PTR_ERR(dst); + } + + if (dst) + break; + + nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); + + if (unlikely(nx<0)) { + err = nx; + if (err == -EAGAIN) { + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + if (!flags) + goto error; + + __set_task_state(tsk, TASK_INTERRUPTIBLE); + add_wait_queue(&km_waitq, &wait); + err = xfrm_tmpl_resolve(policy, fl, xfrm, family); + if (err == -EAGAIN) + schedule(); + __set_task_state(tsk, TASK_RUNNING); + remove_wait_queue(&km_waitq, &wait); + + if (err == -EAGAIN && signal_pending(current)) { + err = -ERESTART; + goto error; + } + if (err == -EAGAIN || + genid != xfrm_policy_genid) + goto restart; + } + if (err) + goto error; + } else if (nx == 0) { + /* Flow passes not transformed. */ + xfrm_pol_put(policy); + return 0; + } + + dst = &rt->u.dst; + err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family); + + if (unlikely(err)) { + int i; + for (i=0; ilock); + if (unlikely(policy->dead)) { + /* Wow! While we worked on resolving, this + * policy has gone. Retry. It is not paranoia, + * we just cannot enlist new bundle to dead object. + */ + write_unlock_bh(&policy->lock); + + xfrm_pol_put(policy); + if (dst) + dst_free(dst); + goto restart; + } + dst->next = policy->bundles; + policy->bundles = dst; + dst_hold(dst); + write_unlock_bh(&policy->lock); + } + *dst_p = dst; + ip_rt_put(rt); + xfrm_pol_put(policy); + return 0; + +error: + ip_rt_put(rt); + xfrm_pol_put(policy); + *dst_p = NULL; + return err; +} + +/* When skb is transformed back to its "native" form, we have to + * check policy restrictions. At the moment we make this in maximally + * stupid way. Shame on me. :-) Of course, connected sockets must + * have policy cached at them. + */ + +static inline int +xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, + unsigned short family) +{ + return x->id.proto == tmpl->id.proto && + (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && + x->props.mode == tmpl->mode && + (tmpl->aalgos & (1<props.aalgo)) && + !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); +} + +static inline int +xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int idx, + unsigned short family) +{ + for (; idx < sp->len; idx++) { + if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) + return ++idx; + } + return -1; +} + +static int +_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + afinfo->decode_session(skb, fl); + xfrm_policy_put_afinfo(afinfo); + return 0; +} + +int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, + unsigned short family) +{ + struct xfrm_policy *pol; + struct flowi fl; + + if (_decode_session(skb, &fl, family) < 0) + return 0; + + /* First, check used SA against their selectors. */ + if (skb->sp) { + int i; + + for (i=skb->sp->len-1; i>=0; i--) { + if (!xfrm_selector_match(&skb->sp->xvec[i]->sel, &fl, family)) + return 0; + } + } + + pol = NULL; + if (sk && sk->policy[dir]) + pol = xfrm_sk_policy_lookup(sk, dir, &fl); + + if (!pol) + pol = flow_lookup(dir, &fl, family); + + if (!pol) + return 1; + + pol->curlft.use_time = (unsigned long)xtime.tv_sec; + + if (pol->action == XFRM_POLICY_ALLOW) { + if (pol->xfrm_nr != 0) { + struct sec_path *sp; + static struct sec_path dummy; + int i, k; + + if ((sp = skb->sp) == NULL) + sp = &dummy; + + /* For each tmpl search corresponding xfrm. + * Order is _important_. Later we will implement + * some barriers, but at the moment barriers + * are implied between each two transformations. + */ + for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { + k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); + if (k < 0) + goto reject; + } + } + xfrm_pol_put(pol); + return 1; + } + +reject: + xfrm_pol_put(pol); + return 0; +} + +int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) +{ + struct flowi fl; + + if (_decode_session(skb, &fl, family) < 0) + return 0; + + return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; +} + +/* Optimize later using cookies and generation ids. */ + +static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) +{ + struct dst_entry *child = dst; + + while (child) { + if (child->obsolete > 0 || + (child->xfrm && child->xfrm->km.state != XFRM_STATE_VALID)) { + dst_release(dst); + return NULL; + } + child = child->child; + } + + return dst; +} + +static void xfrm_dst_destroy(struct dst_entry *dst) +{ + xfrm_state_put(dst->xfrm); + dst->xfrm = NULL; +} + +static void xfrm_link_failure(struct sk_buff *skb) +{ + /* Impossible. Such dst must be popped before reaches point of failure. */ + return; +} + +static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) +{ + if (dst) { + if (dst->obsolete) { + dst_release(dst); + dst = NULL; + } + } + return dst; +} + +static void __xfrm_garbage_collect(void) +{ + int i; + struct xfrm_policy *pol; + struct dst_entry *dst, **dstp, *gc_list = NULL; + + read_lock_bh(&xfrm_policy_lock); + for (i=0; i<2*XFRM_POLICY_MAX; i++) { + for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { + write_lock(&pol->lock); + dstp = &pol->bundles; + while ((dst=*dstp) != NULL) { + if (atomic_read(&dst->__refcnt) == 0) { + *dstp = dst->next; + dst->next = gc_list; + gc_list = dst; + } else { + dstp = &dst->next; + } + } + write_unlock(&pol->lock); + } + } + read_unlock_bh(&xfrm_policy_lock); + + while (gc_list) { + dst = gc_list; + gc_list = dst->next; + dst_free(dst); + } +} + +static int bundle_depends_on(struct dst_entry *dst, struct xfrm_state *x) +{ + do { + if (dst->xfrm == x) + return 1; + } while ((dst = dst->child) != NULL); + return 0; +} + +int xfrm_flush_bundles(struct xfrm_state *x) +{ + int i; + struct xfrm_policy *pol; + struct dst_entry *dst, **dstp, *gc_list = NULL; + + read_lock_bh(&xfrm_policy_lock); + for (i=0; i<2*XFRM_POLICY_MAX; i++) { + for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { + write_lock(&pol->lock); + dstp = &pol->bundles; + while ((dst=*dstp) != NULL) { + if (bundle_depends_on(dst, x)) { + *dstp = dst->next; + dst->next = gc_list; + gc_list = dst; + } else { + dstp = &dst->next; + } + } + write_unlock(&pol->lock); + } + } + read_unlock_bh(&xfrm_policy_lock); + + while (gc_list) { + dst = gc_list; + gc_list = dst->next; + dst_free(dst); + } + + return 0; +} + +/* Well... that's _TASK_. We need to scan through transformation + * list and figure out what mss tcp should generate in order to + * final datagram fit to mtu. Mama mia... :-) + * + * Apparently, some easy way exists, but we used to choose the most + * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta. + * + * Consider this function as something like dark humour. :-) + */ +static int xfrm_get_mss(struct dst_entry *dst, u32 mtu) +{ + int res = mtu - dst->header_len; + + for (;;) { + struct dst_entry *d = dst; + int m = res; + + do { + struct xfrm_state *x = d->xfrm; + if (x) { + spin_lock_bh(&x->lock); + if (x->km.state == XFRM_STATE_VALID && + x->type && x->type->get_max_size) + m = x->type->get_max_size(d->xfrm, m); + else + m += x->props.header_len; + spin_unlock_bh(&x->lock); + } + } while ((d = d->child) != NULL); + + if (m <= mtu) + break; + res -= (m - mtu); + if (res < 88) + return mtu; + } + + return res + dst->header_len; +} + +int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + int err = 0; + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + write_lock(&xfrm_policy_afinfo_lock); + if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) + err = -ENOBUFS; + else { + struct dst_ops *dst_ops = afinfo->dst_ops; + if (likely(dst_ops->kmem_cachep == NULL)) + dst_ops->kmem_cachep = xfrm_dst_cache; + if (likely(dst_ops->check == NULL)) + dst_ops->check = xfrm_dst_check; + if (likely(dst_ops->destroy == NULL)) + dst_ops->destroy = xfrm_dst_destroy; + if (likely(dst_ops->negative_advice == NULL)) + dst_ops->negative_advice = xfrm_negative_advice; + if (likely(dst_ops->link_failure == NULL)) + dst_ops->link_failure = xfrm_link_failure; + if (likely(dst_ops->get_mss == NULL)) + dst_ops->get_mss = xfrm_get_mss; + if (likely(afinfo->garbage_collect == NULL)) + afinfo->garbage_collect = __xfrm_garbage_collect; + xfrm_policy_afinfo[afinfo->family] = afinfo; + } + write_unlock(&xfrm_policy_afinfo_lock); + return err; +} + +int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + int err = 0; + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + write_lock(&xfrm_policy_afinfo_lock); + if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { + if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) + err = -EINVAL; + else { + struct dst_ops *dst_ops = afinfo->dst_ops; + xfrm_policy_afinfo[afinfo->family] = NULL; + dst_ops->kmem_cachep = NULL; + dst_ops->check = NULL; + dst_ops->destroy = NULL; + dst_ops->negative_advice = NULL; + dst_ops->link_failure = NULL; + dst_ops->get_mss = NULL; + afinfo->garbage_collect = NULL; + } + } + write_unlock(&xfrm_policy_afinfo_lock); + return err; +} + +struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) +{ + struct xfrm_policy_afinfo *afinfo; + if (unlikely(family >= NPROTO)) + return NULL; + read_lock(&xfrm_policy_afinfo_lock); + afinfo = xfrm_policy_afinfo[family]; + if (likely(afinfo != NULL)) + read_lock(&afinfo->lock); + read_unlock(&xfrm_policy_afinfo_lock); + return afinfo; +} + +void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + if (unlikely(afinfo == NULL)) + return; + read_unlock(&afinfo->lock); +} + +void __init xfrm_policy_init(void) +{ + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", + sizeof(struct xfrm_dst), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!xfrm_dst_cache) + panic("XFRM: failed to allocate xfrm_dst_cache\n"); +} + +void __init xfrm_init(void) +{ + xfrm_state_init(); + flow_cache_init(); + xfrm_policy_init(); +} + diff -Nru a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c --- /dev/null Wed Dec 31 16:00:00 1969 +++ b/net/xfrm/xfrm_state.c Tue Mar 25 18:36:42 2003 @@ -0,0 +1,739 @@ +/* + * xfrm_state.c + * + * Changes: + * Mitsuru KANDA @USAGI + * Kazunori MIYAZAWA @USAGI + * Kunihiro Ishiguro + * IPv6 support + * YOSHIFUJI Hideaki @USAGI + * Split up af-specific functions + * + */ + +#include +#include +#include + +/* Each xfrm_state may be linked to two tables: + + 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) + 2. Hash table by daddr to find what SAs exist for given + destination/tunnel endpoint. (output) + */ + +static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED; + +/* Hash table to find appropriate SA towards given target (endpoint + * of tunnel or destination of transport mode) allowed by selector. + * + * Main use is finding SA after policy selected tunnel or transport mode. + * Also, it can be used by ah/esp icmp error handler to find offending SA. + */ +static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; +static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; + +DECLARE_WAIT_QUEUE_HEAD(km_waitq); + +static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED; +static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; + +static void __xfrm_state_delete(struct xfrm_state *x); + +static inline unsigned long make_jiffies(long secs) +{ + if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) + return MAX_SCHEDULE_TIMEOUT-1; + else + return secs*HZ; +} + +static void xfrm_timer_handler(unsigned long data) +{ + struct xfrm_state *x = (struct xfrm_state*)data; + unsigned long now = (unsigned long)xtime.tv_sec; + long next = LONG_MAX; + int warn = 0; + + spin_lock(&x->lock); + if (x->km.state == XFRM_STATE_DEAD) + goto out; + if (x->km.state == XFRM_STATE_EXPIRED) + goto expired; + if (x->lft.hard_add_expires_seconds) { + long tmo = x->lft.hard_add_expires_seconds + + x->curlft.add_time - now; + if (tmo <= 0) + goto expired; + if (tmo < next) + next = tmo; + } + if (x->lft.hard_use_expires_seconds && x->curlft.use_time) { + long tmo = x->lft.hard_use_expires_seconds + + x->curlft.use_time - now; + if (tmo <= 0) + goto expired; + if (tmo < next) + next = tmo; + } + if (x->km.dying) + goto resched; + if (x->lft.soft_add_expires_seconds) { + long tmo = x->lft.soft_add_expires_seconds + + x->curlft.add_time - now; + if (tmo <= 0) + warn = 1; + else if (tmo < next) + next = tmo; + } + if (x->lft.soft_use_expires_seconds && x->curlft.use_time) { + long tmo = x->lft.soft_use_expires_seconds + + x->curlft.use_time - now; + if (tmo <= 0) + warn = 1; + else if (tmo < next) + next = tmo; + } + + if (warn) + km_warn_expired(x); +resched: + if (next != LONG_MAX && + !mod_timer(&x->timer, jiffies + make_jiffies(next))) + atomic_inc(&x->refcnt); + goto out; + +expired: + if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) { + x->km.state = XFRM_STATE_EXPIRED; + wake_up(&km_waitq); + next = 2; + goto resched; + } + if (x->id.spi != 0) + km_expired(x); + __xfrm_state_delete(x); + +out: + spin_unlock(&x->lock); + xfrm_state_put(x); +} + +struct xfrm_state *xfrm_state_alloc(void) +{ + struct xfrm_state *x; + + x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC); + + if (x) { + memset(x, 0, sizeof(struct xfrm_state)); + atomic_set(&x->refcnt, 1); + INIT_LIST_HEAD(&x->bydst); + INIT_LIST_HEAD(&x->byspi); + init_timer(&x->timer); + x->timer.function = xfrm_timer_handler; + x->timer.data = (unsigned long)x; + x->curlft.add_time = (unsigned long)xtime.tv_sec; + x->lft.soft_byte_limit = XFRM_INF; + x->lft.soft_packet_limit = XFRM_INF; + x->lft.hard_byte_limit = XFRM_INF; + x->lft.hard_packet_limit = XFRM_INF; + x->lock = SPIN_LOCK_UNLOCKED; + } + return x; +} + +void __xfrm_state_destroy(struct xfrm_state *x) +{ + BUG_TRAP(x->km.state == XFRM_STATE_DEAD); + if (del_timer(&x->timer)) + BUG(); + if (x->aalg) + kfree(x->aalg); + if (x->ealg) + kfree(x->ealg); + if (x->calg) + kfree(x->calg); + if (x->type) + xfrm_put_type(x->type); + kfree(x); +} + +static void __xfrm_state_delete(struct xfrm_state *x) +{ + int kill = 0; + + if (x->km.state != XFRM_STATE_DEAD) { + x->km.state = XFRM_STATE_DEAD; + kill = 1; + spin_lock(&xfrm_state_lock); + list_del(&x->bydst); + atomic_dec(&x->refcnt); + if (x->id.spi) { + list_del(&x->byspi); + atomic_dec(&x->refcnt); + } + spin_unlock(&xfrm_state_lock); + if (del_timer(&x->timer)) + atomic_dec(&x->refcnt); + if (atomic_read(&x->refcnt) != 1) + xfrm_flush_bundles(x); + } + + if (kill && x->type) + x->type->destructor(x); + wake_up(&km_waitq); +} + +void xfrm_state_delete(struct xfrm_state *x) +{ + spin_lock_bh(&x->lock); + __xfrm_state_delete(x); + spin_unlock_bh(&x->lock); +} + +void xfrm_state_flush(u8 proto) +{ + int i; + struct xfrm_state *x; + + spin_lock_bh(&xfrm_state_lock); + for (i = 0; i < XFRM_DST_HSIZE; i++) { +restart: + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) { + atomic_inc(&x->refcnt); + spin_unlock_bh(&xfrm_state_lock); + + xfrm_state_delete(x); + xfrm_state_put(x); + + spin_lock_bh(&xfrm_state_lock); + goto restart; + } + } + } + spin_unlock_bh(&xfrm_state_lock); + wake_up(&km_waitq); +} + +static int +xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, + struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr, + unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return -1; + afinfo->init_tempsel(x, fl, tmpl, daddr, saddr); + xfrm_state_put_afinfo(afinfo); + return 0; +} + +struct xfrm_state * +xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, + struct flowi *fl, struct xfrm_tmpl *tmpl, + struct xfrm_policy *pol, int *err, + unsigned short family) +{ + unsigned h = xfrm_dst_hash(daddr, family); + struct xfrm_state *x; + int acquire_in_progress = 0; + int error = 0; + struct xfrm_state *best = NULL; + + spin_lock_bh(&xfrm_state_lock); + list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == tmpl->reqid && + xfrm_state_addr_check(x, daddr, saddr, family) && + tmpl->mode == x->props.mode && + tmpl->id.proto == x->id.proto) { + /* Resolution logic: + 1. There is a valid state with matching selector. + Done. + 2. Valid state with inappropriate selector. Skip. + + Entering area of "sysdeps". + + 3. If state is not valid, selector is temporary, + it selects only session which triggered + previous resolution. Key manager will do + something to install a state with proper + selector. + */ + if (x->km.state == XFRM_STATE_VALID) { + if (!xfrm_selector_match(&x->sel, fl, family)) + continue; + if (!best || + best->km.dying > x->km.dying || + (best->km.dying == x->km.dying && + best->curlft.add_time < x->curlft.add_time)) + best = x; + } else if (x->km.state == XFRM_STATE_ACQ) { + acquire_in_progress = 1; + } else if (x->km.state == XFRM_STATE_ERROR || + x->km.state == XFRM_STATE_EXPIRED) { + if (xfrm_selector_match(&x->sel, fl, family)) + error = 1; + } + } + } + + if (best) { + atomic_inc(&best->refcnt); + spin_unlock_bh(&xfrm_state_lock); + return best; + } + + x = NULL; + if (!error && !acquire_in_progress && + ((x = xfrm_state_alloc()) != NULL)) { + /* Initialize temporary selector matching only + * to current session. */ + xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + + if (km_query(x, tmpl, pol) == 0) { + x->km.state = XFRM_STATE_ACQ; + list_add_tail(&x->bydst, xfrm_state_bydst+h); + atomic_inc(&x->refcnt); + if (x->id.spi) { + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); + list_add(&x->byspi, xfrm_state_byspi+h); + atomic_inc(&x->refcnt); + } + x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; + atomic_inc(&x->refcnt); + mod_timer(&x->timer, XFRM_ACQ_EXPIRES*HZ); + } else { + x->km.state = XFRM_STATE_DEAD; + xfrm_state_put(x); + x = NULL; + error = 1; + } + } + spin_unlock_bh(&xfrm_state_lock); + if (!x) + *err = acquire_in_progress ? -EAGAIN : + (error ? -ESRCH : -ENOMEM); + return x; +} + +void xfrm_state_insert(struct xfrm_state *x) +{ + unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); + + spin_lock_bh(&xfrm_state_lock); + list_add(&x->bydst, xfrm_state_bydst+h); + atomic_inc(&x->refcnt); + + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); + + list_add(&x->byspi, xfrm_state_byspi+h); + atomic_inc(&x->refcnt); + + if (!mod_timer(&x->timer, jiffies + HZ)) + atomic_inc(&x->refcnt); + + spin_unlock_bh(&xfrm_state_lock); + wake_up(&km_waitq); +} + +int xfrm_state_check_expire(struct xfrm_state *x) +{ + if (!x->curlft.use_time) + x->curlft.use_time = (unsigned long)xtime.tv_sec; + + if (x->km.state != XFRM_STATE_VALID) + return -EINVAL; + + if (x->curlft.bytes >= x->lft.hard_byte_limit || + x->curlft.packets >= x->lft.hard_packet_limit) { + km_expired(x); + if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ)) + atomic_inc(&x->refcnt); + return -EINVAL; + } + + if (!x->km.dying && + (x->curlft.bytes >= x->lft.soft_byte_limit || + x->curlft.packets >= x->lft.soft_packet_limit)) + km_warn_expired(x); + return 0; +} + +int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) +{ + int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev) + - skb_headroom(skb); + + if (nhead > 0) + return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + + /* Check tail too... */ + return 0; +} + +struct xfrm_state * +xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, + unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return NULL; + + spin_lock_bh(&xfrm_state_lock); + x = afinfo->state_lookup(daddr, spi, proto); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return x; +} + +struct xfrm_state * +xfrm_find_acq(u8 mode, u16 reqid, u8 proto, + xfrm_address_t *daddr, xfrm_address_t *saddr, + int create, unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return NULL; + + spin_lock_bh(&xfrm_state_lock); + x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return x; +} + +/* Silly enough, but I'm lazy to build resolution list */ + +struct xfrm_state * xfrm_find_acq_byseq(u32 seq) +{ + int i; + struct xfrm_state *x; + + spin_lock_bh(&xfrm_state_lock); + for (i = 0; i < XFRM_DST_HSIZE; i++) { + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + if (x->km.seq == seq) { + atomic_inc(&x->refcnt); + spin_unlock_bh(&xfrm_state_lock); + return x; + } + } + } + spin_unlock_bh(&xfrm_state_lock); + return NULL; +} + +u32 xfrm_get_acqseq(void) +{ + u32 res; + static u32 acqseq; + static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED; + + spin_lock_bh(&acqseq_lock); + res = (++acqseq ? : ++acqseq); + spin_unlock_bh(&acqseq_lock); + return res; +} + +void +xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) +{ + u32 h; + struct xfrm_state *x0; + + if (x->id.spi) + return; + + if (minspi == maxspi) { + x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); + if (x0) { + xfrm_state_put(x0); + return; + } + x->id.spi = minspi; + } else { + u32 spi = 0; + minspi = ntohl(minspi); + maxspi = ntohl(maxspi); + for (h=0; hid.daddr, minspi, x->id.proto, x->props.family); + if (x0 == NULL) + break; + xfrm_state_put(x0); + } + x->id.spi = htonl(spi); + } + if (x->id.spi) { + spin_lock_bh(&xfrm_state_lock); + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); + list_add(&x->byspi, xfrm_state_byspi+h); + atomic_inc(&x->refcnt); + spin_unlock_bh(&xfrm_state_lock); + wake_up(&km_waitq); + } +} + +int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), + void *data) +{ + int i; + struct xfrm_state *x; + int count = 0; + int err = 0; + + spin_lock_bh(&xfrm_state_lock); + for (i = 0; i < XFRM_DST_HSIZE; i++) { + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) + count++; + } + } + if (count == 0) { + err = -ENOENT; + goto out; + } + + for (i = 0; i < XFRM_DST_HSIZE; i++) { + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) + continue; + err = func(x, --count, data); + if (err) + goto out; + } + } +out: + spin_unlock_bh(&xfrm_state_lock); + return err; +} + + +int xfrm_replay_check(struct xfrm_state *x, u32 seq) +{ + u32 diff; + + seq = ntohl(seq); + + if (unlikely(seq == 0)) + return -EINVAL; + + if (likely(seq > x->replay.seq)) + return 0; + + diff = x->replay.seq - seq; + if (diff >= x->props.replay_window) { + x->stats.replay_window++; + return -EINVAL; + } + + if (x->replay.bitmap & (1U << diff)) { + x->stats.replay++; + return -EINVAL; + } + return 0; +} + +void xfrm_replay_advance(struct xfrm_state *x, u32 seq) +{ + u32 diff; + + seq = ntohl(seq); + + if (seq > x->replay.seq) { + diff = seq - x->replay.seq; + if (diff < x->props.replay_window) + x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; + else + x->replay.bitmap = 1; + x->replay.seq = seq; + } else { + diff = x->replay.seq - seq; + x->replay.bitmap |= (1U << diff); + } +} + +int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl) +{ + int i; + + for (i=0; isel, fl, x[i]->props.family); + if (!match) + return -EINVAL; + } + return 0; +} + +static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list); +static rwlock_t xfrm_km_lock = RW_LOCK_UNLOCKED; + +void km_warn_expired(struct xfrm_state *x) +{ + struct xfrm_mgr *km; + + x->km.dying = 1; + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) + km->notify(x, 0); + read_unlock(&xfrm_km_lock); +} + +void km_expired(struct xfrm_state *x) +{ + struct xfrm_mgr *km; + + x->km.state = XFRM_STATE_EXPIRED; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) + km->notify(x, 1); + read_unlock(&xfrm_km_lock); + wake_up(&km_waitq); +} + +int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) +{ + int err = -EINVAL; + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + err = km->acquire(x, t, pol, XFRM_POLICY_OUT); + if (!err) + break; + } + read_unlock(&xfrm_km_lock); + return err; +} + +int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen) +{ + int err; + u8 *data; + struct xfrm_mgr *km; + struct xfrm_policy *pol = NULL; + + if (optlen <= 0 || optlen > PAGE_SIZE) + return -EMSGSIZE; + + data = kmalloc(optlen, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = -EFAULT; + if (copy_from_user(data, optval, optlen)) + goto out; + + err = -EINVAL; + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + pol = km->compile_policy(sk->family, optname, data, optlen, &err); + if (err >= 0) + break; + } + read_unlock(&xfrm_km_lock); + + if (err >= 0) { + xfrm_sk_policy_insert(sk, err, pol); + err = 0; + } + +out: + kfree(data); + return err; +} + +int xfrm_register_km(struct xfrm_mgr *km) +{ + write_lock_bh(&xfrm_km_lock); + list_add_tail(&km->list, &xfrm_km_list); + write_unlock_bh(&xfrm_km_lock); + return 0; +} + +int xfrm_unregister_km(struct xfrm_mgr *km) +{ + write_lock_bh(&xfrm_km_lock); + list_del(&km->list); + write_unlock_bh(&xfrm_km_lock); + return 0; +} + +int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) +{ + int err = 0; + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + write_lock(&xfrm_state_afinfo_lock); + if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) + err = -ENOBUFS; + else { + afinfo->state_bydst = xfrm_state_bydst; + afinfo->state_byspi = xfrm_state_byspi; + xfrm_state_afinfo[afinfo->family] = afinfo; + } + write_unlock(&xfrm_state_afinfo_lock); + return err; +} + +int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) +{ + int err = 0; + if (unlikely(afinfo == NULL)) + return -EINVAL; + if (unlikely(afinfo->family >= NPROTO)) + return -EAFNOSUPPORT; + write_lock(&xfrm_state_afinfo_lock); + if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { + if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) + err = -EINVAL; + else { + xfrm_state_afinfo[afinfo->family] = NULL; + afinfo->state_byspi = NULL; + afinfo->state_bydst = NULL; + } + } + write_unlock(&xfrm_state_afinfo_lock); + return err; +} + +struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) +{ + struct xfrm_state_afinfo *afinfo; + if (unlikely(family >= NPROTO)) + return NULL; + read_lock(&xfrm_state_afinfo_lock); + afinfo = xfrm_state_afinfo[family]; + if (likely(afinfo != NULL)) + read_lock(&afinfo->lock); + read_unlock(&xfrm_state_afinfo_lock); + return afinfo; +} + +void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) +{ + if (unlikely(afinfo == NULL)) + return; + read_unlock(&afinfo->lock); +} + +void __init xfrm_state_init(void) +{ + int i; + + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct sock *xfrm_nl; + +static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) +{ + struct rtattr *rt = xfrma[type - 1]; + struct xfrm_algo *algp; + + if (!rt) + return 0; + + if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp)) + return -EINVAL; + + algp = RTA_DATA(rt); + switch (type) { + case XFRMA_ALG_AUTH: + if (!algp->alg_key_len && + strcmp(algp->alg_name, "digest_null") != 0) + return -EINVAL; + break; + + case XFRMA_ALG_CRYPT: + if (!algp->alg_key_len && + strcmp(algp->alg_name, "cipher_null") != 0) + return -EINVAL; + break; + + case XFRMA_ALG_COMP: + /* Zero length keys are legal. */ + break; + + default: + return -EINVAL; + }; + + algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; + return 0; +} + +static int verify_newsa_info(struct xfrm_usersa_info *p, + struct rtattr **xfrma) +{ + int err; + + err = -EINVAL; + switch (p->family) { + case AF_INET: + break; + + case AF_INET6: +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + break; +#else + err = -EAFNOSUPPORT; + goto out; +#endif + + default: + goto out; + }; + + err = -EINVAL; + switch (p->id.proto) { + case IPPROTO_AH: + if (!xfrma[XFRMA_ALG_AUTH-1] || + xfrma[XFRMA_ALG_CRYPT-1] || + xfrma[XFRMA_ALG_COMP-1]) + goto out; + break; + + case IPPROTO_ESP: + if ((!xfrma[XFRMA_ALG_AUTH-1] && + !xfrma[XFRMA_ALG_CRYPT-1]) || + xfrma[XFRMA_ALG_COMP-1]) + goto out; + break; + + case IPPROTO_COMP: + if (!xfrma[XFRMA_ALG_COMP-1] || + xfrma[XFRMA_ALG_AUTH-1] || + xfrma[XFRMA_ALG_CRYPT-1]) + goto out; + break; + + default: + goto out; + }; + + if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH))) + goto out; + if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT))) + goto out; + if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP))) + goto out; + + err = -EINVAL; + switch (p->mode) { + case 0: + case 1: + break; + + default: + goto out; + }; + + err = 0; + +out: + return err; +} + +static int attach_one_algo(struct xfrm_algo **algpp, struct rtattr *u_arg) +{ + struct rtattr *rta = u_arg; + struct xfrm_algo *p, *ualg; + + if (!rta) + return 0; + + ualg = RTA_DATA(rta); + p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL); + if (!p) + return -ENOMEM; + + memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len); + *algpp = p; + return 0; +} + +static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) +{ + memcpy(&x->id, &p->id, sizeof(x->id)); + memcpy(&x->sel, &p->sel, sizeof(x->sel)); + memcpy(&x->lft, &p->lft, sizeof(x->lft)); + x->props.mode = p->mode; + x->props.replay_window = p->replay_window; + x->props.reqid = p->reqid; + x->props.family = p->family; + x->props.saddr = x->sel.saddr; +} + +static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, + struct rtattr **xfrma, + int *errp) +{ + struct xfrm_state *x = xfrm_state_alloc(); + int err = -ENOMEM; + + if (!x) + goto error_no_put; + + copy_from_user_state(x, p); + + if ((err = attach_one_algo(&x->aalg, xfrma[XFRMA_ALG_AUTH-1]))) + goto error; + if ((err = attach_one_algo(&x->ealg, xfrma[XFRMA_ALG_CRYPT-1]))) + goto error; + if ((err = attach_one_algo(&x->calg, xfrma[XFRMA_ALG_COMP-1]))) + goto error; + + err = -ENOENT; + x->type = xfrm_get_type(x->id.proto, x->props.family); + if (x->type == NULL) + goto error; + + err = x->type->init_state(x, NULL); + if (err) + goto error; + + x->curlft.add_time = (unsigned long) xtime.tv_sec; + x->km.state = XFRM_STATE_VALID; + x->km.seq = p->seq; + + return x; + +error: + xfrm_state_put(x); +error_no_put: + *errp = err; + return NULL; +} + +static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_usersa_info *p = NLMSG_DATA(nlh); + struct xfrm_state *x, *x1; + int err; + + err = verify_newsa_info(p, (struct rtattr **) xfrma); + if (err) + return err; + + x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err); + if (!x) + return err; + + x1 = xfrm_state_lookup(&x->props.saddr, x->id.spi, x->id.proto, x->props.family); + if (x1) { + xfrm_state_put(x); + xfrm_state_put(x1); + return -EEXIST; + } + + xfrm_state_insert(x); + + return 0; +} + +static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_state *x; + struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + + x = xfrm_state_lookup(&p->saddr, p->spi, p->proto, p->family); + if (x == NULL) + return -ESRCH; + + xfrm_state_delete(x); + xfrm_state_put(x); + + return 0; +} + +static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) +{ + memcpy(&p->id, &x->id, sizeof(p->id)); + memcpy(&p->sel, &x->sel, sizeof(p->sel)); + memcpy(&p->lft, &x->lft, sizeof(p->lft)); + memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); + memcpy(&p->stats, &x->stats, sizeof(p->stats)); + p->mode = x->props.mode; + p->replay_window = x->props.replay_window; + p->reqid = x->props.reqid; + p->family = x->props.family; + p->seq = x->km.seq; +} + +struct xfrm_dump_info { + struct sk_buff *in_skb; + struct sk_buff *out_skb; + u32 nlmsg_seq; + int start_idx; + int this_idx; +}; + +static int dump_one_state(struct xfrm_state *x, int count, void *ptr) +{ + struct xfrm_dump_info *sp = ptr; + struct sk_buff *in_skb = sp->in_skb; + struct sk_buff *skb = sp->out_skb; + struct xfrm_usersa_info *p; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + if (sp->this_idx < sp->start_idx) + goto out; + + nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, + sp->nlmsg_seq, + XFRM_MSG_NEWSA, sizeof(*p)); + nlh->nlmsg_flags = 0; + + p = NLMSG_DATA(nlh); + copy_to_user_state(x, p); + + if (x->aalg) + RTA_PUT(skb, XFRMA_ALG_AUTH, + sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg); + if (x->ealg) + RTA_PUT(skb, XFRMA_ALG_CRYPT, + sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg); + if (x->calg) + RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + + nlh->nlmsg_len = skb->tail - b; +out: + sp->this_idx++; + return 0; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct xfrm_dump_info info; + + info.in_skb = cb->skb; + info.out_skb = skb; + info.nlmsg_seq = cb->nlh->nlmsg_seq; + info.this_idx = 0; + info.start_idx = cb->args[0]; + (void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info); + cb->args[0] = info.this_idx; + + return skb->len; +} + +static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, + struct xfrm_state *x, u32 seq) +{ + struct xfrm_dump_info info; + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return ERR_PTR(-ENOMEM); + + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + info.in_skb = in_skb; + info.out_skb = skb; + info.nlmsg_seq = seq; + info.this_idx = info.start_idx = 0; + + if (dump_one_state(x, 0, &info)) { + kfree_skb(skb); + return NULL; + } + + return skb; +} + +static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + struct xfrm_state *x; + struct sk_buff *resp_skb; + int err; + + x = xfrm_state_lookup(&p->saddr, p->spi, p->proto, p->family); + err = -ESRCH; + if (x == NULL) + goto out_noput; + + resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); + if (IS_ERR(resp_skb)) { + err = PTR_ERR(resp_skb); + } else { + err = netlink_unicast(xfrm_nl, resp_skb, + NETLINK_CB(skb).pid, MSG_DONTWAIT); + } + xfrm_state_put(x); +out_noput: + return err; +} + +static int verify_userspi_info(struct xfrm_userspi_info *p) +{ + switch (p->info.id.proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + break; + + case IPPROTO_COMP: + /* IPCOMP spi is 16-bits. */ + if (p->min >= 0x10000 || + p->max >= 0x10000) + return -EINVAL; + + default: + return -EINVAL; + }; + + if (p->min > p->max) + return -EINVAL; + + return 0; +} + +static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_state *x; + struct xfrm_userspi_info *p; + struct sk_buff *resp_skb; + int err; + + p = NLMSG_DATA(nlh); + err = verify_userspi_info(p); + if (err) + goto out_noput; + x = xfrm_find_acq(p->info.mode, p->info.reqid, p->info.id.proto, + &p->info.sel.daddr, + &p->info.sel.saddr, 1, + p->info.family); + err = -ENOENT; + if (x == NULL) + goto out_noput; + + resp_skb = ERR_PTR(-ENOENT); + + spin_lock_bh(&x->lock); + if (x->km.state != XFRM_STATE_DEAD) { + xfrm_alloc_spi(x, p->min, p->max); + if (x->id.spi) + resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); + } + spin_unlock_bh(&x->lock); + + if (IS_ERR(resp_skb)) { + err = PTR_ERR(resp_skb); + goto out; + } + + err = netlink_unicast(xfrm_nl, resp_skb, + NETLINK_CB(skb).pid, MSG_DONTWAIT); + +out: + xfrm_state_put(x); +out_noput: + return err; +} + +static int verify_policy_dir(__u8 dir) +{ + switch (dir) { + case XFRM_POLICY_IN: + case XFRM_POLICY_OUT: + case XFRM_POLICY_FWD: + break; + + default: + return -EINVAL; + }; + + return 0; +} + +static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) +{ + switch (p->share) { + case XFRM_SHARE_ANY: + case XFRM_SHARE_SESSION: + case XFRM_SHARE_USER: + case XFRM_SHARE_UNIQUE: + break; + + default: + return -EINVAL; + }; + + switch (p->action) { + case XFRM_POLICY_ALLOW: + case XFRM_POLICY_BLOCK: + break; + + default: + return -EINVAL; + }; + + switch (p->family) { + case AF_INET: + break; + + case AF_INET6: +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + break; +#else + return -EAFNOSUPPORT; +#endif + + default: + return -EINVAL; + }; + + return verify_policy_dir(p->dir); +} + +static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, + int nr) +{ + int i; + + xp->xfrm_nr = nr; + for (i = 0; i < nr; i++, ut++) { + struct xfrm_tmpl *t = &xp->xfrm_vec[i]; + + memcpy(&t->id, &ut->id, sizeof(struct xfrm_id)); + memcpy(&t->saddr, &ut->saddr, + sizeof(xfrm_address_t)); + t->reqid = ut->reqid; + t->mode = ut->mode; + t->share = ut->share; + t->optional = ut->optional; + t->aalgos = ut->aalgos; + t->ealgos = ut->ealgos; + t->calgos = ut->calgos; + } +} + +static int copy_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) +{ + struct rtattr *rt = xfrma[XFRMA_TMPL-1]; + struct xfrm_user_tmpl *utmpl; + int nr; + + if (!rt) { + pol->xfrm_nr = 0; + } else { + nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl); + + if (nr > XFRM_MAX_DEPTH) + return -EINVAL; + + copy_templates(pol, RTA_DATA(rt), nr); + } + return 0; +} + +static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) +{ + xp->priority = p->priority; + xp->index = p->index; + memcpy(&xp->selector, &p->sel, sizeof(xp->selector)); + memcpy(&xp->lft, &p->lft, sizeof(xp->lft)); + xp->action = p->action; + xp->flags = p->flags; + xp->family = p->family; + /* XXX xp->share = p->share; */ +} + +static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) +{ + memcpy(&p->sel, &xp->selector, sizeof(p->sel)); + memcpy(&p->lft, &xp->lft, sizeof(p->lft)); + memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); + p->priority = xp->priority; + p->index = xp->index; + p->family = xp->family; + p->dir = dir; + p->action = xp->action; + p->flags = xp->flags; + p->share = XFRM_SHARE_ANY; /* XXX xp->share */ +} + +static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp) +{ + struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL); + int err; + + if (!xp) { + *errp = -ENOMEM; + return NULL; + } + + copy_from_user_policy(xp, p); + err = copy_user_tmpl(xp, xfrma); + if (err) { + *errp = err; + kfree(xp); + xp = NULL; + } + + return xp; +} + +static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); + struct xfrm_policy *xp; + int err; + + err = verify_newpolicy_info(p); + if (err) + return err; + + xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err); + if (!xp) + return err; + + err = xfrm_policy_insert(p->dir, xp, 1); + if (err) { + kfree(xp); + return err; + } + + xfrm_pol_put(xp); + + return 0; +} + +static int xfrm_del_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_policy *xp; + struct xfrm_userpolicy_id *p; + int err; + + p = NLMSG_DATA(nlh); + + err = verify_policy_dir(p->dir); + if (err) + return err; + + xp = xfrm_policy_delete(p->dir, &p->sel); + if (xp == NULL) + return -ENOENT; + xfrm_policy_kill(xp); + xfrm_pol_put(xp); + return 0; +} + +static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) +{ + struct xfrm_dump_info *sp = ptr; + struct xfrm_userpolicy_info *p; + struct sk_buff *in_skb = sp->in_skb; + struct sk_buff *skb = sp->out_skb; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + if (sp->this_idx < sp->start_idx) + goto out; + + nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, + sp->nlmsg_seq, + XFRM_MSG_NEWPOLICY, sizeof(*p)); + p = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + copy_to_user_policy(xp, p, dir); + + if (xp->xfrm_nr) { + struct xfrm_user_tmpl vec[XFRM_MAX_DEPTH]; + int i; + + for (i = 0; i < xp->xfrm_nr; i++) { + struct xfrm_user_tmpl *up = &vec[i]; + struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; + + memcpy(&up->id, &kp->id, sizeof(up->id)); + memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); + up->reqid = kp->reqid; + up->mode = kp->mode; + up->share = kp->share; + up->optional = kp->optional; + up->aalgos = kp->aalgos; + up->ealgos = kp->ealgos; + up->calgos = kp->calgos; + } + RTA_PUT(skb, XFRMA_TMPL, + (sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr), + vec); + } + + nlh->nlmsg_len = skb->tail - b; +out: + sp->this_idx++; + return 0; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct xfrm_dump_info info; + + info.in_skb = cb->skb; + info.out_skb = skb; + info.nlmsg_seq = cb->nlh->nlmsg_seq; + info.this_idx = 0; + info.start_idx = cb->args[0]; + (void) xfrm_policy_walk(dump_one_policy, &info); + cb->args[0] = info.this_idx; + + return skb->len; +} + +static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, + struct xfrm_policy *xp, + int dir, u32 seq) +{ + struct xfrm_dump_info info; + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return ERR_PTR(-ENOMEM); + + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + info.in_skb = in_skb; + info.out_skb = skb; + info.nlmsg_seq = seq; + info.this_idx = info.start_idx = 0; + + if (dump_one_policy(xp, dir, 0, &info) < 0) { + kfree_skb(skb); + return NULL; + } + + return skb; +} + +static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) +{ + struct xfrm_policy *xp; + struct xfrm_userpolicy_id *p; + struct sk_buff *resp_skb; + int err; + + p = NLMSG_DATA(nlh); + xp = xfrm_policy_byid(p->dir, p->index, 0); + if (xp == NULL) + return -ENOENT; + + resp_skb = xfrm_policy_netlink(skb, xp, p->dir, nlh->nlmsg_seq); + if (IS_ERR(resp_skb)) { + err = PTR_ERR(resp_skb); + } else { + err = netlink_unicast(xfrm_nl, resp_skb, + NETLINK_CB(skb).pid, MSG_DONTWAIT); + } + + xfrm_pol_put(xp); + + return err; +} + +static const int xfrm_msg_min[(XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)] = { + NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)), /* NEW SA */ + NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)), /* DEL SA */ + NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)), /* GET SA */ + NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* NEW POLICY */ + NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)), /* DEL POLICY */ + NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)), /* GET POLICY */ + NLMSG_LENGTH(sizeof(struct xfrm_userspi_info)), /* ALLOC SPI */ + NLMSG_LENGTH(sizeof(struct xfrm_user_acquire)), /* ACQUIRE */ + NLMSG_LENGTH(sizeof(struct xfrm_user_expire)), /* EXPIRE */ +}; + +static struct xfrm_link { + int (*doit)(struct sk_buff *, struct nlmsghdr *, void **); + int (*dump)(struct sk_buff *, struct netlink_callback *); +} xfrm_dispatch[] = { + { .doit = xfrm_add_sa, }, + { .doit = xfrm_del_sa, }, + { + .doit = xfrm_get_sa, + .dump = xfrm_dump_sa, + }, + { .doit = xfrm_add_policy }, + { .doit = xfrm_del_policy }, + { + .doit = xfrm_get_policy, + .dump = xfrm_dump_policy, + }, + { .doit = xfrm_alloc_userspi }, +}; + +static int xfrm_done(struct netlink_callback *cb) +{ + return 0; +} + +static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) +{ + struct rtattr *xfrma[XFRMA_MAX]; + struct xfrm_link *link; + int type, min_len; + + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) + return 0; + + type = nlh->nlmsg_type; + + /* A control message: ignore them */ + if (type < XFRM_MSG_BASE) + return 0; + + /* Unknown message: reply with EINVAL */ + if (type > XFRM_MSG_MAX) + goto err_einval; + + type -= XFRM_MSG_BASE; + link = &xfrm_dispatch[type]; + + /* All operations require privileges, even GET */ + if (security_netlink_recv(skb)) { + *errp = -EPERM; + return -1; + } + + if ((type == 2 || type == 5) && (nlh->nlmsg_flags & NLM_F_DUMP)) { + u32 rlen; + + if (link->dump == NULL) + goto err_einval; + + if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh, + link->dump, + xfrm_done)) != 0) { + return -1; + } + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + skb_pull(skb, rlen); + return -1; + } + + memset(xfrma, 0, sizeof(xfrma)); + + if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type])) + goto err_einval; + + if (nlh->nlmsg_len > min_len) { + int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); + struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len); + + while (RTA_OK(attr, attrlen)) { + unsigned short flavor = attr->rta_type; + if (flavor) { + if (flavor > XFRMA_MAX) + goto err_einval; + xfrma[flavor - 1] = attr; + } + attr = RTA_NEXT(attr, attrlen); + } + } + + if (link->doit == NULL) + goto err_einval; + *errp = link->doit(skb, nlh, (void **) &xfrma); + + return *errp; + +err_einval: + *errp = -EINVAL; + return -1; +} + +static int xfrm_user_rcv_skb(struct sk_buff *skb) +{ + int err; + struct nlmsghdr *nlh; + + while (skb->len >= NLMSG_SPACE(0)) { + u32 rlen; + + nlh = (struct nlmsghdr *) skb->data; + if (nlh->nlmsg_len < sizeof(*nlh) || + skb->len < nlh->nlmsg_len) + return 0; + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + if (xfrm_user_rcv_msg(skb, nlh, &err)) { + if (err == 0) + return -1; + netlink_ack(skb, nlh, err); + } else if (nlh->nlmsg_flags & NLM_F_ACK) + netlink_ack(skb, nlh, 0); + skb_pull(skb, rlen); + } + + return 0; +} + +static void xfrm_netlink_rcv(struct sock *sk, int len) +{ + do { + struct sk_buff *skb; + + down(&xfrm_cfg_sem); + + while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) { + if (xfrm_user_rcv_skb(skb)) { + if (skb->len) + skb_queue_head(&sk->receive_queue, skb); + else + kfree_skb(skb); + break; + } + kfree_skb(skb); + } + + up(&xfrm_cfg_sem); + + } while (xfrm_nl && xfrm_nl->receive_queue.qlen); +} + +static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard) +{ + struct xfrm_user_expire *ue; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_EXPIRE, + sizeof(*ue)); + ue = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + copy_to_user_state(x, &ue->state); + ue->hard = (hard != 0) ? 1 : 0; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_notify(struct xfrm_state *x, int hard) +{ + struct sk_buff *skb; + + skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_expire(skb, x, hard) < 0) + BUG(); + + NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; + + return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); +} + +static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, + struct xfrm_tmpl *xt, struct xfrm_policy *xp, + int dir) +{ + struct xfrm_user_acquire *ua; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + __u32 seq = xfrm_get_acqseq(); + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE, + sizeof(*ua)); + ua = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + memcpy(&ua->id, &x->id, sizeof(ua->id)); + memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); + copy_to_user_policy(xp, &ua->policy, dir); + ua->aalgos = xt->aalgos; + ua->ealgos = xt->ealgos; + ua->calgos = xt->calgos; + ua->seq = x->km.seq = seq; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, + struct xfrm_policy *xp, int dir) +{ + struct sk_buff *skb; + + skb = alloc_skb(sizeof(struct xfrm_user_acquire) + 16, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_acquire(skb, x, xt, xp, dir) < 0) + BUG(); + + NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE; + + return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC); +} + +/* User gives us xfrm_user_policy_info followed by an array of 0 + * or more templates. + */ +struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, + u8 *data, int len, int *dir) +{ + struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; + struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1); + struct xfrm_policy *xp; + int nr; + + switch (family) { + case AF_INET: + if (opt != IP_XFRM_POLICY) { + *dir = -EOPNOTSUPP; + return NULL; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + if (opt != IPV6_XFRM_POLICY) { + *dir = -EOPNOTSUPP; + return NULL; + } + break; +#endif + default: + *dir = -EINVAL; + return NULL; + } + + *dir = -EINVAL; + + if (len < sizeof(*p) || + verify_newpolicy_info(p)) + return NULL; + + nr = ((len - sizeof(*p)) / sizeof(*ut)); + if (nr > XFRM_MAX_DEPTH) + return NULL; + + xp = xfrm_policy_alloc(GFP_KERNEL); + if (xp == NULL) { + *dir = -ENOBUFS; + return NULL; + } + + copy_from_user_policy(xp, p); + copy_templates(xp, ut, nr); + + *dir = p->dir; + + return xp; +} + +static struct xfrm_mgr netlink_mgr = { + .id = "netlink", + .notify = xfrm_send_notify, + .acquire = xfrm_send_acquire, + .compile_policy = xfrm_compile_policy, +}; + +static int __init xfrm_user_init(void) +{ + printk(KERN_INFO "Initializing IPsec netlink socket\n"); + + xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv); + if (xfrm_nl == NULL) + panic("xfrm_user_init: cannot initialize xfrm_nl\n"); + + + xfrm_register_km(&netlink_mgr); + + return 0; +} + +static void __exit xfrm_user_exit(void) +{ + xfrm_unregister_km(&netlink_mgr); + sock_release(xfrm_nl->socket); +} + +module_init(xfrm_user_init); +module_exit(xfrm_user_exit);