diff options
author | jdike <jdike> | 2003-09-14 03:02:53 +0000 |
---|---|---|
committer | jdike <jdike> | 2003-09-14 03:02:53 +0000 |
commit | cace4203102952daf57845d3a522e46d6a12081d (patch) | |
tree | ef4f8621cae50eca66fc4adb573fc638987afa09 | |
parent | 1fbf59b4136161b4bbc9cbf502f0b7c87a777d21 (diff) | |
download | uml-history-cace4203102952daf57845d3a522e46d6a12081d.tar.gz |
Added the skeleton of /dev/anon.
-rw-r--r-- | arch/um/config_char.in | 1 | ||||
-rw-r--r-- | drivers/char/Config.in | 350 | ||||
-rw-r--r-- | drivers/char/Makefile | 2 | ||||
-rw-r--r-- | drivers/char/anon.c | 36 | ||||
-rw-r--r-- | drivers/char/mem.c | 774 | ||||
-rw-r--r-- | mm/shmem.c | 1701 |
6 files changed, 2864 insertions, 0 deletions
diff --git a/arch/um/config_char.in b/arch/um/config_char.in index dfb87d6..1b164c7 100644 --- a/arch/um/config_char.in +++ b/arch/um/config_char.in @@ -4,6 +4,7 @@ comment 'Character Devices' define_bool CONFIG_STDIO_CONSOLE y bool 'Virtual serial line' CONFIG_SSL +bool 'Anonymous memory device' CONFIG_DEV_ANON bool 'file descriptor channel support' CONFIG_FD_CHAN bool 'null channel support' CONFIG_NULL_CHAN diff --git a/drivers/char/Config.in b/drivers/char/Config.in new file mode 100644 index 0000000..824968f --- /dev/null +++ b/drivers/char/Config.in @@ -0,0 +1,350 @@ +# +# Character device configuration +# +mainmenu_option next_comment +comment 'Character devices' + +bool 'Virtual terminal' CONFIG_VT +bool 'Anonymous memory device' CONFIG_DEV_ANON + +if [ "$CONFIG_VT" = "y" ]; then + bool ' Support for console on virtual terminal' CONFIG_VT_CONSOLE + if [ "$CONFIG_GSC_LASI" = "y" ]; then + bool ' Support for Lasi/Dino PS2 port' CONFIG_GSC_PS2 + fi +fi +tristate 'Standard/generic (8250/16550 and compatible UARTs) serial support' CONFIG_SERIAL +if [ "$CONFIG_SERIAL" = "y" ]; then + bool ' Support for console on serial port' CONFIG_SERIAL_CONSOLE + if [ "$CONFIG_GSC_LASI" = "y" ]; then + bool ' serial port on GSC support' CONFIG_SERIAL_GSC + fi + if [ "$CONFIG_IA64" = "y" ]; then + bool ' Support for serial port described by EFI HCDP table' CONFIG_SERIAL_HCDP + fi + if [ "$CONFIG_ARCH_ACORN" = "y" ]; then + tristate ' Atomwide serial port support' CONFIG_ATOMWIDE_SERIAL + tristate ' Dual serial port support' CONFIG_DUALSP_SERIAL + fi +fi +dep_mbool 'Extended dumb serial driver options' CONFIG_SERIAL_EXTENDED $CONFIG_SERIAL +if [ "$CONFIG_SERIAL_EXTENDED" = "y" ]; then + bool ' Support more than 4 serial ports' CONFIG_SERIAL_MANY_PORTS + bool ' Support for sharing serial interrupts' CONFIG_SERIAL_SHARE_IRQ + bool ' Autodetect IRQ on standard ports (unsafe)' CONFIG_SERIAL_DETECT_IRQ + bool ' Support special multiport boards' CONFIG_SERIAL_MULTIPORT + bool ' Support the Bell Technologies HUB6 card' CONFIG_HUB6 +fi +bool 'Non-standard serial port support' CONFIG_SERIAL_NONSTANDARD +if [ "$CONFIG_SERIAL_NONSTANDARD" = "y" ]; then + tristate ' Computone IntelliPort Plus serial support' CONFIG_COMPUTONE + tristate ' Comtrol Rocketport support' CONFIG_ROCKETPORT + tristate ' Cyclades async mux support' CONFIG_CYCLADES + if [ "$CONFIG_EXPERIMENTAL" = "y" -a "$CONFIG_CYCLADES" != "n" ]; then + bool ' Cyclades-Z interrupt mode operation (EXPERIMENTAL)' CONFIG_CYZ_INTR + fi + if [ "$CONFIG_X86_64" != "y" ]; then + tristate ' Digiboard Intelligent Async Support' CONFIG_DIGIEPCA + if [ "$CONFIG_DIGIEPCA" = "n" ]; then + tristate ' Digiboard PC/Xx Support' CONFIG_DIGI + fi + fi + dep_tristate ' Hayes ESP serial port support' CONFIG_ESPSERIAL $CONFIG_ISA + tristate ' Moxa Intellio support' CONFIG_MOXA_INTELLIO + tristate ' Moxa SmartIO support' CONFIG_MOXA_SMARTIO + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + dep_tristate ' Multi-Tech multiport card support (EXPERIMENTAL)' CONFIG_ISI m + fi + tristate ' Microgate SyncLink card support' CONFIG_SYNCLINK + tristate ' SyncLink Multiport support' CONFIG_SYNCLINKMP + tristate ' HDLC line discipline support' CONFIG_N_HDLC + tristate ' SDL RISCom/8 card support' CONFIG_RISCOM8 + if [ "$CONFIG_X86_64" != "y" ]; then + tristate ' Specialix IO8+ card support' CONFIG_SPECIALIX + if [ "$CONFIG_SPECIALIX" != "n" ]; then + bool ' Specialix DTR/RTS pin is RTS' CONFIG_SPECIALIX_RTSCTS + fi + tristate ' Specialix SX (and SI) card support' CONFIG_SX + tristate ' Specialix RIO system support' CONFIG_RIO + if [ "$CONFIG_RIO" != "n" ]; then + bool ' Support really old RIO/PCI cards' CONFIG_RIO_OLDPCI + fi + fi + bool ' Stallion multiport serial support' CONFIG_STALDRV + if [ "$CONFIG_STALDRV" = "y" ]; then + tristate ' Stallion EasyIO or EC8/32 support' CONFIG_STALLION + tristate ' Stallion EC8/64, ONboard, Brumby support' CONFIG_ISTALLION + fi + if [ "$CONFIG_PARISC" = "y" ]; then + if [ "$CONFIG_PDC_CONSOLE" != "y" ]; then + bool ' Serial MUX support' CONFIG_SERIAL_MUX CONFIG_SERIAL_NONSTANDARD + fi + if [ "$CONFIG_SERIAL_MUX" != "y" ]; then + bool ' PDC software console support' CONFIG_PDC_CONSOLE CONFIG_SERIAL_NONSTANDARD + fi + fi + if [ "$CONFIG_MIPS" = "y" ]; then + bool ' TX3912/PR31700 serial port support' CONFIG_SERIAL_TX3912 + dep_bool ' Console on TX3912/PR31700 serial port' CONFIG_SERIAL_TX3912_CONSOLE $CONFIG_SERIAL_TX3912 + bool ' Enable Au1000 UART Support' CONFIG_AU1000_UART + if [ "$CONFIG_AU1000_UART" = "y" ]; then + bool ' Enable Au1000 serial console' CONFIG_AU1000_SERIAL_CONSOLE + fi + bool 'TXx927 SIO support' CONFIG_TXX927_SERIAL + if [ "$CONFIG_TXX927_SERIAL" = "y" ]; then + bool 'TXx927 SIO Console support' CONFIG_TXX927_SERIAL_CONSOLE + fi + if [ "$CONFIG_SIBYTE_SB1250" = "y" ]; then + bool ' Support for sb1250 onchip DUART' CONFIG_SIBYTE_SB1250_DUART + if [ "$CONFIG_SIBYTE_SB1250_DUART" = "y" ]; then + bool ' Console on SB1250 DUART' CONFIG_SIBYTE_SB1250_DUART_CONSOLE + if [ "$CONFIG_SIBYTE_SB1250_DUART_CONSOLE" = "y" ]; then + define_bool CONFIG_SERIAL_CONSOLE y + fi + int ' Output buffers size (in bytes)' CONFIG_SB1250_DUART_OUTPUT_BUF_SIZE 1024 + bool ' Leave port 1 alone (for kgdb or audio)' CONFIG_SIBYTE_SB1250_DUART_NO_PORT_1 + fi + fi + fi +fi +if [ "$CONFIG_EXPERIMENTAL" = "y" -a "$CONFIG_ZORRO" = "y" ]; then + tristate 'Commodore A2232 serial support (EXPERIMENTAL)' CONFIG_A2232 +fi +if [ "$CONFIG_FOOTBRIDGE" = "y" ]; then + bool 'DC21285 serial port support' CONFIG_SERIAL_21285 + if [ "$CONFIG_SERIAL_21285" = "y" ]; then + if [ "$CONFIG_OBSOLETE" = "y" ]; then + bool ' Use /dev/ttyS0 device (OBSOLETE)' CONFIG_SERIAL_21285_OLD + fi + bool ' Console on DC21285 serial port' CONFIG_SERIAL_21285_CONSOLE + fi + if [ "$CONFIG_MIPS" = "y" ]; then + bool ' TMPTX3912/PR31700 serial port support' CONFIG_SERIAL_TX3912 + dep_bool ' Console on TMPTX3912/PR31700 serial port' CONFIG_SERIAL_TX3912_CONSOLE $CONFIG_SERIAL_TX3912 + bool ' Enable Au1000 UART Support' CONFIG_AU1000_UART + if [ "$CONFIG_AU1000_UART" = "y" ]; then + bool ' Enable Au1000 serial console' CONFIG_AU1000_SERIAL_CONSOLE + fi + fi + if [ "$CONFIG_PARISC" = "y" ]; then + bool ' PDC software console support' CONFIG_PDC_CONSOLE + fi +fi +if [ "$CONFIG_MIPS_ITE8172" = "y" ]; then + bool 'Enable Qtronix 990P Keyboard Support' CONFIG_QTRONIX_KEYBOARD + if [ "$CONFIG_QTRONIX_KEYBOARD" = "y" ]; then + define_bool CONFIG_IT8172_CIR y + else + bool ' Enable PS2 Keyboard Support' CONFIG_PC_KEYB + fi + bool 'Enable Smart Card Reader 0 Support ' CONFIG_IT8172_SCR0 + bool 'Enable Smart Card Reader 1 Support ' CONFIG_IT8172_SCR1 +fi +if [ "$CONFIG_MIPS_IVR" = "y" ]; then + bool 'Enable Qtronix 990P Keyboard Support' CONFIG_QTRONIX_KEYBOARD + if [ "$CONFIG_QTRONIX_KEYBOARD" = "y" ]; then + define_bool CONFIG_IT8172_CIR y + fi + bool 'Enable Smart Card Reader 0 Support ' CONFIG_IT8172_SCR0 +fi +bool 'Unix98 PTY support' CONFIG_UNIX98_PTYS +if [ "$CONFIG_UNIX98_PTYS" = "y" ]; then + int 'Maximum number of Unix98 PTYs in use (0-2048)' CONFIG_UNIX98_PTY_COUNT 256 +fi +if [ "$CONFIG_PARPORT" != "n" ]; then + dep_tristate 'Parallel printer support' CONFIG_PRINTER $CONFIG_PARPORT + if [ "$CONFIG_PRINTER" != "n" ]; then + bool ' Support for console on line printer' CONFIG_LP_CONSOLE + fi + dep_tristate 'Support for user-space parallel port device drivers' CONFIG_PPDEV $CONFIG_PARPORT + dep_tristate 'Texas Instruments parallel link cable support' CONFIG_TIPAR $CONFIG_PARPORT +fi + +if [ "$CONFIG_PPC64" ] ; then + bool 'pSeries Hypervisor Virtual Console support' CONFIG_HVC_CONSOLE +fi +if [ "$CONFIG_ALL_PPC" = "y" ]; then + tristate 'Total Impact briQ front panel driver' CONFIG_BRIQ_PANEL +fi + +source drivers/i2c/Config.in + +mainmenu_option next_comment +comment 'Mice' +tristate 'Bus Mouse Support' CONFIG_BUSMOUSE +if [ "$CONFIG_BUSMOUSE" != "n" ]; then + dep_tristate ' ATIXL busmouse support' CONFIG_ATIXL_BUSMOUSE $CONFIG_BUSMOUSE + dep_tristate ' Logitech busmouse support' CONFIG_LOGIBUSMOUSE $CONFIG_BUSMOUSE + dep_tristate ' Microsoft busmouse support' CONFIG_MS_BUSMOUSE $CONFIG_BUSMOUSE + if [ "$CONFIG_ADB" = "y" -a "$CONFIG_ADB_KEYBOARD" = "y" ]; then + dep_tristate ' Apple Desktop Bus mouse support (old driver)' CONFIG_ADBMOUSE $CONFIG_BUSMOUSE + fi +fi + +tristate 'Mouse Support (not serial and bus mice)' CONFIG_MOUSE +if [ "$CONFIG_MOUSE" != "n" ]; then + bool ' PS/2 mouse (aka "auxiliary device") support' CONFIG_PSMOUSE + tristate ' C&T 82C710 mouse port support (as on TI Travelmate)' CONFIG_82C710_MOUSE + tristate ' PC110 digitizer pad support' CONFIG_PC110_PAD + tristate ' MK712 touch screen support' CONFIG_MK712_MOUSE +fi +endmenu + +source drivers/char/joystick/Config.in + +tristate 'QIC-02 tape support' CONFIG_QIC02_TAPE +if [ "$CONFIG_QIC02_TAPE" != "n" ]; then + bool ' Do you want runtime configuration for QIC-02' CONFIG_QIC02_DYNCONF + if [ "$CONFIG_QIC02_DYNCONF" != "y" ]; then + comment ' Edit configuration parameters in ./include/linux/tpqic02.h!' + else + comment ' Setting runtime QIC-02 configuration is done with qic02conf' + comment ' from the tpqic02-support package. It is available at' + comment ' metalab.unc.edu or ftp://titus.cfw.com/pub/Linux/util/' + fi +fi + +tristate 'IPMI top-level message handler' CONFIG_IPMI_HANDLER +dep_mbool ' Generate a panic event to all BMCs on a panic' CONFIG_IPMI_PANIC_EVENT $CONFIG_IPMI_HANDLER +dep_tristate ' Device interface for IPMI' CONFIG_IPMI_DEVICE_INTERFACE $CONFIG_IPMI_HANDLER +dep_tristate ' IPMI KCS handler' CONFIG_IPMI_KCS $CONFIG_IPMI_HANDLER +dep_tristate ' IPMI Watchdog Timer' CONFIG_IPMI_WATCHDOG $CONFIG_IPMI_HANDLER + +mainmenu_option next_comment +comment 'Watchdog Cards' +bool 'Watchdog Timer Support' CONFIG_WATCHDOG +if [ "$CONFIG_WATCHDOG" != "n" ]; then + bool ' Disable watchdog shutdown on close' CONFIG_WATCHDOG_NOWAYOUT + tristate ' Acquire SBC Watchdog Timer' CONFIG_ACQUIRE_WDT + tristate ' Advantech SBC Watchdog Timer' CONFIG_ADVANTECH_WDT + tristate ' ALi M7101 PMU on ALi 1535D+ Watchdog Timer' CONFIG_ALIM1535_WDT + tristate ' ALi M7101 PMU Watchdog Timer' CONFIG_ALIM7101_WDT + tristate ' AMD "Elan" SC520 Watchdog Timer' CONFIG_SC520_WDT + tristate ' Berkshire Products PC Watchdog' CONFIG_PCWATCHDOG + if [ "$CONFIG_FOOTBRIDGE" = "y" ]; then + tristate ' DC21285 watchdog' CONFIG_21285_WATCHDOG + if [ "$CONFIG_ARCH_NETWINDER" = "y" ]; then + tristate ' NetWinder WB83C977 watchdog' CONFIG_977_WATCHDOG + fi + fi + tristate ' Eurotech CPU-1220/1410 Watchdog Timer' CONFIG_EUROTECH_WDT + tristate ' IB700 SBC Watchdog Timer' CONFIG_IB700_WDT + tristate ' ICP ELectronics Wafer 5823 Watchdog' CONFIG_WAFER_WDT + if [ "$CONFIG_SGI_IP22" = "y" ]; then + dep_tristate ' Indy/I2 Hardware Watchdog' CONFIG_INDYDOG $CONFIG_SGI_IP22 + fi + tristate ' Intel i810 TCO timer / Watchdog' CONFIG_I810_TCO + tristate ' Mixcom Watchdog' CONFIG_MIXCOMWD + tristate ' SBC-60XX Watchdog Timer' CONFIG_60XX_WDT + dep_tristate ' SC1200 Watchdog Timer (EXPERIMENTAL)' CONFIG_SC1200_WDT $CONFIG_EXPERIMENTAL + tristate ' NatSemi SCx200 Watchdog' CONFIG_SCx200_WDT + tristate ' Software Watchdog' CONFIG_SOFT_WATCHDOG + tristate ' W83877F (EMACS) Watchdog Timer' CONFIG_W83877F_WDT + tristate ' WDT Watchdog timer' CONFIG_WDT + tristate ' WDT PCI Watchdog timer' CONFIG_WDTPCI + if [ "$CONFIG_WDT" != "n" ]; then + bool ' WDT501 features' CONFIG_WDT_501 + if [ "$CONFIG_WDT_501" = "y" ]; then + bool ' Fan Tachometer' CONFIG_WDT_501_FAN + fi + fi + tristate ' ZF MachZ Watchdog' CONFIG_MACHZ_WDT + dep_tristate ' AMD 766/768 TCO Timer/Watchdog' CONFIG_AMD7XX_TCO $CONFIG_EXPERIMENTAL +fi +endmenu + +if [ "$CONFIG_ARCH_NETWINDER" = "y" ]; then + tristate 'NetWinder thermometer support' CONFIG_DS1620 + tristate 'NetWinder Button' CONFIG_NWBUTTON + if [ "$CONFIG_NWBUTTON" != "n" ]; then + bool ' Reboot Using Button' CONFIG_NWBUTTON_REBOOT + fi + tristate 'NetWinder flash support' CONFIG_NWFLASH +fi +dep_tristate 'NatSemi SCx200 GPIO Support' CONFIG_SCx200_GPIO $CONFIG_SCx200 + +if [ "$CONFIG_X86" = "y" -o "$CONFIG_X86_64" = "y" ]; then + dep_tristate 'AMD 768/8111 Random Number Generator support' CONFIG_AMD_RNG $CONFIG_PCI +fi +if [ "$CONFIG_X86" = "y" -o "$CONFIG_IA64" = "y" ]; then + dep_tristate 'Intel i8x0 Random Number Generator support' CONFIG_INTEL_RNG $CONFIG_PCI +fi +dep_tristate 'AMD 76x native power management (Experimental)' CONFIG_AMD_PM768 $CONFIG_PCI +tristate '/dev/nvram support' CONFIG_NVRAM +tristate 'Enhanced Real Time Clock Support' CONFIG_RTC +if [ "$CONFIG_IA64" = "y" ]; then + bool 'EFI Real Time Clock Services' CONFIG_EFI_RTC +fi +if [ "$CONFIG_OBSOLETE" = "y" -a "$CONFIG_ALPHA_BOOK1" = "y" ]; then + bool 'Tadpole ANA H8 Support (OBSOLETE)' CONFIG_H8 +fi + +tristate 'Double Talk PC internal speech card support' CONFIG_DTLK +tristate 'Siemens R3964 line discipline' CONFIG_R3964 +tristate 'Applicom intelligent fieldbus card support' CONFIG_APPLICOM +if [ "$CONFIG_EXPERIMENTAL" = "y" -a "$CONFIG_X86" = "y" -a "$CONFIG_X86_64" != "y" ]; then + dep_tristate 'Sony Vaio Programmable I/O Control Device support (EXPERIMENTAL)' CONFIG_SONYPI $CONFIG_PCI +fi + +mainmenu_option next_comment +comment 'Ftape, the floppy tape device driver' +tristate 'Ftape (QIC-80/Travan) support' CONFIG_FTAPE +if [ "$CONFIG_FTAPE" != "n" ]; then + source drivers/char/ftape/Config.in +fi +endmenu + +if [ "$CONFIG_GART_IOMMU" = "y" ]; then + bool '/dev/agpgart (AGP Support)' CONFIG_AGP + define_bool CONFIG_AGP_AMD_8151 y +else + tristate '/dev/agpgart (AGP Support)' CONFIG_AGP +fi +if [ "$CONFIG_AGP" != "n" ]; then + bool ' Intel 440LX/BX/GX and I815/I820/I830M/I830MP/I840/I845/I850/I860 support' CONFIG_AGP_INTEL + bool ' Intel I810/I815/I830M (on-board) support' CONFIG_AGP_I810 + bool ' VIA chipset support' CONFIG_AGP_VIA + bool ' AMD Irongate, 761, and 762 support' CONFIG_AGP_AMD + if [ "$CONFIG_GART_IOMMU" != "y" ]; then + bool ' AMD 8151 support' CONFIG_AGP_AMD_8151 + fi + bool ' Generic SiS support' CONFIG_AGP_SIS + bool ' ALI chipset support' CONFIG_AGP_ALI + bool ' Serverworks LE/HE support' CONFIG_AGP_SWORKS + if [ "$CONFIG_X86" = "y" ]; then + bool ' NVIDIA chipset support' CONFIG_AGP_NVIDIA + fi + if [ "$CONFIG_IA64" = "y" ]; then + bool ' HP ZX1 AGP support' CONFIG_AGP_HP_ZX1 + fi +fi + +bool 'Direct Rendering Manager (XFree86 DRI support)' CONFIG_DRM +if [ "$CONFIG_DRM" = "y" ]; then + bool ' Build drivers for old (XFree 4.0) DRM' CONFIG_DRM_OLD + if [ "$CONFIG_DRM_OLD" = "y" ]; then + comment 'DRM 4.0 drivers' + source drivers/char/drm-4.0/Config.in + else + comment 'DRM 4.1 drivers' + define_bool CONFIG_DRM_NEW y + source drivers/char/drm/Config.in + fi +fi + +if [ "$CONFIG_HOTPLUG" = "y" -a "$CONFIG_PCMCIA" != "n" ]; then + source drivers/char/pcmcia/Config.in +fi +if [ "$CONFIG_MIPS_AU1000" = "y" ]; then + tristate ' Alchemy Au1000 GPIO device support' CONFIG_AU1000_GPIO + tristate ' Au1000/ADS7846 touchscreen support' CONFIG_TS_AU1000_ADS7846 +fi +if [ "$CONFIG_MIPS_ITE8172" = "y" ]; then + tristate ' ITE GPIO' CONFIG_ITE_GPIO +fi + +if [ "$CONFIG_X86" = "y" ]; then + tristate 'ACP Modem (Mwave) support' CONFIG_MWAVE +fi + +endmenu diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 7ce93bc..8ac4b8d 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -319,6 +319,8 @@ ifeq ($(CONFIG_IPMI_HANDLER),y) obj-y += ipmi/ipmi.o endif +obj-$(CONFIG_DEV_ANON) += anon.o + include $(TOPDIR)/Rules.make fastdep: diff --git a/drivers/char/anon.c b/drivers/char/anon.c new file mode 100644 index 0000000..796f229 --- /dev/null +++ b/drivers/char/anon.c @@ -0,0 +1,36 @@ +#include <linux/fs.h> +#include <linux/mm.h> + +static int mmap_anon(struct file *file, struct vm_area_struct *vma) +{ + loff_t size = vma->vm_end - vma->vm_start; + int err; + + if(file->private_data == NULL){ + err = shmem_zero_setup(vma); + if(err) + return(err); + } + else { + vma->vm_file = file->private_data; +/* vma->vm_ops = &shmem_vm_ops; */ + } + + file->private_data = vma->vm_file; + return 0; +} + +struct file_operations anon_fops = { + .mmap = mmap_anon, +}; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/drivers/char/mem.c b/drivers/char/mem.c new file mode 100644 index 0000000..169c0bc --- /dev/null +++ b/drivers/char/mem.c @@ -0,0 +1,774 @@ +/* + * linux/drivers/char/mem.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Added devfs support. + * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu> + * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com> + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/miscdevice.h> +#include <linux/tpqic02.h> +#include <linux/ftape.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mman.h> +#include <linux/random.h> +#include <linux/init.h> +#include <linux/raw.h> +#include <linux/tty.h> +#include <linux/capability.h> + +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/pgalloc.h> + +#ifdef CONFIG_I2C +extern int i2c_init_all(void); +#endif +#ifdef CONFIG_FB +extern void fbmem_init(void); +#endif +#ifdef CONFIG_PROM_CONSOLE +extern void prom_con_init(void); +#endif +#ifdef CONFIG_MDA_CONSOLE +extern void mda_console_init(void); +#endif +#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR) +extern void tapechar_init(void); +#endif + +static ssize_t do_write_mem(struct file * file, void *p, unsigned long realp, + const char * buf, size_t count, loff_t *ppos) +{ + ssize_t written; + + written = 0; +#if defined(__sparc__) || defined(__mc68000__) + /* we don't have page 0 mapped on sparc and m68k.. */ + if (realp < PAGE_SIZE) { + unsigned long sz = PAGE_SIZE-realp; + if (sz > count) sz = count; + /* Hmm. Do something? */ + buf+=sz; + p+=sz; + count-=sz; + written+=sz; + } +#endif + if (copy_from_user(p, buf, count)) + return -EFAULT; + written += count; + *ppos += written; + return written; +} + + +/* + * This funcion reads the *physical* memory. The f_pos points directly to the + * memory location. + */ +static ssize_t read_mem(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + unsigned long p = *ppos; + unsigned long end_mem; + ssize_t read; + + end_mem = __pa(high_memory); + if (p >= end_mem) + return 0; + if (count > end_mem - p) + count = end_mem - p; + read = 0; +#if defined(__sparc__) || defined(__mc68000__) + /* we don't have page 0 mapped on sparc and m68k.. */ + if (p < PAGE_SIZE) { + unsigned long sz = PAGE_SIZE-p; + if (sz > count) + sz = count; + if (sz > 0) { + if (clear_user(buf, sz)) + return -EFAULT; + buf += sz; + p += sz; + count -= sz; + read += sz; + } + } +#endif + if (copy_to_user(buf, __va(p), count)) + return -EFAULT; + read += count; + *ppos += read; + return read; +} + +static ssize_t write_mem(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + unsigned long p = *ppos; + unsigned long end_mem; + + end_mem = __pa(high_memory); + if (p >= end_mem) + return 0; + if (count > end_mem - p) + count = end_mem - p; + return do_write_mem(file, __va(p), p, buf, count, ppos); +} + +#ifndef pgprot_noncached + +/* + * This should probably be per-architecture in <asm/pgtable.h> + */ +static inline pgprot_t pgprot_noncached(pgprot_t _prot) +{ + unsigned long prot = pgprot_val(_prot); + +#if defined(__i386__) || defined(__x86_64__) + /* On PPro and successors, PCD alone doesn't always mean + uncached because of interactions with the MTRRs. PCD | PWT + means definitely uncached. */ + if (boot_cpu_data.x86 > 3) + prot |= _PAGE_PCD | _PAGE_PWT; +#elif defined(__powerpc__) + prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; +#elif defined(__mc68000__) +#ifdef SUN3_PAGE_NOCACHE + if (MMU_IS_SUN3) + prot |= SUN3_PAGE_NOCACHE; + else +#endif + if (MMU_IS_851 || MMU_IS_030) + prot |= _PAGE_NOCACHE030; + /* Use no-cache mode, serialized */ + else if (MMU_IS_040 || MMU_IS_060) + prot = (prot & _CACHEMASK040) | _PAGE_NOCACHE_S; +#endif + + return __pgprot(prot); +} + +#endif /* !pgprot_noncached */ + +/* + * Architectures vary in how they handle caching for addresses + * outside of main memory. + */ +static inline int noncached_address(unsigned long addr) +{ +#if defined(__i386__) + /* + * On the PPro and successors, the MTRRs are used to set + * memory types for physical addresses outside main memory, + * so blindly setting PCD or PWT on those pages is wrong. + * For Pentiums and earlier, the surround logic should disable + * caching for the high addresses through the KEN pin, but + * we maintain the tradition of paranoia in this code. + */ + return !( test_bit(X86_FEATURE_MTRR, &boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_K6_MTRR, &boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_CYRIX_ARR, &boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_CENTAUR_MCR, &boot_cpu_data.x86_capability) ) + && addr >= __pa(high_memory); +#else + return addr >= __pa(high_memory); +#endif +} + +static int mmap_mem(struct file * file, struct vm_area_struct * vma) +{ + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + + /* + * Accessing memory above the top the kernel knows about or + * through a file pointer that was marked O_SYNC will be + * done non-cached. + */ + if (noncached_address(offset) || (file->f_flags & O_SYNC)) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + /* Don't try to swap out physical pages.. */ + vma->vm_flags |= VM_RESERVED; + + /* + * Don't dump addresses that are not real memory to a core file. + */ + if (offset >= __pa(high_memory) || (file->f_flags & O_SYNC)) + vma->vm_flags |= VM_IO; + + if (remap_page_range(vma->vm_start, offset, vma->vm_end-vma->vm_start, + vma->vm_page_prot)) + return -EAGAIN; + return 0; +} + +/* + * This function reads the *virtual* memory as seen by the kernel. + */ +static ssize_t read_kmem(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + unsigned long p = *ppos; + ssize_t read = 0; + ssize_t virtr = 0; + char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ + + if (p < (unsigned long) high_memory) { + read = count; + if (count > (unsigned long) high_memory - p) + read = (unsigned long) high_memory - p; + +#if defined(__sparc__) || defined(__mc68000__) + /* we don't have page 0 mapped on sparc and m68k.. */ + if (p < PAGE_SIZE && read > 0) { + size_t tmp = PAGE_SIZE - p; + if (tmp > read) tmp = read; + if (clear_user(buf, tmp)) + return -EFAULT; + buf += tmp; + p += tmp; + read -= tmp; + count -= tmp; + } +#endif + if (copy_to_user(buf, (char *)p, read)) + return -EFAULT; + p += read; + buf += read; + count -= read; + } + + if (count > 0) { + kbuf = (char *)__get_free_page(GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + while (count > 0) { + int len = count; + + if (len > PAGE_SIZE) + len = PAGE_SIZE; + len = vread(kbuf, (char *)p, len); + if (!len) + break; + if (copy_to_user(buf, kbuf, len)) { + free_page((unsigned long)kbuf); + return -EFAULT; + } + count -= len; + buf += len; + virtr += len; + p += len; + } + free_page((unsigned long)kbuf); + } + *ppos = p; + return virtr + read; +} + +extern long vwrite(char *buf, char *addr, unsigned long count); + +/* + * This function writes to the *virtual* memory as seen by the kernel. + */ +static ssize_t write_kmem(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + unsigned long p = *ppos; + ssize_t wrote = 0; + ssize_t virtr = 0; + char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ + + if (p < (unsigned long) high_memory) { + wrote = count; + if (count > (unsigned long) high_memory - p) + wrote = (unsigned long) high_memory - p; + + wrote = do_write_mem(file, (void*)p, p, buf, wrote, ppos); + + p += wrote; + buf += wrote; + count -= wrote; + } + + if (count > 0) { + kbuf = (char *)__get_free_page(GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + while (count > 0) { + int len = count; + + if (len > PAGE_SIZE) + len = PAGE_SIZE; + if (len && copy_from_user(kbuf, buf, len)) { + free_page((unsigned long)kbuf); + return -EFAULT; + } + len = vwrite(kbuf, (char *)p, len); + count -= len; + buf += len; + virtr += len; + p += len; + } + free_page((unsigned long)kbuf); + } + + *ppos = p; + return virtr + wrote; +} + +#if defined(CONFIG_ISA) || !defined(__mc68000__) +static ssize_t read_port(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + unsigned long i = *ppos; + char *tmp = buf; + + if (verify_area(VERIFY_WRITE,buf,count)) + return -EFAULT; + while (count-- > 0 && i < 65536) { + if (__put_user(inb(i),tmp) < 0) + return -EFAULT; + i++; + tmp++; + } + *ppos = i; + return tmp-buf; +} + +static ssize_t write_port(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + unsigned long i = *ppos; + const char * tmp = buf; + + if (verify_area(VERIFY_READ,buf,count)) + return -EFAULT; + while (count-- > 0 && i < 65536) { + char c; + if (__get_user(c, tmp)) + return -EFAULT; + outb(c,i); + i++; + tmp++; + } + *ppos = i; + return tmp-buf; +} +#endif + +static ssize_t read_null(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + return 0; +} + +static ssize_t write_null(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + return count; +} + +/* + * For fun, we are using the MMU for this. + */ +static inline size_t read_zero_pagealigned(char * buf, size_t size) +{ + struct mm_struct *mm; + struct vm_area_struct * vma; + unsigned long addr=(unsigned long)buf; + + mm = current->mm; + /* Oops, this was forgotten before. -ben */ + down_read(&mm->mmap_sem); + + /* For private mappings, just map in zero pages. */ + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + unsigned long count; + + if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0) + goto out_up; + if (vma->vm_flags & VM_SHARED) + break; + count = vma->vm_end - addr; + if (count > size) + count = size; + + zap_page_range(mm, addr, count); + zeromap_page_range(addr, count, PAGE_COPY); + + size -= count; + buf += count; + addr += count; + if (size == 0) + goto out_up; + } + + up_read(&mm->mmap_sem); + + /* The shared case is hard. Let's do the conventional zeroing. */ + do { + unsigned long unwritten = clear_user(buf, PAGE_SIZE); + if (unwritten) + return size + unwritten - PAGE_SIZE; + if (current->need_resched) + schedule(); + buf += PAGE_SIZE; + size -= PAGE_SIZE; + } while (size); + + return size; +out_up: + up_read(&mm->mmap_sem); + return size; +} + +static ssize_t read_zero(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + unsigned long left, unwritten, written = 0; + + if (!count) + return 0; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + left = count; + + /* do we want to be clever? Arbitrary cut-off */ + if (count >= PAGE_SIZE*4) { + unsigned long partial; + + /* How much left of the page? */ + partial = (PAGE_SIZE-1) & -(unsigned long) buf; + unwritten = clear_user(buf, partial); + written = partial - unwritten; + if (unwritten) + goto out; + left -= partial; + buf += partial; + unwritten = read_zero_pagealigned(buf, left & PAGE_MASK); + written += (left & PAGE_MASK) - unwritten; + if (unwritten) + goto out; + buf += left & PAGE_MASK; + left &= ~PAGE_MASK; + } + unwritten = clear_user(buf, left); + written += left - unwritten; +out: + return written ? written : -EFAULT; +} + +static int mmap_zero(struct file * file, struct vm_area_struct * vma) +{ + if (vma->vm_flags & VM_SHARED) + return shmem_zero_setup(vma); + if (zeromap_page_range(vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot)) + return -EAGAIN; + return 0; +} + +static ssize_t write_full(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + return -ENOSPC; +} + +/* + * Special lseek() function for /dev/null and /dev/zero. Most notably, you + * can fopen() both devices with "a" now. This was previously impossible. + * -- SRB. + */ + +static loff_t null_lseek(struct file * file, loff_t offset, int orig) +{ + return file->f_pos = 0; +} + +/* + * The memory devices use the full 32/64 bits of the offset, and so we cannot + * check against negative addresses: they are ok. The return value is weird, + * though, in that case (0). + * + * also note that seeking relative to the "end of file" isn't supported: + * it has no meaning, so it returns -EINVAL. + */ +static loff_t memory_lseek(struct file * file, loff_t offset, int orig) +{ + switch (orig) { + case 0: + file->f_pos = offset; + return file->f_pos; + case 1: + file->f_pos += offset; + return file->f_pos; + default: + return -EINVAL; + } +} + +static int open_port(struct inode * inode, struct file * filp) +{ + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +struct page *kmem_vm_nopage(struct vm_area_struct *vma, unsigned long address, int write) +{ + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long kaddr; + pgd_t *pgd; + pmd_t *pmd; + pte_t *ptep, pte; + struct page *page = NULL; + + /* address is user VA; convert to kernel VA of desired page */ + kaddr = (address - vma->vm_start) + offset; + kaddr = VMALLOC_VMADDR(kaddr); + + spin_lock(&init_mm.page_table_lock); + + /* Lookup page structure for kernel VA */ + pgd = pgd_offset(&init_mm, kaddr); + if (pgd_none(*pgd) || pgd_bad(*pgd)) + goto out; + pmd = pmd_offset(pgd, kaddr); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + goto out; + ptep = pte_offset(pmd, kaddr); + if (!ptep) + goto out; + pte = *ptep; + if (!pte_present(pte)) + goto out; + if (write && !pte_write(pte)) + goto out; + page = pte_page(pte); + if (!VALID_PAGE(page)) { + page = NULL; + goto out; + } + + /* Increment reference count on page */ + get_page(page); + +out: + spin_unlock(&init_mm.page_table_lock); + + return page; +} + +struct vm_operations_struct kmem_vm_ops = { + nopage: kmem_vm_nopage, +}; + +static int mmap_kmem(struct file * file, struct vm_area_struct * vma) +{ + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + + /* + * If the user is not attempting to mmap a high memory address then + * the standard mmap_mem mechanism will work. High memory addresses + * need special handling, as remap_page_range expects a physically- + * contiguous range of kernel addresses (such as obtained in kmalloc). + */ + if ((offset + size) < (unsigned long) high_memory) + return mmap_mem(file, vma); + + /* + * Accessing memory above the top the kernel knows about or + * through a file pointer that was marked O_SYNC will be + * done non-cached. + */ + if (noncached_address(offset) || (file->f_flags & O_SYNC)) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + /* Don't do anything here; "nopage" will fill the holes */ + vma->vm_ops = &kmem_vm_ops; + + /* Don't try to swap out physical pages.. */ + vma->vm_flags |= VM_RESERVED; + + /* + * Don't dump addresses that are not real memory to a core file. + */ + vma->vm_flags |= VM_IO; + + return 0; +} + +#define zero_lseek null_lseek +#define full_lseek null_lseek +#define write_zero write_null +#define read_full read_zero +#define open_mem open_port +#define open_kmem open_mem + +static struct file_operations mem_fops = { + llseek: memory_lseek, + read: read_mem, + write: write_mem, + mmap: mmap_mem, + open: open_mem, +}; + +static struct file_operations kmem_fops = { + llseek: memory_lseek, + read: read_kmem, + write: write_kmem, + mmap: mmap_kmem, + open: open_kmem, +}; + +static struct file_operations null_fops = { + llseek: null_lseek, + read: read_null, + write: write_null, +}; + +#if defined(CONFIG_ISA) || !defined(__mc68000__) +static struct file_operations port_fops = { + llseek: memory_lseek, + read: read_port, + write: write_port, + open: open_port, +}; +#endif + +static struct file_operations zero_fops = { + llseek: zero_lseek, + read: read_zero, + write: write_zero, + mmap: mmap_zero, +}; + +static struct file_operations full_fops = { + llseek: full_lseek, + read: read_full, + write: write_full, +}; + +#if defined(CONFIG_DEV_ANON) +extern struct file_operations anon_fops; +#endif + +static int memory_open(struct inode * inode, struct file * filp) +{ + switch (MINOR(inode->i_rdev)) { + case 1: + filp->f_op = &mem_fops; + break; + case 2: + filp->f_op = &kmem_fops; + break; + case 3: + filp->f_op = &null_fops; + break; +#if defined(CONFIG_ISA) || !defined(__mc68000__) + case 4: + filp->f_op = &port_fops; + break; +#endif + case 5: + filp->f_op = &zero_fops; + break; + case 7: + filp->f_op = &full_fops; + break; + case 8: + filp->f_op = &random_fops; + break; + case 9: + filp->f_op = &urandom_fops; + break; +#if defined(CONFIG_DEV_ANON) + case 10: + filp->f_op = &anon_fops; + break; +#endif + default: + return -ENXIO; + } + if (filp->f_op && filp->f_op->open) + return filp->f_op->open(inode,filp); + return 0; +} + +void __init memory_devfs_register (void) +{ + /* These are never unregistered */ + static const struct { + unsigned short minor; + char *name; + umode_t mode; + struct file_operations *fops; + } list[] = { /* list of minor devices */ + {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops}, + {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops}, + {3, "null", S_IRUGO | S_IWUGO, &null_fops}, +#if defined(CONFIG_ISA) || !defined(__mc68000__) + {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops}, +#endif + {5, "zero", S_IRUGO | S_IWUGO, &zero_fops}, + {7, "full", S_IRUGO | S_IWUGO, &full_fops}, + {8, "random", S_IRUGO | S_IWUSR, &random_fops}, + {9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops}, + {10, "anon", S_IRUGO | S_IWUSR, &anon_fops}, + }; + int i; + + for (i=0; i<(sizeof(list)/sizeof(*list)); i++) + devfs_register (NULL, list[i].name, DEVFS_FL_NONE, + MEM_MAJOR, list[i].minor, + list[i].mode | S_IFCHR, + list[i].fops, NULL); +} + +static struct file_operations memory_fops = { + open: memory_open, /* just a selector for the real open */ +}; + +int __init chr_dev_init(void) +{ + if (devfs_register_chrdev(MEM_MAJOR,"mem",&memory_fops)) + printk("unable to get major %d for memory devs\n", MEM_MAJOR); + memory_devfs_register(); + rand_initialize(); +#ifdef CONFIG_I2C + i2c_init_all(); +#endif +#if defined (CONFIG_FB) + fbmem_init(); +#endif +#if defined (CONFIG_PROM_CONSOLE) + prom_con_init(); +#endif +#if defined (CONFIG_MDA_CONSOLE) + mda_console_init(); +#endif + tty_init(); +#ifdef CONFIG_M68K_PRINTER + lp_m68k_init(); +#endif + misc_init(); +#if CONFIG_QIC02_TAPE + qic02_tape_init(); +#endif +#ifdef CONFIG_FTAPE + ftape_init(); +#endif +#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR) + tapechar_init(); +#endif + return 0; +} + +__initcall(chr_dev_init); diff --git a/mm/shmem.c b/mm/shmem.c new file mode 100644 index 0000000..7c14b7b --- /dev/null +++ b/mm/shmem.c @@ -0,0 +1,1701 @@ +/* + * Resizable virtual memory filesystem for Linux. + * + * Copyright (C) 2000 Linus Torvalds. + * 2000 Transmeta Corp. + * 2000-2001 Christoph Rohland + * 2000-2001 SAP AG + * 2002 Red Hat Inc. + * Copyright (C) 2002-2003 Hugh Dickins. + * Copyright (C) 2002-2003 VERITAS Software Corporation. + * + * This file is released under the GPL. + */ + +/* + * This virtual memory filesystem is heavily based on the ramfs. It + * extends ramfs by the ability to use swap and honor resource limits + * which makes it a completely usable filesystem. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/devfs_fs_kernel.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/file.h> +#include <linux/swap.h> +#include <linux/pagemap.h> +#include <linux/string.h> +#include <linux/locks.h> +#include <linux/smp_lock.h> + +#include <asm/uaccess.h> +#include <asm/div64.h> + +/* This magic number is used in glibc for posix shared memory */ +#define TMPFS_MAGIC 0x01021994 + +#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) +#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) +#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) + +#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) +#define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) + +#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) + +/* Pretend that each entry is of this size in directory's i_size */ +#define BOGO_DIRENT_SIZE 20 + +#define SHMEM_SB(sb) (&sb->u.shmem_sb) + +/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ +enum sgp_type { + SGP_READ, /* don't exceed i_size, don't allocate page */ + SGP_CACHE, /* don't exceed i_size, may allocate page */ + SGP_WRITE, /* may exceed i_size, may allocate page */ +}; + +static int shmem_getpage(struct inode *inode, unsigned long idx, + struct page **pagep, enum sgp_type sgp); + +static struct super_operations shmem_ops; +static struct address_space_operations shmem_aops; +static struct file_operations shmem_file_operations; +static struct inode_operations shmem_inode_operations; +static struct inode_operations shmem_dir_inode_operations; +static struct vm_operations_struct shmem_vm_ops; + +LIST_HEAD(shmem_inodes); +static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED; + +static void shmem_free_block(struct inode *inode) +{ + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + spin_lock(&sbinfo->stat_lock); + sbinfo->free_blocks++; + inode->i_blocks -= BLOCKS_PER_PAGE; + spin_unlock(&sbinfo->stat_lock); +} + +static void shmem_removepage(struct page *page) +{ + if (!PageLaunder(page)) + shmem_free_block(page->mapping->host); +} + +/* + * shmem_swp_entry - find the swap vector position in the info structure + * + * @info: info structure for the inode + * @index: index of the page to find + * @page: optional page to add to the structure. Has to be preset to + * all zeros + * + * If there is no space allocated yet it will return NULL when + * page is 0, else it will use the page for the needed block, + * setting it to 0 on return to indicate that it has been used. + * + * The swap vector is organized the following way: + * + * There are SHMEM_NR_DIRECT entries directly stored in the + * shmem_inode_info structure. So small files do not need an addional + * allocation. + * + * For pages with index > SHMEM_NR_DIRECT there is the pointer + * i_indirect which points to a page which holds in the first half + * doubly indirect blocks, in the second half triple indirect blocks: + * + * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the + * following layout (for SHMEM_NR_DIRECT == 16): + * + * i_indirect -> dir --> 16-19 + * | +-> 20-23 + * | + * +-->dir2 --> 24-27 + * | +-> 28-31 + * | +-> 32-35 + * | +-> 36-39 + * | + * +-->dir3 --> 40-43 + * +-> 44-47 + * +-> 48-51 + * +-> 52-55 + */ +static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, unsigned long *page) +{ + unsigned long offset; + void **dir; + + if (index < SHMEM_NR_DIRECT) + return info->i_direct+index; + if (!info->i_indirect) { + if (page) { + info->i_indirect = (void **) *page; + *page = 0; + } + return NULL; /* need another page */ + } + + index -= SHMEM_NR_DIRECT; + offset = index % ENTRIES_PER_PAGE; + index /= ENTRIES_PER_PAGE; + dir = info->i_indirect; + + if (index >= ENTRIES_PER_PAGE/2) { + index -= ENTRIES_PER_PAGE/2; + dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; + index %= ENTRIES_PER_PAGE; + if (!*dir) { + if (page) { + *dir = (void *) *page; + *page = 0; + } + return NULL; /* need another page */ + } + dir = (void **) *dir; + } + + dir += index; + if (!*dir) { + if (!page || !*page) + return NULL; /* need a page */ + *dir = (void *) *page; + *page = 0; + } + return (swp_entry_t *) *dir + offset; +} + +/* + * shmem_swp_alloc - get the position of the swap entry for the page. + * If it does not exist allocate the entry. + * + * @info: info structure for the inode + * @index: index of the page to find + * @sgp: check and recheck i_size? skip allocation? + */ +static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp) +{ + struct inode *inode = info->inode; + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + unsigned long page = 0; + swp_entry_t *entry; + static const swp_entry_t unswapped = {0}; + + if (sgp != SGP_WRITE && + ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) + return ERR_PTR(-EINVAL); + + while (!(entry = shmem_swp_entry(info, index, &page))) { + if (sgp == SGP_READ) + return (swp_entry_t *) &unswapped; + /* + * Test free_blocks against 1 not 0, since we have 1 data + * page (and perhaps indirect index pages) yet to allocate: + * a waste to allocate index if we cannot allocate data. + */ + spin_lock(&sbinfo->stat_lock); + if (sbinfo->free_blocks <= 1) { + spin_unlock(&sbinfo->stat_lock); + return ERR_PTR(-ENOSPC); + } + sbinfo->free_blocks--; + inode->i_blocks += BLOCKS_PER_PAGE; + spin_unlock(&sbinfo->stat_lock); + + spin_unlock(&info->lock); + page = get_zeroed_page(GFP_USER); + spin_lock(&info->lock); + + if (!page) { + shmem_free_block(inode); + return ERR_PTR(-ENOMEM); + } + if (sgp != SGP_WRITE && + ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) { + entry = ERR_PTR(-EINVAL); + break; + } + if (info->next_index <= index) + info->next_index = index + 1; + } + if (page) { + /* another task gave its page, or truncated the file */ + shmem_free_block(inode); + free_page(page); + } + if (info->next_index <= index && !IS_ERR(entry)) + info->next_index = index + 1; + return entry; +} + +/* + * shmem_free_swp - free some swap entries in a directory + * + * @dir: pointer to the directory + * @edir: pointer after last entry of the directory + */ +static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) +{ + swp_entry_t *ptr; + int freed = 0; + + for (ptr = dir; ptr < edir; ptr++) { + if (ptr->val) { + free_swap_and_cache(*ptr); + *ptr = (swp_entry_t){0}; + freed++; + } + } + return freed; +} + +/* + * shmem_truncate_direct - free the swap entries of a whole doubly + * indirect block + * + * @info: the info structure of the inode + * @dir: pointer to the pointer to the block + * @start: offset to start from (in pages) + * @len: how many pages are stored in this block + */ +static inline unsigned long +shmem_truncate_direct(struct shmem_inode_info *info, swp_entry_t ***dir, unsigned long start, unsigned long len) +{ + swp_entry_t **last, **ptr; + unsigned long off, freed_swp, freed = 0; + + last = *dir + (len + ENTRIES_PER_PAGE - 1) / ENTRIES_PER_PAGE; + off = start % ENTRIES_PER_PAGE; + + for (ptr = *dir + start/ENTRIES_PER_PAGE; ptr < last; ptr++, off = 0) { + if (!*ptr) + continue; + + if (info->swapped) { + freed_swp = shmem_free_swp(*ptr + off, + *ptr + ENTRIES_PER_PAGE); + info->swapped -= freed_swp; + freed += freed_swp; + } + + if (!off) { + freed++; + free_page((unsigned long) *ptr); + *ptr = 0; + } + } + + if (!start) { + freed++; + free_page((unsigned long) *dir); + *dir = 0; + } + return freed; +} + +/* + * shmem_truncate_indirect - truncate an inode + * + * @info: the info structure of the inode + * @index: the index to truncate + * + * This function locates the last doubly indirect block and calls + * then shmem_truncate_direct to do the real work + */ +static inline unsigned long +shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index) +{ + swp_entry_t ***base; + unsigned long baseidx, start; + unsigned long len = info->next_index; + unsigned long freed; + + if (len <= SHMEM_NR_DIRECT) { + info->next_index = index; + if (!info->swapped) + return 0; + freed = shmem_free_swp(info->i_direct + index, + info->i_direct + len); + info->swapped -= freed; + return freed; + } + + if (len <= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT) { + len -= SHMEM_NR_DIRECT; + base = (swp_entry_t ***) &info->i_indirect; + baseidx = SHMEM_NR_DIRECT; + } else { + len -= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT; + BUG_ON(len > ENTRIES_PER_PAGEPAGE*ENTRIES_PER_PAGE/2); + baseidx = len - 1; + baseidx -= baseidx % ENTRIES_PER_PAGEPAGE; + base = (swp_entry_t ***) info->i_indirect + + ENTRIES_PER_PAGE/2 + baseidx/ENTRIES_PER_PAGEPAGE; + len -= baseidx; + baseidx += ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT; + } + + if (index > baseidx) { + info->next_index = index; + start = index - baseidx; + } else { + info->next_index = baseidx; + start = 0; + } + return *base? shmem_truncate_direct(info, base, start, len): 0; +} + +static void shmem_truncate(struct inode *inode) +{ + struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + unsigned long freed = 0; + unsigned long index; + + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (index >= info->next_index) + return; + + spin_lock(&info->lock); + while (index < info->next_index) + freed += shmem_truncate_indirect(info, index); + BUG_ON(info->swapped > info->next_index); + spin_unlock(&info->lock); + + spin_lock(&sbinfo->stat_lock); + sbinfo->free_blocks += freed; + inode->i_blocks -= freed*BLOCKS_PER_PAGE; + spin_unlock(&sbinfo->stat_lock); +} + +static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + struct page *page = NULL; + int error; + + if (attr->ia_valid & ATTR_SIZE) { + if (attr->ia_size < inode->i_size) { + /* + * If truncating down to a partial page, then + * if that page is already allocated, hold it + * in memory until the truncation is over, so + * truncate_partial_page cannnot miss it were + * it assigned to swap. + */ + if (attr->ia_size & (PAGE_CACHE_SIZE-1)) { + (void) shmem_getpage(inode, + attr->ia_size>>PAGE_CACHE_SHIFT, + &page, SGP_READ); + } + } + } + + error = inode_change_ok(inode, attr); + if (!error) + error = inode_setattr(inode, attr); + if (page) + page_cache_release(page); + return error; +} + +static void shmem_delete_inode(struct inode *inode) +{ + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + struct shmem_inode_info *info = SHMEM_I(inode); + + if (inode->i_op->truncate == shmem_truncate) { + spin_lock(&shmem_ilock); + list_del(&info->list); + spin_unlock(&shmem_ilock); + inode->i_size = 0; + shmem_truncate(inode); + } + BUG_ON(inode->i_blocks); + spin_lock(&sbinfo->stat_lock); + sbinfo->free_inodes++; + spin_unlock(&sbinfo->stat_lock); + clear_inode(inode); +} + +static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) +{ + swp_entry_t *ptr; + + for (ptr = dir; ptr < edir; ptr++) { + if (ptr->val == entry.val) + return ptr - dir; + } + return -1; +} + +static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) +{ + struct inode *inode; + struct address_space *mapping; + swp_entry_t *ptr; + unsigned long idx; + unsigned long limit; + int offset; + + idx = 0; + ptr = info->i_direct; + spin_lock(&info->lock); + offset = info->next_index; + if (offset > SHMEM_NR_DIRECT) + offset = SHMEM_NR_DIRECT; + offset = shmem_find_swp(entry, ptr, ptr + offset); + if (offset >= 0) + goto found; + + for (idx = SHMEM_NR_DIRECT; idx < info->next_index; + idx += ENTRIES_PER_PAGE) { + ptr = shmem_swp_entry(info, idx, NULL); + if (!ptr) + continue; + offset = info->next_index - idx; + if (offset > ENTRIES_PER_PAGE) + offset = ENTRIES_PER_PAGE; + offset = shmem_find_swp(entry, ptr, ptr + offset); + if (offset >= 0) + goto found; + } + spin_unlock(&info->lock); + return 0; +found: + idx += offset; + inode = info->inode; + mapping = inode->i_mapping; + delete_from_swap_cache(page); + + /* Racing against delete or truncate? Must leave out of page cache */ + limit = (inode->i_state & I_FREEING)? 0: + (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + if (idx >= limit || add_to_page_cache_unique(page, + mapping, idx, page_hash(mapping, idx)) == 0) { + ptr[offset].val = 0; + info->swapped--; + } else if (add_to_swap_cache(page, entry) != 0) + BUG(); + spin_unlock(&info->lock); + SetPageUptodate(page); + /* + * Decrement swap count even when the entry is left behind: + * try_to_unuse will skip over mms, then reincrement count. + */ + swap_free(entry); + return 1; +} + +/* + * shmem_unuse() search for an eventually swapped out shmem page. + */ +int shmem_unuse(swp_entry_t entry, struct page *page) +{ + struct list_head *p; + struct shmem_inode_info *info; + int found = 0; + + spin_lock(&shmem_ilock); + list_for_each(p, &shmem_inodes) { + info = list_entry(p, struct shmem_inode_info, list); + + if (info->swapped && shmem_unuse_inode(info, entry, page)) { + /* move head to start search for next from here */ + list_move_tail(&shmem_inodes, &info->list); + found = 1; + break; + } + } + spin_unlock(&shmem_ilock); + return found; +} + +/* + * Move the page from the page cache to the swap cache. + */ +static int shmem_writepage(struct page *page) +{ + struct shmem_inode_info *info; + swp_entry_t *entry, swap; + struct address_space *mapping; + unsigned long index; + struct inode *inode; + + BUG_ON(!PageLocked(page)); + if (!PageLaunder(page)) + return fail_writepage(page); + + mapping = page->mapping; + index = page->index; + inode = mapping->host; + info = SHMEM_I(inode); + if (info->flags & VM_LOCKED) + return fail_writepage(page); +getswap: + swap = get_swap_page(); + if (!swap.val) + return fail_writepage(page); + + spin_lock(&info->lock); + BUG_ON(index >= info->next_index); + entry = shmem_swp_entry(info, index, NULL); + BUG_ON(!entry); + BUG_ON(entry->val); + + /* Remove it from the page cache */ + remove_inode_page(page); + page_cache_release(page); + + /* Add it to the swap cache */ + if (add_to_swap_cache(page, swap) != 0) { + /* + * Raced with "speculative" read_swap_cache_async. + * Add page back to page cache, unref swap, try again. + */ + add_to_page_cache_locked(page, mapping, index); + spin_unlock(&info->lock); + swap_free(swap); + goto getswap; + } + + *entry = swap; + info->swapped++; + spin_unlock(&info->lock); + SetPageUptodate(page); + set_page_dirty(page); + UnlockPage(page); + return 0; +} + +/* + * shmem_getpage - either get the page from swap or allocate a new one + * + * If we allocate a new one we do not mark it dirty. That's up to the + * vm. If we swap it in we mark it dirty since we also free the swap + * entry since a page cannot live in both the swap and page cache + */ +static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp) +{ + struct address_space *mapping = inode->i_mapping; + struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_sb_info *sbinfo; + struct page *filepage = *pagep; + struct page *swappage; + swp_entry_t *entry; + swp_entry_t swap; + int error = 0; + + if (idx >= SHMEM_MAX_INDEX) + return -EFBIG; + /* + * Normally, filepage is NULL on entry, and either found + * uptodate immediately, or allocated and zeroed, or read + * in under swappage, which is then assigned to filepage. + * But shmem_readpage and shmem_prepare_write pass in a locked + * filepage, which may be found not uptodate by other callers + * too, and may need to be copied from the swappage read in. + */ +repeat: + if (!filepage) + filepage = find_lock_page(mapping, idx); + if (filepage && Page_Uptodate(filepage)) + goto done; + + spin_lock(&info->lock); + entry = shmem_swp_alloc(info, idx, sgp); + if (IS_ERR(entry)) { + spin_unlock(&info->lock); + error = PTR_ERR(entry); + goto failed; + } + swap = *entry; + + if (swap.val) { + /* Look it up and read it in.. */ + swappage = lookup_swap_cache(swap); + if (!swappage) { + spin_unlock(&info->lock); + swapin_readahead(swap); + swappage = read_swap_cache_async(swap); + if (!swappage) { + spin_lock(&info->lock); + entry = shmem_swp_alloc(info, idx, sgp); + if (IS_ERR(entry)) + error = PTR_ERR(entry); + else if (entry->val == swap.val) + error = -ENOMEM; + spin_unlock(&info->lock); + if (error) + goto failed; + goto repeat; + } + wait_on_page(swappage); + page_cache_release(swappage); + goto repeat; + } + + /* We have to do this with page locked to prevent races */ + if (TryLockPage(swappage)) { + spin_unlock(&info->lock); + wait_on_page(swappage); + page_cache_release(swappage); + goto repeat; + } + if (!Page_Uptodate(swappage)) { + spin_unlock(&info->lock); + UnlockPage(swappage); + page_cache_release(swappage); + error = -EIO; + goto failed; + } + + delete_from_swap_cache(swappage); + if (filepage) { + entry->val = 0; + info->swapped--; + spin_unlock(&info->lock); + flush_page_to_ram(swappage); + copy_highpage(filepage, swappage); + UnlockPage(swappage); + page_cache_release(swappage); + flush_dcache_page(filepage); + SetPageUptodate(filepage); + SetPageDirty(filepage); + swap_free(swap); + } else if (add_to_page_cache_unique(swappage, + mapping, idx, page_hash(mapping, idx)) == 0) { + entry->val = 0; + info->swapped--; + spin_unlock(&info->lock); + filepage = swappage; + SetPageUptodate(filepage); + SetPageDirty(filepage); + swap_free(swap); + } else { + if (add_to_swap_cache(swappage, swap) != 0) + BUG(); + spin_unlock(&info->lock); + SetPageUptodate(swappage); + SetPageDirty(swappage); + UnlockPage(swappage); + page_cache_release(swappage); + goto repeat; + } + } else if (sgp == SGP_READ && !filepage) { + filepage = find_get_page(mapping, idx); + if (filepage && + (!Page_Uptodate(filepage) || TryLockPage(filepage))) { + spin_unlock(&info->lock); + wait_on_page(filepage); + page_cache_release(filepage); + filepage = NULL; + goto repeat; + } + spin_unlock(&info->lock); + } else { + sbinfo = SHMEM_SB(inode->i_sb); + spin_lock(&sbinfo->stat_lock); + if (sbinfo->free_blocks == 0) { + spin_unlock(&sbinfo->stat_lock); + spin_unlock(&info->lock); + error = -ENOSPC; + goto failed; + } + sbinfo->free_blocks--; + inode->i_blocks += BLOCKS_PER_PAGE; + spin_unlock(&sbinfo->stat_lock); + + if (!filepage) { + spin_unlock(&info->lock); + filepage = page_cache_alloc(mapping); + if (!filepage) { + shmem_free_block(inode); + error = -ENOMEM; + goto failed; + } + + spin_lock(&info->lock); + entry = shmem_swp_alloc(info, idx, sgp); + if (IS_ERR(entry)) + error = PTR_ERR(entry); + if (error || entry->val || + add_to_page_cache_unique(filepage, + mapping, idx, page_hash(mapping, idx)) != 0) { + spin_unlock(&info->lock); + page_cache_release(filepage); + shmem_free_block(inode); + filepage = NULL; + if (error) + goto failed; + goto repeat; + } + } + + spin_unlock(&info->lock); + clear_highpage(filepage); + flush_dcache_page(filepage); + SetPageUptodate(filepage); + } +done: + if (!*pagep) { + if (filepage) { + UnlockPage(filepage); + *pagep = filepage; + } else + *pagep = ZERO_PAGE(0); + } + return 0; + +failed: + if (*pagep != filepage) { + UnlockPage(filepage); + page_cache_release(filepage); + } + return error; +} + +struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int unused) +{ + struct inode *inode = vma->vm_file->f_dentry->d_inode; + struct page *page = NULL; + unsigned long idx; + int error; + + idx = (address - vma->vm_start) >> PAGE_SHIFT; + idx += vma->vm_pgoff; + idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; + + error = shmem_getpage(inode, idx, &page, SGP_CACHE); + if (error) + return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; + + mark_page_accessed(page); + flush_page_to_ram(page); + return page; +} + +void shmem_lock(struct file *file, int lock) +{ + struct inode *inode = file->f_dentry->d_inode; + struct shmem_inode_info *info = SHMEM_I(inode); + + spin_lock(&info->lock); + if (lock) + info->flags |= VM_LOCKED; + else + info->flags &= ~VM_LOCKED; + spin_unlock(&info->lock); +} + +static int shmem_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct vm_operations_struct *ops; + struct inode *inode = file->f_dentry->d_inode; + + ops = &shmem_vm_ops; + if (!S_ISREG(inode->i_mode)) + return -EACCES; + UPDATE_ATIME(inode); + vma->vm_ops = ops; + return 0; +} + +static struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev) +{ + struct inode *inode; + struct shmem_inode_info *info; + struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + + spin_lock(&sbinfo->stat_lock); + if (!sbinfo->free_inodes) { + spin_unlock(&sbinfo->stat_lock); + return NULL; + } + sbinfo->free_inodes--; + spin_unlock(&sbinfo->stat_lock); + + inode = new_inode(sb); + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_rdev = NODEV; + inode->i_mapping->a_ops = &shmem_aops; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + info = SHMEM_I(inode); + info->inode = inode; + spin_lock_init(&info->lock); + switch (mode & S_IFMT) { + default: + init_special_inode(inode, mode, dev); + break; + case S_IFREG: + inode->i_op = &shmem_inode_operations; + inode->i_fop = &shmem_file_operations; + spin_lock(&shmem_ilock); + list_add_tail(&info->list, &shmem_inodes); + spin_unlock(&shmem_ilock); + break; + case S_IFDIR: + inode->i_nlink++; + /* Some things misbehave if size == 0 on a directory */ + inode->i_size = 2 * BOGO_DIRENT_SIZE; + inode->i_op = &shmem_dir_inode_operations; + inode->i_fop = &dcache_dir_ops; + break; + case S_IFLNK: + break; + } + } + return inode; +} + +static int shmem_set_size(struct shmem_sb_info *info, + unsigned long max_blocks, unsigned long max_inodes) +{ + int error; + unsigned long blocks, inodes; + + spin_lock(&info->stat_lock); + blocks = info->max_blocks - info->free_blocks; + inodes = info->max_inodes - info->free_inodes; + error = -EINVAL; + if (max_blocks < blocks) + goto out; + if (max_inodes < inodes) + goto out; + error = 0; + info->max_blocks = max_blocks; + info->free_blocks = max_blocks - blocks; + info->max_inodes = max_inodes; + info->free_inodes = max_inodes - inodes; +out: + spin_unlock(&info->stat_lock); + return error; +} + +#ifdef CONFIG_TMPFS + +static struct inode_operations shmem_symlink_inode_operations; +static struct inode_operations shmem_symlink_inline_operations; + +/* + * tmpfs itself makes no use of generic_file_read, generic_file_mmap + * or generic_file_write; but shmem_readpage, shmem_prepare_write and + * shmem_commit_write let a tmpfs file be used below the loop driver, + * and shmem_readpage lets a tmpfs file be used by sendfile. + */ +static int +shmem_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + int error = shmem_getpage(inode, page->index, &page, SGP_CACHE); + UnlockPage(page); + return error; +} + +static int +shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + struct inode *inode = page->mapping->host; + return shmem_getpage(inode, page->index, &page, SGP_WRITE); +} + +static int +shmem_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + struct inode *inode = page->mapping->host; + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + if (pos > inode->i_size) + inode->i_size = pos; + SetPageDirty(page); + return 0; +} + +static ssize_t +shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_dentry->d_inode; + loff_t pos; + unsigned long written; + int err; + + if ((ssize_t) count < 0) + return -EINVAL; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + down(&inode->i_sem); + + pos = *ppos; + written = 0; + + err = precheck_file_write(file, inode, &count, &pos); + if (err || !count) + goto out; + + remove_suid(inode); + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + + do { + struct page *page = NULL; + unsigned long bytes, index, offset; + char *kaddr; + int left; + + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + index = pos >> PAGE_CACHE_SHIFT; + bytes = PAGE_CACHE_SIZE - offset; + if (bytes > count) + bytes = count; + + /* + * We don't hold page lock across copy from user - + * what would it guard against? - so no deadlock here. + */ + + err = shmem_getpage(inode, index, &page, SGP_WRITE); + if (err) + break; + + kaddr = kmap(page); + left = __copy_from_user(kaddr + offset, buf, bytes); + kunmap(page); + + written += bytes; + count -= bytes; + pos += bytes; + buf += bytes; + if (pos > inode->i_size) + inode->i_size = pos; + + flush_dcache_page(page); + SetPageDirty(page); + SetPageReferenced(page); + page_cache_release(page); + + if (left) { + pos -= left; + written -= left; + err = -EFAULT; + break; + } + } while (count); + + *ppos = pos; + if (written) + err = written; +out: + up(&inode->i_sem); + return err; +} + +static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct address_space *mapping = inode->i_mapping; + unsigned long index, offset; + + index = *ppos >> PAGE_CACHE_SHIFT; + offset = *ppos & ~PAGE_CACHE_MASK; + + for (;;) { + struct page *page = NULL; + unsigned long end_index, nr, ret; + + end_index = inode->i_size >> PAGE_CACHE_SHIFT; + if (index > end_index) + break; + if (index == end_index) { + nr = inode->i_size & ~PAGE_CACHE_MASK; + if (nr <= offset) + break; + } + + desc->error = shmem_getpage(inode, index, &page, SGP_READ); + if (desc->error) { + if (desc->error == -EINVAL) + desc->error = 0; + break; + } + + /* + * We must evaluate after, since reads (unlike writes) + * are called without i_sem protection against truncate + */ + nr = PAGE_CACHE_SIZE; + end_index = inode->i_size >> PAGE_CACHE_SHIFT; + if (index == end_index) { + nr = inode->i_size & ~PAGE_CACHE_MASK; + if (nr <= offset) { + page_cache_release(page); + break; + } + } + nr -= offset; + + if (page != ZERO_PAGE(0)) { + /* + * If users can be writing to this page using arbitrary + * virtual addresses, take care about potential aliasing + * before reading the page on the kernel side. + */ + if (mapping->i_mmap_shared != NULL) + flush_dcache_page(page); + /* + * Mark the page accessed if we read the + * beginning or we just did an lseek. + */ + if (!offset || !filp->f_reada) + mark_page_accessed(page); + } + + /* + * Ok, we have the page, and it's up-to-date, so + * now we can copy it to user space... + * + * The actor routine returns how many bytes were actually used.. + * NOTE! This may not be the same as how much of a user buffer + * we filled up (we may be padding etc), so we can only update + * "pos" here (the actor routine has to update the user buffer + * pointers and the remaining count). + */ + ret = file_read_actor(desc, page, offset, nr); + offset += ret; + index += offset >> PAGE_CACHE_SHIFT; + offset &= ~PAGE_CACHE_MASK; + + page_cache_release(page); + if (ret != nr || !desc->count) + break; + } + + *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; + filp->f_reada = 1; + UPDATE_ATIME(inode); +} + +static ssize_t shmem_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) +{ + read_descriptor_t desc; + + if ((ssize_t) count < 0) + return -EINVAL; + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + if (!count) + return 0; + + desc.written = 0; + desc.count = count; + desc.buf = buf; + desc.error = 0; + + do_shmem_file_read(filp, ppos, &desc); + if (desc.written) + return desc.written; + return desc.error; +} + +static int shmem_statfs(struct super_block *sb, struct statfs *buf) +{ + struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + + buf->f_type = TMPFS_MAGIC; + buf->f_bsize = PAGE_CACHE_SIZE; + spin_lock(&sbinfo->stat_lock); + buf->f_blocks = sbinfo->max_blocks; + buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; + buf->f_files = sbinfo->max_inodes; + buf->f_ffree = sbinfo->free_inodes; + spin_unlock(&sbinfo->stat_lock); + buf->f_namelen = NAME_MAX; + return 0; +} + +/* + * Lookup the data. This is trivial - if the dentry didn't already + * exist, we know it is negative. + */ +static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry) +{ + d_add(dentry, NULL); + return NULL; +} + +/* + * File creation. Allocate an inode, and we're done.. + */ +static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev) +{ + struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev); + int error = -ENOSPC; + + if (inode) { + dir->i_size += BOGO_DIRENT_SIZE; + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ + error = 0; + } + return error; +} + +static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + int error; + + if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0))) + return error; + dir->i_nlink++; + return 0; +} + +static int shmem_create(struct inode *dir, struct dentry *dentry, int mode) +{ + return shmem_mknod(dir, dentry, mode | S_IFREG, 0); +} + +/* + * Link a file.. + */ +static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + + if (S_ISDIR(inode->i_mode)) + return -EPERM; + + dir->i_size += BOGO_DIRENT_SIZE; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + inode->i_nlink++; + atomic_inc(&inode->i_count); /* New dentry reference */ + dget(dentry); /* Extra pinning count for the created dentry */ + d_instantiate(dentry, inode); + return 0; +} + +static inline int shmem_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +/* + * Check that a directory is empty (this works + * for regular files too, they'll just always be + * considered empty..). + * + * Note that an empty directory can still have + * children, they just all have to be negative.. + */ +static int shmem_empty(struct dentry *dentry) +{ + struct list_head *list; + + spin_lock(&dcache_lock); + list = dentry->d_subdirs.next; + + while (list != &dentry->d_subdirs) { + struct dentry *de = list_entry(list, struct dentry, d_child); + + if (shmem_positive(de)) { + spin_unlock(&dcache_lock); + return 0; + } + list = list->next; + } + spin_unlock(&dcache_lock); + return 1; +} + +static int shmem_unlink(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + + dir->i_size -= BOGO_DIRENT_SIZE; + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + inode->i_nlink--; + dput(dentry); /* Undo the count from "create" - this does all the work */ + return 0; +} + +static int shmem_rmdir(struct inode *dir, struct dentry *dentry) +{ + if (!shmem_empty(dentry)) + return -ENOTEMPTY; + + dir->i_nlink--; + return shmem_unlink(dir, dentry); +} + +/* + * The VFS layer already does all the dentry stuff for rename, + * we just have to decrement the usage count for the target if + * it exists so that the VFS layer correctly free's it when it + * gets overwritten. + */ +static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) +{ + struct inode *inode = old_dentry->d_inode; + int they_are_dirs = S_ISDIR(inode->i_mode); + + if (!shmem_empty(new_dentry)) + return -ENOTEMPTY; + + if (new_dentry->d_inode) { + (void) shmem_unlink(new_dir, new_dentry); + if (they_are_dirs) + old_dir->i_nlink--; + } else if (they_are_dirs) { + old_dir->i_nlink--; + new_dir->i_nlink++; + } + + old_dir->i_size -= BOGO_DIRENT_SIZE; + new_dir->i_size += BOGO_DIRENT_SIZE; + old_dir->i_ctime = old_dir->i_mtime = + new_dir->i_ctime = new_dir->i_mtime = + inode->i_ctime = CURRENT_TIME; + return 0; +} + +static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +{ + int error; + int len; + struct inode *inode; + struct page *page = NULL; + char *kaddr; + struct shmem_inode_info *info; + + len = strlen(symname) + 1; + if (len > PAGE_CACHE_SIZE) + return -ENAMETOOLONG; + + inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); + if (!inode) + return -ENOSPC; + + info = SHMEM_I(inode); + inode->i_size = len-1; + if (len <= sizeof(struct shmem_inode_info)) { + /* do it inline */ + memcpy(info, symname, len); + inode->i_op = &shmem_symlink_inline_operations; + } else { + error = shmem_getpage(inode, 0, &page, SGP_WRITE); + if (error) { + iput(inode); + return error; + } + inode->i_op = &shmem_symlink_inode_operations; + spin_lock(&shmem_ilock); + list_add_tail(&info->list, &shmem_inodes); + spin_unlock(&shmem_ilock); + kaddr = kmap(page); + memcpy(kaddr, symname, len); + kunmap(page); + SetPageDirty(page); + page_cache_release(page); + } + dir->i_size += BOGO_DIRENT_SIZE; + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + d_instantiate(dentry, inode); + dget(dentry); + return 0; +} + +static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen) +{ + return vfs_readlink(dentry, buffer, buflen, (const char *)SHMEM_I(dentry->d_inode)); +} + +static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) +{ + return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode)); +} + +static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen) +{ + struct page *page = NULL; + int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ); + if (res) + return res; + res = vfs_readlink(dentry, buffer, buflen, kmap(page)); + kunmap(page); + mark_page_accessed(page); + page_cache_release(page); + return res; +} + +static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct page *page = NULL; + int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ); + if (res) + return res; + res = vfs_follow_link(nd, kmap(page)); + kunmap(page); + mark_page_accessed(page); + page_cache_release(page); + return res; +} + +static struct inode_operations shmem_symlink_inline_operations = { + readlink: shmem_readlink_inline, + follow_link: shmem_follow_link_inline, +}; + +static struct inode_operations shmem_symlink_inode_operations = { + truncate: shmem_truncate, + readlink: shmem_readlink, + follow_link: shmem_follow_link, +}; + +static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes) +{ + char *this_char, *value, *rest; + + while ((this_char = strsep(&options, ",")) != NULL) { + if (!*this_char) + continue; + if ((value = strchr(this_char,'=')) != NULL) { + *value++ = 0; + } else { + printk(KERN_ERR + "tmpfs: No value for mount option '%s'\n", + this_char); + return 1; + } + + if (!strcmp(this_char,"size")) { + unsigned long long size; + size = memparse(value,&rest); + if (*rest == '%') { + struct sysinfo si; + si_meminfo(&si); + size <<= PAGE_SHIFT; + size *= si.totalram; + do_div(size, 100); + rest++; + } + if (*rest) + goto bad_val; + *blocks = size >> PAGE_CACHE_SHIFT; + } else if (!strcmp(this_char,"nr_blocks")) { + *blocks = memparse(value,&rest); + if (*rest) + goto bad_val; + } else if (!strcmp(this_char,"nr_inodes")) { + *inodes = memparse(value,&rest); + if (*rest) + goto bad_val; + } else if (!strcmp(this_char,"mode")) { + if (!mode) + continue; + *mode = simple_strtoul(value,&rest,8); + if (*rest) + goto bad_val; + } else if (!strcmp(this_char,"uid")) { + if (!uid) + continue; + *uid = simple_strtoul(value,&rest,0); + if (*rest) + goto bad_val; + } else if (!strcmp(this_char,"gid")) { + if (!gid) + continue; + *gid = simple_strtoul(value,&rest,0); + if (*rest) + goto bad_val; + } else { + printk(KERN_ERR "tmpfs: Bad mount option %s\n", + this_char); + return 1; + } + } + return 0; + +bad_val: + printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n", + value, this_char); + return 1; +} + +static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) +{ + struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + unsigned long max_blocks = sbinfo->max_blocks; + unsigned long max_inodes = sbinfo->max_inodes; + + if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes)) + return -EINVAL; + return shmem_set_size(sbinfo, max_blocks, max_inodes); +} + +static int shmem_sync_file(struct file *file, struct dentry *dentry, int datasync) +{ + return 0; +} +#endif + +static struct super_block *shmem_read_super(struct super_block *sb, void *data, int silent) +{ + struct inode *inode; + struct dentry *root; + unsigned long blocks, inodes; + int mode = S_IRWXUGO | S_ISVTX; + uid_t uid = current->fsuid; + gid_t gid = current->fsgid; + struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + struct sysinfo si; + + /* + * Per default we only allow half of the physical ram per + * tmpfs instance + */ + si_meminfo(&si); + blocks = inodes = si.totalram / 2; + +#ifdef CONFIG_TMPFS + if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes)) + return NULL; +#endif + + spin_lock_init(&sbinfo->stat_lock); + sbinfo->max_blocks = blocks; + sbinfo->free_blocks = blocks; + sbinfo->max_inodes = inodes; + sbinfo->free_inodes = inodes; + sb->s_maxbytes = SHMEM_MAX_BYTES; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = TMPFS_MAGIC; + sb->s_op = &shmem_ops; + inode = shmem_get_inode(sb, S_IFDIR | mode, 0); + if (!inode) + return NULL; + + inode->i_uid = uid; + inode->i_gid = gid; + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return NULL; + } + sb->s_root = root; + return sb; +} + +static struct address_space_operations shmem_aops = { + removepage: shmem_removepage, + writepage: shmem_writepage, +#ifdef CONFIG_TMPFS + readpage: shmem_readpage, + prepare_write: shmem_prepare_write, + commit_write: shmem_commit_write, +#endif +}; + +static struct file_operations shmem_file_operations = { + mmap: shmem_mmap, +#ifdef CONFIG_TMPFS + read: shmem_file_read, + write: shmem_file_write, + fsync: shmem_sync_file, +#endif +}; + +static struct inode_operations shmem_inode_operations = { + truncate: shmem_truncate, + setattr: shmem_notify_change, +}; + +static struct inode_operations shmem_dir_inode_operations = { +#ifdef CONFIG_TMPFS + create: shmem_create, + lookup: shmem_lookup, + link: shmem_link, + unlink: shmem_unlink, + symlink: shmem_symlink, + mkdir: shmem_mkdir, + rmdir: shmem_rmdir, + mknod: shmem_mknod, + rename: shmem_rename, +#endif +}; + +static struct super_operations shmem_ops = { +#ifdef CONFIG_TMPFS + statfs: shmem_statfs, + remount_fs: shmem_remount_fs, +#endif + delete_inode: shmem_delete_inode, + put_inode: force_delete, +}; + +static struct vm_operations_struct shmem_vm_ops = { + nopage: shmem_nopage, +}; + +#ifdef CONFIG_TMPFS +/* type "shm" will be tagged obsolete in 2.5 */ +static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER); +static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER); +#else +static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT); +#endif +static struct vfsmount *shm_mnt; + +static int __init init_tmpfs(void) +{ + int error; + + error = register_filesystem(&tmpfs_fs_type); + if (error) { + printk(KERN_ERR "Could not register tmpfs\n"); + goto out3; + } +#ifdef CONFIG_TMPFS + error = register_filesystem(&shmem_fs_type); + if (error) { + printk(KERN_ERR "Could not register shm fs\n"); + goto out2; + } + devfs_mk_dir(NULL, "shm", NULL); +#endif + shm_mnt = kern_mount(&tmpfs_fs_type); + if (IS_ERR(shm_mnt)) { + error = PTR_ERR(shm_mnt); + printk(KERN_ERR "Could not kern_mount tmpfs\n"); + goto out1; + } + + /* The internal instance should not do size checking */ + shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX); + return 0; + +out1: +#ifdef CONFIG_TMPFS + unregister_filesystem(&shmem_fs_type); +out2: +#endif + unregister_filesystem(&tmpfs_fs_type); +out3: + shm_mnt = ERR_PTR(error); + return error; +} +module_init(init_tmpfs) + +/* + * shmem_file_setup - get an unlinked file living in tmpfs + * + * @name: name for dentry (to be seen in /proc/<pid>/maps + * @size: size to be set for the file + * + */ +struct file *shmem_file_setup(char *name, loff_t size) +{ + int error; + struct file *file; + struct inode *inode; + struct dentry *dentry, *root; + struct qstr this; + int vm_enough_memory(long pages); + + if (IS_ERR(shm_mnt)) + return (void *)shm_mnt; + + if (size > SHMEM_MAX_BYTES) + return ERR_PTR(-EINVAL); + + if (!vm_enough_memory(VM_ACCT(size))) + return ERR_PTR(-ENOMEM); + + this.name = name; + this.len = strlen(name); + this.hash = 0; /* will go */ + root = shm_mnt->mnt_root; + dentry = d_alloc(root, &this); + if (!dentry) + return ERR_PTR(-ENOMEM); + + error = -ENFILE; + file = get_empty_filp(); + if (!file) + goto put_dentry; + + error = -ENOSPC; + inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); + if (!inode) + goto close_file; + + d_instantiate(dentry, inode); + inode->i_size = size; + inode->i_nlink = 0; /* It is unlinked */ + file->f_vfsmnt = mntget(shm_mnt); + file->f_dentry = dentry; + file->f_op = &shmem_file_operations; + file->f_mode = FMODE_WRITE | FMODE_READ; + return file; + +close_file: + put_filp(file); +put_dentry: + dput(dentry); + return ERR_PTR(error); +} + +/* + * shmem_zero_setup - setup a shared anonymous mapping + * + * @vma: the vma to be mmapped is prepared by do_mmap_pgoff + */ +int shmem_zero_setup(struct vm_area_struct *vma) +{ + struct file *file; + loff_t size = vma->vm_end - vma->vm_start; + + file = shmem_file_setup("dev/zero", size); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (vma->vm_file) + fput(vma->vm_file); + vma->vm_file = file; + vma->vm_ops = &shmem_vm_ops; + return 0; +} + +EXPORT_SYMBOL(shmem_file_setup); + +int shmem_anon_setup(struct file *file, struct vm_area_struct *vma) +{ + struct file *new; + loff_t size = vma->vm_end - vma->vm_start; + + if(file->private_data == NULL){ + new = shmem_file_setup("/dev/anon", size); + if(IS_ERR(new)) + return(PTR_ERR(new)); + file->private_data = new; + } + + + if (vma->vm_file) + fput(vma->vm_file); + vma->vm_file = file->private_data; + vma->vm_ops = &shmem_vm_ops; + return 0; +} + |