diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-01-06 00:29:13 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-01-06 00:29:13 -0800 |
commit | 6c64bf64a199f1a9959fb98d4a35abf7c0ec023d (patch) | |
tree | ac5608dce7d523102d2ac06a3695d2367e2f79d0 /fs | |
parent | 87475e1fe3a2a3f1643f3e60716e1d7bcf4ccd50 (diff) | |
download | history-6c64bf64a199f1a9959fb98d4a35abf7c0ec023d.tar.gz |
Make pipe data structure be a circular list of pages, rather than
a circular list of one page.
This improves pipe throughput, and allows us to (eventually)
use these lists of page buffers for moving data around efficiently.
Diffstat (limited to 'fs')
-rw-r--r-- | fs/pipe.c | 130 |
1 files changed, 87 insertions, 43 deletions
diff --git a/fs/pipe.c b/fs/pipe.c index 464a83fc4749ca..f1c5de710cd687 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -14,6 +14,8 @@ #include <linux/mount.h> #include <linux/pipe_fs_i.h> #include <linux/uio.h> +#include <linux/highmem.h> + #include <asm/uaccess.h> #include <asm/ioctls.h> @@ -89,6 +91,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov, unsigned long nr_segs, loff_t *ppos) { struct inode *inode = filp->f_dentry->d_inode; + struct pipe_inode_info *info; int do_wakeup; ssize_t ret; struct iovec *iov = (struct iovec *)_iov; @@ -102,32 +105,40 @@ pipe_readv(struct file *filp, const struct iovec *_iov, do_wakeup = 0; ret = 0; down(PIPE_SEM(*inode)); + info = inode->i_pipe; for (;;) { - int size = PIPE_LEN(*inode); - if (size) { - char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode); - ssize_t chars = PIPE_MAX_RCHUNK(*inode); + int bufs = info->nrbufs; + if (bufs) { + int curbuf = info->curbuf; + struct pipe_buffer *buf = info->bufs + curbuf; + size_t chars = buf->len; + int error; if (chars > total_len) chars = total_len; - if (chars > size) - chars = size; - if (pipe_iov_copy_to_user(iov, pipebuf, chars)) { + error = pipe_iov_copy_to_user(iov, kmap(buf->page) + buf->offset, chars); + kunmap(buf->page); + if (unlikely(error)) { if (!ret) ret = -EFAULT; break; } ret += chars; - - PIPE_START(*inode) += chars; - PIPE_START(*inode) &= (PIPE_SIZE - 1); - PIPE_LEN(*inode) -= chars; + buf->offset += chars; + buf->len -= chars; + if (!buf->len) { + __free_page(buf->page); + buf->page = NULL; + curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); + info->curbuf = curbuf; + info->nrbufs = --bufs; + do_wakeup = 1; + } total_len -= chars; - do_wakeup = 1; if (!total_len) break; /* common path: read succeeded */ } - if (PIPE_LEN(*inode)) /* test for cyclic buffers */ + if (bufs) /* More to do? */ continue; if (!PIPE_WRITERS(*inode)) break; @@ -177,8 +188,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov, unsigned long nr_segs, loff_t *ppos) { struct inode *inode = filp->f_dentry->d_inode; + struct pipe_inode_info *info; ssize_t ret; - size_t min; int do_wakeup; struct iovec *iov = (struct iovec *)_iov; size_t total_len; @@ -190,48 +201,58 @@ pipe_writev(struct file *filp, const struct iovec *_iov, do_wakeup = 0; ret = 0; - min = total_len; - if (min > PIPE_BUF) - min = 1; down(PIPE_SEM(*inode)); + info = inode->i_pipe; for (;;) { - int free; + int bufs; if (!PIPE_READERS(*inode)) { send_sig(SIGPIPE, current, 0); if (!ret) ret = -EPIPE; break; } - free = PIPE_FREE(*inode); - if (free >= min) { - /* transfer data */ - ssize_t chars = PIPE_MAX_WCHUNK(*inode); - char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode); + bufs = info->nrbufs; + if (bufs < PIPE_BUFFERS) { + ssize_t chars; + int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1); + struct pipe_buffer *buf = info->bufs + newbuf; + struct page *page = alloc_page(GFP_USER); + int error; + + if (unlikely(!page)) { + ret = ret ? : -ENOMEM; + break; + } /* Always wakeup, even if the copy fails. Otherwise * we lock up (O_NONBLOCK-)readers that sleep due to * syscall merging. + * FIXME! Is this really true? */ do_wakeup = 1; + chars = PAGE_SIZE; if (chars > total_len) chars = total_len; - if (chars > free) - chars = free; - if (pipe_iov_copy_from_user(pipebuf, iov, chars)) { + error = pipe_iov_copy_from_user(kmap(page), iov, chars); + kunmap(page); + if (unlikely(error)) { if (!ret) ret = -EFAULT; + __free_page(page); break; } ret += chars; - PIPE_LEN(*inode) += chars; + /* Insert it into the buffer array */ + buf->page = page; + buf->offset = 0; + buf->len = chars; + info->nrbufs = ++bufs; + total_len -= chars; if (!total_len) break; } - if (PIPE_FREE(*inode) && ret) { - /* handle cyclic data buffers */ - min = 1; + if (bufs < PIPE_BUFFERS) continue; - } if (filp->f_flags & O_NONBLOCK) { if (!ret) ret = -EAGAIN; break; @@ -283,9 +304,23 @@ static int pipe_ioctl(struct inode *pino, struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_dentry->d_inode; + struct pipe_inode_info *info; + int count, buf, nrbufs; + switch (cmd) { case FIONREAD: - return put_user(PIPE_LEN(*pino), (int __user *)arg); + down(PIPE_SEM(*inode)); + info = inode->i_pipe; + count = 0; + buf = info->curbuf; + nrbufs = info->nrbufs; + while (--nrbufs >= 0) { + count += info->bufs[buf].len; + buf = (buf+1) & (PIPE_BUFFERS-1); + } + up(PIPE_SEM(*inode)); + return put_user(count, (int __user *)arg); default: return -EINVAL; } @@ -297,13 +332,16 @@ pipe_poll(struct file *filp, poll_table *wait) { unsigned int mask; struct inode *inode = filp->f_dentry->d_inode; + struct pipe_inode_info *info = inode->i_pipe; + int nrbufs; poll_wait(filp, PIPE_WAIT(*inode), wait); /* Reading only -- no need for acquiring the semaphore. */ - mask = POLLIN | POLLRDNORM; - if (PIPE_EMPTY(*inode)) - mask = POLLOUT | POLLWRNORM; + nrbufs = info->nrbufs; + mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; + mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; + if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode)) mask |= POLLHUP; if (!PIPE_READERS(*inode)) @@ -529,31 +567,37 @@ struct file_operations rdwr_pipe_fops = { void free_pipe_info(struct inode *inode) { + int i; struct pipe_inode_info *info = inode->i_pipe; + inode->i_pipe = NULL; - free_page((unsigned long)info->base); + for (i = 0; i < PIPE_BUFFERS; i++) { + struct page *page = info->bufs[i].page; + + /* We'll make this a data-dependent free some day .. */ + if (page) + __free_page(page); + } kfree(info); } struct inode* pipe_new(struct inode* inode) { unsigned long page; + struct pipe_inode_info *info; page = __get_free_page(GFP_USER); if (!page) return NULL; - inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); - if (!inode->i_pipe) + info = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); + if (!info) goto fail_page; + memset(info, 0, sizeof(*info)); + inode->i_pipe = info; init_waitqueue_head(PIPE_WAIT(*inode)); - PIPE_BASE(*inode) = (char*) page; - PIPE_START(*inode) = PIPE_LEN(*inode) = 0; - PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0; - PIPE_WAITING_WRITERS(*inode) = 0; PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1; - *PIPE_FASYNC_READERS(*inode) = *PIPE_FASYNC_WRITERS(*inode) = NULL; return inode; fail_page: |