From: Karim Yaghmour The implementation of the actual locking/lockless mechanisms. Signed-off-by: Karim Yaghmour Signed-off-by: Andrew Morton --- 25-akpm/fs/relayfs/relay_locking.c | 322 ++++++++++ 25-akpm/fs/relayfs/relay_locking.h | 34 + 25-akpm/fs/relayfs/relay_lockless.c | 541 +++++++++++++++++ 25-akpm/fs/relayfs/relay_lockless.h | 34 + 25-akpm/fs/relayfs/resize.c | 1105 ++++++++++++++++++++++++++++++++++++ 25-akpm/fs/relayfs/resize.h | 51 + 6 files changed, 2087 insertions(+) diff -puN /dev/null fs/relayfs/relay_locking.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/fs/relayfs/relay_locking.c 2005-01-13 23:01:09.117811032 -0800 @@ -0,0 +1,322 @@ +/* + * RelayFS locking scheme implementation. + * + * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com) + * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * + * This file is released under the GPL. + */ + +#include +#include "relay_locking.h" +#include "resize.h" + +/** + * switch_buffers - switches between read and write buffers. + * @cur_time: current time. + * @cur_tsc: the TSC associated with current_time, if applicable + * @rchan: the channel + * @finalizing: if true, don't start a new buffer + * @resetting: if true, + * + * This should be called from with interrupts disabled. + */ +static void +switch_buffers(struct timeval cur_time, + u32 cur_tsc, + struct rchan *rchan, + int finalizing, + int resetting, + int finalize_buffer_only) +{ + char *chan_buf_end; + int bytes_written; + + if (!rchan->half_switch) { + bytes_written = rchan->callbacks->buffer_end(rchan->id, + cur_write_pos(rchan), write_buf_end(rchan), + cur_time, cur_tsc, using_tsc(rchan)); + if (bytes_written == 0) + rchan->unused_bytes[rchan->buf_idx % rchan->n_bufs] = + write_buf_end(rchan) - cur_write_pos(rchan); + } + + if (finalize_buffer_only) { + rchan->bufs_produced++; + return; + } + + chan_buf_end = rchan->buf + rchan->n_bufs * rchan->buf_size; + if((write_buf(rchan) + rchan->buf_size >= chan_buf_end) || resetting) + write_buf(rchan) = rchan->buf; + else + write_buf(rchan) += rchan->buf_size; + write_buf_end(rchan) = write_buf(rchan) + rchan->buf_size; + write_limit(rchan) = write_buf_end(rchan) - rchan->end_reserve; + cur_write_pos(rchan) = write_buf(rchan); + + rchan->buf_start_time = cur_time; + rchan->buf_start_tsc = cur_tsc; + + if (resetting) + rchan->buf_idx = 0; + else + rchan->buf_idx++; + rchan->buf_id++; + + if (!packet_delivery(rchan)) + rchan->unused_bytes[rchan->buf_idx % rchan->n_bufs] = 0; + + if (resetting) { + rchan->bufs_produced = rchan->bufs_produced + rchan->n_bufs; + rchan->bufs_produced -= rchan->bufs_produced % rchan->n_bufs; + rchan->bufs_consumed = rchan->bufs_produced; + rchan->bytes_consumed = 0; + update_readers_consumed(rchan, rchan->bufs_consumed, rchan->bytes_consumed); + } else if (!rchan->half_switch) + rchan->bufs_produced++; + + rchan->half_switch = 0; + + if (!finalizing) { + bytes_written = rchan->callbacks->buffer_start(rchan->id, cur_write_pos(rchan), rchan->buf_id, cur_time, cur_tsc, using_tsc(rchan)); + cur_write_pos(rchan) += bytes_written; + } +} + +/** + * locking_reserve - reserve a slot in the buffer for an event. + * @rchan: the channel + * @slot_len: the length of the slot to reserve + * @ts: variable that will receive the time the slot was reserved + * @tsc: the timestamp counter associated with time + * @err: receives the result flags + * @interrupting: if this write is interrupting another, set to non-zero + * + * Returns pointer to the beginning of the reserved slot, NULL if error. + * + * The err value contains the result flags and is an ORed combination + * of the following: + * + * RELAY_BUFFER_SWITCH_NONE - no buffer switch occurred + * RELAY_EVENT_DISCARD_NONE - event should not be discarded + * RELAY_BUFFER_SWITCH - buffer switch occurred + * RELAY_EVENT_DISCARD - event should be discarded (all buffers are full) + * RELAY_EVENT_TOO_LONG - event won't fit into even an empty buffer + */ +inline char * +locking_reserve(struct rchan *rchan, + u32 slot_len, + struct timeval *ts, + u32 *tsc, + int *err, + int *interrupting) +{ + u32 buffers_ready; + int bytes_written; + + *err = RELAY_BUFFER_SWITCH_NONE; + + if (slot_len >= rchan->buf_size) { + *err = RELAY_WRITE_DISCARD | RELAY_WRITE_TOO_LONG; + return NULL; + } + + if (rchan->initialized == 0) { + rchan->initialized = 1; + get_timestamp(&rchan->buf_start_time, + &rchan->buf_start_tsc, rchan); + rchan->unused_bytes[0] = 0; + bytes_written = rchan->callbacks->buffer_start( + rchan->id, cur_write_pos(rchan), + rchan->buf_id, rchan->buf_start_time, + rchan->buf_start_tsc, using_tsc(rchan)); + cur_write_pos(rchan) += bytes_written; + *tsc = get_time_delta(ts, rchan); + return cur_write_pos(rchan); + } + + *tsc = get_time_delta(ts, rchan); + + if (in_progress_event_size(rchan)) { + interrupted_pos(rchan) = cur_write_pos(rchan); + cur_write_pos(rchan) = in_progress_event_pos(rchan) + + in_progress_event_size(rchan) + + interrupting_size(rchan); + *interrupting = 1; + } else { + in_progress_event_pos(rchan) = cur_write_pos(rchan); + in_progress_event_size(rchan) = slot_len; + interrupting_size(rchan) = 0; + } + + if (cur_write_pos(rchan) + slot_len > write_limit(rchan)) { + if (atomic_read(&rchan->suspended) == 1) { + in_progress_event_pos(rchan) = NULL; + in_progress_event_size(rchan) = 0; + interrupting_size(rchan) = 0; + *err = RELAY_WRITE_DISCARD; + return NULL; + } + + buffers_ready = rchan->bufs_produced - rchan->bufs_consumed; + if (buffers_ready == rchan->n_bufs - 1) { + if (!mode_continuous(rchan)) { + atomic_set(&rchan->suspended, 1); + in_progress_event_pos(rchan) = NULL; + in_progress_event_size(rchan) = 0; + interrupting_size(rchan) = 0; + get_timestamp(ts, tsc, rchan); + switch_buffers(*ts, *tsc, rchan, 0, 0, 1); + recalc_time_delta(ts, tsc, rchan); + rchan->half_switch = 1; + + cur_write_pos(rchan) = write_buf_end(rchan) - 1; + *err = RELAY_BUFFER_SWITCH | RELAY_WRITE_DISCARD; + return NULL; + } + } + + get_timestamp(ts, tsc, rchan); + switch_buffers(*ts, *tsc, rchan, 0, 0, 0); + recalc_time_delta(ts, tsc, rchan); + *err = RELAY_BUFFER_SWITCH; + } + + return cur_write_pos(rchan); +} + +/** + * locking_commit - commit a reserved slot in the buffer + * @rchan: the channel + * @from: commit the length starting here + * @len: length committed + * @deliver: length committed + * @interrupting: not used + * + * Commits len bytes and calls deliver callback if applicable. + */ +inline void +locking_commit(struct rchan *rchan, + char *from, + u32 len, + int deliver, + int interrupting) +{ + cur_write_pos(rchan) += len; + + if (interrupting) { + cur_write_pos(rchan) = interrupted_pos(rchan); + interrupting_size(rchan) += len; + } else { + in_progress_event_size(rchan) = 0; + if (interrupting_size(rchan)) { + cur_write_pos(rchan) += interrupting_size(rchan); + interrupting_size(rchan) = 0; + } + } + + if (deliver) { + if (bulk_delivery(rchan)) { + u32 cur_idx = cur_write_pos(rchan) - rchan->buf; + u32 cur_bufno = cur_idx / rchan->buf_size; + from = rchan->buf + cur_bufno * rchan->buf_size; + len = cur_idx - cur_bufno * rchan->buf_size; + } + rchan->callbacks->deliver(rchan->id, from, len); + expand_check(rchan); + } +} + +/** + * locking_finalize: - finalize last buffer at end of channel use + * @rchan: the channel + */ +inline void +locking_finalize(struct rchan *rchan) +{ + unsigned long int flags; + struct timeval time; + u32 tsc; + + local_irq_save(flags); + get_timestamp(&time, &tsc, rchan); + switch_buffers(time, tsc, rchan, 1, 0, 0); + local_irq_restore(flags); +} + +/** + * locking_get_offset - get current and max 'file' offsets for VFS + * @rchan: the channel + * @max_offset: maximum channel offset + * + * Returns the current and maximum buffer offsets in VFS terms. + */ +u32 +locking_get_offset(struct rchan *rchan, + u32 *max_offset) +{ + if (max_offset) + *max_offset = rchan->buf_size * rchan->n_bufs - 1; + + return cur_write_pos(rchan) - rchan->buf; +} + +/** + * locking_reset - reset the channel + * @rchan: the channel + * @init: 1 if this is a first-time channel initialization + */ +void locking_reset(struct rchan *rchan, int init) +{ + if (init) + channel_lock(rchan) = SPIN_LOCK_UNLOCKED; + write_buf(rchan) = rchan->buf; + write_buf_end(rchan) = write_buf(rchan) + rchan->buf_size; + cur_write_pos(rchan) = write_buf(rchan); + write_limit(rchan) = write_buf_end(rchan) - rchan->end_reserve; + in_progress_event_pos(rchan) = NULL; + in_progress_event_size(rchan) = 0; + interrupted_pos(rchan) = NULL; + interrupting_size(rchan) = 0; +} + +/** + * locking_reset_index - atomically set channel index to the beginning + * @rchan: the channel + * + * If this fails, it means that something else just logged something + * and therefore we probably no longer want to do this. It's up to the + * caller anyway... + * + * Returns 0 if the index was successfully set, negative otherwise + */ +int +locking_reset_index(struct rchan *rchan, u32 old_idx) +{ + unsigned long flags; + struct timeval time; + u32 tsc; + u32 cur_idx; + + relay_lock_channel(rchan, flags); + cur_idx = locking_get_offset(rchan, NULL); + if (cur_idx != old_idx) { + relay_unlock_channel(rchan, flags); + return -1; + } + + get_timestamp(&time, &tsc, rchan); + switch_buffers(time, tsc, rchan, 0, 1, 0); + + relay_unlock_channel(rchan, flags); + + return 0; +} + + + + + + + diff -puN /dev/null fs/relayfs/relay_locking.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/fs/relayfs/relay_locking.h 2005-01-13 23:01:09.118810880 -0800 @@ -0,0 +1,34 @@ +#ifndef _RELAY_LOCKING_H +#define _RELAY_LOCKING_H + +extern char * +locking_reserve(struct rchan *rchan, + u32 slot_len, + struct timeval *time_stamp, + u32 *tsc, + int *err, + int *interrupting); + +extern void +locking_commit(struct rchan *rchan, + char *from, + u32 len, + int deliver, + int interrupting); + +extern void +locking_resume(struct rchan *rchan); + +extern void +locking_finalize(struct rchan *rchan); + +extern u32 +locking_get_offset(struct rchan *rchan, u32 *max_offset); + +extern void +locking_reset(struct rchan *rchan, int init); + +extern int +locking_reset_index(struct rchan *rchan, u32 old_idx); + +#endif /* _RELAY_LOCKING_H */ diff -puN /dev/null fs/relayfs/relay_lockless.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/fs/relayfs/relay_lockless.c 2005-01-13 23:01:09.120810576 -0800 @@ -0,0 +1,541 @@ +/* + * RelayFS lockless scheme implementation. + * + * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com) + * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 2002, 2003 - Bob Wisniewski (bob@watson.ibm.com), IBM Corp + * + * This file is released under the GPL. + */ + +#include +#include "relay_lockless.h" +#include "resize.h" + +/** + * compare_and_store_volatile - self-explicit + * @ptr: ptr to the word that will receive the new value + * @oval: the value we think is currently in *ptr + * @nval: the value *ptr will get if we were right + */ +inline int +compare_and_store_volatile(volatile u32 *ptr, + u32 oval, + u32 nval) +{ + u32 prev; + + barrier(); + prev = cmpxchg(ptr, oval, nval); + barrier(); + + return (prev == oval); +} + +/** + * atomic_set_volatile - atomically set the value in ptr to nval. + * @ptr: ptr to the word that will receive the new value + * @nval: the new value + */ +inline void +atomic_set_volatile(atomic_t *ptr, + u32 nval) +{ + barrier(); + atomic_set(ptr, (int)nval); + barrier(); +} + +/** + * atomic_add_volatile - atomically add val to the value at ptr. + * @ptr: ptr to the word that will receive the addition + * @val: the value to add to *ptr + */ +inline void +atomic_add_volatile(atomic_t *ptr, u32 val) +{ + barrier(); + atomic_add((int)val, ptr); + barrier(); +} + +/** + * atomic_sub_volatile - atomically substract val from the value at ptr. + * @ptr: ptr to the word that will receive the subtraction + * @val: the value to subtract from *ptr + */ +inline void +atomic_sub_volatile(atomic_t *ptr, s32 val) +{ + barrier(); + atomic_sub((int)val, ptr); + barrier(); +} + +/** + * lockless_commit - commit a reserved slot in the buffer + * @rchan: the channel + * @from: commit the length starting here + * @len: length committed + * @deliver: length committed + * @interrupting: not used + * + * Commits len bytes and calls deliver callback if applicable. + */ +inline void +lockless_commit(struct rchan *rchan, + char *from, + u32 len, + int deliver, + int interrupting) +{ + u32 bufno, idx; + + idx = from - rchan->buf; + + if (len > 0) { + bufno = RELAY_BUFNO_GET(idx, offset_bits(rchan)); + atomic_add_volatile(&fill_count(rchan, bufno), len); + } + + if (deliver) { + u32 mask = offset_mask(rchan); + if (bulk_delivery(rchan)) { + from = rchan->buf + RELAY_BUF_OFFSET_CLEAR(idx, mask); + len += RELAY_BUF_OFFSET_GET(idx, mask); + } + rchan->callbacks->deliver(rchan->id, from, len); + expand_check(rchan); + } +} + +/** + * get_buffer_end - get the address of the end of buffer + * @rchan: the channel + * @buf_idx: index into channel corresponding to address + */ +static inline char * +get_buffer_end(struct rchan *rchan, u32 buf_idx) +{ + return rchan->buf + + RELAY_BUF_OFFSET_CLEAR(buf_idx, offset_mask(rchan)) + + RELAY_BUF_SIZE(offset_bits(rchan)); +} + + +/** + * finalize_buffer - utility function consolidating end-of-buffer tasks. + * @rchan: the channel + * @end_idx: index into buffer to write the end-buffer event at + * @size_lost: number of unused bytes at the end of the buffer + * @time_stamp: the time of the end-buffer event + * @tsc: the timestamp counter associated with time + * @resetting: are we resetting the channel? + * + * This function must be called with local irqs disabled. + */ +static inline void +finalize_buffer(struct rchan *rchan, + u32 end_idx, + u32 size_lost, + struct timeval *time_stamp, + u32 *tsc, + int resetting) +{ + char* cur_write_pos; + char* write_buf_end; + u32 bufno; + int bytes_written; + + cur_write_pos = rchan->buf + end_idx; + write_buf_end = get_buffer_end(rchan, end_idx - 1); + + bytes_written = rchan->callbacks->buffer_end(rchan->id, cur_write_pos, + write_buf_end, *time_stamp, *tsc, using_tsc(rchan)); + if (bytes_written == 0) + rchan->unused_bytes[rchan->buf_idx % rchan->n_bufs] = size_lost; + + bufno = RELAY_BUFNO_GET(end_idx, offset_bits(rchan)); + atomic_add_volatile(&fill_count(rchan, bufno), size_lost); + if (resetting) { + rchan->bufs_produced = rchan->bufs_produced + rchan->n_bufs; + rchan->bufs_produced -= rchan->bufs_produced % rchan->n_bufs; + rchan->bufs_consumed = rchan->bufs_produced; + rchan->bytes_consumed = 0; + update_readers_consumed(rchan, rchan->bufs_consumed, rchan->bytes_consumed); + } else + rchan->bufs_produced++; +} + +/** + * lockless_finalize: - finalize last buffer at end of channel use + * @rchan: the channel + */ +inline void +lockless_finalize(struct rchan *rchan) +{ + u32 event_end_idx; + u32 size_lost; + unsigned long int flags; + struct timeval time; + u32 tsc; + + event_end_idx = RELAY_BUF_OFFSET_GET(idx(rchan), offset_mask(rchan)); + size_lost = RELAY_BUF_SIZE(offset_bits(rchan)) - event_end_idx; + + local_irq_save(flags); + get_timestamp(&time, &tsc, rchan); + finalize_buffer(rchan, idx(rchan) & idx_mask(rchan), size_lost, + &time, &tsc, 0); + local_irq_restore(flags); +} + +/** + * discard_check: - determine whether a write should be discarded + * @rchan: the channel + * @old_idx: index into buffer where check for space should begin + * @write_len: the length of the write to check + * @time_stamp: the time of the end-buffer event + * @tsc: the timestamp counter associated with time + * + * The return value contains the result flags and is an ORed combination + * of the following: + * + * RELAY_WRITE_DISCARD_NONE - write should not be discarded + * RELAY_BUFFER_SWITCH - buffer switch occurred + * RELAY_WRITE_DISCARD - write should be discarded (all buffers are full) + * RELAY_WRITE_TOO_LONG - write won't fit into even an empty buffer + */ +static inline int +discard_check(struct rchan *rchan, + u32 old_idx, + u32 write_len, + struct timeval *time_stamp, + u32 *tsc) +{ + u32 buffers_ready; + u32 offset_mask = offset_mask(rchan); + u8 offset_bits = offset_bits(rchan); + u32 idx_mask = idx_mask(rchan); + u32 size_lost; + unsigned long int flags; + + if (write_len > RELAY_BUF_SIZE(offset_bits)) + return RELAY_WRITE_DISCARD | RELAY_WRITE_TOO_LONG; + + if (mode_continuous(rchan)) + return RELAY_WRITE_DISCARD_NONE; + + local_irq_save(flags); + if (atomic_read(&rchan->suspended) == 1) { + local_irq_restore(flags); + return RELAY_WRITE_DISCARD; + } + if (rchan->half_switch) { + local_irq_restore(flags); + return RELAY_WRITE_DISCARD_NONE; + } + buffers_ready = rchan->bufs_produced - rchan->bufs_consumed; + if (buffers_ready == rchan->n_bufs - 1) { + atomic_set(&rchan->suspended, 1); + size_lost = RELAY_BUF_SIZE(offset_bits) + - RELAY_BUF_OFFSET_GET(old_idx, offset_mask); + finalize_buffer(rchan, old_idx & idx_mask, size_lost, + time_stamp, tsc, 0); + rchan->half_switch = 1; + idx(rchan) = RELAY_BUF_OFFSET_CLEAR((old_idx & idx_mask), offset_mask(rchan)) + RELAY_BUF_SIZE(offset_bits) - 1; + local_irq_restore(flags); + + return RELAY_BUFFER_SWITCH | RELAY_WRITE_DISCARD; + } + local_irq_restore(flags); + + return RELAY_WRITE_DISCARD_NONE; +} + +/** + * switch_buffers - switch over to a new sub-buffer + * @rchan: the channel + * @slot_len: the length of the slot needed for the current write + * @offset: the offset calculated for the new index + * @ts: timestamp + * @tsc: the timestamp counter associated with time + * @old_idx: the value of the buffer control index when we were called + * @old_idx: the new calculated value of the buffer control index + * @resetting: are we resetting the channel? + */ +static inline void +switch_buffers(struct rchan *rchan, + u32 slot_len, + u32 offset, + struct timeval *ts, + u32 *tsc, + u32 new_idx, + u32 old_idx, + int resetting) +{ + u32 size_lost = rchan->end_reserve; + unsigned long int flags; + u32 idx_mask = idx_mask(rchan); + u8 offset_bits = offset_bits(rchan); + char *cur_write_pos; + u32 new_buf_no; + u32 start_reserve = rchan->start_reserve; + + if (resetting) + size_lost = RELAY_BUF_SIZE(offset_bits(rchan)) - old_idx % rchan->buf_size; + + if (offset > 0) + size_lost += slot_len - offset; + else + old_idx += slot_len; + + local_irq_save(flags); + if (!rchan->half_switch) + finalize_buffer(rchan, old_idx & idx_mask, size_lost, + ts, tsc, resetting); + rchan->half_switch = 0; + rchan->buf_start_time = *ts; + rchan->buf_start_tsc = *tsc; + local_irq_restore(flags); + + cur_write_pos = rchan->buf + RELAY_BUF_OFFSET_CLEAR((new_idx + & idx_mask), offset_mask(rchan)); + if (resetting) + rchan->buf_idx = 0; + else + rchan->buf_idx++; + rchan->buf_id++; + + rchan->unused_bytes[rchan->buf_idx % rchan->n_bufs] = 0; + + rchan->callbacks->buffer_start(rchan->id, cur_write_pos, + rchan->buf_id, *ts, *tsc, using_tsc(rchan)); + new_buf_no = RELAY_BUFNO_GET(new_idx & idx_mask, offset_bits); + atomic_sub_volatile(&fill_count(rchan, new_buf_no), + RELAY_BUF_SIZE(offset_bits) - start_reserve); + if (atomic_read(&fill_count(rchan, new_buf_no)) < start_reserve) + atomic_set_volatile(&fill_count(rchan, new_buf_no), + start_reserve); +} + +/** + * lockless_reserve_slow - the slow reserve path in the lockless scheme + * @rchan: the channel + * @slot_len: the length of the slot to reserve + * @ts: variable that will receive the time the slot was reserved + * @tsc: the timestamp counter associated with time + * @old_idx: the value of the buffer control index when we were called + * @err: receives the result flags + * + * Returns pointer to the beginning of the reserved slot, NULL if error. + + * err values same as for lockless_reserve. + */ +static inline char * +lockless_reserve_slow(struct rchan *rchan, + u32 slot_len, + struct timeval *ts, + u32 *tsc, + u32 old_idx, + int *err) +{ + u32 new_idx, offset; + unsigned long int flags; + u32 offset_mask = offset_mask(rchan); + u32 idx_mask = idx_mask(rchan); + u32 start_reserve = rchan->start_reserve; + u32 end_reserve = rchan->end_reserve; + int discard_event; + u32 reserved_idx; + char *cur_write_pos; + int initializing = 0; + + *err = RELAY_BUFFER_SWITCH_NONE; + + discard_event = discard_check(rchan, old_idx, slot_len, ts, tsc); + if (discard_event != RELAY_WRITE_DISCARD_NONE) { + *err = discard_event; + return NULL; + } + + local_irq_save(flags); + if (rchan->initialized == 0) { + rchan->initialized = initializing = 1; + idx(rchan) = rchan->start_reserve + rchan->rchan_start_reserve; + } + local_irq_restore(flags); + + do { + old_idx = idx(rchan); + new_idx = old_idx + slot_len; + + offset = RELAY_BUF_OFFSET_GET(new_idx + end_reserve, + offset_mask); + if ((offset < slot_len) && (offset > 0)) { + reserved_idx = RELAY_BUF_OFFSET_CLEAR(new_idx + + end_reserve, offset_mask) + start_reserve; + new_idx = reserved_idx + slot_len; + } else if (offset < slot_len) { + reserved_idx = old_idx; + new_idx = RELAY_BUF_OFFSET_CLEAR(new_idx + + end_reserve, offset_mask) + start_reserve; + } else + reserved_idx = old_idx; + get_timestamp(ts, tsc, rchan); + } while (!compare_and_store_volatile(&idx(rchan), old_idx, new_idx)); + + reserved_idx &= idx_mask; + + if (initializing == 1) { + cur_write_pos = rchan->buf + + RELAY_BUF_OFFSET_CLEAR((old_idx & idx_mask), + offset_mask(rchan)); + rchan->buf_start_time = *ts; + rchan->buf_start_tsc = *tsc; + rchan->unused_bytes[0] = 0; + + rchan->callbacks->buffer_start(rchan->id, cur_write_pos, + rchan->buf_id, *ts, *tsc, using_tsc(rchan)); + } + + if (offset < slot_len) { + switch_buffers(rchan, slot_len, offset, ts, tsc, new_idx, + old_idx, 0); + *err = RELAY_BUFFER_SWITCH; + } + + /* If not using TSC, need to calc time delta */ + recalc_time_delta(ts, tsc, rchan); + + return rchan->buf + reserved_idx; +} + +/** + * lockless_reserve - reserve a slot in the buffer for an event. + * @rchan: the channel + * @slot_len: the length of the slot to reserve + * @ts: variable that will receive the time the slot was reserved + * @tsc: the timestamp counter associated with time + * @err: receives the result flags + * @interrupting: not used + * + * Returns pointer to the beginning of the reserved slot, NULL if error. + * + * The err value contains the result flags and is an ORed combination + * of the following: + * + * RELAY_BUFFER_SWITCH_NONE - no buffer switch occurred + * RELAY_EVENT_DISCARD_NONE - event should not be discarded + * RELAY_BUFFER_SWITCH - buffer switch occurred + * RELAY_EVENT_DISCARD - event should be discarded (all buffers are full) + * RELAY_EVENT_TOO_LONG - event won't fit into even an empty buffer + */ +inline char * +lockless_reserve(struct rchan *rchan, + u32 slot_len, + struct timeval *ts, + u32 *tsc, + int *err, + int *interrupting) +{ + u32 old_idx, new_idx, offset; + u32 offset_mask = offset_mask(rchan); + + do { + old_idx = idx(rchan); + new_idx = old_idx + slot_len; + + offset = RELAY_BUF_OFFSET_GET(new_idx + rchan->end_reserve, + offset_mask); + if (offset < slot_len) + return lockless_reserve_slow(rchan, slot_len, + ts, tsc, old_idx, err); + get_time_or_tsc(ts, tsc, rchan); + } while (!compare_and_store_volatile(&idx(rchan), old_idx, new_idx)); + + /* If not using TSC, need to calc time delta */ + recalc_time_delta(ts, tsc, rchan); + + *err = RELAY_BUFFER_SWITCH_NONE; + + return rchan->buf + (old_idx & idx_mask(rchan)); +} + +/** + * lockless_get_offset - get current and max channel offsets + * @rchan: the channel + * @max_offset: maximum channel offset + * + * Returns the current and maximum channel offsets. + */ +u32 +lockless_get_offset(struct rchan *rchan, + u32 *max_offset) +{ + if (max_offset) + *max_offset = rchan->buf_size * rchan->n_bufs - 1; + + return rchan->initialized ? idx(rchan) & idx_mask(rchan) : 0; +} + +/** + * lockless_reset - reset the channel + * @rchan: the channel + * @init: 1 if this is a first-time channel initialization + */ +void lockless_reset(struct rchan *rchan, int init) +{ + int i; + + /* Start first buffer at 0 - (end_reserve + 1) so that it + gets initialized via buffer_start callback as well. */ + idx(rchan) = 0UL - (rchan->end_reserve + 1); + idx_mask(rchan) = + (1UL << (bufno_bits(rchan) + offset_bits(rchan))) - 1; + atomic_set(&fill_count(rchan, 0), + (int)rchan->start_reserve + + (int)rchan->rchan_start_reserve); + for (i = 1; i < rchan->n_bufs; i++) + atomic_set(&fill_count(rchan, i), + (int)RELAY_BUF_SIZE(offset_bits(rchan))); +} + +/** + * lockless_reset_index - atomically set channel index to the beginning + * @rchan: the channel + * @old_idx: the current index + * + * If this fails, it means that something else just logged something + * and therefore we probably no longer want to do this. It's up to the + * caller anyway... + * + * Returns 0 if the index was successfully set, negative otherwise + */ +int +lockless_reset_index(struct rchan *rchan, u32 old_idx) +{ + struct timeval ts; + u32 tsc; + u32 new_idx; + + if (compare_and_store_volatile(&idx(rchan), old_idx, 0)) { + new_idx = rchan->start_reserve; + switch_buffers(rchan, 0, 0, &ts, &tsc, new_idx, old_idx, 1); + return 0; + } else + return -1; +} + + + + + + + + + + + + + diff -puN /dev/null fs/relayfs/relay_lockless.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/fs/relayfs/relay_lockless.h 2005-01-13 23:01:09.121810424 -0800 @@ -0,0 +1,34 @@ +#ifndef _RELAY_LOCKLESS_H +#define _RELAY_LOCKLESS_H + +extern char * +lockless_reserve(struct rchan *rchan, + u32 slot_len, + struct timeval *time_stamp, + u32 *tsc, + int * interrupting, + int * errcode); + +extern void +lockless_commit(struct rchan *rchan, + char * from, + u32 len, + int deliver, + int interrupting); + +extern void +lockless_resume(struct rchan *rchan); + +extern void +lockless_finalize(struct rchan *rchan); + +extern u32 +lockless_get_offset(struct rchan *rchan, u32 *max_offset); + +extern void +lockless_reset(struct rchan *rchan, int init); + +extern int +lockless_reset_index(struct rchan *rchan, u32 old_idx); + +#endif/* _RELAY_LOCKLESS_H */ diff -puN /dev/null fs/relayfs/resize.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/fs/relayfs/resize.c 2005-01-13 23:01:09.125809816 -0800 @@ -0,0 +1,1105 @@ +/* + * RelayFS buffer management and resizing code. + * + * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com) + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include "resize.h" + +/** + * alloc_page_array - alloc array to hold pages, but not pages + * @size: the total size of the memory represented by the page array + * @page_count: the number of pages the array can hold + * @err: 0 on success, negative otherwise + * + * Returns a pointer to the page array if successful, NULL otherwise. + */ +static struct page ** +alloc_page_array(int size, int *page_count, int *err) +{ + int n_pages; + struct page **page_array; + int page_array_size; + + *err = 0; + + size = PAGE_ALIGN(size); + n_pages = size >> PAGE_SHIFT; + page_array_size = n_pages * sizeof(struct page *); + page_array = kmalloc(page_array_size, GFP_KERNEL); + if (page_array == NULL) { + *err = -ENOMEM; + return NULL; + } + *page_count = n_pages; + memset(page_array, 0, page_array_size); + + return page_array; +} + +/** + * free_page_array - free array to hold pages, but not pages + * @page_array: pointer to the page array + */ +static inline void +free_page_array(struct page **page_array) +{ + kfree(page_array); +} + +/** + * depopulate_page_array - free and unreserve all pages in the array + * @page_array: pointer to the page array + * @page_count: number of pages to free + */ +static void +depopulate_page_array(struct page **page_array, int page_count) +{ + int i; + + for (i = 0; i < page_count; i++) { + ClearPageReserved(page_array[i]); + __free_page(page_array[i]); + } +} + +/** + * populate_page_array - allocate and reserve pages + * @page_array: pointer to the page array + * @page_count: number of pages to allocate + * + * Returns 0 if successful, negative otherwise. + */ +static int +populate_page_array(struct page **page_array, int page_count) +{ + int i; + + for (i = 0; i < page_count; i++) { + page_array[i] = alloc_page(GFP_KERNEL); + if (unlikely(!page_array[i])) { + depopulate_page_array(page_array, i); + return -ENOMEM; + } + SetPageReserved(page_array[i]); + } + return 0; +} + +/** + * alloc_rchan_buf - allocate the initial channel buffer + * @size: total size of the buffer + * @page_array: receives a pointer to the buffer's page array + * @page_count: receives the number of pages allocated + * + * Returns a pointer to the resulting buffer, NULL if unsuccessful + */ +void * +alloc_rchan_buf(unsigned long size, struct page ***page_array, int *page_count) +{ + void *mem; + int err; + + *page_array = alloc_page_array(size, page_count, &err); + if (!*page_array) + return NULL; + + err = populate_page_array(*page_array, *page_count); + if (err) { + free_page_array(*page_array); + *page_array = NULL; + return NULL; + } + + mem = vmap(*page_array, *page_count, GFP_KERNEL, PAGE_KERNEL); + if (!mem) { + depopulate_page_array(*page_array, *page_count); + free_page_array(*page_array); + *page_array = NULL; + return NULL; + } + memset(mem, 0, size); + + return mem; +} + +/** + * expand_check - check whether the channel needs expanding + * @rchan: the channel + * + * If the channel needs expanding, the needs_resize callback is + * called with RELAY_RESIZE_EXPAND. + * + * Returns the suggested number of sub-buffers for the new + * buffer. + */ +void +expand_check(struct rchan *rchan) +{ + u32 active_bufs; + u32 new_n_bufs = 0; + u32 threshold = rchan->n_bufs * RESIZE_THRESHOLD; + + if (rchan->init_buf) + return; + + if (rchan->resize_min == 0) + return; + + if (rchan->resizing || rchan->replace_buffer) + return; + + active_bufs = rchan->bufs_produced - rchan->bufs_consumed + 1; + + if (rchan->resize_max && active_bufs == threshold) { + new_n_bufs = rchan->n_bufs * 2; + } + + if (new_n_bufs && (new_n_bufs * rchan->buf_size <= rchan->resize_max)) + rchan->callbacks->needs_resize(rchan->id, + RELAY_RESIZE_EXPAND, + rchan->buf_size, + new_n_bufs); +} + +/** + * can_shrink - check whether the channel can shrink + * @rchan: the channel + * @cur_idx: the current channel index + * + * Returns the suggested number of sub-buffers for the new + * buffer, 0 if the buffer is not shrinkable. + */ +static inline u32 +can_shrink(struct rchan *rchan, u32 cur_idx) +{ + u32 active_bufs = rchan->bufs_produced - rchan->bufs_consumed + 1; + u32 new_n_bufs = 0; + u32 cur_bufno_bytes = cur_idx % rchan->buf_size; + + if (rchan->resize_min == 0 || + rchan->resize_min >= rchan->n_bufs * rchan->buf_size) + goto out; + + if (active_bufs > 1) + goto out; + + if (cur_bufno_bytes != rchan->bytes_consumed) + goto out; + + new_n_bufs = rchan->resize_min / rchan->buf_size; +out: + return new_n_bufs; +} + +/** + * shrink_check: - timer function checking whether the channel can shrink + * @data: unused + * + * Every SHRINK_TIMER_SECS, check whether the channel is shrinkable. + * If so, we attempt to atomically reset the channel to the beginning. + * The needs_resize callback is then called with RELAY_RESIZE_SHRINK. + * If the reset fails, it means we really shouldn't be shrinking now + * and need to wait until the next time around. + */ +static void +shrink_check(unsigned long data) +{ + struct rchan *rchan = (struct rchan *)data; + u32 shrink_to_nbufs, cur_idx; + + del_timer(&rchan->shrink_timer); + rchan->shrink_timer.expires = jiffies + SHRINK_TIMER_SECS * HZ; + add_timer(&rchan->shrink_timer); + + if (rchan->init_buf) + return; + + if (rchan->resizing || rchan->replace_buffer) + return; + + if (using_lockless(rchan)) + cur_idx = idx(rchan); + else + cur_idx = relay_get_offset(rchan, NULL); + + shrink_to_nbufs = can_shrink(rchan, cur_idx); + if (shrink_to_nbufs != 0 && reset_index(rchan, cur_idx) == 0) { + update_readers_consumed(rchan, rchan->bufs_consumed, 0); + rchan->callbacks->needs_resize(rchan->id, + RELAY_RESIZE_SHRINK, + rchan->buf_size, + shrink_to_nbufs); + } +} + +/** + * init_shrink_timer: - Start timer used to check shrinkability. + * @rchan: the channel + */ +void +init_shrink_timer(struct rchan *rchan) +{ + if (rchan->resize_min) { + init_timer(&rchan->shrink_timer); + rchan->shrink_timer.function = shrink_check; + rchan->shrink_timer.data = (unsigned long)rchan; + rchan->shrink_timer.expires = jiffies + SHRINK_TIMER_SECS * HZ; + add_timer(&rchan->shrink_timer); + } +} + + +/** + * alloc_new_pages - allocate new pages for expanding buffer + * @rchan: the channel + * + * Returns 0 on success, negative otherwise. + */ +static int +alloc_new_pages(struct rchan *rchan) +{ + int new_pages_size, err; + + if (unlikely(rchan->expand_page_array)) BUG(); + + new_pages_size = rchan->resize_alloc_size - rchan->alloc_size; + rchan->expand_page_array = alloc_page_array(new_pages_size, + &rchan->expand_page_count, &err); + if (rchan->expand_page_array == NULL) { + rchan->resize_err = -ENOMEM; + return -ENOMEM; + } + + err = populate_page_array(rchan->expand_page_array, + rchan->expand_page_count); + if (err) { + rchan->resize_err = -ENOMEM; + free_page_array(rchan->expand_page_array); + rchan->expand_page_array = NULL; + } + + return err; +} + +/** + * clear_resize_offset - helper function for buffer resizing + * @rchan: the channel + * + * Clear the saved offset change. + */ +static inline void +clear_resize_offset(struct rchan *rchan) +{ + rchan->resize_offset.ge = 0UL; + rchan->resize_offset.le = 0UL; + rchan->resize_offset.delta = 0; +} + +/** + * save_resize_offset - helper function for buffer resizing + * @rchan: the channel + * @ge: affected region ge this + * @le: affected region le this + * @delta: apply this delta + * + * Save a resize offset. + */ +static inline void +save_resize_offset(struct rchan *rchan, u32 ge, u32 le, int delta) +{ + rchan->resize_offset.ge = ge; + rchan->resize_offset.le = le; + rchan->resize_offset.delta = delta; +} + +/** + * update_file_offset - apply offset change to reader + * @reader: the channel reader + * @change_idx: the offset index into the offsets array + * + * Returns non-zero if the offset was applied. + * + * Apply the offset delta saved in change_idx to the reader's + * current read position. + */ +static inline int +update_file_offset(struct rchan_reader *reader) +{ + int applied = 0; + struct rchan *rchan = reader->rchan; + u32 f_pos; + int delta = reader->rchan->resize_offset.delta; + + if (reader->vfs_reader) + f_pos = (u32)reader->pos.file->f_pos; + else + f_pos = reader->pos.f_pos; + + if (f_pos == relay_get_offset(rchan, NULL)) + return 0; + + if ((f_pos >= rchan->resize_offset.ge - 1) && + (f_pos <= rchan->resize_offset.le)) { + if (reader->vfs_reader) { + if (reader->rchan->read_start == f_pos) + if (!(rchan->flags & RELAY_MODE_START_AT_ZERO)) + reader->rchan->read_start += delta; + reader->pos.file->f_pos += delta; + } else + reader->pos.f_pos += delta; + applied = 1; + } + + return applied; +} + +/** + * update_file_offsets - apply offset change to readers + * @rchan: the channel + * + * Apply the saved offset deltas to all files open on the channel. + */ +static inline void +update_file_offsets(struct rchan *rchan) +{ + struct list_head *p; + struct rchan_reader *reader; + + read_lock(&rchan->open_readers_lock); + list_for_each(p, &rchan->open_readers) { + reader = list_entry(p, struct rchan_reader, list); + if (update_file_offset(reader)) + reader->offset_changed = 1; + } + read_unlock(&rchan->open_readers_lock); +} + +/** + * setup_expand_buf - setup expand buffer for replacement + * @rchan: the channel + * @newsize: the size of the new buffer + * @oldsize: the size of the old buffer + * @old_n_bufs: the number of sub-buffers in the old buffer + * + * Inserts new pages into the old buffer to create a larger + * new channel buffer, splitting them at old_cur_idx, the bottom + * half of the old buffer going to the bottom of the new, likewise + * for the top half. + */ +static void +setup_expand_buf(struct rchan *rchan, int newsize, int oldsize, u32 old_n_bufs) +{ + u32 cur_idx; + int cur_bufno, delta, i, j; + u32 ge, le; + int cur_pageno; + u32 free_bufs, free_pages; + u32 free_pages_in_cur_buf; + u32 free_bufs_to_end; + u32 cur_pages = rchan->alloc_size >> PAGE_SHIFT; + u32 pages_per_buf = cur_pages / rchan->n_bufs; + u32 bufs_ready = rchan->bufs_produced - rchan->bufs_consumed; + + if (!rchan->resize_page_array || !rchan->expand_page_array || + !rchan->buf_page_array) + return; + + if (bufs_ready >= rchan->n_bufs) { + bufs_ready = rchan->n_bufs; + free_bufs = 0; + } else + free_bufs = rchan->n_bufs - bufs_ready - 1; + + cur_idx = relay_get_offset(rchan, NULL); + cur_pageno = cur_idx / PAGE_SIZE; + cur_bufno = cur_idx / rchan->buf_size; + + free_pages_in_cur_buf = (pages_per_buf - 1) - (cur_pageno % pages_per_buf); + free_pages = free_bufs * pages_per_buf + free_pages_in_cur_buf; + free_bufs_to_end = (rchan->n_bufs - 1) - cur_bufno; + if (free_bufs >= free_bufs_to_end) { + free_pages = free_bufs_to_end * pages_per_buf + free_pages_in_cur_buf; + free_bufs = free_bufs_to_end; + } + + for (i = 0, j = 0; i <= cur_pageno + free_pages; i++, j++) + rchan->resize_page_array[j] = rchan->buf_page_array[i]; + for (i = 0; i < rchan->expand_page_count; i++, j++) + rchan->resize_page_array[j] = rchan->expand_page_array[i]; + for (i = cur_pageno + free_pages + 1; i < rchan->buf_page_count; i++, j++) + rchan->resize_page_array[j] = rchan->buf_page_array[i]; + + delta = newsize - oldsize; + ge = (cur_pageno + 1 + free_pages) * PAGE_SIZE; + le = oldsize; + save_resize_offset(rchan, ge, le, delta); + + rchan->expand_buf_id = rchan->buf_id + 1 + free_bufs; +} + +/** + * setup_shrink_buf - setup shrink buffer for replacement + * @rchan: the channel + * + * Removes pages from the old buffer to create a smaller + * new channel buffer. + */ +static void +setup_shrink_buf(struct rchan *rchan) +{ + int i; + int copy_end_page; + + if (!rchan->resize_page_array || !rchan->shrink_page_array || + !rchan->buf_page_array) + return; + + copy_end_page = rchan->resize_alloc_size / PAGE_SIZE; + + for (i = 0; i < copy_end_page; i++) + rchan->resize_page_array[i] = rchan->buf_page_array[i]; +} + +/** + * cleanup_failed_alloc - relaybuf_alloc helper + */ +static void +cleanup_failed_alloc(struct rchan *rchan) +{ + if (rchan->expand_page_array) { + depopulate_page_array(rchan->expand_page_array, + rchan->expand_page_count); + free_page_array(rchan->expand_page_array); + rchan->expand_page_array = NULL; + rchan->expand_page_count = 0; + } else if (rchan->shrink_page_array) { + free_page_array(rchan->shrink_page_array); + rchan->shrink_page_array = NULL; + rchan->shrink_page_count = 0; + } + + if (rchan->resize_page_array) { + free_page_array(rchan->resize_page_array); + rchan->resize_page_array = NULL; + rchan->resize_page_count = 0; + } +} + +/** + * relaybuf_alloc - allocate a new resized channel buffer + * @private: pointer to the channel struct + * + * Internal - manages the allocation and remapping of new channel + * buffers. + */ +static void +relaybuf_alloc(void *private) +{ + struct rchan *rchan = (struct rchan *)private; + int i, j, err; + u32 old_cur_idx; + int free_size; + int free_start_page, free_end_page; + u32 newsize, oldsize; + + if (rchan->resize_alloc_size > rchan->alloc_size) { + err = alloc_new_pages(rchan); + if (err) goto cleanup; + } else { + free_size = rchan->alloc_size - rchan->resize_alloc_size; + BUG_ON(free_size <= 0); + rchan->shrink_page_array = alloc_page_array(free_size, + &rchan->shrink_page_count, &err); + if (rchan->shrink_page_array == NULL) + goto cleanup; + free_start_page = rchan->resize_alloc_size / PAGE_SIZE; + free_end_page = rchan->alloc_size / PAGE_SIZE; + for (i = 0, j = free_start_page; j < free_end_page; i++, j++) + rchan->shrink_page_array[i] = rchan->buf_page_array[j]; + } + + rchan->resize_page_array = alloc_page_array(rchan->resize_alloc_size, + &rchan->resize_page_count, &err); + if (rchan->resize_page_array == NULL) + goto cleanup; + + old_cur_idx = relay_get_offset(rchan, NULL); + clear_resize_offset(rchan); + newsize = rchan->resize_alloc_size; + oldsize = rchan->alloc_size; + if (newsize > oldsize) + setup_expand_buf(rchan, newsize, oldsize, rchan->n_bufs); + else + setup_shrink_buf(rchan); + + rchan->resize_buf = vmap(rchan->resize_page_array, rchan->resize_page_count, GFP_KERNEL, PAGE_KERNEL); + + if (rchan->resize_buf == NULL) + goto cleanup; + + rchan->replace_buffer = 1; + rchan->resizing = 0; + + rchan->callbacks->needs_resize(rchan->id, RELAY_RESIZE_REPLACE, 0, 0); + return; + +cleanup: + cleanup_failed_alloc(rchan); + rchan->resize_err = -ENOMEM; + return; +} + +/** + * relaybuf_free - free a resized channel buffer + * @private: pointer to the channel struct + * + * Internal - manages the de-allocation and unmapping of old channel + * buffers. + */ +static void +relaybuf_free(void *private) +{ + struct free_rchan_buf *free_buf = (struct free_rchan_buf *)private; + int i; + + if (free_buf->unmap_buf) + vunmap(free_buf->unmap_buf); + + for (i = 0; i < 3; i++) { + if (!free_buf->page_array[i].array) + continue; + if (free_buf->page_array[i].count) + depopulate_page_array(free_buf->page_array[i].array, + free_buf->page_array[i].count); + free_page_array(free_buf->page_array[i].array); + } + + kfree(free_buf); +} + +/** + * calc_order - determine the power-of-2 order of a resize + * @high: the larger size + * @low: the smaller size + * + * Returns order + */ +static inline int +calc_order(u32 high, u32 low) +{ + int order = 0; + + if (!high || !low || high <= low) + return 0; + + while (high > low) { + order++; + high /= 2; + } + + return order; +} + +/** + * check_size - check the sanity of the requested channel size + * @rchan: the channel + * @nbufs: the new number of sub-buffers + * @err: return code + * + * Returns the non-zero total buffer size if ok, otherwise 0 and + * sets errcode if not. + */ +static inline u32 +check_size(struct rchan *rchan, u32 nbufs, int *err) +{ + u32 new_channel_size = 0; + + *err = 0; + + if (nbufs > rchan->n_bufs) { + rchan->resize_order = calc_order(nbufs, rchan->n_bufs); + if (!rchan->resize_order) { + *err = -EINVAL; + goto out; + } + + new_channel_size = rchan->buf_size * nbufs; + if (new_channel_size > rchan->resize_max) { + *err = -EINVAL; + goto out; + } + } else if (nbufs < rchan->n_bufs) { + if (rchan->n_bufs < 2) { + *err = -EINVAL; + goto out; + } + rchan->resize_order = -calc_order(rchan->n_bufs, nbufs); + if (!rchan->resize_order) { + *err = -EINVAL; + goto out; + } + + new_channel_size = rchan->buf_size * nbufs; + if (new_channel_size < rchan->resize_min) { + *err = -EINVAL; + goto out; + } + } else + *err = -EINVAL; +out: + return new_channel_size; +} + +/** + * __relay_realloc_buffer - allocate a new channel buffer + * @rchan: the channel + * @new_nbufs: the new number of sub-buffers + * @async: do the allocation using a work queue + * + * Internal - see relay_realloc_buffer() for details. + */ +static int +__relay_realloc_buffer(struct rchan *rchan, u32 new_nbufs, int async) +{ + u32 new_channel_size; + int err = 0; + + if (new_nbufs == rchan->n_bufs) + return -EINVAL; + + if (down_trylock(&rchan->resize_sem)) + return -EBUSY; + + if (rchan->init_buf) { + err = -EPERM; + goto out; + } + + if (rchan->replace_buffer) { + err = -EBUSY; + goto out; + } + + if (rchan->resizing) { + err = -EBUSY; + goto out; + } else + rchan->resizing = 1; + + if (rchan->resize_failures > MAX_RESIZE_FAILURES) { + err = -ENOMEM; + goto out; + } + + new_channel_size = check_size(rchan, new_nbufs, &err); + if (err) + goto out; + + rchan->resize_n_bufs = new_nbufs; + rchan->resize_buf_size = rchan->buf_size; + rchan->resize_alloc_size = FIX_SIZE(new_channel_size); + + if (async) { + INIT_WORK(&rchan->work, relaybuf_alloc, rchan); + schedule_delayed_work(&rchan->work, 1); + } else + relaybuf_alloc((void *)rchan); +out: + up(&rchan->resize_sem); + + return err; +} + +/** + * relay_realloc_buffer - allocate a new channel buffer + * @rchan_id: the channel id + * @bufsize: the new sub-buffer size + * @nbufs: the new number of sub-buffers + * + * Allocates a new channel buffer using the specified sub-buffer size + * and count. If async is non-zero, the allocation is done in the + * background using a work queue. When the allocation has completed, + * the needs_resize() callback is called with a resize_type of + * RELAY_RESIZE_REPLACE. This function doesn't replace the old buffer + * with the new - see relay_replace_buffer(). See + * Documentation/filesystems/relayfs.txt for more details. + * + * Returns 0 on success, or errcode if the channel is busy or if + * the allocation couldn't happen for some reason. + */ +int +relay_realloc_buffer(int rchan_id, u32 new_nbufs, int async) +{ + int err; + + struct rchan *rchan; + + rchan = rchan_get(rchan_id); + if (rchan == NULL) + return -EBADF; + + err = __relay_realloc_buffer(rchan, new_nbufs, async); + + rchan_put(rchan); + + return err; +} + +/** + * expand_cancel_check - check whether the current expand needs canceling + * @rchan: the channel + * + * Returns 1 if the expand should be canceled, 0 otherwise. + */ +static int +expand_cancel_check(struct rchan *rchan) +{ + if (rchan->buf_id >= rchan->expand_buf_id) + return 1; + else + return 0; +} + +/** + * shrink_cancel_check - check whether the current shrink needs canceling + * @rchan: the channel + * + * Returns 1 if the shrink should be canceled, 0 otherwise. + */ +static int +shrink_cancel_check(struct rchan *rchan, u32 newsize) +{ + u32 active_bufs = rchan->bufs_produced - rchan->bufs_consumed + 1; + u32 cur_idx = relay_get_offset(rchan, NULL); + + if (cur_idx >= newsize) + return 1; + + if (active_bufs > 1) + return 1; + + return 0; +} + +/** + * switch_rchan_buf - do_replace_buffer helper + */ +static void +switch_rchan_buf(struct rchan *rchan, + int newsize, + int oldsize, + u32 old_nbufs, + u32 cur_idx) +{ + u32 newbufs, cur_bufno; + int i; + + cur_bufno = cur_idx / rchan->buf_size; + + rchan->buf = rchan->resize_buf; + rchan->alloc_size = rchan->resize_alloc_size; + rchan->n_bufs = rchan->resize_n_bufs; + + if (newsize > oldsize) { + u32 ge = rchan->resize_offset.ge; + u32 moved_buf = ge / rchan->buf_size; + + newbufs = (newsize - oldsize) / rchan->buf_size; + for (i = moved_buf; i < old_nbufs; i++) { + if (using_lockless(rchan)) + atomic_set(&fill_count(rchan, i + newbufs), + atomic_read(&fill_count(rchan, i))); + rchan->unused_bytes[i + newbufs] = rchan->unused_bytes[i]; + } + for (i = moved_buf; i < moved_buf + newbufs; i++) { + if (using_lockless(rchan)) + atomic_set(&fill_count(rchan, i), + (int)RELAY_BUF_SIZE(offset_bits(rchan))); + rchan->unused_bytes[i] = 0; + } + } + + rchan->buf_idx = cur_bufno; + + if (!using_lockless(rchan)) { + cur_write_pos(rchan) = rchan->buf + cur_idx; + write_buf(rchan) = rchan->buf + cur_bufno * rchan->buf_size; + write_buf_end(rchan) = write_buf(rchan) + rchan->buf_size; + write_limit(rchan) = write_buf_end(rchan) - rchan->end_reserve; + } else { + idx(rchan) &= idx_mask(rchan); + bufno_bits(rchan) += rchan->resize_order; + idx_mask(rchan) = + (1UL << (bufno_bits(rchan) + offset_bits(rchan))) - 1; + } +} + +/** + * do_replace_buffer - does the work of channel buffer replacement + * @rchan: the channel + * @newsize: new channel buffer size + * @oldsize: old channel buffer size + * @old_n_bufs: old channel sub-buffer count + * + * Returns 0 if replacement happened, 1 if canceled + * + * Does the work of switching buffers and fixing everything up + * so the channel can continue with a new size. + */ +static int +do_replace_buffer(struct rchan *rchan, + int newsize, + int oldsize, + u32 old_nbufs) +{ + u32 cur_idx; + int err = 0; + int canceled; + + cur_idx = relay_get_offset(rchan, NULL); + + if (newsize > oldsize) + canceled = expand_cancel_check(rchan); + else + canceled = shrink_cancel_check(rchan, newsize); + + if (canceled) { + err = -EAGAIN; + goto out; + } + + switch_rchan_buf(rchan, newsize, oldsize, old_nbufs, cur_idx); + + if (rchan->resize_offset.delta) + update_file_offsets(rchan); + + atomic_set(&rchan->suspended, 0); + + rchan->old_buf_page_array = rchan->buf_page_array; + rchan->buf_page_array = rchan->resize_page_array; + rchan->buf_page_count = rchan->resize_page_count; + rchan->resize_page_array = NULL; + rchan->resize_page_count = 0; + rchan->resize_buf = NULL; + rchan->resize_buf_size = 0; + rchan->resize_alloc_size = 0; + rchan->resize_n_bufs = 0; + rchan->resize_err = 0; + rchan->resize_order = 0; +out: + rchan->callbacks->needs_resize(rchan->id, + RELAY_RESIZE_REPLACED, + rchan->buf_size, + rchan->n_bufs); + return err; +} + +/** + * add_free_page_array - add a page_array to be freed + * @free_rchan_buf: the free_rchan_buf struct + * @page_array: the page array to free + * @page_count: the number of pages to free, 0 to free the array only + * + * Internal - Used add page_arrays to be freed asynchronously. + */ +static inline void +add_free_page_array(struct free_rchan_buf *free_rchan_buf, + struct page **page_array, int page_count) +{ + int cur = free_rchan_buf->cur++; + + free_rchan_buf->page_array[cur].array = page_array; + free_rchan_buf->page_array[cur].count = page_count; +} + +/** + * free_rchan_buf - free a channel buffer + * @buf: pointer to the buffer to free + * @page_array: pointer to the buffer's page array + * @page_count: number of pages in page array + */ +int +free_rchan_buf(void *buf, struct page **page_array, int page_count) +{ + struct free_rchan_buf *free_buf; + + free_buf = kmalloc(sizeof(struct free_rchan_buf), GFP_ATOMIC); + if (!free_buf) + return -ENOMEM; + memset(free_buf, 0, sizeof(struct free_rchan_buf)); + + free_buf->unmap_buf = buf; + add_free_page_array(free_buf, page_array, page_count); + + INIT_WORK(&free_buf->work, relaybuf_free, free_buf); + schedule_delayed_work(&free_buf->work, 1); + + return 0; +} + +/** + * free_replaced_buffer - free a channel's old buffer + * @rchan: the channel + * @oldbuf: the old buffer + * @oldsize: old buffer size + * + * Frees a channel buffer via work queue. + */ +static int +free_replaced_buffer(struct rchan *rchan, char *oldbuf, int oldsize) +{ + struct free_rchan_buf *free_buf; + + free_buf = kmalloc(sizeof(struct free_rchan_buf), GFP_ATOMIC); + if (!free_buf) + return -ENOMEM; + memset(free_buf, 0, sizeof(struct free_rchan_buf)); + + free_buf->unmap_buf = oldbuf; + add_free_page_array(free_buf, rchan->old_buf_page_array, 0); + rchan->old_buf_page_array = NULL; + add_free_page_array(free_buf, rchan->expand_page_array, 0); + add_free_page_array(free_buf, rchan->shrink_page_array, rchan->shrink_page_count); + + rchan->expand_page_array = NULL; + rchan->expand_page_count = 0; + rchan->shrink_page_array = NULL; + rchan->shrink_page_count = 0; + + INIT_WORK(&free_buf->work, relaybuf_free, free_buf); + schedule_delayed_work(&free_buf->work, 1); + + return 0; +} + +/** + * free_canceled_resize - free buffers allocated for a canceled resize + * @rchan: the channel + * + * Frees canceled buffers via work queue. + */ +static int +free_canceled_resize(struct rchan *rchan) +{ + struct free_rchan_buf *free_buf; + + free_buf = kmalloc(sizeof(struct free_rchan_buf), GFP_ATOMIC); + if (!free_buf) + return -ENOMEM; + memset(free_buf, 0, sizeof(struct free_rchan_buf)); + + if (rchan->resize_alloc_size > rchan->alloc_size) + add_free_page_array(free_buf, rchan->expand_page_array, rchan->expand_page_count); + else + add_free_page_array(free_buf, rchan->shrink_page_array, 0); + + add_free_page_array(free_buf, rchan->resize_page_array, 0); + free_buf->unmap_buf = rchan->resize_buf; + + rchan->expand_page_array = NULL; + rchan->expand_page_count = 0; + rchan->shrink_page_array = NULL; + rchan->shrink_page_count = 0; + rchan->resize_page_array = NULL; + rchan->resize_page_count = 0; + rchan->resize_buf = NULL; + + INIT_WORK(&free_buf->work, relaybuf_free, free_buf); + schedule_delayed_work(&free_buf->work, 1); + + return 0; +} + +/** + * __relay_replace_buffer - replace channel buffer with new buffer + * @rchan: the channel + * + * Internal - see relay_replace_buffer() for details. + * + * Returns 0 if successful, negative otherwise. + */ +static int +__relay_replace_buffer(struct rchan *rchan) +{ + int oldsize; + int err = 0; + char *oldbuf; + + if (down_trylock(&rchan->resize_sem)) + return -EBUSY; + + if (rchan->init_buf) { + err = -EPERM; + goto out; + } + + if (!rchan->replace_buffer) + goto out; + + if (rchan->resizing) { + err = -EBUSY; + goto out; + } + + if (rchan->resize_buf == NULL) { + err = -EINVAL; + goto out; + } + + oldbuf = rchan->buf; + oldsize = rchan->alloc_size; + + err = do_replace_buffer(rchan, rchan->resize_alloc_size, + oldsize, rchan->n_bufs); + if (err == 0) + err = free_replaced_buffer(rchan, oldbuf, oldsize); + else + err = free_canceled_resize(rchan); +out: + rchan->replace_buffer = 0; + up(&rchan->resize_sem); + + return err; +} + +/** + * relay_replace_buffer - replace channel buffer with new buffer + * @rchan_id: the channel id + * + * Replaces the current channel buffer with the new buffer allocated + * by relay_alloc_buffer and contained in the channel struct. When the + * replacement is complete, the needs_resize() callback is called with + * RELAY_RESIZE_REPLACED. + * + * Returns 0 on success, or errcode if the channel is busy or if + * the replacement or previous allocation didn't happen for some reason. + */ +int +relay_replace_buffer(int rchan_id) +{ + int err; + + struct rchan *rchan; + + rchan = rchan_get(rchan_id); + if (rchan == NULL) + return -EBADF; + + err = __relay_replace_buffer(rchan); + + rchan_put(rchan); + + return err; +} + +EXPORT_SYMBOL(relay_realloc_buffer); +EXPORT_SYMBOL(relay_replace_buffer); + diff -puN /dev/null fs/relayfs/resize.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/fs/relayfs/resize.h 2005-01-13 23:01:09.126809664 -0800 @@ -0,0 +1,51 @@ +#ifndef _RELAY_RESIZE_H +#define _RELAY_RESIZE_H + +/* + * If the channel usage has been below the low water mark for more than + * this amount of time, we can shrink the buffer if necessary. + */ +#define SHRINK_TIMER_SECS 60 + +/* This inspired by rtai/shmem */ +#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE + +/* Don't attempt resizing again after this many failures */ +#define MAX_RESIZE_FAILURES 1 + +/* Trigger resizing if a resizable channel is this full */ +#define RESIZE_THRESHOLD 3 / 4 + +/* + * Used for deferring resized channel free + */ +struct free_rchan_buf +{ + char *unmap_buf; + struct + { + struct page **array; + int count; + } page_array[3]; + + int cur; + struct work_struct work; /* resize de-allocation work struct */ +}; + +extern void * +alloc_rchan_buf(unsigned long size, + struct page ***page_array, + int *page_count); + +extern int +free_rchan_buf(void *buf, + struct page **page_array, + int page_count); + +extern void +expand_check(struct rchan *rchan); + +extern void +init_shrink_timer(struct rchan *rchan); + +#endif/* _RELAY_RESIZE_H */ _