diff -urNp --exclude CVS --exclude BitKeeper x-ref/include/linux/ipc.h x/include/linux/ipc.h --- x-ref/include/linux/ipc.h 2003-08-31 04:48:24.000000000 +0200 +++ x/include/linux/ipc.h 2003-09-01 19:16:45.000000000 +0200 @@ -56,6 +56,8 @@ struct ipc_perm /* used by in-kernel data structures */ struct kern_ipc_perm { + spinlock_t lock; + int deleted; key_t key; uid_t uid; gid_t gid; diff -urNp --exclude CVS --exclude BitKeeper x-ref/init/main.c x/init/main.c --- x-ref/init/main.c 2003-09-01 19:16:14.000000000 +0200 +++ x/init/main.c 2003-09-01 19:16:45.000000000 +0200 @@ -430,9 +430,6 @@ asmlinkage void __init start_kernel(void #ifdef CONFIG_PROC_FS proc_root_init(); #endif -#if defined(CONFIG_SYSVIPC) - ipc_init(); -#endif check_bugs(); printk("POSIX conformance testing by UNIFIX\n"); @@ -449,6 +446,9 @@ asmlinkage void __init start_kernel(void if (gdb_enter) gdb_hook(); /* right at boot time */ #endif +#if defined(CONFIG_SYSVIPC) + ipc_init(); +#endif /* Do the rest non-__init'ed, we're now alive */ rest_init(); diff -urNp --exclude CVS --exclude BitKeeper x-ref/ipc/msg.c x/ipc/msg.c --- x-ref/ipc/msg.c 2003-06-13 22:07:42.000000000 +0200 +++ x/ipc/msg.c 2003-09-01 19:16:45.000000000 +0200 @@ -92,14 +92,14 @@ static atomic_t msg_hdrs = ATOMIC_INIT(0 static struct ipc_ids msg_ids; #define msg_lock(id) ((struct msg_queue*)ipc_lock(&msg_ids,id)) -#define msg_unlock(id) ipc_unlock(&msg_ids,id) +#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) #define msg_rmid(id) ((struct msg_queue*)ipc_rmid(&msg_ids,id)) #define msg_checkid(msq, msgid) \ ipc_checkid(&msg_ids,&msq->q_perm,msgid) #define msg_buildid(id, seq) \ ipc_buildid(&msg_ids, id, seq) -static void freeque (int id); +static void freeque (struct msg_queue *msq, int id); static int newque (key_t key, int msgflg); #ifdef CONFIG_PROC_FS static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data); @@ -119,12 +119,12 @@ static int newque (key_t key, int msgflg int id; struct msg_queue *msq; - msq = (struct msg_queue *) kmalloc (sizeof (*msq), GFP_KERNEL); + msq = ipc_rcu_alloc(sizeof(*msq)); if (!msq) return -ENOMEM; id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni); if(id == -1) { - kfree(msq); + ipc_rcu_free(msq, sizeof(*msq)); return -ENOSPC; } msq->q_perm.mode = (msgflg & S_IRWXUGO); @@ -138,7 +138,7 @@ static int newque (key_t key, int msgflg INIT_LIST_HEAD(&msq->q_messages); INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); - msg_unlock(id); + msg_unlock(msq); return msg_buildid(id,msq->q_perm.seq); } @@ -277,17 +277,22 @@ static void expunge_all(struct msg_queue wake_up_process(msr->r_tsk); } } - -static void freeque (int id) +/* + * freeque() wakes up waiters on the sender and receiver waiting queue, + * removes the message queue from message queue ID + * array, and cleans up all the messages associated with this queue. + * + * msg_ids.sem and the spinlock for this message queue is hold + * before freeque() is called. msg_ids.sem remains locked on exit. + */ +static void freeque (struct msg_queue *msq, int id) { - struct msg_queue *msq; struct list_head *tmp; - msq = msg_rmid(id); - expunge_all(msq,-EIDRM); ss_wakeup(&msq->q_senders,1); - msg_unlock(id); + msq = msg_rmid(id); + msg_unlock(msq); tmp = msq->q_messages.next; while(tmp != &msq->q_messages) { @@ -297,7 +302,7 @@ static void freeque (int id) free_msg(msg); } atomic_sub(msq->q_cbytes, &msg_bytes); - kfree(msq); + ipc_rcu_free(msq, sizeof(*msq)); } asmlinkage long sys_msgget (key_t key, int msgflg) @@ -323,7 +328,7 @@ asmlinkage long sys_msgget (key_t key, i ret = -EACCES; else ret = msg_buildid(id, msq->q_perm.seq); - msg_unlock(id); + msg_unlock(msq); } up(&msg_ids.sem); return ret; @@ -503,7 +508,7 @@ asmlinkage long sys_msgctl (int msqid, i tbuf.msg_qbytes = msq->q_qbytes; tbuf.msg_lspid = msq->q_lspid; tbuf.msg_lrpid = msq->q_lrpid; - msg_unlock(msqid); + msg_unlock(msq); if (copy_msqid_to_user(buf, &tbuf, version)) return -EFAULT; return success_return; @@ -556,11 +561,11 @@ asmlinkage long sys_msgctl (int msqid, i * due to a larger queue size. */ ss_wakeup(&msq->q_senders,0); - msg_unlock(msqid); + msg_unlock(msq); break; } case IPC_RMID: - freeque (msqid); + freeque (msq, msqid); break; } err = 0; @@ -568,10 +573,10 @@ out_up: up(&msg_ids.sem); return err; out_unlock_up: - msg_unlock(msqid); + msg_unlock(msq); goto out_up; out_unlock: - msg_unlock(msqid); + msg_unlock(msq); return err; } @@ -666,7 +671,7 @@ retry: goto out_unlock_free; } ss_add(msq, &s); - msg_unlock(msqid); + msg_unlock(msq); schedule(); current->state= TASK_RUNNING; @@ -699,7 +704,7 @@ retry: msg = NULL; out_unlock_free: - msg_unlock(msqid); + msg_unlock(msq); out_free: if(msg!=NULL) free_msg(msg); @@ -781,7 +786,7 @@ retry: atomic_sub(msg->m_ts,&msg_bytes); atomic_dec(&msg_hdrs); ss_wakeup(&msq->q_senders,0); - msg_unlock(msqid); + msg_unlock(msq); out_success: msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; if (put_user (msg->m_type, &msgp->mtype) || @@ -792,7 +797,6 @@ out_success: return msgsz; } else { - struct msg_queue *t; /* no message waiting. Prepare for pipelined * receive. */ @@ -810,7 +814,7 @@ out_success: msr_d.r_maxsize = msgsz; msr_d.r_msg = ERR_PTR(-EAGAIN); current->state = TASK_INTERRUPTIBLE; - msg_unlock(msqid); + msg_unlock(msq); schedule(); current->state = TASK_RUNNING; @@ -821,21 +825,19 @@ out_success: if(!IS_ERR(msg)) goto out_success; */ - t = msg_lock(msqid); - if(t==NULL) - msqid=-1; + msq = msg_lock(msqid); msg = (struct msg_msg*)msr_d.r_msg; if(!IS_ERR(msg)) { /* our message arived while we waited for * the spinlock. Process it. */ - if(msqid!=-1) - msg_unlock(msqid); + if(msq) + msg_unlock(msq); goto out_success; } err = PTR_ERR(msg); if(err == -EAGAIN) { - if(msqid==-1) + if(!msq) BUG(); list_del(&msr_d.r_list); if (signal_pending(current)) @@ -845,8 +847,8 @@ out_success: } } out_unlock: - if(msqid!=-1) - msg_unlock(msqid); + if(msq) + msg_unlock(msq); return err; } @@ -879,7 +881,7 @@ static int sysvipc_msg_read_proc(char *b msq->q_stime, msq->q_rtime, msq->q_ctime); - msg_unlock(i); + msg_unlock(msq); pos += len; if(pos < offset) { diff -urNp --exclude CVS --exclude BitKeeper x-ref/ipc/sem.c x/ipc/sem.c --- x-ref/ipc/sem.c 2003-08-26 00:13:17.000000000 +0200 +++ x/ipc/sem.c 2003-09-01 19:17:31.000000000 +0200 @@ -68,7 +68,7 @@ #define sem_lock(id) ((struct sem_array*)ipc_lock(&sem_ids,id)) -#define sem_unlock(id) ipc_unlock(&sem_ids,id) +#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) #define sem_rmid(id) ((struct sem_array*)ipc_rmid(&sem_ids,id)) #define sem_checkid(sma, semid) \ ipc_checkid(&sem_ids,&sma->sem_perm,semid) @@ -77,7 +77,7 @@ static struct ipc_ids sem_ids; static int newary (key_t, int, int); -static void freeary (int id); +static void freeary (struct sem_array *sma, int id); #ifdef CONFIG_PROC_FS static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data); #endif @@ -124,14 +124,14 @@ static int newary (key_t key, int nsems, return -ENOSPC; size = sizeof (*sma) + nsems * sizeof (struct sem); - sma = (struct sem_array *) ipc_alloc(size); + sma = ipc_rcu_alloc(size); if (!sma) { return -ENOMEM; } memset (sma, 0, size); id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni); if(id == -1) { - ipc_free(sma, size); + ipc_rcu_free(sma, size); return -ENOSPC; } used_sems += nsems; @@ -145,7 +145,7 @@ static int newary (key_t key, int nsems, /* sma->undo = NULL; */ sma->sem_nsems = nsems; sma->sem_ctime = CURRENT_TIME; - sem_unlock(id); + sem_unlock(sma); return sem_buildid(id, sma->sem_perm.seq); } @@ -178,7 +178,7 @@ asmlinkage long sys_semget (key_t key, i err = -EACCES; else err = sem_buildid(id, sma->sem_perm.seq); - sem_unlock(id); + sem_unlock(sma); } up(&sem_ids.sem); @@ -194,12 +194,12 @@ static int sem_revalidate(int semid, str if(smanew==NULL) return -EIDRM; if(smanew != sma || sem_checkid(sma,semid) || sma->sem_nsems != nsems) { - sem_unlock(semid); + sem_unlock(smanew); return -EIDRM; } if (ipcperms(&sma->sem_perm, flg)) { - sem_unlock(semid); + sem_unlock(smanew); return -EACCES; } return 0; @@ -384,16 +384,16 @@ static int count_semzcnt (struct sem_arr return semzcnt; } -/* Free a semaphore set. */ -static void freeary (int id) +/* Free a semaphore set. freeary() is called with sem_ids.sem down and + * the spinlock for this semaphore set hold. sem_ids.sem remains locked + * on exit. + */ +static void freeary (struct sem_array *sma, int id) { - struct sem_array *sma; struct sem_undo *un; struct sem_queue *q; int size; - sma = sem_rmid(id); - /* Invalidate the existing undo structures for this semaphore set. * (They will be freed without any further action in sem_exit() * or during the next semop.) @@ -407,11 +407,14 @@ static void freeary (int id) q->prev = NULL; wake_up_process(q->sleeper); /* doesn't sleep */ } - sem_unlock(id); + + /* Remove the semaphore set from the ID array*/ + sma = sem_rmid(id); + sem_unlock(sma); used_sems -= sma->sem_nsems; size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem); - ipc_free(sma, size); + ipc_rcu_free(sma, size); } static unsigned long copy_semid_to_user(void *buf, struct semid64_ds *in, int version) @@ -439,6 +442,7 @@ static unsigned long copy_semid_to_user( static int semctl_nolock(int semid, int semnum, int cmd, int version, union semun arg) { int err = -EINVAL; + struct sem_array *sma; switch(cmd) { case IPC_INFO: @@ -472,7 +476,6 @@ static int semctl_nolock(int semid, int } case SEM_STAT: { - struct sem_array *sma; struct semid64_ds tbuf; int id; @@ -494,7 +497,7 @@ static int semctl_nolock(int semid, int tbuf.sem_otime = sma->sem_otime; tbuf.sem_ctime = sma->sem_ctime; tbuf.sem_nsems = sma->sem_nsems; - sem_unlock(semid); + sem_unlock(sma); if (copy_semid_to_user (arg.buf, &tbuf, version)) return -EFAULT; return id; @@ -504,7 +507,7 @@ static int semctl_nolock(int semid, int } return err; out_unlock: - sem_unlock(semid); + sem_unlock(sma); return err; } @@ -538,7 +541,7 @@ static int semctl_main(int semid, int se int i; if(nsems > SEMMSL_FAST) { - sem_unlock(semid); + sem_unlock(sma); sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) return -ENOMEM; @@ -549,7 +552,7 @@ static int semctl_main(int semid, int se for (i = 0; i < sma->sem_nsems; i++) sem_io[i] = sma->sem_base[i].semval; - sem_unlock(semid); + sem_unlock(sma); err = 0; if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) err = -EFAULT; @@ -560,7 +563,7 @@ static int semctl_main(int semid, int se int i; struct sem_undo *un; - sem_unlock(semid); + sem_unlock(sma); if(nsems > SEMMSL_FAST) { sem_io = ipc_alloc(sizeof(ushort)*nsems); @@ -602,7 +605,7 @@ static int semctl_main(int semid, int se tbuf.sem_otime = sma->sem_otime; tbuf.sem_ctime = sma->sem_ctime; tbuf.sem_nsems = sma->sem_nsems; - sem_unlock(semid); + sem_unlock(sma); if (copy_semid_to_user (arg.buf, &tbuf, version)) return -EFAULT; return 0; @@ -648,7 +651,7 @@ static int semctl_main(int semid, int se } } out_unlock: - sem_unlock(semid); + sem_unlock(sma); out_free: if(sem_io != fast_sem_io) ipc_free(sem_io, sizeof(ushort)*nsems); @@ -724,7 +727,7 @@ static int semctl_down(int semid, int se switch(cmd){ case IPC_RMID: - freeary(semid); + freeary(sma, semid); err = 0; break; case IPC_SET: @@ -733,18 +736,18 @@ static int semctl_down(int semid, int se ipcp->mode = (ipcp->mode & ~S_IRWXUGO) | (setbuf.mode & S_IRWXUGO); sma->sem_ctime = CURRENT_TIME; - sem_unlock(semid); + sem_unlock(sma); err = 0; break; default: - sem_unlock(semid); + sem_unlock(sma); err = -EINVAL; break; } return err; out_unlock: - sem_unlock(semid); + sem_unlock(sma); return err; } @@ -810,7 +813,7 @@ static int alloc_undo(struct sem_array * nsems = sma->sem_nsems; size = sizeof(struct sem_undo) + sizeof(short)*nsems; - sem_unlock(semid); + sem_unlock(sma); un = (struct sem_undo *) kmalloc(size, GFP_KERNEL); if (!un) @@ -942,19 +945,18 @@ asmlinkage long sys_semtimedop (int semi current->semsleeping = &queue; for (;;) { - struct sem_array* tmp; queue.status = -EINTR; queue.sleeper = current; current->state = TASK_INTERRUPTIBLE; - sem_unlock(semid); + sem_unlock(sma); if (timeout) jiffies_left = schedule_timeout(jiffies_left); else schedule(); - tmp = sem_lock(semid); - if(tmp==NULL) { + sma = sem_lock(semid); + if(sma==NULL) { if(queue.prev != NULL) BUG(); current->semsleeping = NULL; @@ -991,7 +993,7 @@ update: if (alter) update_queue (sma); out_unlock_free: - sem_unlock(semid); + sem_unlock(sma); out_free: if(sops != fast_sops) kfree(sops); @@ -1031,7 +1033,7 @@ void sem_exit (void) remove_from_queue(q->sma,q); } if(sma!=NULL) - sem_unlock(semid); + sem_unlock(sma); } for (up = ¤t->semundo; (u = *up); *up = u->proc_next, kfree(u)) { @@ -1070,7 +1072,7 @@ found: /* maybe some queued-up processes were waiting for this */ update_queue(sma); next_entry: - sem_unlock(semid); + sem_unlock(sma); } current->semundo = NULL; } @@ -1100,7 +1102,7 @@ static int sysvipc_sem_read_proc(char *b sma->sem_perm.cgid, sma->sem_otime, sma->sem_ctime); - sem_unlock(i); + sem_unlock(sma); pos += len; if(pos < offset) { diff -urNp --exclude CVS --exclude BitKeeper x-ref/ipc/shm.c x/ipc/shm.c --- x-ref/ipc/shm.c 2003-09-01 19:16:13.000000000 +0200 +++ x/ipc/shm.c 2003-09-01 19:16:45.000000000 +0200 @@ -48,9 +48,7 @@ static struct vm_operations_struct shm_v static struct ipc_ids shm_ids; #define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id)) -#define shm_unlock(id) ipc_unlock(&shm_ids,id) -#define shm_lockall() ipc_lockall(&shm_ids) -#define shm_unlockall() ipc_unlockall(&shm_ids) +#define shm_unlock(shp) ipc_unlock(&(shp)->shm_perm) #define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id)) #define shm_buildid(id, seq) \ ipc_buildid(&shm_ids, id, seq) @@ -103,7 +101,7 @@ static inline void shm_inc (int id) { shp->shm_atim = CURRENT_TIME; shp->shm_lprid = current->pid; shp->shm_nattch++; - shm_unlock(id); + shm_unlock(shp); } /* This is called by fork, once for every shm attach. */ @@ -123,11 +121,11 @@ static void shm_open (struct vm_area_str static void shm_destroy (struct shmid_kernel *shp) { shm_rmid (shp->id); - shm_unlock(shp->id); + shm_unlock(shp); shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; shmem_lock(shp->shm_file, 0); fput (shp->shm_file); - kfree (shp); + ipc_rcu_free (shp, sizeof(struct shmid_kernel)); } /* @@ -153,7 +151,7 @@ static void shm_close (struct vm_area_st shp->shm_flags & SHM_DEST) shm_destroy (shp); else - shm_unlock(id); + shm_unlock(shp); up (&shm_ids.sem); } @@ -204,7 +202,7 @@ static int newseg (key_t key, int shmflg if (shm_tot + numpages >= shm_ctlall) return -ENOSPC; - shp = (struct shmid_kernel *) kmalloc (sizeof (*shp), GFP_USER); + shp = ipc_rcu_alloc(sizeof(*shp)); if (!shp) return -ENOMEM; sprintf (name, "SYSV%08x", key); @@ -240,14 +238,14 @@ static int newseg (key_t key, int shmflg file->f_dentry->d_inode->i_ino = shp->id; file->f_op = &shm_file_operations; shm_tot += numpages; - shm_unlock (id); + shm_unlock(shp); return shp->id; no_id: fput(file); no_file: - kfree(shp); + ipc_rcu_free (shp, sizeof(struct shmid_kernel)); return error; } @@ -276,7 +274,7 @@ asmlinkage long sys_shmget (key_t key, s err = -EACCES; else err = shm_buildid(id, shp->shm_perm.seq); - shm_unlock(id); + shm_unlock(shp); } up(&shm_ids.sem); return err; @@ -433,14 +431,12 @@ asmlinkage long sys_shmctl (int shmid, i memset(&shm_info,0,sizeof(shm_info)); down(&shm_ids.sem); - shm_lockall(); shm_info.used_ids = shm_ids.in_use; shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp); shm_info.shm_tot = shm_tot; shm_info.swap_attempts = 0; shm_info.swap_successes = 0; err = shm_ids.max_id; - shm_unlockall(); up(&shm_ids.sem); if(copy_to_user (buf, &shm_info, sizeof(shm_info))) return -EFAULT; @@ -478,7 +474,7 @@ asmlinkage long sys_shmctl (int shmid, i tbuf.shm_cpid = shp->shm_cprid; tbuf.shm_lpid = shp->shm_lprid; tbuf.shm_nattch = shp->shm_nattch; - shm_unlock(shmid); + shm_unlock(shp); if(copy_shmid_to_user (buf, &tbuf, version)) return -EFAULT; return result; @@ -505,7 +501,7 @@ asmlinkage long sys_shmctl (int shmid, i shmem_lock(shp->shm_file, 0); shp->shm_flags &= ~SHM_LOCKED; } - shm_unlock(shmid); + shm_unlock(shp); return err; } case IPC_RMID: @@ -538,7 +534,7 @@ asmlinkage long sys_shmctl (int shmid, i shp->shm_flags |= SHM_DEST; /* Do not find it any more */ shp->shm_perm.key = IPC_PRIVATE; - shm_unlock(shmid); + shm_unlock(shp); } else shm_destroy (shp); up(&shm_ids.sem); @@ -578,12 +574,12 @@ asmlinkage long sys_shmctl (int shmid, i err = 0; out_unlock_up: - shm_unlock(shmid); + shm_unlock(shp); out_up: up(&shm_ids.sem); return err; out_unlock: - shm_unlock(shmid); + shm_unlock(shp); return err; } @@ -640,17 +636,17 @@ asmlinkage long sys_shmat (int shmid, ch return -EINVAL; err = shm_checkid(shp,shmid); if (err) { - shm_unlock(shmid); + shm_unlock(shp); return err; } if (ipcperms(&shp->shm_perm, acc_mode)) { - shm_unlock(shmid); + shm_unlock(shp); return -EACCES; } file = shp->shm_file; size = i_size_read(file->f_dentry->d_inode); shp->shm_nattch++; - shm_unlock(shmid); + shm_unlock(shp); down_write(¤t->mm->mmap_sem); if (addr && !(shmflg & SHM_REMAP)) { @@ -679,7 +675,7 @@ invalid: shp->shm_flags & SHM_DEST) shm_destroy (shp); else - shm_unlock(shmid); + shm_unlock(shp); up (&shm_ids.sem); *raddr = (unsigned long) user_addr; @@ -751,7 +747,7 @@ static int sysvipc_shm_read_proc(char *b shp->shm_atim, shp->shm_dtim, shp->shm_ctim); - shm_unlock(i); + shm_unlock(shp); pos += len; if(pos < offset) { diff -urNp --exclude CVS --exclude BitKeeper x-ref/ipc/util.c x/ipc/util.c --- x-ref/ipc/util.c 2003-08-26 00:13:17.000000000 +0200 +++ x/ipc/util.c 2003-09-01 19:16:45.000000000 +0200 @@ -8,6 +8,8 @@ * Chris Evans, * Nov 1999 - ipc helper functions, unified SMP locking * Manfred Spraul + * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary(). + * Mingming Cao */ #include @@ -19,6 +21,7 @@ #include #include #include +#include #if defined(CONFIG_SYSVIPC) @@ -68,13 +71,12 @@ void __init ipc_init_ids(struct ipc_ids* ids->seq_max = seq_limit; } - ids->entries = ipc_alloc(sizeof(struct ipc_id)*size); + ids->entries = ipc_rcu_alloc(sizeof(struct ipc_id)*size); if(ids->entries == NULL) { printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n"); ids->size = 0; } - ids->ary = SPIN_LOCK_UNLOCKED; for(i=0;isize;i++) ids->entries[i].p = NULL; } @@ -83,7 +85,8 @@ void __init ipc_init_ids(struct ipc_ids* * ipc_findkey - find a key in an ipc identifier set * @ids: Identifier set * @key: The key to find - * + * + * Requires ipc_ids.sem locked. * Returns the identifier if found or -1 if not. */ @@ -91,8 +94,13 @@ int ipc_findkey(struct ipc_ids* ids, key { int id; struct kern_ipc_perm* p; + int max_id = ids->max_id; - for (id = 0; id <= ids->max_id; id++) { + /* + * read_barrier_depends is not needed here + * since ipc_ids.sem is held + */ + for (id = 0; id <= max_id; id++) { p = ids->entries[id].p; if(p==NULL) continue; @@ -102,6 +110,9 @@ int ipc_findkey(struct ipc_ids* ids, key return -1; } +/* + * Requires ipc_ids.sem locked + */ static int grow_ary(struct ipc_ids* ids, int newsize) { struct ipc_id* new; @@ -113,21 +124,22 @@ static int grow_ary(struct ipc_ids* ids, if(newsize <= ids->size) return newsize; - new = ipc_alloc(sizeof(struct ipc_id)*newsize); + new = ipc_rcu_alloc(sizeof(struct ipc_id)*newsize); if(new == NULL) return ids->size; memcpy(new, ids->entries, sizeof(struct ipc_id)*ids->size); for(i=ids->size;iary); - old = ids->entries; - ids->entries = new; i = ids->size; + + wmb(); + ids->entries = new; + wmb(); ids->size = newsize; - spin_unlock(&ids->ary); - ipc_free(old, sizeof(struct ipc_id)*i); + + ipc_rcu_free(old, sizeof(struct ipc_id)*i); return ids->size; } @@ -141,6 +153,8 @@ static int grow_ary(struct ipc_ids* ids, * initialised and the first free entry is set up and the id assigned * is returned. The list is returned in a locked state on success. * On failure the list is not locked and -1 is returned. + * + * Called with ipc_ids.sem held. */ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) @@ -148,6 +162,11 @@ int ipc_addid(struct ipc_ids* ids, struc int id; size = grow_ary(ids,size); + + /* + * read_barrier_depends() is not needed here since + * ipc_ids.sem is held + */ for (id = 0; id < size; id++) { if(ids->entries[id].p == NULL) goto found; @@ -165,7 +184,10 @@ found: if(ids->seq > ids->seq_max) ids->seq = 0; - spin_lock(&ids->ary); + new->lock = SPIN_LOCK_UNLOCKED; + new->deleted = 0; + rcu_read_lock(); + spin_lock(&new->lock); ids->entries[id].p = new; return id; } @@ -179,6 +201,8 @@ found: * fed an invalid identifier. The entry is removed and internal * variables recomputed. The object associated with the identifier * is returned. + * ipc_ids.sem and the spinlock for this ID is hold before this function + * is called, and remain locked on the exit. */ struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id) @@ -187,6 +211,11 @@ struct kern_ipc_perm* ipc_rmid(struct ip int lid = id % SEQ_MULTIPLIER; if(lid >= ids->size) BUG(); + + /* + * do not need a read_barrier_depends() here to force ordering + * on Alpha, since the ipc_ids.sem is held. + */ p = ids->entries[lid].p; ids->entries[lid].p = NULL; if(p==NULL) @@ -201,6 +230,7 @@ struct kern_ipc_perm* ipc_rmid(struct ip } while (ids->entries[lid].p == NULL); ids->max_id = lid; } + p->deleted = 1; return p; } @@ -223,14 +253,14 @@ void* ipc_alloc(int size) } /** - * ipc_free - free ipc space + * ipc_free - free ipc space * @ptr: pointer returned by ipc_alloc * @size: size of block * * Free a block created with ipc_alloc. The caller must know the size * used in the allocation call. */ - + void ipc_free(void* ptr, int size) { if(size > PAGE_SIZE) @@ -239,6 +269,86 @@ void ipc_free(void* ptr, int size) kfree(ptr); } +struct ipc_rcu_kmalloc +{ + struct rcu_head rcu; + /* "void *" makes sure alignment of following data is sane. */ + void *data[0]; +}; + +struct ipc_rcu_vmalloc +{ + struct rcu_head rcu; + struct tq_struct tq; + /* "void *" makes sure alignment of following data is sane. */ + void *data[0]; +}; + +static inline int rcu_use_vmalloc(int size) +{ + /* Too big for a single page? */ + if (sizeof(struct ipc_rcu_kmalloc) + size > PAGE_SIZE) + return 1; + return 0; +} + +/** + * ipc_rcu_alloc - allocate ipc and rcu space + * @size: size desired + * + * Allocate memory for the rcu header structure + the object. + * Returns the pointer to the object. + * NULL is returned if the allocation fails. + */ + +void* ipc_rcu_alloc(int size) +{ + void* out; + /* + * We prepend the allocation with the rcu struct, and + * taskqueue if necessary (for vmalloc). + */ + if (rcu_use_vmalloc(size)) { + out = vmalloc(sizeof(struct ipc_rcu_vmalloc) + size); + if (out) out += sizeof(struct ipc_rcu_vmalloc); + } else { + out = kmalloc(sizeof(struct ipc_rcu_kmalloc)+size, GFP_KERNEL); + if (out) out += sizeof(struct ipc_rcu_kmalloc); + } + + return out; +} + +/** + * ipc_schedule_free - free ipc + rcu space + * + * Since RCU callback function is called in bh, + * we need to defer the vfree to schedule_task + */ +static void ipc_schedule_free(void* arg) +{ + struct ipc_rcu_vmalloc *free = arg; + + free->tq.routine = (void (*)(void *))vfree; + free->tq.data = free; + schedule_task(&free->tq); +} + +void ipc_rcu_free(void* ptr, int size) +{ + if (rcu_use_vmalloc(size)) { + struct ipc_rcu_vmalloc *free; + free = ptr - sizeof(*free); + call_rcu(&free->rcu, ipc_schedule_free, free); + } else { + struct ipc_rcu_kmalloc *free; + free = ptr - sizeof(*free); + /* kfree takes a "const void *" so gcc warns. So we cast. */ + call_rcu(&free->rcu, (void (*)(void *))kfree, free); + } + +} + /** * ipcperms - check IPC permissions * @ipcp: IPC permission set diff -urNp --exclude CVS --exclude BitKeeper x-ref/ipc/util.h x/ipc/util.h --- x-ref/ipc/util.h 2003-03-15 03:25:18.000000000 +0100 +++ x/ipc/util.h 2003-09-01 19:16:45.000000000 +0200 @@ -4,6 +4,7 @@ * * ipc helper functions (c) 1999 Manfred Spraul */ +#include #define USHRT_MAX 0xffff #define SEQ_MULTIPLIER (IPCMNI) @@ -19,7 +20,6 @@ struct ipc_ids { unsigned short seq; unsigned short seq_max; struct semaphore sem; - spinlock_t ary; struct ipc_id* entries; }; @@ -27,7 +27,6 @@ struct ipc_id { struct kern_ipc_perm* p; }; - void __init ipc_init_ids(struct ipc_ids* ids, int size); /* must be called with ids->sem acquired.*/ @@ -44,44 +43,78 @@ int ipcperms (struct kern_ipc_perm *ipcp */ void* ipc_alloc(int size); void ipc_free(void* ptr, int size); +/* for allocation that need to be freed by RCU + * both function can sleep + */ +void* ipc_rcu_alloc(int size); +void ipc_rcu_free(void* arg, int size); -extern inline void ipc_lockall(struct ipc_ids* ids) -{ - spin_lock(&ids->ary); -} - +/* So far only shm_get_stat() calls ipc_get() via shm_get(), so ipc_get( + * is called with shm_ids.sem locked. Since grow_ary() is also called with + * shm_ids.sem down(for Shared Memory), there is no need to add read + * barriers here to gurantee the writes in grow_ary() are seen in order + * here (for Alpha). + * + * However ipc_get() itself does not necessary require ipc_ids.sem down. So + * if in the future ipc_get() is used by other places without ipc_ids.sem + * down, then ipc_get() needs read memery barriers as ipc_lock() does. +*/ + extern inline struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id) { struct kern_ipc_perm* out; int lid = id % SEQ_MULTIPLIER; if(lid >= ids->size) return NULL; - out = ids->entries[lid].p; return out; } -extern inline void ipc_unlockall(struct ipc_ids* ids) -{ - spin_unlock(&ids->ary); -} extern inline struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id) { struct kern_ipc_perm* out; int lid = id % SEQ_MULTIPLIER; - if(lid >= ids->size) + struct ipc_id* entries; + + rcu_read_lock(); + if(lid >= ids->size) { + rcu_read_unlock(); return NULL; + } - spin_lock(&ids->ary); - out = ids->entries[lid].p; - if(out==NULL) - spin_unlock(&ids->ary); + /* + * Note: The following two read barriers are corresponding + * to the two write barriers in grow_ary(). They gurantee + * the writes are seen in the same order on the read side. + * smp_rmb() has effect on all CPUs. read_barrier_depends() + * is used if there are data dependency between two reads, and + * has effect only on Alpha. + */ + smp_rmb(); /* prevent indexing old array with new size */ + entries = ids->entries; + read_barrier_depends(); /*prevent seeing new array unitialized */ + out = entries[lid].p; + if(out == NULL) { + rcu_read_unlock(); + return NULL; + } + spin_lock(&out->lock); + + /* ipc_rmid() may have already freed the ID while ipc_lock + * was spinning: here verify that the structure is still valid + */ + if (out->deleted) { + spin_unlock(&out->lock); + rcu_read_unlock(); + return NULL; + } return out; } -extern inline void ipc_unlock(struct ipc_ids* ids, int id) +extern inline void ipc_unlock(struct kern_ipc_perm* perm) { - spin_unlock(&ids->ary); + spin_unlock(&perm->lock); + rcu_read_unlock(); } extern inline int ipc_buildid(struct ipc_ids* ids, int id, int seq)