From: "Paul E. McKenney" This patch uses the rcu_assign_pointer() API to eliminate a number of explicit memory barriers from the SysV IPC code that uses RCU. It also restructures the ipc_ids structure so that the array size is stored in the same memory block as the array itself (see the new struct ipc_id_ary). This prevents the race that the earlier code was subject to, where a reader could see a mismatch between the size and the actual array. With the size stored with the array, the possibility of mismatch is eliminated -- with out the need for careful ordering and explicit memory barriers. This has been tested successfully on i386 and ppc64. Signed-off-by: Signed-off-by: Andrew Morton --- 25-akpm/ipc/msg.c | 2 - 25-akpm/ipc/sem.c | 2 - 25-akpm/ipc/util.c | 80 +++++++++++++++++++++++------------------------------ 25-akpm/ipc/util.h | 13 ++++---- 4 files changed, 45 insertions(+), 52 deletions(-) diff -puN ipc/msg.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc ipc/msg.c --- 25/ipc/msg.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc 2004-10-24 03:23:49.798841488 -0700 +++ 25-akpm/ipc/msg.c 2004-10-24 03:23:49.819838296 -0700 @@ -381,7 +381,7 @@ asmlinkage long sys_msgctl (int msqid, i int success_return; if (!buf) return -EFAULT; - if(cmd == MSG_STAT && msqid >= msg_ids.size) + if(cmd == MSG_STAT && msqid >= msg_ids.entries->size) return -EINVAL; memset(&tbuf,0,sizeof(tbuf)); diff -puN ipc/sem.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc ipc/sem.c --- 25/ipc/sem.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc 2004-10-24 03:23:49.799841336 -0700 +++ 25-akpm/ipc/sem.c 2004-10-24 03:23:49.820838144 -0700 @@ -524,7 +524,7 @@ static int semctl_nolock(int semid, int struct semid64_ds tbuf; int id; - if(semid >= sem_ids.size) + if(semid >= sem_ids.entries->size) return -EINVAL; memset(&tbuf,0,sizeof(tbuf)); diff -puN ipc/util.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc ipc/util.c --- 25/ipc/util.c~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc 2004-10-24 03:23:49.801841032 -0700 +++ 25-akpm/ipc/util.c 2004-10-24 03:23:49.821837992 -0700 @@ -62,7 +62,6 @@ void __init ipc_init_ids(struct ipc_ids* if(size > IPCMNI) size = IPCMNI; - ids->size = size; ids->in_use = 0; ids->max_id = -1; ids->seq = 0; @@ -74,14 +73,17 @@ void __init ipc_init_ids(struct ipc_ids* ids->seq_max = seq_limit; } - ids->entries = ipc_rcu_alloc(sizeof(struct ipc_id)*size); + ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size + + sizeof(struct ipc_id_ary)); if(ids->entries == NULL) { printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n"); - ids->size = 0; + size = 0; + ids->entries = &ids->nullentry; } - for(i=0;isize;i++) - ids->entries[i].p = NULL; + ids->entries->size = size; + for(i=0;ientries->p[i] = NULL; } /** @@ -104,7 +106,7 @@ int ipc_findkey(struct ipc_ids* ids, key * since ipc_ids.sem is held */ for (id = 0; id <= max_id; id++) { - p = ids->entries[id].p; + p = ids->entries->p[id]; if(p==NULL) continue; if (key == p->key) @@ -118,36 +120,36 @@ int ipc_findkey(struct ipc_ids* ids, key */ static int grow_ary(struct ipc_ids* ids, int newsize) { - struct ipc_id* new; - struct ipc_id* old; + struct ipc_id_ary* new; + struct ipc_id_ary* old; int i; + int size = ids->entries->size; if(newsize > IPCMNI) newsize = IPCMNI; - if(newsize <= ids->size) + if(newsize <= size) return newsize; - new = ipc_rcu_alloc(sizeof(struct ipc_id)*newsize); + new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize + + sizeof(struct ipc_id_ary)); if(new == NULL) - return ids->size; - memcpy(new, ids->entries, sizeof(struct ipc_id)*ids->size); - for(i=ids->size;isize = newsize; + memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size + + sizeof(struct ipc_id_ary)); + for(i=size;ip[i] = NULL; } old = ids->entries; /* - * before setting the ids->entries to the new array, there must be a - * smp_wmb() to make sure the memcpyed contents of the new array are - * visible before the new array becomes visible. + * Use rcu_assign_pointer() to make sure the memcpyed contents + * of the new array are visible before the new array becomes visible. */ - smp_wmb(); /* prevent seeing new array uninitialized. */ - ids->entries = new; - smp_wmb(); /* prevent indexing into old array based on new size. */ - ids->size = newsize; + rcu_assign_pointer(ids->entries, new); ipc_rcu_putref(old); - return ids->size; + return newsize; } /** @@ -175,7 +177,7 @@ int ipc_addid(struct ipc_ids* ids, struc * ipc_ids.sem is held */ for (id = 0; id < size; id++) { - if(ids->entries[id].p == NULL) + if(ids->entries->p[id] == NULL) goto found; } return -1; @@ -195,7 +197,7 @@ found: new->deleted = 0; rcu_read_lock(); spin_lock(&new->lock); - ids->entries[id].p = new; + ids->entries->p[id] = new; return id; } @@ -216,15 +218,15 @@ struct kern_ipc_perm* ipc_rmid(struct ip { struct kern_ipc_perm* p; int lid = id % SEQ_MULTIPLIER; - if(lid >= ids->size) + if(lid >= ids->entries->size) BUG(); /* * do not need a rcu_dereference()() here to force ordering * on Alpha, since the ipc_ids.sem is held. */ - p = ids->entries[lid].p; - ids->entries[lid].p = NULL; + p = ids->entries->p[lid]; + ids->entries->p[lid] = NULL; if(p==NULL) BUG(); ids->in_use--; @@ -234,7 +236,7 @@ struct kern_ipc_perm* ipc_rmid(struct ip lid--; if(lid == -1) break; - } while (ids->entries[lid].p == NULL); + } while (ids->entries->p[lid] == NULL); ids->max_id = lid; } p->deleted = 1; @@ -493,9 +495,9 @@ struct kern_ipc_perm* ipc_get(struct ipc { struct kern_ipc_perm* out; int lid = id % SEQ_MULTIPLIER; - if(lid >= ids->size) + if(lid >= ids->entries->size) return NULL; - out = ids->entries[lid].p; + out = ids->entries->p[lid]; return out; } @@ -503,25 +505,15 @@ struct kern_ipc_perm* ipc_lock(struct ip { struct kern_ipc_perm* out; int lid = id % SEQ_MULTIPLIER; - struct ipc_id* entries; + struct ipc_id_ary* entries; rcu_read_lock(); - if(lid >= ids->size) { + entries = rcu_dereference(ids->entries); + if(lid >= entries->size) { rcu_read_unlock(); return NULL; } - - /* - * Note: The following two read barriers are corresponding - * to the two write barriers in grow_ary(). They guarantee - * the writes are seen in the same order on the read side. - * smp_rmb() has effect on all CPUs. rcu_dereference() - * is used if there are data dependency between two reads, and - * has effect only on Alpha. - */ - smp_rmb(); /* prevent indexing old array with new size */ - entries = rcu_dereference(ids->entries); - out = entries[lid].p; + out = entries->p[lid]; if(out == NULL) { rcu_read_unlock(); return NULL; diff -puN ipc/util.h~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc ipc/util.h --- 25/ipc/util.h~rcu-eliminating-explicit-memory-barriers-from-sysv-ipc 2004-10-24 03:23:49.811839512 -0700 +++ 25-akpm/ipc/util.h 2004-10-24 03:23:49.822837840 -0700 @@ -15,18 +15,19 @@ void sem_init (void); void msg_init (void); void shm_init (void); -struct ipc_ids { +struct ipc_id_ary { int size; + struct kern_ipc_perm *p[0]; +}; + +struct ipc_ids { int in_use; int max_id; unsigned short seq; unsigned short seq_max; struct semaphore sem; - struct ipc_id* entries; -}; - -struct ipc_id { - struct kern_ipc_perm* p; + struct ipc_id_ary nullentry; + struct ipc_id_ary* entries; }; void __init ipc_init_ids(struct ipc_ids* ids, int size); _