aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@smyrno.hos.anvin.org>2005-12-04 21:52:21 -0800
committerH. Peter Anvin <hpa@smyrno.hos.anvin.org>2005-12-04 21:52:21 -0800
commit12a407d6ebceeb1fc9b8ad8c7dda5049c75e1da8 (patch)
tree0d6d7741f37b24f1e1f4a238bc679fca994600b5
parentf6d64d8c89c2048cdac0e854bc2208fbe413324d (diff)
downloadlibucd-12a407d6ebceeb1fc9b8ad8c7dda5049c75e1da8.tar.gz
Add caching of UCS data
-rw-r--r--Makefile34
-rw-r--r--cache.c132
-rw-r--r--libucd_int.h15
-rw-r--r--ucd.h12
-rw-r--r--ucslookup.c26
5 files changed, 200 insertions, 19 deletions
diff --git a/Makefile b/Makefile
index 72b295c..b4aec4f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
CC = cc
-CFLAGS = -g -O -I. -W -Wall
+CFLAGS = -g -O3 -I. -W -Wall -DHAVE_PTHREAD_H
PICFLAGS = -fPIC
-LDFLAGS =
+LDFLAGS = -lpthread
AR = ar
RANLIB = ranlib
LIB_FILE = libucd.a
@@ -18,6 +18,11 @@ PERL = time perl
#
+# Headers included from libucd_int.h
+#
+HDRS = libucd_int.h ucd.h int24.h compiler.h
+
+#
# These are the files produced by convert_ucd.pl
#
CVT_FILES = gen/jamo.c gen/nameslist.tab gen/nametoucs.keys gen/nametoucs.tab \
@@ -48,7 +53,7 @@ CVT_FILES = gen/jamo.c gen/nameslist.tab gen/nametoucs.keys gen/nametoucs.tab \
LIBSRCS = proparray.c gen/nametoucs_hash.c gen/ucstoname_hash.c \
gen/jamo.c gen/nameslist.c gen/nameslist_dict.c \
gen/ucstoname_tab.c gen/nametoucs_tab.c nametoucs.c \
- ucslookup.c
+ ucslookup.c cache.c
LIBOBJS = $(patsubst %.c,%.o,$(LIBSRCS))
SO_OBJS = $(patsubst %.c,%.lo,$(LIBSRCS))
@@ -126,23 +131,26 @@ endif
# -----------------------------------------------------------------------
-proparray.o: proparray.c ucd.h libucd_int.h gen/proparray.c
-proparray.lo: proparray.c ucd.h libucd_int.h gen/proparray.c
+proparray.o: proparray.c ucd.h $(HDRS) gen/proparray.c
+proparray.lo: proparray.c ucd.h $(HDRS) gen/proparray.c
mk_ucstoname_tab.ho: mk_ucstoname_tab.c gen/ucstoname_hash.h
mk_nametoucs_tab.ho: mk_nametoucs_tab.c gen/nametoucs_hash.h
-gen/ucstoname_tab.o: gen/ucstoname_tab.c libucd_int.h
-gen/ucstoname_tab.lo: gen/ucstoname_tab.c libucd_int.h
+gen/ucstoname_tab.o: gen/ucstoname_tab.c $(HDRS)
+gen/ucstoname_tab.lo: gen/ucstoname_tab.c $(HDRS)
-gen/nametoucs_tab.o: gen/nametoucs_tab.c libucd_int.h
-gen/nametoucs_tab.lo: gen/nametoucs_tab.c libucd_int.h
+gen/nametoucs_tab.o: gen/nametoucs_tab.c $(HDRS)
+gen/nametoucs_tab.lo: gen/nametoucs_tab.c $(HDRS)
gen/nameslist_dict.o: gen/nameslist_dict.c
gen/nameslist_dict.lo: gen/nameslist_dict.c
-nametoucs.o: nametoucs.c libucd_int.h gen/nametoucs_hash.h
-nametoucs.lo: nametoucs.c libucd_int.h gen/nametoucs_hash.h
+nametoucs.o: nametoucs.c $(HDRS) gen/nametoucs_hash.h
+nametoucs.lo: nametoucs.c $(HDRS) gen/nametoucs_hash.h
+
+ucslookup.o: ucslookup.c $(HDRS) gen/ucstoname_hash.h
+ucslookup.lo: ucslookup.c $(HDRS) gen/ucstoname_hash.h
-ucslookup.o: ucslookup.c libucd_int.h gen/ucstoname_hash.h
-ucslookup.lo: ucslookup.c libucd_int.h gen/ucstoname_hash.h
+cache.o: cache.c $(HDRS)
+cache.lo: cache.c $(HDRS)
diff --git a/cache.c b/cache.c
new file mode 100644
index 0000000..8ff7d10
--- /dev/null
+++ b/cache.c
@@ -0,0 +1,132 @@
+#include <stdlib.h>
+#include "libucd_int.h"
+#ifdef HAVE_PTHREAD_H
+# include <pthread.h>
+#endif
+
+#define CACHE_SIZE 512
+static struct unicode_character_data *_libucd_cache[CACHE_SIZE];
+
+#ifdef HAVE_PTHREAD_H
+static pthread_mutex_t _libucd_cache_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void lock_cache(void)
+{
+ pthread_mutex_lock(&_libucd_cache_mutex);
+}
+static void unlock_cache(void)
+{
+ pthread_mutex_unlock(&_libucd_cache_mutex);
+}
+
+#else
+
+/* Single-threaded execution only */
+static void lock_cache(void)
+{
+}
+static void unlock_cache(void)
+{
+}
+#endif
+
+#if defined(HAVE_PTHREAD_H) && (defined(__i386__) || defined(__x86_64__))
+
+/* Specially optimized versions for i386 and x86-64 */
+
+struct unicode_character_data *
+unicode_character_get(struct unicode_character_data *ucd)
+{
+ struct libucd_private *pvt = (struct libucd_private *)(ucd+1);
+ asm volatile("lock ; incl %0" : "+m" (pvt->usage_ctr));
+ return ucd;
+}
+
+void
+unicode_character_put(struct unicode_character_data *ucd)
+{
+ struct libucd_private *pvt = (struct libucd_private *)(ucd+1);
+ unsigned char zero;
+
+ asm volatile("lock ; decl %0 ; setz %1"
+ : "+m" (pvt->usage_ctr), "=r" (zero));
+ if ( zero )
+ free(ucd);
+}
+
+#else
+
+# ifdef HAVE_PTHREAD_H
+static void lock(struct libucd_private *pvt)
+{
+ pthread_mutex_lock(&pvt->mutex);
+}
+static void unlock(struct libucd_private *pvt)
+{
+ pthread_mutex_unlock(&pvt->mutex);
+}
+# else
+static void lock(struct libucd_private *pvt)
+{
+}
+static void unlock(struct libucd_private *pvt)
+{
+}
+# endif
+
+struct unicode_character_data *
+unicode_character_get(struct unicode_character_data *ucd)
+{
+ struct libucd_private *pvt = (struct libucd_private *)(ucd+1);
+ lock(pvt);
+ pvt->usage_ctr++;
+ unlock(pvt);
+ return ucd;
+}
+
+void
+unicode_character_put(struct unicode_character_data *ucd)
+{
+ struct libucd_private *pvt = (struct libucd_private *)(ucd+1);
+ unsigned int cnt;
+ lock(pvt);
+ cnt = --pvt->usage_ctr;
+ unlock(pvt);
+ if ( !cnt )
+ free(ucd);
+}
+
+#endif
+
+struct unicode_character_data *
+unicode_character_data(int32_t ucs)
+{
+ struct unicode_character_data *ucd, *prev_ucd;
+ unsigned int bucket;
+
+ if ( ucs < 0 || ucs > 0x10ffff )
+ return NULL;
+
+ bucket = (uint32_t)ucs % CACHE_SIZE;
+
+ lock_cache();
+ ucd = _libucd_cache[bucket];
+ if ( ucd && ucd->ucs == ucs ) {
+ ucd = unicode_character_get(ucd);
+ unlock_cache();
+ return ucd;
+ }
+ unlock_cache();
+
+ ucd = unicode_character_data_raw(ucs);
+
+ lock_cache();
+ prev_ucd = _libucd_cache[bucket];
+ _libucd_cache[bucket] = ucd;
+ unlock_cache();
+
+ if ( prev_ucd )
+ unicode_character_put(prev_ucd);
+
+ return ucd;
+}
diff --git a/libucd_int.h b/libucd_int.h
index 955607b..5514ac0 100644
--- a/libucd_int.h
+++ b/libucd_int.h
@@ -62,4 +62,19 @@ extern const struct _libucd_nametoucs_tab _libucd_nametoucs_tab[];
extern const unsigned char _libucd_names_list[];
extern const char * const _libucd_nameslist_dict[256];
+/* This is a private data structure included in each ucd object */
+#if defined(__i386__) || defined(__x86_64__)
+# define HAVE_ATOMIC_CTR
+#endif
+
+struct libucd_private {
+#if defined(HAVE_PTHREAD_H) && !defined(HAVE_ATOMIC_CTR)
+ pthread_mutex_t mutex;
+#endif
+ volatile unsigned int usage_ctr;
+};
+
+struct unicode_character_data *
+unicode_character_data_raw(int32_t ucs);
+
#endif
diff --git a/ucd.h b/ucd.h
index ebbc8f4..c205c47 100644
--- a/ucd.h
+++ b/ucd.h
@@ -36,6 +36,12 @@
#include <stdint.h>
+#define LIBUCD_THREAD_SUPPORT
+
+#ifdef LIBUCD_THREAD_SUPPORT
+# include <pthread.h>
+#endif
+
enum unicode_bidi_class {
UC_BIDI_AL, /* Arabic_Letter */
UC_BIDI_AN, /* Arabic_Number */
@@ -540,13 +546,15 @@ enum unicode_block {
#define UC_FL_BIDI_MIRRORED UC_FLAG(35)
struct unicode_character_data {
+ int32_t ucs; /* Actual codepoint */
+ uint16_t size; /* Size of this structure */
+ uint16_t alloc_size; /* Allocation size */
uint64_t fl; /* Flags */
const char *name;
const char *bidi_mirroring_glyph;
const char *uppercase_mapping;
const char *lowercase_mapping;
const char *titlecase_mapping;
- int32_t ucs; /* Actual codepoint */
int32_t simple_uppercase;
int32_t simple_lowercase;
int32_t simple_titlecase;
@@ -572,5 +580,7 @@ struct unicode_character_data {
};
struct unicode_character_data *unicode_character_data(int32_t);
+struct unicode_character_data *unicode_character_get(struct unicode_character_data *);
+void unicode_character_put(struct unicode_character_data *);
#endif /* UCD_H */
diff --git a/ucslookup.c b/ucslookup.c
index 3192080..3283b0b 100644
--- a/ucslookup.c
+++ b/ucslookup.c
@@ -6,6 +6,9 @@
#include <errno.h>
#include "libucd_int.h"
#include "gen/ucstoname_hash.h"
+#ifdef HAVE_PTHREAD_H
+# include <pthread.h>
+#endif
/*
* This returns the name for a specific UCS in a user-provided buffer,
@@ -98,11 +101,18 @@ alloc_copy_properties(const struct _libucd_property_array *prop,
int32_t ucs, size_t namelen)
{
struct unicode_character_data *ucd;
+ struct libucd_private *pvt;
+ size_t size = sizeof(struct unicode_character_data)+
+ sizeof(struct libucd_private)+namelen+1;
- ucd = malloc(sizeof(struct unicode_character_data)+namelen+1);
+
+ ucd = malloc(size);
if ( !ucd )
return NULL;
- ucd->name = (char *)(ucd+1);
+ pvt = (struct libucd_private *)(ucd+1);
+ ucd->name = (char *)(pvt+1);
+ ucd->size = sizeof(struct unicode_character_data);
+ ucd->alloc_size = size;
ucd->fl = prop->flags_block & UINT64_C(0xffffffffffff);
ucd->bidi_mirroring_glyph = NULL; /* NYS */
@@ -138,6 +148,14 @@ alloc_copy_properties(const struct _libucd_property_array *prop,
ucd->word_break = prop->word_break;
ucd->line_break = prop->line_break;
+#if defined(HAVE_PTHREAD_H) && !defined(HAVE_ATOMIC_CTR)
+ if ( pthread_mutex_init(&pvt->mutex, NULL) ) {
+ free(ucd);
+ return NULL;
+ }
+#endif
+ pvt->usage_ctr = 2; /* cache plus end user */
+
return ucd;
}
@@ -145,7 +163,7 @@ alloc_copy_properties(const struct _libucd_property_array *prop,
* Standard entry point for the user
*/
struct unicode_character_data *
-unicode_character_data(int32_t ucs)
+unicode_character_data_raw(int32_t ucs)
{
uint32_t hash;
const struct _libucd_ucstoname_tab *unt;
@@ -206,5 +224,3 @@ unicode_character_data(int32_t ucs)
return ucd;
}
-
-