diff options
author | H. Peter Anvin <hpa@smyrno.hos.anvin.org> | 2005-12-04 21:52:21 -0800 |
---|---|---|
committer | H. Peter Anvin <hpa@smyrno.hos.anvin.org> | 2005-12-04 21:52:21 -0800 |
commit | 12a407d6ebceeb1fc9b8ad8c7dda5049c75e1da8 (patch) | |
tree | 0d6d7741f37b24f1e1f4a238bc679fca994600b5 | |
parent | f6d64d8c89c2048cdac0e854bc2208fbe413324d (diff) | |
download | libucd-12a407d6ebceeb1fc9b8ad8c7dda5049c75e1da8.tar.gz |
Add caching of UCS data
-rw-r--r-- | Makefile | 34 | ||||
-rw-r--r-- | cache.c | 132 | ||||
-rw-r--r-- | libucd_int.h | 15 | ||||
-rw-r--r-- | ucd.h | 12 | ||||
-rw-r--r-- | ucslookup.c | 26 |
5 files changed, 200 insertions, 19 deletions
@@ -1,7 +1,7 @@ CC = cc -CFLAGS = -g -O -I. -W -Wall +CFLAGS = -g -O3 -I. -W -Wall -DHAVE_PTHREAD_H PICFLAGS = -fPIC -LDFLAGS = +LDFLAGS = -lpthread AR = ar RANLIB = ranlib LIB_FILE = libucd.a @@ -18,6 +18,11 @@ PERL = time perl # +# Headers included from libucd_int.h +# +HDRS = libucd_int.h ucd.h int24.h compiler.h + +# # These are the files produced by convert_ucd.pl # CVT_FILES = gen/jamo.c gen/nameslist.tab gen/nametoucs.keys gen/nametoucs.tab \ @@ -48,7 +53,7 @@ CVT_FILES = gen/jamo.c gen/nameslist.tab gen/nametoucs.keys gen/nametoucs.tab \ LIBSRCS = proparray.c gen/nametoucs_hash.c gen/ucstoname_hash.c \ gen/jamo.c gen/nameslist.c gen/nameslist_dict.c \ gen/ucstoname_tab.c gen/nametoucs_tab.c nametoucs.c \ - ucslookup.c + ucslookup.c cache.c LIBOBJS = $(patsubst %.c,%.o,$(LIBSRCS)) SO_OBJS = $(patsubst %.c,%.lo,$(LIBSRCS)) @@ -126,23 +131,26 @@ endif # ----------------------------------------------------------------------- -proparray.o: proparray.c ucd.h libucd_int.h gen/proparray.c -proparray.lo: proparray.c ucd.h libucd_int.h gen/proparray.c +proparray.o: proparray.c ucd.h $(HDRS) gen/proparray.c +proparray.lo: proparray.c ucd.h $(HDRS) gen/proparray.c mk_ucstoname_tab.ho: mk_ucstoname_tab.c gen/ucstoname_hash.h mk_nametoucs_tab.ho: mk_nametoucs_tab.c gen/nametoucs_hash.h -gen/ucstoname_tab.o: gen/ucstoname_tab.c libucd_int.h -gen/ucstoname_tab.lo: gen/ucstoname_tab.c libucd_int.h +gen/ucstoname_tab.o: gen/ucstoname_tab.c $(HDRS) +gen/ucstoname_tab.lo: gen/ucstoname_tab.c $(HDRS) -gen/nametoucs_tab.o: gen/nametoucs_tab.c libucd_int.h -gen/nametoucs_tab.lo: gen/nametoucs_tab.c libucd_int.h +gen/nametoucs_tab.o: gen/nametoucs_tab.c $(HDRS) +gen/nametoucs_tab.lo: gen/nametoucs_tab.c $(HDRS) gen/nameslist_dict.o: gen/nameslist_dict.c gen/nameslist_dict.lo: gen/nameslist_dict.c -nametoucs.o: nametoucs.c libucd_int.h gen/nametoucs_hash.h -nametoucs.lo: nametoucs.c libucd_int.h gen/nametoucs_hash.h +nametoucs.o: nametoucs.c $(HDRS) gen/nametoucs_hash.h +nametoucs.lo: nametoucs.c $(HDRS) gen/nametoucs_hash.h + +ucslookup.o: ucslookup.c $(HDRS) gen/ucstoname_hash.h +ucslookup.lo: ucslookup.c $(HDRS) gen/ucstoname_hash.h -ucslookup.o: ucslookup.c libucd_int.h gen/ucstoname_hash.h -ucslookup.lo: ucslookup.c libucd_int.h gen/ucstoname_hash.h +cache.o: cache.c $(HDRS) +cache.lo: cache.c $(HDRS) @@ -0,0 +1,132 @@ +#include <stdlib.h> +#include "libucd_int.h" +#ifdef HAVE_PTHREAD_H +# include <pthread.h> +#endif + +#define CACHE_SIZE 512 +static struct unicode_character_data *_libucd_cache[CACHE_SIZE]; + +#ifdef HAVE_PTHREAD_H +static pthread_mutex_t _libucd_cache_mutex = PTHREAD_MUTEX_INITIALIZER; + +static void lock_cache(void) +{ + pthread_mutex_lock(&_libucd_cache_mutex); +} +static void unlock_cache(void) +{ + pthread_mutex_unlock(&_libucd_cache_mutex); +} + +#else + +/* Single-threaded execution only */ +static void lock_cache(void) +{ +} +static void unlock_cache(void) +{ +} +#endif + +#if defined(HAVE_PTHREAD_H) && (defined(__i386__) || defined(__x86_64__)) + +/* Specially optimized versions for i386 and x86-64 */ + +struct unicode_character_data * +unicode_character_get(struct unicode_character_data *ucd) +{ + struct libucd_private *pvt = (struct libucd_private *)(ucd+1); + asm volatile("lock ; incl %0" : "+m" (pvt->usage_ctr)); + return ucd; +} + +void +unicode_character_put(struct unicode_character_data *ucd) +{ + struct libucd_private *pvt = (struct libucd_private *)(ucd+1); + unsigned char zero; + + asm volatile("lock ; decl %0 ; setz %1" + : "+m" (pvt->usage_ctr), "=r" (zero)); + if ( zero ) + free(ucd); +} + +#else + +# ifdef HAVE_PTHREAD_H +static void lock(struct libucd_private *pvt) +{ + pthread_mutex_lock(&pvt->mutex); +} +static void unlock(struct libucd_private *pvt) +{ + pthread_mutex_unlock(&pvt->mutex); +} +# else +static void lock(struct libucd_private *pvt) +{ +} +static void unlock(struct libucd_private *pvt) +{ +} +# endif + +struct unicode_character_data * +unicode_character_get(struct unicode_character_data *ucd) +{ + struct libucd_private *pvt = (struct libucd_private *)(ucd+1); + lock(pvt); + pvt->usage_ctr++; + unlock(pvt); + return ucd; +} + +void +unicode_character_put(struct unicode_character_data *ucd) +{ + struct libucd_private *pvt = (struct libucd_private *)(ucd+1); + unsigned int cnt; + lock(pvt); + cnt = --pvt->usage_ctr; + unlock(pvt); + if ( !cnt ) + free(ucd); +} + +#endif + +struct unicode_character_data * +unicode_character_data(int32_t ucs) +{ + struct unicode_character_data *ucd, *prev_ucd; + unsigned int bucket; + + if ( ucs < 0 || ucs > 0x10ffff ) + return NULL; + + bucket = (uint32_t)ucs % CACHE_SIZE; + + lock_cache(); + ucd = _libucd_cache[bucket]; + if ( ucd && ucd->ucs == ucs ) { + ucd = unicode_character_get(ucd); + unlock_cache(); + return ucd; + } + unlock_cache(); + + ucd = unicode_character_data_raw(ucs); + + lock_cache(); + prev_ucd = _libucd_cache[bucket]; + _libucd_cache[bucket] = ucd; + unlock_cache(); + + if ( prev_ucd ) + unicode_character_put(prev_ucd); + + return ucd; +} diff --git a/libucd_int.h b/libucd_int.h index 955607b..5514ac0 100644 --- a/libucd_int.h +++ b/libucd_int.h @@ -62,4 +62,19 @@ extern const struct _libucd_nametoucs_tab _libucd_nametoucs_tab[]; extern const unsigned char _libucd_names_list[]; extern const char * const _libucd_nameslist_dict[256]; +/* This is a private data structure included in each ucd object */ +#if defined(__i386__) || defined(__x86_64__) +# define HAVE_ATOMIC_CTR +#endif + +struct libucd_private { +#if defined(HAVE_PTHREAD_H) && !defined(HAVE_ATOMIC_CTR) + pthread_mutex_t mutex; +#endif + volatile unsigned int usage_ctr; +}; + +struct unicode_character_data * +unicode_character_data_raw(int32_t ucs); + #endif @@ -36,6 +36,12 @@ #include <stdint.h> +#define LIBUCD_THREAD_SUPPORT + +#ifdef LIBUCD_THREAD_SUPPORT +# include <pthread.h> +#endif + enum unicode_bidi_class { UC_BIDI_AL, /* Arabic_Letter */ UC_BIDI_AN, /* Arabic_Number */ @@ -540,13 +546,15 @@ enum unicode_block { #define UC_FL_BIDI_MIRRORED UC_FLAG(35) struct unicode_character_data { + int32_t ucs; /* Actual codepoint */ + uint16_t size; /* Size of this structure */ + uint16_t alloc_size; /* Allocation size */ uint64_t fl; /* Flags */ const char *name; const char *bidi_mirroring_glyph; const char *uppercase_mapping; const char *lowercase_mapping; const char *titlecase_mapping; - int32_t ucs; /* Actual codepoint */ int32_t simple_uppercase; int32_t simple_lowercase; int32_t simple_titlecase; @@ -572,5 +580,7 @@ struct unicode_character_data { }; struct unicode_character_data *unicode_character_data(int32_t); +struct unicode_character_data *unicode_character_get(struct unicode_character_data *); +void unicode_character_put(struct unicode_character_data *); #endif /* UCD_H */ diff --git a/ucslookup.c b/ucslookup.c index 3192080..3283b0b 100644 --- a/ucslookup.c +++ b/ucslookup.c @@ -6,6 +6,9 @@ #include <errno.h> #include "libucd_int.h" #include "gen/ucstoname_hash.h" +#ifdef HAVE_PTHREAD_H +# include <pthread.h> +#endif /* * This returns the name for a specific UCS in a user-provided buffer, @@ -98,11 +101,18 @@ alloc_copy_properties(const struct _libucd_property_array *prop, int32_t ucs, size_t namelen) { struct unicode_character_data *ucd; + struct libucd_private *pvt; + size_t size = sizeof(struct unicode_character_data)+ + sizeof(struct libucd_private)+namelen+1; - ucd = malloc(sizeof(struct unicode_character_data)+namelen+1); + + ucd = malloc(size); if ( !ucd ) return NULL; - ucd->name = (char *)(ucd+1); + pvt = (struct libucd_private *)(ucd+1); + ucd->name = (char *)(pvt+1); + ucd->size = sizeof(struct unicode_character_data); + ucd->alloc_size = size; ucd->fl = prop->flags_block & UINT64_C(0xffffffffffff); ucd->bidi_mirroring_glyph = NULL; /* NYS */ @@ -138,6 +148,14 @@ alloc_copy_properties(const struct _libucd_property_array *prop, ucd->word_break = prop->word_break; ucd->line_break = prop->line_break; +#if defined(HAVE_PTHREAD_H) && !defined(HAVE_ATOMIC_CTR) + if ( pthread_mutex_init(&pvt->mutex, NULL) ) { + free(ucd); + return NULL; + } +#endif + pvt->usage_ctr = 2; /* cache plus end user */ + return ucd; } @@ -145,7 +163,7 @@ alloc_copy_properties(const struct _libucd_property_array *prop, * Standard entry point for the user */ struct unicode_character_data * -unicode_character_data(int32_t ucs) +unicode_character_data_raw(int32_t ucs) { uint32_t hash; const struct _libucd_ucstoname_tab *unt; @@ -206,5 +224,3 @@ unicode_character_data(int32_t ucs) return ucd; } - - |