/* -*- c -*- ------------------------------------------------------------- * * * Copyright 2005 H. Peter Anvin - All Rights Reserved * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall * be included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * ----------------------------------------------------------------------- */ /* * ucd.h * * Simple interface to the Unicode Character Database */ #ifndef UCD_H #define UCD_H #include #define LIBUCD_THREAD_SUPPORT #ifdef LIBUCD_THREAD_SUPPORT # include #endif ENUMS; #define UC_FLAG(x) (UINT64_C(1) << (x)) #define UC_FL_COMPOSITION_EXCLUSION UC_FLAG(0) #define UC_FL_ALPHABETIC UC_FLAG(1) #define UC_FL_DEFAULT_IGNORABLE_CODE_POINT UC_FLAG(2) #define UC_FL_LOWERCASE UC_FLAG(3) #define UC_FL_GRAPHEME_BASE UC_FLAG(4) #define UC_FL_GRAPHEME_EXTEND UC_FLAG(5) #define UC_FL_ID_START UC_FLAG(6) #define UC_FL_ID_CONTINUE UC_FLAG(7) #define UC_FL_MATH UC_FLAG(8) #define UC_FL_UPPERCASE UC_FLAG(9) #define UC_FL_XID_START UC_FLAG(10) #define UC_FL_XID_CONTINUE UC_FLAG(11) #define UC_FL_HEX_DIGIT UC_FLAG(12) #define UC_FL_BIDI_CONTROL UC_FLAG(13) #define UC_FL_DASH UC_FLAG(14) #define UC_FL_DEPRECATED UC_FLAG(15) #define UC_FL_DIACRITIC UC_FLAG(16) #define UC_FL_EXTENDER UC_FLAG(17) #define UC_FL_GRAPHEME_LINK UC_FLAG(18) #define UC_FL_IDEOGRAPHIC UC_FLAG(19) #define UC_FL_IDS_BINARY_OPERATOR UC_FLAG(20) #define UC_FL_IDS_TRINARY_OPERATOR UC_FLAG(21) #define UC_FL_JOIN_CONTROL UC_FLAG(22) #define UC_FL_LOGICAL_ORDER_EXCEPTION UC_FLAG(23) #define UC_FL_NONCHARACTER_CODE_POINT UC_FLAG(24) #define UC_FL_PATTERN_SYNTAX UC_FLAG(25) #define UC_FL_PATTERN_WHITE_SPACE UC_FLAG(26) #define UC_FL_QUOTATION_MARK UC_FLAG(27) #define UC_FL_RADICAL UC_FLAG(28) #define UC_FL_SOFT_DOTTED UC_FLAG(29) #define UC_FL_STERM UC_FLAG(30) #define UC_FL_TERMINAL_PUNCTUATION UC_FLAG(31) #define UC_FL_UNIFIED_IDEOGRAPH UC_FLAG(32) #define UC_FL_VARIATION_SELECTOR UC_FLAG(33) #define UC_FL_WHITE_SPACE UC_FLAG(34) #define UC_FL_BIDI_MIRRORED UC_FLAG(35) struct unicode_character_data { int32_t ucs; /* Actual codepoint */ uint16_t size; /* Size of this structure */ uint16_t alloc_size; /* Allocation size */ uint64_t fl; /* Flags */ const char *name; const char *bidi_mirroring_glyph; const char *uppercase_mapping; const char *lowercase_mapping; const char *titlecase_mapping; int32_t simple_uppercase; int32_t simple_lowercase; int32_t simple_titlecase; /* Numeric value = num/den * 10^exp */ uint8_t numeric_value_num; uint8_t numeric_value_den; uint8_t numeric_value_exp; uint8_t age_ma, age_mi; enum unicode_general_category general_category; enum unicode_block block; enum unicode_script script; enum unicode_joining_type joining_type; enum unicode_joining_group joining_group; enum unicode_east_asian_width east_asian_width; enum unicode_hangul_syllable_type hangul_syllable_type; enum unicode_numeric_type numeric_type; enum unicode_canonical_combining_class canonical_combining_class; enum unicode_bidi_class bidi_class; enum unicode_grapheme_cluster_break grapheme_cluster_break; enum unicode_sentence_break sentence_break; enum unicode_word_break word_break; enum unicode_line_break line_break; }; const struct unicode_character_data *unicode_character_data(int32_t); const struct unicode_character_data *unicode_character_get(const struct unicode_character_data *); void unicode_character_put(const struct unicode_character_data *); const struct unicode_character_data *unicode_character_lookup(const char *); int unicode_database_version(void); #endif /* UCD_H */