aboutsummaryrefslogtreecommitdiffstats
path: root/ucd.h.in
blob: 3f557b013c15a58237b00675a4a047eecd2f1f14 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/* -*- c -*- ------------------------------------------------------------- *
 *   
 *   Copyright 2005 H. Peter Anvin - All Rights Reserved
 *
 *   Permission is hereby granted, free of charge, to any person
 *   obtaining a copy of this software and associated documentation
 *   files (the "Software"), to deal in the Software without
 *   restriction, including without limitation the rights to use,
 *   copy, modify, merge, publish, distribute, sublicense, and/or
 *   sell copies of the Software, and to permit persons to whom
 *   the Software is furnished to do so, subject to the following
 *   conditions:
 *   
 *   The above copyright notice and this permission notice shall
 *   be included in all copies or substantial portions of the Software.
 *   
 *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 *   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 *   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 *   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 *   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 *   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 *   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 *   OTHER DEALINGS IN THE SOFTWARE.
 *
 * ----------------------------------------------------------------------- */

/*
 * ucd.h
 *
 * Simple interface to the Unicode Character Database
 */

#ifndef UCD_H
#define UCD_H

#include <stdint.h>

#define LIBUCD_THREAD_SUPPORT

#ifdef LIBUCD_THREAD_SUPPORT
# include <pthread.h>
#endif

ENUMS;

#define UC_FLAG(x) (UINT64_C(1) << (x))

#define UC_FL_COMPOSITION_EXCLUSION     UC_FLAG(0)
#define UC_FL_ALPHABETIC                UC_FLAG(1)
#define UC_FL_DEFAULT_IGNORABLE_CODE_POINT  UC_FLAG(2)
#define UC_FL_LOWERCASE                 UC_FLAG(3)
#define UC_FL_GRAPHEME_BASE             UC_FLAG(4)
#define UC_FL_GRAPHEME_EXTEND           UC_FLAG(5)
#define UC_FL_ID_START                  UC_FLAG(6)
#define UC_FL_ID_CONTINUE               UC_FLAG(7)
#define UC_FL_MATH                      UC_FLAG(8)
#define UC_FL_UPPERCASE                 UC_FLAG(9)
#define UC_FL_XID_START                 UC_FLAG(10)
#define UC_FL_XID_CONTINUE              UC_FLAG(11)
#define UC_FL_HEX_DIGIT                 UC_FLAG(12)
#define UC_FL_BIDI_CONTROL              UC_FLAG(13)
#define UC_FL_DASH                      UC_FLAG(14)
#define UC_FL_DEPRECATED                UC_FLAG(15)
#define UC_FL_DIACRITIC                 UC_FLAG(16)
#define UC_FL_EXTENDER                  UC_FLAG(17)
#define UC_FL_GRAPHEME_LINK             UC_FLAG(18)
#define UC_FL_IDEOGRAPHIC               UC_FLAG(19)
#define UC_FL_IDS_BINARY_OPERATOR       UC_FLAG(20)
#define UC_FL_IDS_TRINARY_OPERATOR      UC_FLAG(21)
#define UC_FL_JOIN_CONTROL              UC_FLAG(22)
#define UC_FL_LOGICAL_ORDER_EXCEPTION   UC_FLAG(23)
#define UC_FL_NONCHARACTER_CODE_POINT   UC_FLAG(24)
#define UC_FL_PATTERN_SYNTAX            UC_FLAG(25)
#define UC_FL_PATTERN_WHITE_SPACE       UC_FLAG(26)
#define UC_FL_QUOTATION_MARK            UC_FLAG(27)
#define UC_FL_RADICAL                   UC_FLAG(28)
#define UC_FL_SOFT_DOTTED               UC_FLAG(29)
#define UC_FL_STERM                     UC_FLAG(30)
#define UC_FL_TERMINAL_PUNCTUATION	UC_FLAG(31)
#define UC_FL_UNIFIED_IDEOGRAPH		UC_FLAG(32)
#define UC_FL_VARIATION_SELECTOR	UC_FLAG(33)
#define UC_FL_WHITE_SPACE		UC_FLAG(34)
#define UC_FL_BIDI_MIRRORED		UC_FLAG(35)

struct unicode_character_data {
  int32_t ucs;			/* Actual codepoint */
  uint16_t size;		/* Size of this structure */
  uint16_t alloc_size;		/* Allocation size */
  uint64_t fl;			/* Flags */
  const char *name;
  const char *bidi_mirroring_glyph;
  const char *uppercase_mapping;
  const char *lowercase_mapping;
  const char *titlecase_mapping;
  int32_t simple_uppercase;
  int32_t simple_lowercase;
  int32_t simple_titlecase;
  /* Numeric value = num/den * 10^exp */
  uint8_t numeric_value_num;
  uint8_t numeric_value_den;
  uint8_t numeric_value_exp;
  uint8_t age_ma, age_mi;
  enum unicode_general_category         general_category;
  enum unicode_block			block;
  enum unicode_script                   script;
  enum unicode_joining_type      	joining_type;
  enum unicode_joining_group     	joining_group;
  enum unicode_east_asian_width         east_asian_width;
  enum unicode_hangul_syllable_type     hangul_syllable_type;
  enum unicode_numeric_type             numeric_type;
  enum unicode_canonical_combining_class canonical_combining_class;
  enum unicode_bidi_class	    	bidi_class;
  enum unicode_grapheme_cluster_break	grapheme_cluster_break;
  enum unicode_sentence_break		sentence_break;
  enum unicode_word_break		word_break;
  enum unicode_line_break		line_break;
};

const struct unicode_character_data *unicode_character_data(int32_t);
const struct unicode_character_data *unicode_character_get(const struct unicode_character_data *);
void unicode_character_put(const struct unicode_character_data *);
const struct unicode_character_data *unicode_character_lookup(const char *);
int unicode_database_version(void);

#endif /* UCD_H */