diff options
author | H. Peter Anvin <hpa@smyrno.hos.anvin.org> | 2005-11-26 12:46:14 -0800 |
---|---|---|
committer | H. Peter Anvin <hpa@smyrno.hos.anvin.org> | 2005-11-26 12:46:14 -0800 |
commit | 48e36b984fcb63f4b8c0e2d68b18311069051efc (patch) | |
tree | d6374b36b00dae0ed1f22a0fe3b6f7bda0809810 | |
parent | 49b561825b4069061868b8808da85bbc8f7692ab (diff) | |
download | libucd-48e36b984fcb63f4b8c0e2d68b18311069051efc.tar.gz |
Beginnings of a properties array; always use UCC names for flags
-rwxr-xr-x | convert_ucd.pl | 57 | ||||
-rw-r--r-- | ucd.h | 4 |
2 files changed, 59 insertions, 2 deletions
diff --git a/convert_ucd.pl b/convert_ucd.pl index 4d7c4df..e19a79a 100755 --- a/convert_ucd.pl +++ b/convert_ucd.pl @@ -377,6 +377,62 @@ sub dump_prop_list() } # +# Produce the properties array +# +%prop_array_position = (); + +sub make_properties_array() +{ + my $fh, $c, $prev, $mine, $cnt, $cp; + + # List of boolean properties that translate 1:1 into flags + my @boolean_props = ('Composition_Exclusion', 'Alphabetic', 'Default_Ignorable_Code_Point', + 'Lowercase', 'Grapheme_Base', 'Grapheme_Extend', 'ID_Start', 'ID_Continue', + 'Math', 'Uppercase', 'XID_Start', 'XID_Continue', 'Hex_Digit', + 'Bidi_Control', 'Dash', 'Deprecated', 'Diacritic', 'Extender', + 'Grapheme_Link', 'Ideographic', 'IDS_Binary_Operator', + 'IDS_Trinary_Operator', 'Join_Control', 'Logical_Order_Exception', + 'Noncharacter_Code_Point', 'Pattern_Syntax', 'Pattern_White_Space', + 'Quotation_Mark', 'Radical', 'Soft_Dotted', 'STerm', + 'Terminal_Punctuation', 'Unified_Ideograph', 'Variation_Selector', + 'White_Space', 'Bidi_Mirrored'); + + open($fh, '>', 'gen/proparray.c') or die; + binmode $fh, ':utf8'; + + undef $prev; + $cnt = 0; + + for ( $c = 0 ; $c <= 0x10ffff ; $c++ ) { + $cp = $ucs_props{$c}; + # Careful with the formatting: we rely on the fact that + # the first 14 characters contain the UCS value and the rest + # the properties. + $mine = sprintf("\t{\n\t\t0x%05x,\n", $c); + my $bp; + foreach $bp ( @boolean_props ) { + if ( $$cp{$bp} ) { + $mine .= "\t\tUC_FL_\U$bp\E |\n"; + } + } + $mine .= "\t\t0,\n"; # Easy way to terminate a bit sequence + + # Additional properties... + $mine .= "\t},\n"; + + if ( substr($prev,14) ne substr($mine,14) ) { + print $fh $mine; + $cnt++; + $prev = $mine; + } + $prop_array_position{$c} = $cnt; + } + print $fh "\t/* Total: $cnt ranges */\n"; + + close($fh); +} + +# # Import files # read_separated_file('ucd/UnicodeData.txt', @@ -414,4 +470,5 @@ make_jamo_tables(); make_names_list(); make_name_keyfile(); make_named_ucs_keyfile(); +make_properties_array(); dump_prop_list(); @@ -287,7 +287,7 @@ enum unicode_general_category { #define UC_FL_COMPOSITION_EXCLUSION UC_FLAG(0) #define UC_FL_ALPHABETIC UC_FLAG(1) -#define UC_FL_DEFAULT_IGNORABLE UC_FLAG(2) +#define UC_FL_DEFAULT_IGNORABLE_CODE_POINT UC_FLAG(2) #define UC_FL_LOWERCASE UC_FLAG(3) #define UC_FL_GRAPHEME_BASE UC_FLAG(4) #define UC_FL_GRAPHEME_EXTEND UC_FLAG(5) @@ -309,7 +309,7 @@ enum unicode_general_category { #define UC_FL_IDS_TRINARY_OPERATOR UC_FLAG(21) #define UC_FL_JOIN_CONTROL UC_FLAG(22) #define UC_FL_LOGICAL_ORDER_EXCEPTION UC_FLAG(23) -#define UC_FL_NONCHARACTER UC_FLAG(24) +#define UC_FL_NONCHARACTER_CODE_POINT UC_FLAG(24) #define UC_FL_PATTERN_SYNTAX UC_FLAG(25) #define UC_FL_PATTERN_WHITE_SPACE UC_FLAG(26) #define UC_FL_QUOTATION_MARK UC_FLAG(27) |