aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@smyrno.hos.anvin.org>2005-11-26 12:46:14 -0800
committerH. Peter Anvin <hpa@smyrno.hos.anvin.org>2005-11-26 12:46:14 -0800
commit48e36b984fcb63f4b8c0e2d68b18311069051efc (patch)
treed6374b36b00dae0ed1f22a0fe3b6f7bda0809810
parent49b561825b4069061868b8808da85bbc8f7692ab (diff)
downloadlibucd-48e36b984fcb63f4b8c0e2d68b18311069051efc.tar.gz
Beginnings of a properties array; always use UCC names for flags
-rwxr-xr-xconvert_ucd.pl57
-rw-r--r--ucd.h4
2 files changed, 59 insertions, 2 deletions
diff --git a/convert_ucd.pl b/convert_ucd.pl
index 4d7c4df..e19a79a 100755
--- a/convert_ucd.pl
+++ b/convert_ucd.pl
@@ -377,6 +377,62 @@ sub dump_prop_list()
}
#
+# Produce the properties array
+#
+%prop_array_position = ();
+
+sub make_properties_array()
+{
+ my $fh, $c, $prev, $mine, $cnt, $cp;
+
+ # List of boolean properties that translate 1:1 into flags
+ my @boolean_props = ('Composition_Exclusion', 'Alphabetic', 'Default_Ignorable_Code_Point',
+ 'Lowercase', 'Grapheme_Base', 'Grapheme_Extend', 'ID_Start', 'ID_Continue',
+ 'Math', 'Uppercase', 'XID_Start', 'XID_Continue', 'Hex_Digit',
+ 'Bidi_Control', 'Dash', 'Deprecated', 'Diacritic', 'Extender',
+ 'Grapheme_Link', 'Ideographic', 'IDS_Binary_Operator',
+ 'IDS_Trinary_Operator', 'Join_Control', 'Logical_Order_Exception',
+ 'Noncharacter_Code_Point', 'Pattern_Syntax', 'Pattern_White_Space',
+ 'Quotation_Mark', 'Radical', 'Soft_Dotted', 'STerm',
+ 'Terminal_Punctuation', 'Unified_Ideograph', 'Variation_Selector',
+ 'White_Space', 'Bidi_Mirrored');
+
+ open($fh, '>', 'gen/proparray.c') or die;
+ binmode $fh, ':utf8';
+
+ undef $prev;
+ $cnt = 0;
+
+ for ( $c = 0 ; $c <= 0x10ffff ; $c++ ) {
+ $cp = $ucs_props{$c};
+ # Careful with the formatting: we rely on the fact that
+ # the first 14 characters contain the UCS value and the rest
+ # the properties.
+ $mine = sprintf("\t{\n\t\t0x%05x,\n", $c);
+ my $bp;
+ foreach $bp ( @boolean_props ) {
+ if ( $$cp{$bp} ) {
+ $mine .= "\t\tUC_FL_\U$bp\E |\n";
+ }
+ }
+ $mine .= "\t\t0,\n"; # Easy way to terminate a bit sequence
+
+ # Additional properties...
+ $mine .= "\t},\n";
+
+ if ( substr($prev,14) ne substr($mine,14) ) {
+ print $fh $mine;
+ $cnt++;
+ $prev = $mine;
+ }
+ $prop_array_position{$c} = $cnt;
+ }
+ print $fh "\t/* Total: $cnt ranges */\n";
+
+ close($fh);
+}
+
+#
# Import files
#
read_separated_file('ucd/UnicodeData.txt',
@@ -414,4 +470,5 @@ make_jamo_tables();
make_names_list();
make_name_keyfile();
make_named_ucs_keyfile();
+make_properties_array();
dump_prop_list();
diff --git a/ucd.h b/ucd.h
index 1eda004..c0c0299 100644
--- a/ucd.h
+++ b/ucd.h
@@ -287,7 +287,7 @@ enum unicode_general_category {
#define UC_FL_COMPOSITION_EXCLUSION UC_FLAG(0)
#define UC_FL_ALPHABETIC UC_FLAG(1)
-#define UC_FL_DEFAULT_IGNORABLE UC_FLAG(2)
+#define UC_FL_DEFAULT_IGNORABLE_CODE_POINT UC_FLAG(2)
#define UC_FL_LOWERCASE UC_FLAG(3)
#define UC_FL_GRAPHEME_BASE UC_FLAG(4)
#define UC_FL_GRAPHEME_EXTEND UC_FLAG(5)
@@ -309,7 +309,7 @@ enum unicode_general_category {
#define UC_FL_IDS_TRINARY_OPERATOR UC_FLAG(21)
#define UC_FL_JOIN_CONTROL UC_FLAG(22)
#define UC_FL_LOGICAL_ORDER_EXCEPTION UC_FLAG(23)
-#define UC_FL_NONCHARACTER UC_FLAG(24)
+#define UC_FL_NONCHARACTER_CODE_POINT UC_FLAG(24)
#define UC_FL_PATTERN_SYNTAX UC_FLAG(25)
#define UC_FL_PATTERN_WHITE_SPACE UC_FLAG(26)
#define UC_FL_QUOTATION_MARK UC_FLAG(27)