mirror of
https://git.freebsd.org/src.git
synced 2026-01-16 23:02:24 +00:00
145 lines
4.2 KiB
Perl
Executable file
145 lines
4.2 KiB
Perl
Executable file
#!/usr/bin/env perl
|
|
use strict;
|
|
|
|
my $USAGE = <<__EOF__;
|
|
usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt
|
|
-n = take non-matching types
|
|
-f = zero-based type field (default 2)
|
|
__EOF__
|
|
|
|
use Getopt::Std;
|
|
use vars qw( $opt_f $opt_n );
|
|
|
|
my $type_field = 2;
|
|
|
|
# Override Unicode tables for certain control chars
|
|
# that are expected to be found in normal text files.
|
|
my %force_space = (
|
|
0x08 => 1, # backspace
|
|
0x09 => 1, # tab
|
|
0x0a => 1, # newline
|
|
0x0c => 1, # form feed
|
|
0x0d => 1, # carriage return
|
|
);
|
|
|
|
# Override Unicode tables for certain modifier chars which act differently
|
|
# on different terminals. Treat them as omittable.
|
|
my @force_omit = (
|
|
[0xad, 0xad], # SOFT HYPHEN
|
|
[0x200d, 0x200d], # ZERO WIDTH JOINER
|
|
[0x1f3fb, 0x1f3ff], # EMOJI MODIFIER FITZPATRICK TYPE-[1-6]
|
|
[0x1f9b0, 0x1f9b3], # EMOJI COMPONENT [RED,CURLY,BALD,WHITE] HAIR
|
|
[0xfe00, 0xfe0f], # VARIATION SELECTOR-[1-16]
|
|
[0xe0100, 0xe01ef], # VARIATION SELECTOR-[17-256]
|
|
);
|
|
|
|
# Hangul Jamo medial vowels and final consonants should be zero width.
|
|
my @force_compose = (
|
|
[0x1160, 0x11ff],
|
|
[0xd7b0, 0xd7c6],
|
|
[0xd7cb, 0xd7fb]
|
|
);
|
|
|
|
exit (main() ? 0 : 1);
|
|
|
|
sub main {
|
|
my $args = join ' ', @ARGV;
|
|
die $USAGE if not getopts('f:n');
|
|
$type_field = $opt_f if $opt_f;
|
|
|
|
my %types;
|
|
my $arg;
|
|
while ($arg = shift @ARGV) {
|
|
last if $arg eq '--';
|
|
$types{$arg} = 1;
|
|
}
|
|
my %out = ( 'types' => \%types );
|
|
|
|
my %force_compose;
|
|
foreach my $comp (@force_compose) {
|
|
my ($lo,$hi) = @$comp;
|
|
for (my $ch = $lo; $ch <= $hi; ++$ch) {
|
|
$force_compose{$ch} = 1;
|
|
}
|
|
}
|
|
my %force_omit;
|
|
foreach my $comp (@force_omit) {
|
|
my ($lo,$hi) = @$comp;
|
|
for (my $ch = $lo; $ch <= $hi; ++$ch) {
|
|
$force_omit{$ch} = 1;
|
|
}
|
|
}
|
|
|
|
my ($sec,$min,$hour,$mday,$mon,$year) = gmtime($ENV{SOURCE_DATE_EPOCH} // time());
|
|
my @month = ( "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec" );
|
|
printf "/* Generated by \"%s %s\" on %s %2d %2d:%02d:%02d GMT %d */\n",
|
|
$0, $args, $month[$mon], $mday, $hour, $min, $sec, $year+1900;
|
|
|
|
my $last_code = 0;
|
|
my $start_range = 0;
|
|
while (<>) {
|
|
chomp;
|
|
s/#.*//;
|
|
my @fields = split /;/;
|
|
next if not @fields;
|
|
my ($lo_code, $hi_code);
|
|
my $codes = $fields[0];
|
|
if ($codes =~ /(\w+)\.\.(\w+)/) {
|
|
$lo_code = hex $1;
|
|
$hi_code = hex $2;
|
|
} else {
|
|
$lo_code = $hi_code = hex $codes;
|
|
}
|
|
if ($fields[1] =~ /, First>$/) {
|
|
die "invalid Unicode data: First with range" if $hi_code != $lo_code;
|
|
$start_range = $lo_code;
|
|
next;
|
|
}
|
|
if ($fields[1] =~ /, Last>$/) {
|
|
die "invalid Unicode data: Last without First" if not $start_range;
|
|
$lo_code = $start_range;
|
|
$start_range = 0;
|
|
} elsif ($start_range) {
|
|
die "invalid Unicode data: First without Last";
|
|
}
|
|
my $type = $fields[$type_field];
|
|
$type =~ s/\s//g;
|
|
for ($last_code = $lo_code; $last_code <= $hi_code; ++$last_code) {
|
|
output(\%out, $last_code,
|
|
$force_space{$last_code} ? 'Zs' : $force_compose{$last_code} ? 'Mn' :
|
|
$force_omit{$last_code} ? 'Xx' : $type);
|
|
}
|
|
}
|
|
output(\%out, $last_code);
|
|
return 1;
|
|
}
|
|
|
|
sub output {
|
|
my ($out, $code, $type) = @_;
|
|
my $type_ok = ($type and ${${$out}{types}}{$type});
|
|
$type_ok = not $type_ok if $opt_n;
|
|
my $prev_code = $$out{prev_code};
|
|
|
|
if (not $type_ok) {
|
|
end_run($out, $prev_code);
|
|
} elsif (not $$out{in_run} or $type ne $$out{run_type} or $code != $prev_code+1) {
|
|
end_run($out, $prev_code);
|
|
start_run($out, $code, $type);
|
|
}
|
|
$$out{prev_code} = $code;
|
|
}
|
|
|
|
sub start_run {
|
|
my ($out, $code, $type) = @_;
|
|
$$out{start_code} = $code;
|
|
$$out{prev_code} = $code;
|
|
$$out{run_type} = $type;
|
|
$$out{in_run} = 1;
|
|
}
|
|
|
|
sub end_run {
|
|
my ($out, $code) = @_;
|
|
return if not $$out{in_run};
|
|
printf "\t{ 0x%04x, 0x%04x }, /* %s */\n", $$out{start_code}, $code, $$out{run_type};
|
|
$$out{in_run} = 0;
|
|
}
|