125 lines
3.6 KiB
Perl
125 lines
3.6 KiB
Perl
|
#!/usr/bin/env perl
|
||
|
# $XTermId: utf8.pl,v 1.12 2022/07/08 18:32:43 tom Exp $
|
||
|
# -----------------------------------------------------------------------------
|
||
|
# this file is part of xterm
|
||
|
#
|
||
|
# Copyright 2012-2018,2022 by Thomas E. Dickey
|
||
|
#
|
||
|
# All Rights Reserved
|
||
|
#
|
||
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
||
|
# copy of this software and associated documentation files (the
|
||
|
# "Software"), to deal in the Software without restriction, including
|
||
|
# without limitation the rights to use, copy, modify, merge, publish,
|
||
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
||
|
# permit persons to whom the Software is furnished to do so, subject to
|
||
|
# the following conditions:
|
||
|
#
|
||
|
# The above copyright notice and this permission notice shall be included
|
||
|
# in all copies or substantial portions of the Software.
|
||
|
#
|
||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||
|
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||
|
# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
|
||
|
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||
|
#
|
||
|
# Except as contained in this notice, the name(s) of the above copyright
|
||
|
# holders shall not be used in advertising or otherwise to promote the
|
||
|
# sale, use or other dealings in this Software without prior written
|
||
|
# authorization.
|
||
|
# -----------------------------------------------------------------------------
|
||
|
# display the given Unicode characters, given their hex or decimal values.
|
||
|
|
||
|
use warnings FATAL => "overflow";
|
||
|
no warnings "portable";
|
||
|
use strict;
|
||
|
use Encode 'encode_utf8';
|
||
|
use Text::CharWidth qw(mbswidth);
|
||
|
|
||
|
$| = 1;
|
||
|
|
||
|
sub num_bytes($) {
|
||
|
my $char = shift;
|
||
|
my $value = length( Encode::encode_utf8($char) );
|
||
|
my $result =
|
||
|
( $value <= 0
|
||
|
? "no bytes"
|
||
|
: ( $value > 1 ? sprintf( "%d bytes", $value ) : "1 bytes" ) );
|
||
|
return $result;
|
||
|
}
|
||
|
|
||
|
sub num_cells($) {
|
||
|
my $char = shift;
|
||
|
my $value = mbswidth($char);
|
||
|
my $result =
|
||
|
( $value <= 0
|
||
|
? "no cells"
|
||
|
: ( $value > 1 ? sprintf( "%d cells", $value ) : "1 cell" ) );
|
||
|
return $result;
|
||
|
}
|
||
|
|
||
|
sub pad_column($) {
|
||
|
my $char = shift;
|
||
|
my $value = mbswidth($char);
|
||
|
$value = 0 if ( $value < 0);
|
||
|
my $result = sprintf( "%.*s", 3 - $value, " ");
|
||
|
return $result;
|
||
|
}
|
||
|
|
||
|
sub vxt_utf8($) {
|
||
|
my $arg = $_[0];
|
||
|
my $hex = $arg;
|
||
|
my $dec = $arg;
|
||
|
if ( $arg =~ /^u\+[[:xdigit:]]+$/i ) {
|
||
|
$hex =~ s/^../0x/;
|
||
|
$dec = hex($hex);
|
||
|
}
|
||
|
elsif ( $arg =~ /^0x[[:xdigit:]]+$/i ) {
|
||
|
$dec = hex($hex);
|
||
|
}
|
||
|
elsif ( $arg =~ /^[[:xdigit:]]+$/i ) {
|
||
|
$dec = hex($hex);
|
||
|
}
|
||
|
else {
|
||
|
printf STDERR "? not a codepoint: $dec\n";
|
||
|
return;
|
||
|
}
|
||
|
my $chr = chr($dec);
|
||
|
my $type = (
|
||
|
$chr =~ /\p{isPrint}/
|
||
|
? (
|
||
|
$chr =~ /\p{isAlpha}/
|
||
|
? "alpha"
|
||
|
: (
|
||
|
$chr =~ /\p{isPunct}/
|
||
|
? "punct"
|
||
|
: (
|
||
|
$chr =~ /\p{isDigit}/
|
||
|
? "digit"
|
||
|
: "printing"
|
||
|
)
|
||
|
)
|
||
|
)
|
||
|
: (
|
||
|
$chr =~ /\p{isCntrl}/
|
||
|
? "cntrl"
|
||
|
: "nonprinting"
|
||
|
)
|
||
|
);
|
||
|
printf "%d ->%#x ->{%s}%s(%s %s %s)\n", $dec, $dec, $chr,
|
||
|
&pad_column($chr),
|
||
|
&num_bytes($chr),
|
||
|
&num_cells($chr),
|
||
|
$type;
|
||
|
}
|
||
|
|
||
|
binmode( STDOUT, ":utf8" );
|
||
|
while ( $#ARGV >= 0 ) {
|
||
|
vxt_utf8( shift @ARGV );
|
||
|
}
|
||
|
|
||
|
1;
|