You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
49 lines
1.1 KiB
49 lines
1.1 KiB
9 months ago
|
#!/usr/bin/perl -w
|
||
|
|
||
|
# Convert unicode mappings to nginx configuration file format.
|
||
|
|
||
|
# You may find useful mappings in various places, including
|
||
|
# unicode.org official site:
|
||
|
#
|
||
|
# http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT
|
||
|
# http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
|
||
|
|
||
|
# Needs perl 5.6 or later.
|
||
|
|
||
|
# Written by Maxim Dounin, mdounin@mdounin.ru
|
||
|
|
||
|
###############################################################################
|
||
|
|
||
|
require 5.006;
|
||
|
|
||
|
while (<>) {
|
||
|
# Skip comments and empty lines
|
||
|
|
||
|
next if /^#/;
|
||
|
next if /^\s*$/;
|
||
|
chomp;
|
||
|
|
||
|
# Convert mappings
|
||
|
|
||
|
if (/^\s*0x(..)\s*0x(....)\s*(#.*)/) {
|
||
|
# Mapping <from-code> <unicode-code> "#" <unicode-name>
|
||
|
my $cs_code = $1;
|
||
|
my $un_code = $2;
|
||
|
my $un_name = $3;
|
||
|
|
||
|
# Produce UTF-8 sequence from character code;
|
||
|
|
||
|
my $un_utf8 = join('',
|
||
|
map { sprintf("%02X", $_) }
|
||
|
unpack("U0C*", pack("U", hex($un_code)))
|
||
|
);
|
||
|
|
||
|
print " $cs_code $un_utf8 ; $un_name\n";
|
||
|
|
||
|
} else {
|
||
|
warn "Unrecognized line: '$_'";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
###############################################################################
|