#!/usr/local/bin/perl -- # -*- Perl -*- Force Emacs perl-mode # # Copyright 1999 Computing Research Labs, New Mexico State University # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT # OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR # THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # 24 February 1999 # Mark Leisher # # # Define the Naidunia mapping table. # @naimap = ( # 0x00 '0x0000', # 0x01 '0x0001', # 0x02 '0x0002', # 0x03 '0x0003', # 0x04 '0x0004', # 0x05 '0x0005', # 0x06 '0x0006', # 0x07 '0x0007', # 0x08 '0x0008', # 0x09 '0x0009', # 0x0A '0x000A', # 0x0B '0x000B', # 0x0C '0x000C', # 0x0D '0x000D', # 0x0E '0x000E', # 0x0F '0x000F', # 0x10 '0x0010', # 0x11 '0x0011', # 0x12 '0x0012', # 0x13 '0x0013', # 0x14 '0x0014', # 0x15 '0x0015', # 0x16 '0x0016', # 0x17 '0x0017', # 0x18 '0x0018', # 0x19 '0x0019', # 0x1A '0x001A', # 0x1B '0x001B', # 0x1C '0x001C', # 0x1D '0x001D', # 0x1E '0x001E', # 0x1F '0x001F', # 0x20 '0x0020', # 0x21 '0x0021', # 0x22 '0x0927', # 0x23 '0x0951', # 0x24 '0x2013', # 0x25 '0x0025', # 0x26 '0x0903', # 0x27 '0x00A9', # 0x28 '0x0028', # 0x29 '0x0029', # 0x2A '0x002A', # 0x2B '0x002B', # 0x2C '0x002C', # 0x2D '0x002D', # 0x2E '0x002E', # 0x2F '0x002F', # 0x30 '0x0966', # 0x31 '0x0967', # 0x32 '0x0968', # 0x33 '0x0969', # 0x34 '0x096A', # 0x35 '0x096B', # 0x36 '0x096C', # 0x37 '0x096D', # 0x38 '0x096E', # 0x39 '0x096F', # 0x3A '0x0925', # 0x3B '0x0924', # 0x3C '0x003B', # 0x3D '0x0926', # 0x3E '0x0964', # 0x3F '0x003F', # 0x40 '0x091E', # 0x41 '0x091B', # 0x42 '0x0933', # 0x43 '0x092D', # 0x44 '0x0918', # 0x45 '0x0919', # 0x46 '0x0916', # 0x47 '0x0915 0x094D 0x0937', # 0x48 '0x2010', # 0x49 '0x2035', # 0x4A '0x0935', # 0x4B '0x0923', # 0x4C '0x0930 0x093F', # 0x4D '0x0937', # 0x4E '0x0936', # 0x4F '0x2032', # 0x50 '0x00D7', # 0x51 '0x090A', # 0x52 '0x0907', # 0x53 '0x091D', # 0x54 '0x094D', # 0x55 '0xFFFD', # 0x56 '0x092B', # 0x57 '0x0909', # 0x58 '0x0920', # 0x59 '0x090F', # 0x5A '0x0922', # 0x5B '0x0930', # 0x5C '0x093C', # 0x5D '0x0943', # 0x5E '0x00F7', # 0x5F '0x2014', # 0x60 '0x0950', # 0x61 '0x091A', # 0x62 '0x092E', # 0x63 '0x092C', # 0x64 '0x0917', # 0x65 '0x0940', # 0x66 '0x0915', # 0x67 '0x092F', # 0x68 '0x0930', # 0x69 '0x0948', # 0x6A '0x0932', # 0x6B '0x0902', # 0x6C '0x0928', # 0x6D '0x0938', # 0x6E '0x0939', # 0x6F '0x0930', # 0x70 '0x0945', # 0x71 '0x0942', # 0x72 '0x093F', # 0x73 '0x091C', # 0x74 '0x093E', # 0x75 '0x0947', # 0x76 '0x092A', # 0x77 '0x0941', # 0x78 '0x091F', # 0x79 '0x0905', # 0x7A '0x0921', # 0x7B '0x0930', # 0x7C '0x003D', # 0x7D '0x090B', # 0x7E '0x093D', # 0x7F '0xFFFD', # 0x80 '0x0031', # 0x81 '0x0032', # 0x82 '0x0902', # 0x83 '0x0932 0x094D', # 0x84 '0x0935 0x094D', # 0x85 '0x0919 0x094D 0x0932', # 0x86 '0x0939 0x0943', # 0x87 '0x0936 0x094D 0x0930', # 0x88 '0x0936 0x094D', # 0x89 '0x0952', # 0x8A '0x0926 0x094D 0x0930', # 0x8B '0x0938 0x094D 0x0930', # 0x8C '0x092A 0x094D 0x0930', # 0x8D '0x0033', # 0x8E '0x0034', # 0x8F '0x0035', # 0x90 '0xFFFD', # 0x91 '0x0926 0x094D 0x092F', # 0x92 '0x0926 0x094D 0x0917', # 0x93 '0x0926 0x094D 0x0930', # 0x94 '0x091A 0x094D 0x091A 0x094D', # 0x95 '0x0939 0x094D 0x0935', # 0x96 '0x0915 0x094D 0x0935', # 0x97 '0x0939 0x094D 0x0928', # 0x98 '0x0932', # 0x99 '0x091C 0x094D 0x091C', # 0x9A '0x2612', # 0x9B '0x0931 0x094D 0x093F', # 0x9C '0x0942', # 0x9D '0x0036', # 0x9E '0x0037', # 0x9F '0x0939 0x094D 0x0928', # 0xA0 '0x00A0', # 0xA1 '0x0941', # 0xA2 '0x0915 0x094D', # 0xA3 '0x0939 0x094D 0x0935', # 0xA4 '0x091E 0x094D', # 0xA5 '0x0924 0x094D 0x0930 0x094D', # 0xA6 '0xFFFD', # 0xA7 '0x0916 0x094D 0x0930', # 0xA8 '0x0932 0x094D 0x0932', # 0xA9 '0x092F 0x094D', # 0xAA '0x0928 0x094D 0x0928', # 0xAB '0x0926 0x094D 0x092E', # 0xAC '0x0915 0x094D 0x0915', # 0xAD '0x2013', # 0xAE '0x093F', # 0xAF '0x0938 0x094D 0x0928 0x094D 0x0928', # 0xB0 '0x0926 0x094D 0x0927', # 0xB1 '0x0927 0x094D', # 0xB2 '0x0937 0x094D 0x091F', # 0xB3 '0x0937 0x094D 0x0920', # 0xB4 '0x0936 0x094D 0x0930 0x094D 0x091A 0x094D', # 0xB5 '0x0915 0x094D', # 0xB6 '0x0936 0x094D 0x0930', # 0xB7 '0x0901', # 0xB8 '0x0917 0x094D', # 0xB9 '0x0938 0x094D', # 0xBA '0x0924 0x094D 0x0930 0x094D', # 0xBB '0x093C', # 0xBC '0x091E 0x094D 0x091C 0x094D', # 0xBD '0x091E 0x094D 0x091A 0x094D', # 0xBE '0x0933 0x094D', # 0xBF '0x0928 0x094D', # 0xC0 '0x0924 0x094D', # 0xC1 '0x0938 0x094D 0x0928 0x094D', # 0xC2 '0x093F', # 0xC3 '0x0926 0x094D 0x092C', # 0xC4 '0x0939 0x094D 0x0930', # 0xC5 '0x0926 0x094D 0x0935', # 0xC6 '0x0927 0x094D', # 0xC7 '0x092D 0x094D', # 0xC8 '0x0921 0x094D 0x0921', # 0xC9 '0x0917 0x094D', # 0xCA '0x0939 0x094D 0x092F', # 0xCB '0x092A 0x094D', # 0xCC '0x0915 0x094D 0x0937 0x094D', # 0xCD '0x091D 0x094D', # 0xCE '0x0918 0x094D', # 0xCF '0x0916 0x094D', # 0xD0 '0x091B 0x094D 0x0935 0x94D', # 0xD1 '0x0939 0x094D 0x0928', # 0xD2 '0x091C 0x094D 0x091E 0x094D', # 0xD3 '0x091A 0x094D 0x092F', # 0xD4 '0x0935 0x094D', # 0xD5 '0x091F 0x094D 0x091F', # 0xD6 '0x0932 0x094D', # 0xD7 '0x091B 0x094D 0x0930 0x094D', # 0xD8 '0x092D 0x094D 0x0926', # 0xD9 '0x0920 0x094D 0x0920', # 0xDA '0x0925 0x094D', # 0xDB '0x0921 0x094D 0x0922', # 0xDC '0x092B 0x094D', # 0xDD '0x0936 0x094D 0x0930 0x094D 0x0919 0x094D', # 0xDE '0x0930', # 0xDF '0x091C 0x094D', # 0xE0 '0x0928 0x094D', # 0xE1 '0x091D 0x094D 0x0930', # 0xE2 '0x0901', # 0xE3 '0x0923 0x094D', # 0xE4 '0x0933 0x094D', # 0xE5 '0x091A 0x094D', # 0xE6 '0x0926 0x094D 0x0926', # 0xE7 '0x092C 0x094D', # 0xE8 '0x091F 0x094D 0x0920', # 0xE9 '0x0940 0x0902', # 0xEA '0x0922 0x094D 0x0922', # 0xEB '0x0936 0x094D', # 0xEC '0x0926 0x094D 0x0917', # 0xED '0x0937 0x094D', # 0xEE '0x0948 0x0902', # 0xEF '0x0938 0x094D 0x0928 0x94D', # 0xF0 '0x0926 0x094D 0x0917', # 0xF1 '0x0939 0x094D', # 0xF2 '0x0939 0x094D 0x0932', # 0xF3 '0x0931 0x094D', # 0xF4 '0x0938 0x094D', # 0xF5 '0x0926 0x094D 0x0927', # 0xF6 '0x092E 0x094D', # 0xF7 '0x092E 0x094D', # 0xF8 '0x0931', # 0xF9 '0x0919 0x094D 0x0916', # 0xFA '0x0924 0x094D 0x0924 0x094D', # 0xFB '0x0919 0x094D 0x0918', # 0xFC '0x0947 0x0902', # 0xFD '0x0939 0x0941', # 0xFE '0x0939 0x0942', # 0xFF '0x0915 0x094D 0x0924', ); # # Load the mapping table. # #open(IN, "naidunia.tbl"); #while () { # ($n, $u) = split(/\t/, $_); # $naimap[hex($n)] = $u; #} #close(IN); # # Default the output to standard out. # $out = STDOUT; # # Scan the command line for an output filename. # while ($_ = shift) { if (/^-o/) { $oname = shift; close($out) if ($out != STDOUT); open(OFILE, ">$oname"); # # Print the UCS2 byte order mark (BOM). # printf OFILE "%c%c", 0xfe, 0xff; ## printf OFILE "%c%c", 0xff, 0xfe; $out = OFILE; } else { # # Convert the input filename and write it to an output file. # $nai = 0; open(IN, $_); while () { while ($_) { if (!$nai) { if (/\]*\>/i) { $n = "$`$&"; $_ = $'; @chars = split(//, $n); $nai = 1; } else { @chars = split(//, $_); $_ = ''; } while (@chars) { printf $out "%c%c", 0x00, ord(shift(@chars)); } } if ($nai) { if (/\<\/p\>/i || /\<\/text\>/i) { $n = $`; $a = $&; $_ = $'; $nai = 0; } else { $n = $_; $a = ''; $_ = ''; } # # Make sure the NUKTA follows the consonant it applies to. # $n =~ s/(\x5c)(.)/$2$1/g; # # Do a little special processing to try and get the vowel # I positioned correctly before converting the text. # $n =~ s/([\x72\xae\xc2])(.)([^\x16\x19\x74])/$2$1$3/g; # # Do some fancy footwork with the REPHA form of the RA to # make it appear close to where it is supposed to be. # This means swapping it with the previous shape and # adding a virama after it. # $n =~ s/(.)(\x6f)/$2T$1/g; # # Remove what appear to be extraneous 'U' letters from the # Naidunia text. # $n =~ s/U//g; # # Convert the Naidunia text first. # @chars = split(//, $n); while (@chars) { @codes = split(/ /, $naimap[ord(shift(@chars))]); while (@codes) { $u = hex(shift(@codes)); if ($u != 0) { printf $out "%c%c", (($u >> 8) & 0xff), ($u & 0xff); } } } # # If there is any remaining ASCII make sure it is written # out in 2-byte form. # @chars = split(//, $a); while (@chars) { printf $out "%c%c", 0x00, ord(shift(@chars)); } } } } close(IN); } } close $out if ($out != STDOUT); exit 0;