aboutsummaryrefslogtreecommitdiffstats
path: root/admin/charsets
diff options
context:
space:
mode:
Diffstat (limited to 'admin/charsets')
-rw-r--r--admin/charsets/.arch-inventory4
-rw-r--r--admin/charsets/.gitignore2
-rw-r--r--admin/charsets/Makefile349
-rw-r--r--admin/charsets/big5.awk54
-rw-r--r--admin/charsets/compact.awk125
-rw-r--r--admin/charsets/cp51932.awk59
-rw-r--r--admin/charsets/cp932.awk118
-rw-r--r--admin/charsets/eucjp-ms.awk85
-rw-r--r--admin/charsets/gb180302.awk82
-rw-r--r--admin/charsets/gb180304.awk104
-rw-r--r--admin/charsets/kuten.awk7
-rwxr-xr-xadmin/charsets/mapconv143
-rw-r--r--admin/charsets/mule-charsets.el60
13 files changed, 1192 insertions, 0 deletions
diff --git a/admin/charsets/.arch-inventory b/admin/charsets/.arch-inventory
new file mode 100644
index 0000000000..0924093e90
--- /dev/null
+++ b/admin/charsets/.arch-inventory
@@ -0,0 +1,4 @@
+# Unlike most emacs dirs, admin/charsets has a simple non-autoconf-generated makefile
+source ^(Makefile)$
+
+# arch-tag: ee36cfe3-96f8-4e91-aec4-008c80a85e6b
diff --git a/admin/charsets/.gitignore b/admin/charsets/.gitignore
new file mode 100644
index 0000000000..ea375dc591
--- /dev/null
+++ b/admin/charsets/.gitignore
@@ -0,0 +1,2 @@
+*.map
+*.el
diff --git a/admin/charsets/Makefile b/admin/charsets/Makefile
new file mode 100644
index 0000000000..ceecbce821
--- /dev/null
+++ b/admin/charsets/Makefile
@@ -0,0 +1,349 @@
+# Makefile -- Makefile to generate charset maps in etc/charsets.
+# Copyright (C) 2003
+# National Institute of Advanced Industrial Science and Technology (AIST)
+# Registration Number H13PRO009
+#
+# This file is part of GNU Emacs.
+
+# GNU Emacs is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# GNU Emacs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GNU Emacs; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# Commentary
+
+# At first, set these environment variables:
+# GLIBC_CHARMAPS
+# Directory of glibc-VERSION/localedate/charmaps.
+# VERSION must be 2.3 or the later.
+# MISC_CHARMAPS
+# Direcory containing these charmap files:
+# o bulgarian-mik.txt.gz
+# provided at <http://czyborra.com/charsets/>
+# o PTCP154
+# provided at <http://www.iana.org/assignments/charset-reg/>
+# o stdenc.txt and symbol.txt
+# provided at <http://www.unicode.org/Public/MAPPINGS/>
+# o cp932.txt
+# provided at <http://www.unicode.org/Public/MAPPINGS/VENDERS>
+# o Uni2JIS
+# provided at <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/CJK.html>
+# o 720.htm and 858.htm
+# provided at <http://www.microsoft.com/globaldev/reference/oem/>
+# o eucJP-13th.txt, eucJP-udc.txt, eucJP-ibmext.txt
+# provided at <http://www.opengroup.or.jp/jvc/cde/>
+# o cns2ucsdkw.txt
+# available by:
+# % cvs -d :pserver:[email protected]:\
+# /cvsroot/kanji-database login
+# % cvs -d :pserver:[email protected]:\
+# /cvsroot/kanji-database co kanji-database
+# OLDEMACS
+# emacs of version 21.3.50 or later
+#
+# Then, do this:
+# % make XXX.map (or make all)
+# % make install
+
+CHARSETS = ${ISO8859} ${IBM} ${CODEPAGE} ${CJK} ${MISC} ${MULE}
+
+# Note: We can not prepend "ISO-" to these map files because of file
+# name limits on DOS.
+ISO8859 = \
+ 8859-2.map 8859-3.map 8859-4.map 8859-5.map 8859-6.map 8859-7.map \
+ 8859-8.map 8859-9.map 8859-10.map 8859-11.map 8859-13.map 8859-14.map \
+ 8859-15.map 8859-16.map
+
+IBM = \
+ IBM037.map IBM038.map \
+ IBM256.map IBM273.map IBM274.map IBM275.map IBM277.map IBM278.map \
+ IBM280.map IBM281.map IBM284.map IBM285.map IBM290.map IBM297.map \
+ IBM420.map IBM423.map IBM424.map IBM437.map IBM500.map IBM850.map \
+ IBM851.map IBM852.map IBM855.map IBM856.map IBM857.map IBM860.map \
+ IBM861.map IBM862.map IBM863.map IBM864.map IBM865.map IBM866.map \
+ IBM868.map IBM869.map IBM870.map IBM871.map IBM874.map IBM875.map \
+ IBM880.map IBM891.map IBM903.map IBM904.map IBM905.map IBM918.map \
+ IBM1004.map IBM1026.map IBM1047.map
+
+CODEPAGE = \
+ CP737.map CP775.map CP1125.map\
+ CP1250.map CP1251.map CP1252.map CP1253.map CP1254.map \
+ CP1255.map CP1256.map CP1257.map CP1258.map \
+ CP10007.map \
+ CP720.map CP858.map
+
+CJK = GB2312.map GBK.map GB180302.map GB180304.map \
+ BIG5.map BIG5-HKSCS.map\
+ CNS-1.map CNS-2.map CNS-3.map CNS-4.map CNS-5.map CNS-6.map CNS-7.map \
+ CNS-F.map \
+ JISX0201.map JISX0208.map JISX0212.map JISX2131.map JISX2132.map \
+ JISC6226.map CP932-2BYTE.map JISX213A.map\
+ KSC5601.map KSC5636.map JOHAB.map
+
+MISC = KOI-8.map KOI8-R.map KOI8-U.map KOI8-T.map ALTERNATIVNYJ.map \
+ MIK.map PTCP154.map \
+ TIS-620.map VISCII.map VSCII.map VSCII-2.map\
+ KA-PS.map KA-ACADEMY.map \
+ HP-ROMAN8.map NEXTSTEP.map MACINTOSH.map EBCDICUK.map EBCDICUS.map \
+ stdenc.map symbol.map \
+ CP949-2BYTE.map \
+ BIG5-1.map BIG5-2.map
+
+# Emacs-mule charsets.
+MULE = MULE-ethiopic.map MULE-ipa.map MULE-is13194.map \
+ MULE-sisheng.map MULE-tibetan.map \
+ MULE-lviscii.map MULE-uviscii.map
+
+TRANS_TABLE = cp51932.el eucjp-ms.el
+
+all: ${CHARSETS} ${TRANS_TABLE}
+
+AWK = gawk
+
+# Rules for each charset
+
+VSCII.map: ${GLIBC_CHARMAPS}/TCVN5712-1 mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[0-9a-f].[ ]/' GLIBC-1 compact.awk > $@
+
+VSCII-2.map: ${GLIBC_CHARMAPS}/TCVN5712-1 mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[2-7a-f].[ ]/' GLIBC-1 compact.awk \
+ | sed 's/0x20-0x7F.*/0x00-0x7F 0x0000/' > $@
+
+ALTERNATIVNYJ.map: IBM866.map
+ # Generating $@...
+ @echo "# Modified from ibm866 according to the chart at" > $@
+ @echo "# http://www.cyrillic.com/ref/cyrillic/koi-8alt.html," >> $@
+ @echo "# with guesses for the Unicodes of the glyphs." >> $@
+ @sed -e '/0xF2/ s/ .*/ 0x2019/' \
+ -e '/0xF3/ s/ .*/ 0x2018/' \
+ -e '/0xF4/ s/ .*/ 0x0301/' \
+ -e '/0xF5/ s/ .*/ 0x0300/' \
+ -e '/0xF6/ s/ .*/ 0x203A/' \
+ -e '/0xF7/ s/ .*/ 0x2039/' \
+ -e '/0xF8/ s/ .*/ 0x2191/' \
+ -e '/0xF9/ s/ .*/ 0x2193/' \
+ -e '/0xFA/ s/ .*/ 0x00B1/' \
+ -e '/0xFB/ s/ .*/ 0x00F7/' < $< >> $@
+
+MIK.map: ${MISC_CHARMAPS}/bulgarian-mik.txt.gz mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '1,$$' CZYBORRA compact.awk > $@
+
+PTCP154.map: ${MISC_CHARMAPS}/PTCP154 mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^0x/' IANA compact.awk > $@
+
+stdenc.map: ${MISC_CHARMAPS}/stdenc.txt mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^[0-9A-Fa-f]/' UNICODE compact.awk > $@
+
+symbol.map: ${MISC_CHARMAPS}/symbol.txt mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^[0-9A-Fa-f]/' UNICODE compact.awk > $@
+
+CP720.map: ${MISC_CHARMAPS}/720.htm mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^[0-9A-F]/' MICROSOFT compact.awk > $@
+
+CP858.map: ${MISC_CHARMAPS}/858.htm mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^[0-9A-F]/' MICROSOFT compact.awk > $@
+
+CP949-2BYTE.map: ${GLIBC_CHARMAPS}/CP949 mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2 compact.awk > $@
+
+GB2312.map: ${GLIBC_CHARMAPS}/GB2312 mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@
+
+GBK.map: ${GLIBC_CHARMAPS}/GBK mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2 compact.awk > $@
+
+GB180302.map: ${GLIBC_CHARMAPS}/GB18030 mapconv gb180302.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x..\/x..[ ]/' GLIBC-2 gb180302.awk > $@
+
+GB180304.map: GB180302.map gb180304.awk
+ # Generating $@...
+ @$(AWK) -f gb180304.awk < $< > $@
+
+JISX0201.map: ${GLIBC_CHARMAPS}/JIS_X0201 mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[0-9]/' GLIBC-1 compact.awk > $@
+ @echo "# Generated by hand" >> $@
+ @echo "0xA1-0xDF 0xFF61" >> $@
+
+JISX0208.map: ${GLIBC_CHARMAPS}/EUC-JP mapconv
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 \
+ | sed 's/0x2015/0x2014/' > $@
+
+JISX0212.map: ${GLIBC_CHARMAPS}/EUC-JP mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x8f/ s,/x8f,,' GLIBC-2-7 compact.awk > $@
+
+JISX2131.map: ${GLIBC_CHARMAPS}/EUC-JISX0213 mapconv
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 \
+ | sed -e 's/0x2015/0x2014/' -e 's/0x2299/0x29BF/' > $@
+
+JISX2132.map: ${GLIBC_CHARMAPS}/EUC-JISX0213 mapconv
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x8f/ s,/x8f,,' GLIBC-2-7 > $@
+
+JISX213A.map:
+ # Generating $@
+ @(echo "0x2E21 0x4FF1"; \
+ echo "0x2F7E 0x525D"; \
+ echo "0x4F54 0x20B9F"; \
+ echo "0x4F7E 0x541E"; \
+ echo "0x7427 0x5653"; \
+ echo "0x7E7A 0x59F8"; \
+ echo "0x7E7B 0x5C5B"; \
+ echo "0x7E7C 0x5E77"; \
+ echo "0x7E7D 0x7626"; \
+ echo "0x7E7E 0x7E6B") > $@
+
+CP932-2BYTE.map: ${MISC_CHARMAPS}/cp932.txt mapconv cp932.awk
+ # Generating $@...
+ @mapconv $< '/^0x[89A-F][0-9A-F][0-9A-F]/' UNICODE2 cp932.awk > $@
+
+cp51932.el: CP932-2BYTE.map cp51932.awk
+ @$(AWK) -f cp51932.awk < CP932-2BYTE.map > $@
+
+eucjp-ms.el: ${MISC_CHARMAPS}/eucJP-13th.txt ${MISC_CHARMAPS}/eucJP-udc.txt \
+ ${MISC_CHARMAPS}/eucJP-ibmext.txt eucjp-ms.awk
+ @(cd ${MISC_CHARMAPS}; \
+ cat eucJP-13th.txt eucJP-udc.txt eucJP-ibmext.txt) \
+ | $(AWK) -f eucjp-ms.awk > $@
+
+JISC6226.map : ${MISC_CHARMAPS}/Uni2JIS mapconv kuten.awk
+ # Generating $@...
+ @mapconv $< '/^[^#].*0-/' YASUOKA kuten.awk > $@
+
+KSC5601.map: ${GLIBC_CHARMAPS}/EUC-KR mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@
+
+BIG5.map: ${GLIBC_CHARMAPS}/BIG5 mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2 > $@
+
+BIG5-1.map: BIG5.map mapconv big5.awk
+ # Generating $@...
+ @echo "Generated from $<" > $@
+ @sed -n -e '/0xa140/,/0xc8fe/p' < $< | gawk -f big5.awk >> $@
+
+BIG5-2.map: BIG5.map mapconv big5.awk
+ # Generating $@...
+ @echo "Generated from $<" > $@
+ @sed -n -e '/0xc940/,$$ p' < $< | gawk -f big5.awk >> $@
+
+BIG5-HKSCS.map: ${GLIBC_CHARMAPS}/BIG5-HKSCS mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[89a-f].\//' GLIBC-2 compact.awk > $@
+
+JOHAB.map: ${GLIBC_CHARMAPS}/JOHAB mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2 compact.awk > $@
+
+CNS-1.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@
+
+# CNS-1.map: ${MISC_CHARMAPS}/cns2ucsdkw.txt mapconv compact.awk
+# # Generating $@...
+# @mapconv $< '/^C1/' KANJI-DATABASE compact.awk > $@
+
+CNS-2.map: ${MISC_CHARMAPS}/cns2ucsdkw.txt mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^C2/' KANJI-DATABASE compact.awk > $@
+
+CNS-3.map: ${MISC_CHARMAPS}/cns2ucsdkw.txt mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^C3/' KANJI-DATABASE compact.awk > $@
+
+CNS-4.map: ${MISC_CHARMAPS}/cns2ucsdkw.txt mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^C4/' KANJI-DATABASE compact.awk > $@
+
+CNS-5.map: ${MISC_CHARMAPS}/cns2ucsdkw.txt mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^C5/' KANJI-DATABASE compact.awk > $@
+
+CNS-6.map: ${MISC_CHARMAPS}/cns2ucsdkw.txt mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^C6/' KANJI-DATABASE compact.awk > $@
+
+CNS-7.map: ${MISC_CHARMAPS}/cns2ucsdkw.txt mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^C7/' KANJI-DATABASE compact.awk > $@
+
+CNS-F.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*\/x8e\/xaf/ s,/x8e/xaf,,' GLIBC-2-7 compact.awk > $@
+
+# General target to produce map files for mule charsets.
+MULE-%.map: mule-charsets.el
+ # Generating $@...
+ @${OLDEMACS} -batch -l ./mule-charsets.el $@
+
+# General target to produce map files for ISO-8859, GEORGIAN, and
+# EBCDIC charsets. We can not use the original file name because of
+# file name limit on DOS. "KA" is ISO 639 language code for Georgian.
+
+8859-%.map: ${GLIBC_CHARMAPS}/ISO-8859-% mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@
+
+KA-%.map: ${GLIBC_CHARMAPS}/GEORGIAN-% mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@
+
+EBCDIC%.map: ${GLIBC_CHARMAPS}/EBCDIC-% mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@
+
+# General target to produce map files for single-byte charsets.
+
+%.map: ${GLIBC_CHARMAPS}/% mapconv compact.awk
+ # Generating $@...
+ @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@
+
+install:
+ @for f in ${CHARSETS}; do \
+ if test -r $$f; then \
+ if ! cmp -s $$f ../../etc/charsets/$$f; then \
+ echo updating $$f; \
+ cp $$f ../../etc/charsets; \
+ fi; \
+ fi; \
+ done
+ @for f in ${TRANS_TABLE}; do \
+ if test -r $$f; then \
+ if ! cmp -s $$f ../../lisp/international/$$f; then \
+ echo updating $$f; \
+ cp $$f ../../lisp/international; \
+ fi; \
+ fi; \
+ done
+
+# Clear files that are automatically generated.
+clean:
+ rm -f ${CHARSETS} ${TRANS_TABLE}
+
+# arch-tag: 90b3bf30-1fef-45bf-b30c-665c30c22310
diff --git a/admin/charsets/big5.awk b/admin/charsets/big5.awk
new file mode 100644
index 0000000000..e238f7541c
--- /dev/null
+++ b/admin/charsets/big5.awk
@@ -0,0 +1,54 @@
+BEGIN {
+ tohex["A"] = 10;
+ tohex["B"] = 11;
+ tohex["C"] = 12;
+ tohex["D"] = 13;
+ tohex["E"] = 14;
+ tohex["F"] = 15;
+ tohex["a"] = 10;
+ tohex["b"] = 11;
+ tohex["c"] = 12;
+ tohex["d"] = 13;
+ tohex["e"] = 14;
+ tohex["f"] = 15;
+}
+
+function decode_hex(str) {
+ n = 0;
+ len = length(str);
+ for (i = 1; i <= len; i++)
+ {
+ c = substr (str, i, 1);
+ if (c >= "0" && c <= "9")
+ n = n * 16 + (c - "0");
+ else
+ n = n * 16 + tohex[c];
+ }
+ return n;
+}
+
+function decode_big5(big5) {
+ b0 = int(big5 / 256);
+ b1 = big5 % 256;
+# (0xFF - 0xA1 + 0x7F - 0x40) = 157
+# (0xA1 - (0x7F - 0x40)) = 98
+# (0xC9 - 0xA1) * (0xFF - 0xA1 + 0x7F - 0x40) = 6280
+ if (b1 < 127)
+ idx = (b0 - 161) * 157 + (b1 - 64);
+ else
+ idx = (b0 - 161) * 157 + (b1 - 98);
+ if (b0 >= 201)
+ idx -= 6280;
+ b0 = int(idx / 94) + 33;
+ b1 = (idx % 94) + 33;
+ return (b0 * 256 + b1)
+}
+
+{
+ big5 = decode_hex($1);
+ code = decode_big5(big5);
+ printf "0x%04X %s\n", code, $2;
+}
+
+
+# arch-tag: 36f08d21-0d24-4b67-852d-a9a51299586d
diff --git a/admin/charsets/compact.awk b/admin/charsets/compact.awk
new file mode 100644
index 0000000000..ba756b1ae5
--- /dev/null
+++ b/admin/charsets/compact.awk
@@ -0,0 +1,125 @@
+# compact.awk -- Make charset map compact.
+# Copyright (C) 2003
+# National Institute of Advanced Industrial Science and Technology (AIST)
+# Registration Number H13PRO009
+#
+# This file is part of GNU Emacs.
+#
+# GNU Emacs is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# GNU Emacs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Emacs; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# Comment:
+# Make a charset map compact by changing this kind of line sequence:
+# 0x00 0x0000
+# 0x01 0x0001
+# ...
+# 0x7F 0x007F
+# to one line of this format:
+# 0x00-0x7F 0x0000
+
+BEGIN {
+ tohex["0"] = 1;
+ tohex["1"] = 2;
+ tohex["2"] = 3;
+ tohex["3"] = 4;
+ tohex["4"] = 5;
+ tohex["5"] = 6;
+ tohex["6"] = 7;
+ tohex["7"] = 8;
+ tohex["8"] = 9;
+ tohex["9"] = 10;
+ tohex["A"] = 11;
+ tohex["B"] = 12;
+ tohex["C"] = 13;
+ tohex["D"] = 14;
+ tohex["E"] = 15;
+ tohex["F"] = 16;
+ tohex["a"] = 11;
+ tohex["b"] = 12;
+ tohex["c"] = 13;
+ tohex["d"] = 14;
+ tohex["e"] = 15;
+ tohex["f"] = 16;
+ from_code = 0;
+ to_code = -1;
+ to_unicode = 0;
+ from_unicode = 0;
+}
+
+function decode_hex(str, idx) {
+ n = 0;
+ len = length(str);
+ for (i = idx; i <= len; i++)
+ {
+ c = tohex[substr (str, i, 1)];
+ if (c == 0)
+ break;
+ n = n * 16 + c - 1;
+ }
+ return n;
+}
+
+/^\#/ {
+ print;
+ next;
+}
+
+{
+ code = decode_hex($1, 3);
+ unicode = decode_hex($2, 3);
+ if ((code == to_code + 1) && (unicode == to_unicode + 1))
+ {
+ to_code++;
+ to_unicode++;
+ }
+ else
+ {
+ if (to_code < 256)
+ {
+ if (from_code == to_code)
+ printf "0x%02X 0x%04X\n", from_code, from_unicode;
+ else if (from_code < to_code)
+ printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
+ }
+ else
+ {
+ if (from_code == to_code)
+ printf "0x%04X 0x%04X\n", from_code, from_unicode;
+ else if (from_code < to_code)
+ printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
+ }
+ from_code = to_code = code;
+ from_unicode = to_unicode = unicode;
+ }
+}
+
+END {
+ if (to_code < 256)
+ {
+ if (from_code == to_code)
+ printf "0x%02X 0x%04X\n", from_code, from_unicode;
+ else
+ printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
+ }
+ else
+ {
+ if (from_code == to_code)
+ printf "0x%04X 0x%04X\n", from_code, from_unicode;
+ else
+ printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
+ }
+}
+
+# arch-tag: 7e6f57c3-8e62-4af3-8916-ca67bca3a0ce
diff --git a/admin/charsets/cp51932.awk b/admin/charsets/cp51932.awk
new file mode 100644
index 0000000000..e30f4e29f1
--- /dev/null
+++ b/admin/charsets/cp51932.awk
@@ -0,0 +1,59 @@
+# cp51932.awk -- Generate a translation table for CP51932.
+# Copyright (C) 2004
+# National Institute of Advanced Industrial Science and Technology (AIST)
+# Registration Number H13PRO009
+#
+# This file is part of GNU Emacs.
+#
+# GNU Emacs is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# GNU Emacs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Emacs; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# Comment:
+
+# Genereate a translation table for CP51932 (EUC-JP of MicroSoft Version).
+# It maps invalid JISX0208 code points used by CP51932 to Unicode.
+# 4th field of the input has these meanings:
+# 0: JISX0208 characters.
+# 1: NEC special characters.
+# 2: IBM extension characters.
+# 3: NEC selection of IBM extension characters.
+# Among them, 1 and 3 are the target characters. 2 should have
+# already been mapped to 1 or 3.
+
+BEGIN {
+ print ";;; cp51932.el -- translation table for CP51932. -*- no-byte-compile: t -*-";
+ print ";;; Automatically genrated from CP932-2BYTE.map";
+ print "(let ((map";
+ printf " '(;JISEXT<->UNICODE";
+}
+
+/# [13]/ {
+ printf "\n (#x%s . #x%s)", $5 ,substr($2, 3, 4);
+}
+
+END {
+ print ")))";
+ print " (mapc #'(lambda (x)";
+ print " (setcar x (decode-char 'japanese-jisx0208 (car x))))";
+ print " map)";
+ print " (define-translation-table 'cp51932-decode map)";
+ print " (mapc #'(lambda (x)";
+ print " (let ((tmp (car x)))";
+ print " (setcar x (cdr x)) (setcdr x tmp)))";
+ print " map)";
+ print " (define-translation-table 'cp51932-encode map))";
+}
+
+# arch-tag: bbae996b-2d1c-4e85-bb55-ac30146d7504
diff --git a/admin/charsets/cp932.awk b/admin/charsets/cp932.awk
new file mode 100644
index 0000000000..3c1da2d51b
--- /dev/null
+++ b/admin/charsets/cp932.awk
@@ -0,0 +1,118 @@
+# cp932.awk -- Add sort keys and append user defined area to CP932-2BYTE.map.
+# Copyright (C) 2004
+# National Institute of Advanced Industrial Science and Technology (AIST)
+# Registration Number H13PRO009
+#
+# This file is part of GNU Emacs.
+#
+# GNU Emacs is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# GNU Emacs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Emacs; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# Comment:
+# Add a sort key 0, 1, 2, or 3 at the tail of each line as a comment
+# to realize the round trip mapping to Unicode works as described in
+# this page:
+# http://support.microsoft.com/default.aspx?scid=kb;EN-US;170559
+# Each sort key means as below:
+# 0: JISX0208 characters.
+# 1: NEC special characters.
+# 2: IBM extension characters.
+# 3: NEC selection of IBM extension characters.
+# 4: user defined area
+
+BEGIN {
+ tohex["A"] = 10;
+ tohex["B"] = 11;
+ tohex["C"] = 12;
+ tohex["D"] = 13;
+ tohex["E"] = 14;
+ tohex["F"] = 15;
+}
+
+function decode_hex(str) {
+ n = 0;
+ len = length(str);
+ for (i = 1; i <= len; i++)
+ {
+ c = substr(str, i, 1);
+ if (c >= "0" && c <= "9")
+ n = n * 16 + (c - "0");
+ else
+ n = n * 16 + tohex[c];
+ }
+ return n;
+}
+
+function sjis_to_jis_ku(code)
+{
+ s1 = int(code / 256);
+ s2 = code % 256;
+ if (s2 >= 159) # s2 >= 0x9F
+ {
+ if (s1 >= 224) # s1 >= 0xE0
+ j1 = s1 * 2 - 352; # j1 = s1 * 2 - 0x160
+ else
+ j1 = s1 * 2 - 224; # j1 = s1 * 2 - 0xE0
+ j2 = s2 - 126 # j2 = s2 - #x7E
+ }
+ else
+ {
+ if (s1 >= 224)
+ j1 = s1 * 2 - 353; # j1 = s1 * 2 - 0x161
+ else
+ j1 = s1 * 2 - 225; # j1 = s1 * 2 - 0xE1
+ if (s2 >= 127) # s2 >= #x7F
+ j2 = s2 - 32;
+ else
+ j2 = s2 - 31;
+ }
+ return j1 - 32;
+}
+
+/^0x[89E]/ {
+ sjis=decode_hex(substr($1, 3, 4))
+ ku=sjis_to_jis_ku(sjis);
+ if (ku == 13)
+ printf "%s # 1 %02X%02X\n", $0, j1, j2;
+ else if (ku >= 89 && ku <= 92)
+ printf "%s # 3 %02X%02X\n", $0, j1, j2;
+ else
+ printf "%s # 0 %02X%02X\n", $0, j1, j2;
+ next;
+}
+
+/^0xF/ {
+ printf "%s # 2\n", $0;
+ next;
+}
+
+{
+ print;
+}
+
+END {
+ code = 57344; # 0xE000
+ for (i = 240; i < 250; i++)
+ {
+ for (j = 64; j <= 126; j++)
+ printf "0x%02X%02X 0x%04X # 4\n", i, j, code++;
+ for (j = 128; j <= 158; j++)
+ printf "0x%02X%02X 0x%04X # 4\n", i, j, code++;
+ for (; j <= 252; j++)
+ printf "0x%02X%02X 0x%04X # 4\n", i, j, code++;
+ }
+}
+
+# arch-tag: 998dc444-759d-43ef-87e3-2ab205011394
diff --git a/admin/charsets/eucjp-ms.awk b/admin/charsets/eucjp-ms.awk
new file mode 100644
index 0000000000..051e388e7f
--- /dev/null
+++ b/admin/charsets/eucjp-ms.awk
@@ -0,0 +1,85 @@
+# eucjp-ms.awk -- Generate a translation table for eucJP-ms.
+# Copyright (C) 2004
+# National Institute of Advanced Industrial Science and Technology (AIST)
+# Registration Number H13PRO009
+#
+# This file is part of GNU Emacs.
+#
+# GNU Emacs is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# GNU Emacs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Emacs; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# Comment:
+
+# eucJP-ms is one of eucJP-open encoding defined at this page:
+# http://www.opengroup.or.jp/jvc/cde/appendix.html
+
+BEGIN {
+ print ";;; eucjp-ms.el -- translation table for eucJP-ms. -*- no-byte-compile: t -*-";
+ print ";;; Automatically genrated from eucJP-13th.txt, eucJP-udc.txt, eucJP-ibmext.txt";
+ print "(let ((map";
+ printf " '(;JISEXT<->UNICODE";
+
+ tohex["A"] = 10;
+ tohex["B"] = 11;
+ tohex["C"] = 12;
+ tohex["D"] = 13;
+ tohex["E"] = 14;
+ tohex["F"] = 15;
+}
+
+function decode_hex(str) {
+ n = 0;
+ len = length(str);
+ for (i = 1; i <= len; i++)
+ {
+ c = substr(str, i, 1);
+ if (c >= "0" && c <= "9")
+ n = n * 16 + (c - "0");
+ else
+ n = n * 16 + tohex[c];
+ }
+ return n;
+}
+
+/0x8F/ {
+ code = decode_hex(substr($1, 5, 4));
+ code -= 32896; # code -= 0x8080
+ printf "\n (#x%04x #x%s)", code, substr($2, 3, 4);
+ next;
+}
+
+/0x[A-F]/ {
+ code = decode_hex(substr($1, 3, 4));
+ code -= 32896; # code -= 0x8080
+ printf "\n (#x%04x . #x%s)", code, substr($2, 3, 4);
+}
+
+END {
+ print ")))";
+ print " (mapc #'(lambda (x)";
+ print " (if (integerp (cdr x))";
+ print " (setcar x (decode-char 'japanese-jisx0208 (car x)))";
+ print " (setcar x (decode-char 'japanese-jisx0212 (car x)))";
+ print " (setcdr x (cadr x))))";
+ print " map)";
+ print " (define-translation-table 'eucjp-ms-decode map)";
+ print " (mapc #'(lambda (x)";
+ print " (let ((tmp (car x)))";
+ print " (setcar x (cdr x)) (setcdr x tmp)))";
+ print " map)";
+ print " (define-translation-table 'eucjp-ms-encode map))";
+}
+
+# arch-tag: d9cc7af7-2d6e-48cd-8eed-a6d25226de7c
diff --git a/admin/charsets/gb180302.awk b/admin/charsets/gb180302.awk
new file mode 100644
index 0000000000..5eaf587763
--- /dev/null
+++ b/admin/charsets/gb180302.awk
@@ -0,0 +1,82 @@
+BEGIN {
+ tohex["A"] = 10;
+ tohex["B"] = 11;
+ tohex["C"] = 12;
+ tohex["D"] = 13;
+ tohex["E"] = 14;
+ tohex["F"] = 15;
+ tohex["a"] = 10;
+ tohex["b"] = 11;
+ tohex["c"] = 12;
+ tohex["d"] = 13;
+ tohex["e"] = 14;
+ tohex["f"] = 15;
+ from_gb = 0;
+ to_gb = -1;
+ to_unicode = 0;
+ from_unicode = 0;
+}
+
+function decode_hex(str) {
+ n = 0;
+ len = length(str);
+ for (i = 1; i <= len; i++)
+ {
+ c = substr (str, i, 1);
+ if (c >= "0" && c <= "9")
+ n = n * 16 + (c - "0");
+ else
+ n = n * 16 + tohex[c];
+ }
+ return n;
+}
+
+function gb_to_index(gb) {
+ b0 = int(gb / 256);
+ b1 = gb % 256;
+ idx = (((b0 - 129)) * 191 + b1 - 64);
+# if (b1 >= 128)
+# idx--;
+ return idx
+}
+
+function index_to_gb(idx) {
+ b0 = int(idx / 191) + 129;
+ b1 = (idx % 191) + 64;
+# if (b1 >= 127)
+# b1++;
+ return (b0 * 256 + b1);
+}
+
+/^\#/ {
+ print;
+ next;
+}
+
+{
+ gb = gb_to_index(decode_hex(substr($1, 3, 4)));
+ unicode = decode_hex(substr($2, 3, 4));
+ if ((gb == to_gb + 1) && (unicode == to_unicode + 1))
+ {
+ to_gb++;
+ to_unicode++;
+ }
+ else
+ {
+ if (from_gb == to_gb)
+ printf "0x%04X 0x%04X\n", index_to_gb(from_gb), from_unicode;
+ else if (from_gb < to_gb)
+ printf "0x%04X-0x%04X 0x%04X\n",
+ index_to_gb(from_gb), index_to_gb(to_gb), from_unicode;
+ from_gb = to_gb = gb;
+ from_unicode = to_unicode = unicode;
+ }
+}
+
+END {
+ if (from_gb <= to_gb)
+ printf "0x%04X-0x%04X 0x%04X\n",
+ index_to_gb(from_gb), index_to_gb(to_gb), from_unicode;
+}
+
+# arch-tag: d7dbad89-a512-41a4-8ee0-ba1a4505b8c1
diff --git a/admin/charsets/gb180304.awk b/admin/charsets/gb180304.awk
new file mode 100644
index 0000000000..f3f50db9a8
--- /dev/null
+++ b/admin/charsets/gb180304.awk
@@ -0,0 +1,104 @@
+BEGIN {
+ tohex["A"] = 10;
+ tohex["B"] = 11;
+ tohex["C"] = 12;
+ tohex["D"] = 13;
+ tohex["E"] = 14;
+ tohex["F"] = 15;
+ tohex["a"] = 10;
+ tohex["b"] = 11;
+ tohex["c"] = 12;
+ tohex["d"] = 13;
+ tohex["e"] = 14;
+ tohex["f"] = 15;
+}
+
+function decode_hex(str) {
+ n = 0;
+ len = length(str);
+ for (i = 1; i <= len; i++)
+ {
+ c = substr (str, i, 1);
+ if (c >= "0" && c <= "9")
+ n = n * 16 + (c - "0");
+ else
+ n = n * 16 + tohex[c];
+ }
+ return n;
+}
+
+function gb_to_index(gb) {
+ b0 = int(gb / 256);
+ b1 = gb % 256;
+ idx = (((b0 - 129)) * 191 + b1 - 64);
+# if (b1 >= 127)
+# idx--;
+ return idx
+}
+
+function index_to_gb(idx) {
+ b3 = (idx % 10) + 48;
+ idx = int(idx / 10);
+ b2 = (idx % 126) + 129;
+ idx = int(idx / 126);
+ b1 = (idx % 10) + 48;
+ b0 = int(idx / 10) + 129;
+ return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3);
+}
+
+/^\#/ {
+ print;
+ next;
+}
+
+/0x....-0x..../ {
+ gb_from = gb_to_index(decode_hex(substr($1, 3, 4)));
+ gb_to = gb_to_index(decode_hex(substr($1, 10, 4)));
+ unicode = decode_hex(substr($2, 3, 4));
+ while (gb_from <= gb_to)
+ {
+ table[unicode++] = 1;
+ gb_from++;
+ }
+ next;
+}
+
+{
+ gb = decode_hex(substr($1, 3, 4));
+ unicode = decode_hex(substr($2, 3, 4));
+ table[unicode] = 1;
+}
+
+END {
+ from_gb = -1;
+ to_gb = 0;
+ from_i = 0;
+ table[65536] = 1;
+ for (i = 128; i <= 65536; i++)
+ {
+ if (table[i] == 0)
+ {
+ if (i < 55296 || i >= 57344)
+ {
+ if (from_gb < 0)
+ {
+ from_gb = to_gb;
+ from_i = i;
+ }
+ to_gb++;
+ }
+ }
+ else if (from_gb >= 0)
+ {
+ if (from_gb + 1 == to_gb)
+ printf "0x%s\t\t0x%04X\n",
+ index_to_gb(from_gb), from_i;
+ else
+ printf "0x%s-0x%s\t0x%04X\n",
+ index_to_gb(from_gb), index_to_gb(to_gb - 1), from_i;
+ from_gb = -1;
+ }
+ }
+}
+
+# arch-tag: 8e5a22ae-610e-411f-ae17-d6e528b30d71
diff --git a/admin/charsets/kuten.awk b/admin/charsets/kuten.awk
new file mode 100644
index 0000000000..9d43f2e0e8
--- /dev/null
+++ b/admin/charsets/kuten.awk
@@ -0,0 +1,7 @@
+/^[0-9]/ {
+ ku=substr($1, 3, 2) + 32;
+ ten=substr($1, 5, 2) + 32;
+ printf "0x%02X%02X %s\n", ku, ten, $2;
+}
+
+# arch-tag: dade6b45-b4c5-42ab-9d49-d6bf23a710b6
diff --git a/admin/charsets/mapconv b/admin/charsets/mapconv
new file mode 100755
index 0000000000..641afc037b
--- /dev/null
+++ b/admin/charsets/mapconv
@@ -0,0 +1,143 @@
+#!/bin/sh
+#
+# Copyright (C) 2003
+# National Institute of Advanced Industrial Science and Technology (AIST)
+# Registration Number H13PRO009
+#
+# This file is part of GNU Emacs.
+#
+# GNU Emacs is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# GNU Emacs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Emacs; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# Comment:
+# Convert charset map of various format into this:
+# 0xXX 0xYYYY
+# where,
+# XX is a code point of the charset in hexa-decimal,
+# YYYY is the corresponding Unicode character code in hexa-decimal.
+# Arguments are:
+# $1: source map file
+# $2: address pattern for sed (optionally with substitution command)
+# $3: format of source map file
+# GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE YASUOKA MICROSOFT
+# $4: awk script
+
+BASE=`basename $1`
+
+case "$3" in
+ GLIBC*)
+ SOURCE="glibc-2.3.2/localedata/charmaps/${BASE}";;
+ CZYBORRA)
+ SOURCE="http://czyborra.com/charsets/${BASE}";;
+ IANA)
+ SOURCE="http://www.iana.org/assignments/charset-reg/${BASE}";;
+ UNICODE)
+ SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";;
+ UNICODE2)
+ SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";;
+ YASUOKA)
+ SOURCE="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/.../${BASE}";;
+ MICROSOFT)
+ SOURCE="http://www.microsoft.com/globaldev/reference/oem/${BASE}";;
+ KANJI-DATABASE)
+ SOURCE="data at http://sourceforge.net/cvs/?group_id=26261";;
+ *)
+ echo "Unknown file type: $3";
+ exit 1;;
+esac
+
+echo "# Generated from $SOURCE"
+
+if [ -n "$4" ] ; then
+ if [ -f "$4" ] ; then
+ AWKPROG="gawk -f $4"
+ else
+ echo "Awk program does not exist: $4"
+ exit 1
+ fi
+else
+ AWKPROG=cat
+fi
+
+if [ "$3" == "GLIBC-1" ] ; then
+ # Source format is:
+ # <UYYYY> /xXX
+ sed -n -e "$2 p" < $1 \
+ | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\).*,0x\2 0x\1,' \
+ | sort | ${AWKPROG}
+elif [ "$3" == "GLIBC-2" ] ; then
+ # Source format is:
+ # <UYYYY> /xXX/xZZ
+ sed -n -e "$2 p" < $1 \
+ | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \
+ | sort | ${AWKPROG}
+elif [ "$3" == "GLIBC-2-7" ] ; then
+ # Source format is:
+ # <UYYYY> /xXX/xZZ
+ # We must drop MSBs of XX and ZZ
+ sed -n -e "$2 p" < $1 \
+ | sed -e 's/xa/x2/g' -e 's/xb/x3/g' -e 's/xc/x4/g' \
+ -e 's/xd/x5/g' -e 's/xe/x6/g' -e 's/xf/x7/g' \
+ -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \
+ | tee temp \
+ | sort | ${AWKPROG}
+elif [ "$3" == "CZYBORRA" ] ; then
+ # Source format is:
+ # =XX U+YYYY
+ zcat $1 | sed -n -e "$2 p" \
+ | sed -e 's/=\(..\)[^U]*U+\([0-9A-F]*\).*/0x\1 0x\2/' \
+ | sort | ${AWKPROG}
+elif [ "$3" == "IANA" ] ; then
+ # Source format is:
+ # 0xXX 0xYYYY
+ sed -n -e "$2 p" < $1 \
+ | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \
+ | sort | ${AWKPROG}
+elif [ "$3" == "UNICODE" ] ; then
+ # Source format is:
+ # YYYY XX
+ sed -n -e "$2 p" < $1 \
+ | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \
+ | sort | ${AWKPROG}
+elif [ "$3" == "UNICODE2" ] ; then
+ # Source format is:
+ # 0xXXXX 0xYYYY # ...
+ sed -n -e "$2 p" < $1 \
+ | sed -e 's/\([0-9A-Fx]*\)[^0]*\([0-9A-Fx]*\).*/\1 \2/' \
+ | ${AWKPROG} | sort -n -k 4,4
+elif [ "$3" == "YASUOKA" ] ; then
+ # Source format is:
+ # YYYY 0-XXXX (XXXX is a Kuten code)
+ sed -n -e "$2 p" < $1 \
+ | sed -e 's/\([0-9A-F]*\)[^0]*0-\([0-9]*\).*/0x\2 0x\1/' \
+ | sort | ${AWKPROG}
+elif [ "$3" == "MICROSOFT" ] ; then
+ # Source format is:
+ # XX = U+YYYY
+ sed -n -e "$2 p" < $1 \
+ | sed -e 's/\([0-9A-F]*\).*U+\([0-9A-F]*\).*/0x\1 0x\2/' \
+ | sort | ${AWKPROG}
+elif [ "$3" == "KANJI-DATABASE" ] ; then
+ # Source format is:
+ # C?-XXXX U+YYYYY .....
+ sed -n -e "$2 p" < $1 \
+ | sed -e 's/...\(....\) U+\([0-9A-F]*\).*/0x\1 0x\2/' \
+ | sort | ${AWKPROG}
+else
+ echo "Invalid arguments"
+ exit 1
+fi
+
+# arch-tag: c33acb47-7eb6-4872-b871-15e1447e8f0e
diff --git a/admin/charsets/mule-charsets.el b/admin/charsets/mule-charsets.el
new file mode 100644
index 0000000000..158121a2cf
--- /dev/null
+++ b/admin/charsets/mule-charsets.el
@@ -0,0 +1,60 @@
+;; mule-charsets.el -- Generate Mule-orignal charset maps.
+;; Copyright (C) 2003
+;; National Institute of Advanced Industrial Science and Technology (AIST)
+;; Registration Number H13PRO009
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING. If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+(if (or (< emacs-major-version 21)
+ (< emacs-minor-version 3)
+ (and (= emacs-minor-version 3)
+ (string< emacs-version "21.3.50")))
+ (error "Use Emacs of version 21.3.50 or later"))
+
+(defun func (start end)
+ (while (<= start end)
+ (let ((split (split-char start))
+ (unicode (encode-char start 'ucs)))
+ (if unicode
+ (if (nth 2 split)
+ (insert (format "0x%02X%02X 0x%04X\n"
+ (nth 1 split) (nth 2 split) unicode))
+ (insert (format "0x%02X 0x%04X\n" (nth 1 split) unicode)))))
+ (setq start (1+ start))))
+
+(defconst charset-alist
+ '(("MULE-ethiopic.map" . ethiopic)
+ ("MULE-ipa.map" . ipa)
+ ("MULE-is13194.map" . indian-is13194)
+ ("MULE-sisheng.map" . chinese-sisheng)
+ ("MULE-tibetan.map" . tibetan)
+ ("MULE-lviscii.map" . vietnamese-viscii-lower)
+ ("MULE-uviscii.map" . vietnamese-viscii-upper)))
+
+(setq file (car command-line-args-left))
+(or (stringp file)
+ (error "Invalid file name: %s" file))
+(setq charset (cdr (assoc file charset-alist)))
+(or charset
+ (error "Invalid charset: %s" (car command-line-args-left)))
+
+(with-temp-buffer
+ (map-charset-chars 'func charset)
+ (write-file file))
+
+;;; arch-tag: 515989d7-2e2d-41cc-9163-05ad472fede4