From 3ccf95cb1a0138e00140490b72678c0d69c00ab4 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Wed, 27 Oct 2004 12:50:22 +0000 Subject: (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more Unicode ranges. --- lisp/ChangeLog | 2 ++ lisp/international/utf-8.el | 57 ++++++++++++++++++++++++++++----------------- 2 files changed, 37 insertions(+), 22 deletions(-) (limited to 'lisp') diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 8c5a2e47ae..3da657862a 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -15,6 +15,8 @@ (utf-translate-cjk-unicode-range): Make it customizable. (utf-8-post-read-conversion): Use utf-translate-cjk-unicode-range-string. + (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more + Unicode ranges. 2004-10-26 Daniel Pfeiffer diff --git a/lisp/international/utf-8.el b/lisp/international/utf-8.el index 02d7733e2d..d5da20d111 100644 --- a/lisp/international/utf-8.el +++ b/lisp/international/utf-8.el @@ -258,8 +258,11 @@ The value nil means that the tables are not yet loaded.") (funcall decode-char-no-trans (car x)) (funcall decode-char-no-trans (cdr x)))) ranges ""))) - ;; This forces loading tables for utf-translate-cjk-mode. - (setq utf-translate-cjk-lang-env nil)) + ;; These forces loading and settting tables for + ;; utf-translate-cjk-mode. + (setq utf-translate-cjk-lang-env nil + ucs-mule-cjk-to-unicode (make-hash-table :test 'eq) + ucs-unicode-to-mule-cjk (make-hash-table :test 'eq))) (defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3) (#xff00 . #xffef)) @@ -492,25 +495,32 @@ by the above coding systems, you can customize the user option ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx ((r1 &= #x3F) (r1 |= ((r0 & #x1F) << 6)) - ;; Now r2 holds scalar value. We don't have to check + ;; Now r1 holds scalar value. We don't have to check ;; `overlong sequence' because r0 >= 0xC2. (if (r1 >= 256) ;; mule-unicode-0100-24ff (< 0800) - ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) - (r1 -= #x0100) - (r2 = (((r1 / 96) + 32) << 7)) - (r1 %= 96) - (r1 += (r2 + 32)) - (translate-character - utf-translation-table-for-decode r0 r1) + ((r0 = r1) + (lookup-integer utf-subst-table-for-decode r0 r1) + (if (r7 == 0) + ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) + (r1 -= #x0100) + (r2 = (((r1 / 96) + 32) << 7)) + (r1 %= 96) + (r1 += (r2 + 32)) + (translate-character + utf-translation-table-for-decode r0 r1))) (write-multibyte-character r0 r1) (read r0) (repeat)) (if (r1 >= 160) ;; latin-iso8859-1 - ((r1 -= 128) - (write-multibyte-character r6 r1) + ((r0 = r1) + (lookup-integer utf-subst-table-for-decode r0 r1) + (if (r7 == 0) + ((r1 -= 128) + (write-multibyte-character r6 r1)) + ((write-multibyte-character r0 r1))) (read r0) (repeat)) ;; eight-bit-control @@ -549,13 +559,16 @@ by the above coding systems, you can customize the user option (if (r3 < #x2500) ;; mule-unicode-0100-24ff (>= 0800) - ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) - (r3 -= #x0100) - (r3 //= 96) - (r1 = (r7 + 32)) - (r1 += ((r3 + 32) << 7)) - (translate-character - utf-translation-table-for-decode r0 r1) + ((r0 = r3) + (lookup-integer utf-subst-table-for-decode r0 r1) + (if (r7 == 0) + ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) + (r3 -= #x0100) + (r3 //= 96) + (r1 = (r7 + 32)) + (r1 += ((r3 + 32) << 7)) + (translate-character + utf-translation-table-for-decode r0 r1))) (write-multibyte-character r0 r1) (read r0) (repeat))) @@ -949,9 +962,9 @@ Also compose particular scripts if `utf-8-compose-scripts' is non-nil." (unless (eobp) (utf-translate-cjk-load-tables) (setq range - (concat range utf-translate-cjk-unicode-range-string)))) - (setq hash-table (get 'utf-subst-table-for-decode - 'translation-hash-table))) + (concat range utf-translate-cjk-unicode-range-string))) + (setq hash-table (get 'utf-subst-table-for-decode + 'translation-hash-table)))) (while (and (skip-chars-forward range) (not (eobp))) (setq ch (following-char)) -- cgit v1.2.3