aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa <[email protected]>2002-07-29 05:05:19 +0000
committerKenichi Handa <[email protected]>2002-07-29 05:05:19 +0000
commit5c88a01e1e8a7f7fefda2ee3c1e16e0782fa02e5 (patch)
tree1c60f16b2c0a35241cfa96f2e1cc2c7ce8acfd98
parent930ca8e8456912a10ab89c400bdfecb32be64a22 (diff)
(ctext-post-read-conversion): Add support for emboded utf-8 encodng
(ESC % G ... ESC % @).
-rw-r--r--lisp/international/mule.el124
1 files changed, 72 insertions, 52 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index 346133053c..fc5b10bcb9 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -429,7 +429,8 @@ code-point in CCS. Currently not supported and just ignored."
"Return the coding type of CODING-SYSTEM.
A coding type is an integer value indicating the encoding method
of CODING-SYSTEM. See the function `make-coding-system' for more detail."
- (aref (coding-system-spec coding-system) coding-spec-type-idx))
+ (let ((spec (coding-system-spec coding-system)))
+ (if spec (aref spec coding-spec-type-idx))))
(defun coding-system-mnemonic (coding-system)
"Return the mnemonic character of CODING-SYSTEM.
@@ -440,18 +441,21 @@ to indicate the coding system. If the arg is nil, return ?-."
(defun coding-system-doc-string (coding-system)
"Return the documentation string for CODING-SYSTEM."
- (aref (coding-system-spec coding-system) coding-spec-doc-string-idx))
+ (let ((spec (coding-system-spec coding-system)))
+ (if spec (aref spec coding-spec-doc-string-idx))))
(defun coding-system-plist (coding-system)
"Return the property list of CODING-SYSTEM."
- (aref (coding-system-spec coding-system) coding-spec-plist-idx))
+ (let ((spec (coding-system-spec coding-system)))
+ (if spec (aref spec coding-spec-plist-idx))))
(defun coding-system-flags (coding-system)
"Return `flags' of CODING-SYSTEM.
A `flags' of a coding system is a vector of length 32 indicating detailed
information of a coding system. See the function `make-coding-system'
for more detail."
- (aref (coding-system-spec coding-system) coding-spec-flags-idx))
+ (let ((spec (coding-system-spec coding-system)))
+ (if spec (aref spec coding-spec-flags-idx))))
(defun coding-system-get (coding-system prop)
"Extract a value from CODING-SYSTEM's property list for property PROP."
@@ -462,8 +466,8 @@ for more detail."
(let ((plist (coding-system-plist coding-system)))
(if plist
(plist-put plist prop val)
- (aset (coding-system-spec coding-system) coding-spec-plist-idx
- (list prop val)))))
+ (let ((spec (coding-system-spec coding-system)))
+ (if spec (aset spec coding-spec-plist-idx (list prop val)))))))
(defun coding-system-category (coding-system)
"Return the coding category of CODING-SYSTEM.
@@ -1307,10 +1311,15 @@ ARG is a list of coding categories ordered by priority."
charsets or coding systems.")
;; Functions to support "Non-Standard Character Set Encodings" defined
-;; by the ICCCM spec. We support that by converting the leading
-;; sequence of the ``extended segment'' to the corresponding ISO-2022
-;; sequences (if the leading sequence names an Emacs charset), or decode
-;; the segment (if it names a coding system). Encoding does the reverse.
+;; by the COMPOUND-TEXT spec.
+;; We support that by converting the leading sequence of the
+;; ``extended segment'' to the corresponding ISO-2022 sequences (if
+;; the leading sequence names an Emacs charset), or decode the segment
+;; (if it names a coding system). Encoding does the reverse.
+;; This function also supports "The UTF-8 encoding" described in the
+;; section 7 of the documentation fo COMPOUND-TEXT distributed with
+;; XFree86.
+
(defun ctext-post-read-conversion (len)
"Decode LEN characters encoded as Compound Text with Extended Segments."
(buffer-disable-undo) ; minimize consing due to insertions and deletions
@@ -1324,54 +1333,65 @@ charsets or coding systems.")
last-coding-system-used
encoding textlen chset)
(while (re-search-forward
- "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002"
+ "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002\\|\e%G[^\e]+\e%@"
nil 'move)
(set-marker newpt (point))
(set-marker pt (match-beginning 0))
- (setq encoding (match-string 3))
- (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
- (- (aref (match-string 2) 1) 128))
- (1+ (length encoding))))
- (setq
- chset (cdr (assoc-ignore-case encoding
- non-standard-icccm-encodings-alist)))
- (cond ((null chset)
- ;; This charset is not supported--leave this extended
- ;; segment unaltered and skip over it.
- (goto-char (+ (point) textlen)))
- ((charsetp chset)
- ;; If it's a charset, replace the leading escape sequence
- ;; with a standard ISO-2022 sequence. We will decode all
- ;; such segments later, in one go, when we exit the loop
- ;; or find an extended segment that names a coding
- ;; system, not a charset.
- (replace-match
- (concat "\\1"
- (if (= 0 (charset-iso-graphic-plane chset))
- ;; GL charsets
- (if (= 1 (charset-dimension chset)) "(" "$(")
- ;; GR charsets
- (if (= 96 (charset-chars chset))
- "-"
- (if (= 1 (charset-dimension chset)) ")" "$)")))
- (string (charset-iso-final-char chset)))
- t)
- (goto-char (+ (point) textlen)))
- ((coding-system-p chset)
- ;; If it's a coding system, we need to decode the segment
- ;; right away. But first, decode what we've skipped
- ;; across until now.
- (when (> pt oldpt)
- (decode-coding-region oldpt pt 'ctext-no-compositions))
- (delete-region pt newpt)
- (set-marker newpt (+ newpt textlen))
- (decode-coding-region pt newpt chset)
- (goto-char newpt)
- (set-marker oldpt newpt))))
+ (if (= (preceding-char) ?@)
+ ;; We found embedded utf-8 sequence.
+ (progn
+ (delete-char -3) ; delete ESC % @ at the tail
+ (goto-char pt)
+ (delete-char 3) ; delete ESC % G at the head
+ (if (> pt oldpt)
+ (decode-coding-region oldpt pt 'ctext-no-compositions))
+ (decode-coding-region pt newpt 'mule-utf-8)
+ (goto-char newpt)
+ (set-marker oldpt newpt))
+ (setq encoding (match-string 3))
+ (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
+ (- (aref (match-string 2) 1) 128))
+ (1+ (length encoding))))
+ (setq
+ chset (cdr (assoc-ignore-case encoding
+ non-standard-icccm-encodings-alist)))
+ (cond ((null chset)
+ ;; This charset is not supported--leave this extended
+ ;; segment unaltered and skip over it.
+ (goto-char (+ (point) textlen)))
+ ((charsetp chset)
+ ;; If it's a charset, replace the leading escape sequence
+ ;; with a standard ISO-2022 sequence. We will decode all
+ ;; such segments later, in one go, when we exit the loop
+ ;; or find an extended segment that names a coding
+ ;; system, not a charset.
+ (replace-match
+ (concat "\\1"
+ (if (= 0 (charset-iso-graphic-plane chset))
+ ;; GL charsets
+ (if (= 1 (charset-dimension chset)) "(" "$(")
+ ;; GR charsets
+ (if (= 96 (charset-chars chset))
+ "-"
+ (if (= 1 (charset-dimension chset)) ")" "$)")))
+ (string (charset-iso-final-char chset)))
+ t)
+ (goto-char (+ (point) textlen)))
+ ((coding-system-p chset)
+ ;; If it's a coding system, we need to decode the segment
+ ;; right away. But first, decode what we've skipped
+ ;; across until now.
+ (when (> pt oldpt)
+ (decode-coding-region oldpt pt 'ctext-no-compositions))
+ (delete-region pt newpt)
+ (set-marker newpt (+ newpt textlen))
+ (decode-coding-region pt newpt chset)
+ (goto-char newpt)
+ (set-marker oldpt newpt)))))
;; Decode what's left.
(when (> (point) oldpt)
(decode-coding-region oldpt (point) 'ctext-no-compositions))
- ;; This buffer started as unibyte, because the string we get from
+ ;; This buffer started as unibyte, because the string we get from
;; the X selection is a unibyte string. We must now make it
;; multibyte, so that the decoded text is inserted as multibyte
;; into its buffer.