aboutsummaryrefslogtreecommitdiffstats
path: root/lisp/international
diff options
context:
space:
mode:
authorEli Zaretskii <[email protected]>2002-02-22 13:44:21 +0000
committerEli Zaretskii <[email protected]>2002-02-22 13:44:21 +0000
commit835cbadb3fa1792c6fb9716b502f7502d018f57b (patch)
tree978743f3f71b1c1b1acef71763478cd4f1f691da /lisp/international
parent23e16093ecd087847f494aaebbf58816415eed06 (diff)
(non-standard-icccm-encodings-alist, non-standard-designations-alist): New
variables. (ctext-post-read-conversion, ctext-pre-write-conversion): New functions.
Diffstat (limited to 'lisp/international')
-rw-r--r--lisp/international/mule.el155
1 files changed, 155 insertions, 0 deletions
diff --git a/lisp/international/mule.el b/lisp/international/mule.el
index 124761c0d2..9bb9c4bf5c 100644
--- a/lisp/international/mule.el
+++ b/lisp/international/mule.el
@@ -1284,6 +1284,161 @@ ARG is a list of coding categories ordered by priority."
(setq coding-category-list (append arg current-list))
(set-coding-priority-internal)))
+;;; X selections
+
+(defvar non-standard-icccm-encodings-alist
+ '(("ISO8859-15" . latin-iso8859-15)
+ ("ISO8859-14" . latin-iso8859-14)
+ ("KOI8-R" . koi8-r)
+ ("BIG5-0" . big5))
+ "Alist of font charset names defined by XLFD, and the corresponding Emacs
+charsets or coding systems.")
+
+;; Functions to support "Non-Standard Character Set Encodings" defined
+;; by the ICCCM spec. We support that by converting the leading
+;; sequence of the ``extended segment'' to the corresponding ISO-2022
+;; sequences (if the leading sequence names an Emacs charset), or decode
+;; the segment (if it names a coding system). Encoding does the reverse.
+(defun ctext-post-read-conversion (len)
+ "Decode LEN characters encoded as Compound Text with Extended Segments."
+ (buffer-disable-undo) ; minimize consing due to insertions and deletions
+ (narrow-to-region (point) (+ (point) len))
+ (save-match-data
+ (let ((pt (point-marker))
+ (oldpt (point-marker))
+ (newpt (make-marker))
+ (modified-p (buffer-modified-p))
+ (case-fold-search nil)
+ last-coding-system-used
+ encoding textlen chset)
+ (while (re-search-forward
+ "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002"
+ nil 'move)
+ (set-marker newpt (point))
+ (set-marker pt (match-beginning 0))
+ (setq encoding (match-string 3))
+ (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128)
+ (- (aref (match-string 2) 1) 128))
+ (1+ (length encoding))))
+ (setq
+ chset (cdr (assoc-ignore-case encoding
+ non-standard-icccm-encodings-alist)))
+ (cond ((null chset)
+ ;; This charset is not supported--leave this extended
+ ;; segment unaltered and skip over it.
+ (goto-char (+ (point) textlen)))
+ ((charsetp chset)
+ ;; If it's a charset, replace the leading escape sequence
+ ;; with a standard ISO-2022 sequence. We will decode all
+ ;; such segments later, in one go, when we exit the loop
+ ;; or find an extended segment that names a coding
+ ;; system, not a charset.
+ (replace-match
+ (concat "\\1"
+ (if (= 0 (charset-iso-graphic-plane chset))
+ ;; GL charsets
+ (if (= 1 (charset-dimension chset)) "(" "$(")
+ ;; GR charsets
+ (if (= 96 (charset-chars chset))
+ "-"
+ (if (= 1 (charset-dimension chset)) ")" "$)")))
+ (string (charset-iso-final-char chset)))
+ t)
+ (goto-char (+ (point) textlen)))
+ ((coding-system-p chset)
+ ;; If it's a coding system, we need to decode the segment
+ ;; right away. But first, decode what we've skipped
+ ;; across until now.
+ (when (> pt oldpt)
+ (decode-coding-region oldpt pt 'ctext-no-compositions))
+ (delete-region pt newpt)
+ (set-marker newpt (+ newpt textlen))
+ (decode-coding-region pt newpt chset)
+ (goto-char newpt)
+ (set-marker oldpt newpt))))
+ ;; Decode what's left.
+ (when (> (point) oldpt)
+ (decode-coding-region oldpt (point) 'ctext-no-compositions))
+ ;; This buffer started as unibyte, because the string we get from
+ ;; the X selection is a unibyte string. We must now make it
+ ;; multibyte, so that the decoded text is inserted as multibyte
+ ;; into its buffer.
+ (set-buffer-multibyte t)
+ (set-buffer-modified-p modified-p)
+ (- (point-max) (point-min)))))
+
+(defvar non-standard-designations-alist
+ '(("$(0" . (big5 "big5-0" 2))
+ ("$(1" . (big5 "big5-0" 2))
+ ("-V" . (t "iso8859-10" 1))
+ ("-Y" . (t "iso8859-13" 1))
+ ("-_" . (t "iso8859-14" 1))
+ ("-b" . (t "iso8859-15" 1))
+ ("-f" . (t "iso8859-16" 1)))
+ "Alist of ctext control sequences that introduce character sets which
+are not in the list of approved ICCCM encodings, and the corresponding
+coding system, identifier string, and number of octets per encoded
+character.
+
+Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ
+is the control sequence (sans the leading ESC) that introduces the character
+set in the text encoded by compound-text. ENCODING is a coding system
+symbol; if it is t, it means that the ctext coding system already encodes
+the text correctly, and only the leading control sequence needs to be altered.
+If ENCODING is a coding system, we need to re-encode the text with that
+coding system. CHARSET is the ICCCM name of the charset we need to put into
+the leading control sequence. NOCTETS is the number of octets (bytes) that
+encode each character in this charset. NOCTETS can be 0 (meaning the number
+of octets per character is variable), 1, 2, 3, or 4.")
+
+(defun ctext-pre-write-conversion (from to)
+ "Encode characters between FROM and TO as Compound Text w/Extended Segments."
+ (buffer-disable-undo) ; minimize consing due to insertions and deletions
+ (narrow-to-region from to)
+ (encode-coding-region from to 'ctext-no-compositions)
+ ;; Replace ISO-2022 charset designations with extended segments, for
+ ;; those charsets that are not part of the official X registry.
+ (save-match-data
+ (goto-char (point-min))
+ (let ((newpt (make-marker))
+ (case-fold-search nil)
+ pt desig encode-info encoding chset noctets textlen)
+ (set-buffer-multibyte nil)
+ (while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move)
+ (setq desig (match-string 1)
+ pt (point-marker)
+ encode-info (cdr (assoc desig non-standard-designations-alist))
+ encoding (car encode-info)
+ chset (cadr encode-info)
+ noctets (car (cddr encode-info)))
+ (skip-chars-forward "^\e")
+ (set-marker newpt (point))
+ (cond
+ ((eq encoding t) ; only the leading sequence needs to be changed
+ (setq textlen (+ (- newpt pt) (length chset) 1))
+ (replace-match (format "\e%%/%d%c%c%s"
+ noctets
+ (+ (/ textlen 128) 128)
+ (+ (% textlen 128) 128)
+ chset)
+ t t))
+ ((coding-system-p encoding) ; need to recode the entire segment...
+ (set-marker pt (match-beginning 0))
+ (decode-coding-region pt newpt 'ctext-no-compositions)
+ (set-buffer-multibyte t)
+ (encode-coding-region pt newpt encoding)
+ (set-buffer-multibyte nil)
+ (setq textlen (+ (- newpt pt) (length chset) 1))
+ (goto-char pt)
+ (insert (format "\e%%/%d%c%c%s"
+ noctets
+ (+ (/ textlen 128) 128)
+ (+ (% textlen 128) 128)
+ chset))))
+ (goto-char newpt))))
+ (set-buffer-multibyte t)
+ nil)
+
;;; FILE I/O
(defcustom auto-coding-alist