aboutsummaryrefslogtreecommitdiffstats
path: root/lisp/international/mule-util.el
diff options
context:
space:
mode:
authorKarl Heuer <[email protected]>1997-02-20 07:02:49 +0000
committerKarl Heuer <[email protected]>1997-02-20 07:02:49 +0000
commit4ed4686978bd18292e2bb7b87a7b0e0407ecb3b1 (patch)
tree860ad83f81c8c630fe7051e3c5379ca8a9658f69 /lisp/international/mule-util.el
parentadb572fb93ddfee88f9c5e9681434517fd241232 (diff)
Initial revision
Diffstat (limited to 'lisp/international/mule-util.el')
-rw-r--r--lisp/international/mule-util.el419
1 files changed, 419 insertions, 0 deletions
diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el
new file mode 100644
index 0000000000..27c64aa3a2
--- /dev/null
+++ b/lisp/international/mule-util.el
@@ -0,0 +1,419 @@
+;;; mule-util.el --- Utility functions for mulitilingual environment (mule)
+
+;; Copyright (C) 1995 Free Software Foundation, Inc.
+;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
+
+;; Keywords: mule, multilingual
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING. If not, write to
+;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+;;; Code:
+
+;;; String manipulations while paying attention to multibyte
+;;; characters.
+
+;;;###autoload
+(defun string-to-sequence (string type)
+ "Convert STRING to a sequence of TYPE which contains characters in STRING.
+TYPE should be `list' or `vector'.
+Multibyte characters are conserned."
+ (or (eq type 'list) (eq type 'vector)
+ (error "Invalid type: %s" type))
+ (let* ((len (length string))
+ (i 0)
+ l ch)
+ (while (< i len)
+ (setq ch (sref string i))
+ (setq l (cons ch l))
+ (setq i (+ i (char-bytes ch))))
+ (setq l (nreverse l))
+ (if (eq type 'list)
+ l
+ (vconcat l))))
+
+;;;###autoload
+(defsubst string-to-list (string)
+ "Return a list of characters in STRING."
+ (string-to-sequence string 'list))
+
+;;;###autoload
+(defsubst string-to-vector (string)
+ "Return a vector of characters in STRING."
+ (string-to-sequence string 'vector))
+
+;;;###autoload
+(defun store-substring (string idx obj)
+ "Embed OBJ (string or character) at index IDX of STRING."
+ (let* ((str (cond ((stringp obj) obj)
+ ((integerp obj) (char-to-string obj))
+ (t (error
+ "Invalid argument (should be string or character): %s"
+ obj))))
+ (string-len (length string))
+ (len (length str))
+ (i 0))
+ (while (and (< i len) (< idx string-len))
+ (aset string idx (aref str i))
+ (setq idx (1+ idx) i (1+ i)))
+ string))
+
+;;;###autoload
+(defun truncate-string-to-width (str width &optional start-column padding)
+ "Truncate string STR to fit in WIDTH columns.
+Optional 1st arg START-COLUMN if non-nil specifies the starting column.
+Optional 2nd arg PADDING if non-nil, space characters are padded at
+the head and tail of the resulting string to fit in WIDTH if necessary.
+If PADDING is nil, the resulting string may be narrower than WIDTH."
+ (or start-column
+ (setq start-column 0))
+ (let ((len (length str))
+ (idx 0)
+ (column 0)
+ (head-padding "") (tail-padding "")
+ ch last-column last-idx from-idx)
+ (condition-case nil
+ (while (< column start-column)
+ (setq ch (sref str idx)
+ column (+ column (char-width ch))
+ idx (+ idx (char-bytes ch))))
+ (args-out-of-range (setq idx len)))
+ (if (< column start-column)
+ (if padding (make-string width ?\ ) "")
+ (if (and padding (> column start-column))
+ (setq head-padding (make-string (- column start-column) ?\ )))
+ (setq from-idx idx)
+ (condition-case nil
+ (while (< column width)
+ (setq last-column column
+ last-idx idx
+ ch (sref str idx)
+ column (+ column (char-width ch))
+ idx (+ idx (char-bytes ch))))
+ (args-out-of-range (setq idx len)))
+ (if (> column width)
+ (setq column last-column idx last-idx))
+ (if (and padding (< column width))
+ (setq tail-padding (make-string (- width column) ?\ )))
+ (setq str (substring str from-idx idx))
+ (if padding
+ (concat head-padding str tail-padding)
+ str))))
+
+;;; For backward compatiblity ...
+;;;###autoload
+(defalias 'truncate-string 'truncate-string-to-width)
+(make-obsolete 'truncate-string 'truncate-string-to-width)
+
+;;; Nested alist handler. Nested alist is alist whose elements are
+;;; also nested alist.
+
+;;;###autoload
+(defsubst nested-alist-p (obj)
+ "Return t if OBJ is a nesetd alist.
+
+Nested alist is a list of the form (ENTRY . BRANCHES), where ENTRY is
+any Lisp object, and BRANCHES is a list of cons cells of the form
+(KEY-ELEMENT . NESTED-ALIST).
+
+You can use a nested alist to store any Lisp object (ENTRY) for a key
+sequence KEYSEQ, where KEYSEQ is a sequence of KEY-ELEMENT. KEYSEQ
+can be a string, a vector, or a list."
+ (and obj (listp obj) (listp (cdr obj))))
+
+;;;###autoload
+(defun set-nested-alist (keyseq entry alist &optional len branches)
+ "Set ENTRY for KEYSEQ in a nested alist ALIST.
+Optional 4th arg LEN non-nil means the firlst LEN elements in KEYSEQ
+ is considered.
+Optional argument BRANCHES if non-nil is branches for a keyseq
+longer than KEYSEQ.
+See the documentation of `nested-alist-p' for more detail."
+ (or (nested-alist-p alist)
+ (error "Invalid arguement %s" alist))
+ (let ((islist (listp keyseq))
+ (len (or len (length keyseq)))
+ (i 0)
+ key-elt slot)
+ (while (< i len)
+ (if (null (nested-alist-p alist))
+ (error "Keyseq %s is too long for this nested alist" keyseq))
+ (setq key-elt (if islist (nth i keyseq) (aref keyseq i)))
+ (setq slot (assoc key-elt (cdr alist)))
+ (if (null slot)
+ (progn
+ (setq slot (cons key-elt (list t)))
+ (setcdr alist (cons slot (cdr alist)))))
+ (setq alist (cdr slot))
+ (setq i (1+ i)))
+ (setcar alist entry)
+ (if branches
+ (if (cdr alist)
+ (error "Can't set branches for keyseq %s" keyseq)
+ (setcdr alist branches)))))
+
+;;;###autoload
+(defun lookup-nested-alist (keyseq alist &optional len start nil-for-too-long)
+ "Look up key sequence KEYSEQ in nested alist ALIST. Return the definition.
+Optional 1st argument LEN specifies the length of KEYSEQ.
+Optional 2nd argument START specifies index of the starting key.
+The returned value is normally a nested alist of which
+car part is the entry for KEYSEQ.
+If ALIST is not deep enough for KEYSEQ, return number which is
+ how many key elements at the front of KEYSEQ it takes
+ to reach a leaf in ALIST.
+Optional 3rd argument NIL-FOR-TOO-LONG non-nil means return nil
+ even if ALIST is not deep enough."
+ (or (nested-alist-p alist)
+ (error "invalid arguement %s" alist))
+ (or len
+ (setq len (length keyseq)))
+ (let ((i (or start 0)))
+ (if (catch 'lookup-nested-alist-tag
+ (if (listp keyseq)
+ (while (< i len)
+ (if (setq alist (cdr (assoc (nth i keyseq) (cdr alist))))
+ (setq i (1+ i))
+ (throw 'lookup-nested-alist-tag t))))
+ (while (< i len)
+ (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist))))
+ (setq i (1+ i))
+ (throw 'lookup-nested-alist-tag t))))
+ ;; KEYSEQ is too long.
+ (if nil-for-too-long nil i)
+ alist)))
+
+;; Coding system related functions.
+
+;;;###autoload
+(defun set-coding-system-alist (target-type regexp coding-system
+ &optional operation)
+ "Update `coding-system-alist' according to the arguments.
+TARGET-TYPE specifies a type of the target: `file', `process', or `network'.
+ TARGET-TYPE tells which slots of coding-system-alist should be affected.
+ If `file', it affects slots for insert-file-contents and write-region.
+ If `process', it affects slots for call-process, call-process-region, and
+ start-process.
+ If `network', it affects a slot for open-network-process.
+REGEXP is a regular expression matching a target of I/O operation.
+CODING-SYSTEM is a coding system to perform code conversion
+ on the I/O operation, or a cons of coding systems for decoding and
+ encoding respectively, or a function symbol which returns the cons.
+Optional arg OPERATION if non-nil specifies directly one of slots above.
+ The valid value is: insert-file-contents, write-region,
+ call-process, call-process-region, start-process, or open-network-stream.
+If OPERATION is specified, TARGET-TYPE is ignored.
+See the documentation of `coding-system-alist' for more detail."
+ (or (stringp regexp)
+ (error "Invalid regular expression: %s" regexp))
+ (or (memq target-type '(file process network))
+ (error "Invalid target type: %s" target-type))
+ (if (symbolp coding-system)
+ (if (not (fboundp coding-system))
+ (progn
+ (check-coding-system coding-system)
+ (setq coding-system (cons coding-system coding-system))))
+ (check-coding-system (car coding-system))
+ (check-coding-system (cdr coding-system)))
+ (let ((op-list (if operation (list operation)
+ (cond ((eq target-type 'file)
+ '(insert-file-contents write-region))
+ ((eq target-type 'process)
+ '(call-process call-process-region start-process))
+ (t ; i.e. (eq target-type network)
+ '(open-network-stream)))))
+ slot)
+ (while op-list
+ (setq slot (assq (car op-list) coding-system-alist))
+ (if slot
+ (let ((chain (cdr slot)))
+ (if (catch 'tag
+ (while chain
+ (if (string= regexp (car (car chain)))
+ (progn
+ (setcdr (car chain) coding-system)
+ (throw 'tag nil)))
+ (setq chain (cdr chain)))
+ t)
+ (setcdr slot (cons (cons regexp coding-system) (cdr slot)))))
+ (setq coding-system-alist
+ (cons (cons (car op-list) (list (cons regexp coding-system)))
+ coding-system-alist)))
+ (setq op-list (cdr op-list)))))
+
+;;;###autoload
+(defun coding-system-list ()
+ "Return a list of all existing coding systems."
+ (let (l)
+ (mapatoms (lambda (x) (if (get x 'coding-system) (setq l (cons x l)))))
+ l))
+
+
+;;; Composite charcater manipulations.
+
+;;;###autoload
+(defun compose-region (start end)
+ "Compose all characters in the current region into one composite character.
+When called from a program, expects two arguments,
+positions (integers or markers) specifying the region."
+ (interactive "r")
+ (save-excursion
+ (let ((str (buffer-substring start end)))
+ (goto-char start)
+ (delete-region start end)
+ (insert (compose-string str)))))
+
+;;;###autoload
+(defun decompose-region (start end)
+ "Decompose all composite characters in the current region.
+Composite characters are broken up into individual components.
+When called from a program, expects two arguments,
+positions (integers or markers) specifying the region."
+ (interactive "r")
+ (save-restriction
+ (narrow-to-region start end)
+ (goto-char (point-min))
+ (let ((enable-multibyte-characters nil)
+ ;; This matches the whole bytes of single composite character.
+ (re-cmpchar "\200[\240-\377]+")
+ p ch str)
+ (while (re-search-forward re-cmpchar nil t)
+ (setq str (buffer-substring (match-beginning 0) (match-end 0)))
+ (delete-region (match-beginning 0) (match-end 0))
+ (insert (decompose-composite-char (string-to-char str)))))))
+
+;;;###autoload
+(defconst reference-point-alist
+ '((tl . 0) (tc . 1) (tr . 2)
+ (ml . 3) (mc . 4) (mr . 5)
+ (bl . 6) (bc . 7) (br . 8)
+ (top-left . 0) (top-center . 1) (top-right . 2)
+ (mid-left . 3) (mid-center . 4) (mid-right . 5)
+ (bottom-left . 6) (bottom-center . 7) (bottom-right . 8)
+ (0 . 0) (1 . 1) (2 . 2)
+ (3 . 3) (4 . 4) (5 . 5)
+ (6 . 6) (7 . 7) (8 . 8))
+ "Alist of reference point symbols vs reference point codes.
+Meanings of reference point codes are as follows:
+
+ 0----1----2 <-- ascent 0:tl or top-left
+ | | 1:tc or top-center
+ | | 2:tr or top-right
+ | | 3:ml or mid-left
+ | 4 <--+---- center 4:mc or mid-center
+ | | 5:mr or mid-right
+--- 3 5 <-- baseline 6:bl or bottom-left
+ | | 7:bc or bottom-center
+ 6----7----8 <-- descent 8:br or bottom-right
+
+Reference point symbols are to be used to specify composition rule of
+the form \(GLOBAL-REF-POINT . NEW-REF-POINT), where GLOBAL-REF-POINT
+is a reference point in the overall glyphs already composed, and
+NEW-REF-POINT is a reference point in the new glyph to be added.
+
+For instance, if GLOBAL-REF-POINT is 8 and NEW-REF-POINT is 1, the
+overall glyph is updated as follows:
+
+ +-------+--+ <--- new ascent
+ | | |
+ | global| |
+ | glyph | |
+--- | | | <--- baseline (doesn't change)
+ +----+--+--+
+ | | new |
+ | |glyph|
+ +----+-----+ <--- new descent
+")
+
+;; Return a string for char CH to be embedded in multibyte form of
+;; composite character.
+(defun compose-chars-component (ch)
+ (if (< ch 128)
+ (format "\240%c" (+ ch 128))
+ (let ((str (char-to-string ch)))
+ (if (cmpcharp ch)
+ (if (/= (aref str 1) ?\xFF)
+ (error "Char %c can't be composed" ch)
+ (substring str 2))
+ (aset str 0 (+ (aref str 0) ?\x20))
+ str))))
+
+;; Return a string for composition rule RULE to be embedded in
+;; multibyte form of composite character.
+(defsubst compose-chars-rule (rule)
+ (char-to-string (+ ?\xA0
+ (* (cdr (assq (car rule) reference-point-alist)) 9)
+ (cdr (assq (cdr rule) reference-point-alist)))))
+
+;;;###autoload
+(defun compose-chars (first-component &rest args)
+ "Return one char string composed from the arguments.
+Each argument is a character (including a composite chararacter)
+or a composition rule.
+A composition rule has the form \(GLOBAL-REF-POINT . NEW-REF-POINT).
+See the documentation of `reference-point-alist' for more detail."
+ (if (= (length args) 0)
+ (char-to-string first-component)
+ (let* ((with-rule (consp (car args)))
+ (str (if with-rule (concat (vector leading-code-composition ?\xFF))
+ (char-to-string leading-code-composition))))
+ (setq str (concat str (compose-chars-component first-component)))
+ (while args
+ (if with-rule
+ (progn
+ (if (not (consp (car args)))
+ (error "Invalid composition rule: %s" (car args)))
+ (setq str (concat str (compose-chars-rule (car args))
+ (compose-chars-component (car (cdr args))))
+ args (cdr (cdr args))))
+ (setq str (concat str (compose-chars-component (car args)))
+ args (cdr args))))
+ str)))
+
+;;;###autoload
+(defun decompose-composite-char (char &optional type with-composition-rule)
+ "Convert composite character CHAR to a string containing components of CHAR.
+Optional 1st arg TYPE specifies the type of sequence returned.
+It should be `string' (default), `list', or `vector'.
+Optional 2nd arg WITH-COMPOSITION-RULE non-nil means the returned
+sequence contains embedded composition rules if any. In this case, the
+order of elements in the sequence is the same as arguments for
+`compose-chars' to create CHAR.
+If TYPE is omitted or is `string', composition rules are omitted
+even if WITH-COMPOSITION-RULE is t."
+ (or type
+ (setq type 'string))
+ (let* ((len (composite-char-component-count char))
+ (i (1- len))
+ l)
+ (setq with-composition-rule (and with-composition-rule
+ (not (eq type 'string))
+ (composite-char-composition-rule-p char)))
+ (while (> i 0)
+ (setq l (cons (composite-char-component char i) l))
+ (if with-composition-rule
+ (let ((rule (- (composite-char-composition-rule char i) ?\xA0)))
+ (setq l (cons (cons (/ rule 9) (% rule 9)) l))))
+ (setq i (1- i)))
+ (setq l (cons (composite-char-component char 0) l))
+ (cond ((eq type 'string)
+ (apply 'concat-chars l))
+ ((eq type 'list)
+ l)
+ (t ; i.e. TYPE is vector
+ (vconcat l)))))
+
+;;; mule-util.el ends here