aboutsummaryrefslogtreecommitdiffstats
path: root/lisp/language/tibet-util.el
blob: e26f3779382b96521b4917dd3f68b7a4f81d976f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
;;; tibet-util.el --- Support for inputting Tibetan characters

;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
;; Licensed to the Free Software Foundation.

;; Keywords: multilingual, Tibetan

;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;; Author: Toru TOMABECHI, <[email protected]>

;; Created: Feb. 17. 1997

;; History:
;; 1997.03.13 Modification in treatment of text properties;
;;            Support for some special signs and punctuations.

;;; Code:

;;;###autoload
(defun setup-tibetan-environment ()
  (interactive)
  (set-language-environment "Tibetan"))

;;; This function makes a transcription string for
;;; re-composing a character.

;;;###autoload
(defun tibetan-tibetan-to-transcription (ch)
  "Return a transcription string of Tibetan character CH"
  (let ((char ch)
	(l (append tibetan-consonant-transcription-alist
		   tibetan-vowel-transcription-alist
		   tibetan-precomposed-transcription-alist
		   tibetan-subjoined-transcription-alist)) 
	decomp-l t-char trans str result)
    (if (eq (char-charset char) 'composition)
	(setq decomp-l (decompose-composite-char char 'list nil))
      (setq decomp-l (cons char nil)))
    (setq str "")
    (while decomp-l
      (setq t-char (char-to-string (car decomp-l)))
      (setq trans (car (rassoc t-char l)))
      (setq str (concat str trans))
      (setq decomp-l (cdr decomp-l)))
    (setq result str)))

;;; This function translates transcription string into a string of
;;; Tibetan characters.

;;;###autoload
(defun tibetan-transcription-to-tibetan (transcription)
  "Translate Roman transcription into a sequence of Tibetan components."
  (let ((trans transcription)
	(lp tibetan-precomposed-transcription-alist)
	(l (append tibetan-consonant-transcription-alist
		   tibetan-vowel-transcription-alist
		   tibetan-subjoined-transcription-alist))
	(case-fold-search nil)
	substr t-char p-str t-str result)
    (setq substr "")
    (setq p-str "")
    (setq t-str "")
    (cond ((string-match tibetan-precomposed-regexp trans)
	   (setq substr (substring trans (match-beginning 0) (match-end 0)))
	   (setq trans (substring trans (match-end 0)))
	   (setq t-char (cdr (assoc substr lp)))
	   (setq p-str t-char)))
    (while (string-match tibetan-regexp trans)
      (setq substr (substring trans (match-beginning 0) (match-end 0)))
      (setq trans (substring trans 0 (match-beginning 0)))
      (setq t-char
	    (cdr (assoc substr l)))
      (setq t-str (concat t-char t-str)))
    (setq result (concat p-str t-str))))


;;;
;;; Functions for composing Tibetan character.
;;;
;;; A Tibetan syllable is typically structured as follows:
;;;
;;;      [Prefix] C [C+] V [M] [Suffix [Post suffix]]
;;;
;;; where C's are all vertically stacked, V appears below or above
;;; consonant cluster and M is always put above the C[C+]V combination.
;;; (Sanskrit visarga, though it is a vowel modifier, is considered
;;;  to be a punctuation.)
;;;
;;; Here are examples of the words "bsgrubs" and "h'uM"
;;;
;;;            $(7"7(B2$(7%q`"U(B1$(7"7"G(B         2$(7"H`#A`"U0"_(B1        
;;;
;;;                             M
;;;             b s b s         h
;;;               g             '
;;;               r             u
;;;               u
;;;
;;; Consonants ''', 'w', 'y', 'r' take special forms when they are used
;;; as subjoined consonant. Consonant 'r' takes another special form
;;; when used as superjoined as in "rka", and so on, while it does not
;;; change its form when conjoined with subjoined ''', 'w' or 'y'
;;; as in "rwa", "rya".
;;;
;;;
;;; As a Tibetan input method should avoid using conversion key,
;;; we use a "Tibetan glyph -> transcription -> Tibetan glyph"
;;; translation at each key input.
;;;
;;; 1st stage -  Check the preceding char.
;;;              If the preceding char is Tibetan and composable, then
;;;
;;; 2nd stage -  Translate the preceding char into transcription
;;;
;;; 3rd stage -  Concatenate the transcription of preceding char
;;;              and the current input key.
;;;
;;; 4th stage -  Re-translate the concatenated transcription into
;;;              a sequence of Tibetan letters.
;;;
;;; 5th stage -  Convert leading consonants into one single precomposed char
;;;              if possible.
;;;
;;; 6th stage -  Compose the consonants into one composite glyph.
;;;
;;; (If the current input is a vowel sign or a vowel modifier,
;;;  then it is composed with preceding char without checking
;;;  except when the preceding char is a punctuation or a digit.)
;;;
;;;

;;; This function is used to avoid composition
;;;      between Tibetan and non-Tibetan chars.

;;;###autoload
(defun tibetan-char-examin (ch)
  "Check if char CH is Tibetan character.
Returns non-nil if CH is Tibetan. Otherwise, returns nil."
  (let ((chr ch))
    (if (eq (char-charset chr) 'composition)
	(string-match "\\cq+" (decompose-composite-char chr))
      (string-match "\\cq" (char-to-string chr)))))

;;; This is used to avoid composition between digits, signs, punctuations
;;; and word constituents.

;;;###autoload
(defun tibetan-composable-examin (ch)
  "Check if Tibetan char CH is composable.
Returns t if CH is a composable char \(i.e. neither punctuation nor digit)."
  (let ((chr ch)
	chstr)
    (if (eq (char-charset chr) 'composition)
	(setq chstr (decompose-composite-char chr))
      (setq chstr (char-to-string chr)))
    (not (string-match "[$(7!1(B-$(7!o"f$(8!;!=!?!@!A!D"`(B]" chstr))))


;;; This checks if a character to be composed contains already
;;; one or more vowels / vowel modifiers. If the character contains
;;; them, then no more consonant should be added.

;;;###autoload
(defun tibetan-complete-char-examin (ch)
  "Check if composite char CH contains one or more vowel/vowel modifiers.
Returns non-nil, if CH contains vowel/vowel modifiers."
  (let ((chr ch)
	chstr)
    (if (eq (char-charset chr) 'composition)
	(setq chstr (decompose-composite-char chr))
      (setq chstr (char-to-string chr)))
    (string-match "[$(7!g!e"Q(B-$(7"^"_(B-$(7"l(B]" chstr)))

;;; This function makes a composite character consisting of two characters
;;; vertically stacked.

;;;###autoload
(defun tibetan-vertical-stacking (first second upward)
  "Return a vertically stacked composite char consisting of FIRST and SECOND.
If UPWARD is non-nil, then SECOND is put above FIRST."
  (if upward
      (compose-chars first '(tc . bc) second)
    (compose-chars first '(bc . tc) second)))

;;; This function makes a composite char from a string.
;;; Note that this function returns a string, not a char.

;;;###autoload
(defun tibetan-compose-string (str)
  "Compose a sequence of Tibetan character components into a composite character.
Returns a string containing a composite character."
  (let ((t-str str)
	f-str s-str f-ch s-ch rest composed result)
    ;;Make sure no redundant vowel sign is present.  
    (if (string-match
	 "^\\(.+\\)\\($(7"Q(B\\)\\([$(7!I!g!e"Q(B-$(7"^"_(B-$(7"l(B]\\)" t-str)
	(setq t-str (concat
		     (match-string 1 t-str)
		     (match-string 3 t-str))))
    (if (string-match
	 "^\\(.+\\)\\([$(7!I!g!e"Q(B-$(7"^"_(B-$(7"l(B]\\)\\($(7"Q(B\\)" t-str)
	(setq t-str (concat
		     (match-string 1 t-str)
		     (match-string 2 t-str))))
    ;;Start conversion.
    (setq result "")
    ;; Consecutive base/precomposed consonants are reduced to the last one.
    (while (string-match "^\\([$(7"!(B-$(7"J$!(B-$(7%u(B]\\)\\([$(7"!(B-$(7"@"B(B-$(7"J$!(B-$(7%u(B].*\\)" t-str)
      (setq result (concat result (match-string 1 t-str)))
      (setq t-str (match-string 2 t-str)))
    ;; Vowel/vowel modifier, subjoined consonants are added one by one
    ;; to the preceding element.
    (while
	(string-match "^\\(.\\)\\([$(7"A#!(B-$(7#J!I!g!e"Q(B-$(7"^"_(B-$(7"l(B]\\)\\(.*\\)" t-str)
      (setq f-str (match-string 1 t-str))
      (setq f-ch (string-to-char f-str))
      (setq s-str (match-string 2 t-str))
      ;;Special treatment for 'a chung.
      ;;If 'a follows a consonant, then turned into its subjoined form.
      (if (and (string-match "$(7"A(B" s-str)
	       (not (tibetan-complete-char-examin f-ch)))
	  (setq s-str "$(7#A(B"))
      (setq s-ch (string-to-char s-str))
      (setq rest (match-string 3 t-str))
      (cond ((string-match "\\c2" s-str);; upper vowel sign
	     (setq composed
		   (tibetan-vertical-stacking f-ch s-ch t)))
	    ((string-match "\\c3" s-str);; lower vowel sign
	     (setq composed
		   (tibetan-vertical-stacking f-ch s-ch nil)))
	    ;;Automatic conversion of ra-mgo (superscribed r).
	    ;;'r' is converted if followed by a subjoined consonant
	    ;;other than w, ', y, r.
	    ((and (string-match "$(7"C(B" f-str)
		  (not (string-match "[$(7#>#A#B#C(B]" s-str)))
	     (setq f-ch ?$(7#P(B)
	     (setq composed
		   (tibetan-vertical-stacking f-ch s-ch nil)))
	    ((not (tibetan-complete-char-examin f-ch))
	     ;;Initial base consonant is tranformed, if followed by
	     ;;a subjoined consonant, except when it is followed
	     ;;by a subscribed 'a.
	     (if (and (string-match "[$(7"!(B-$(7"="?"@"D(B-$(7"J(B]" f-str)
		      (not (string-match "$(7#A(B" s-str)))
		 (setq f-ch
		       (string-to-char
			(cdr (assoc f-str tibetan-base-to-subjoined-alist)))))
	     (setq composed
		   (tibetan-vertical-stacking f-ch s-ch nil)))
	    (t
	     (setq composed s-str)
	     (setq result (concat result f-str))))
      (setq t-str (concat composed rest)))
    (setq result (concat result t-str))))

;;; quail <-> conversion interface.

;;;###autoload
(defun tibetan-composition (pc key)
  "Interface to quail input method.
Takes two arguments: char PC and string KEY, where PC is the preceding
character to be composed with current input KEY.
Returns a string which is the result of composition."
  (let (trans cur-ch t-str result)
    ;; Make a tibetan character corresponding to current input key.
    (setq cur-ch (tibetan-transcription-to-tibetan key))
    ;; Check if the preceding character is Tibetan and composable.
    (cond ((and (tibetan-char-examin pc)
		(tibetan-composable-examin pc))
	   ;;If Tibetan char corresponding to the current input key exists,
	   (cond (cur-ch
		  ;; Then,
		  ;; Convert the preceding character into transcription,
		  ;; and concatenate it with the current input key,
		  (setq trans (tibetan-tibetan-to-transcription pc))
		  (setq trans (concat trans key))
		  ;; Concatenated transcription is converted to
		  ;; a sequence of Tibetan characters,
		  (setq t-str (tibetan-transcription-to-tibetan trans))
		  ;; And it is composed into a composite character.
		  (setq result (tibetan-compose-string t-str)))
		 ;; Else,
		 (t
		  ;; Simply concatenate the preceding character and
		  ;; the current input key.
		  (setq result (char-to-string pc))
		  (setq result (concat result key)))))
	  ;; If the preceding char is not Tibetan or not composable,
	  (t
	   ;; pc = 0 means the point is at the beginning of buffer.
	   (if (not (eq pc 0))
	       (setq result (char-to-string pc)))
	   (if cur-ch
	       (setq result (concat result cur-ch))
	     (setq result (concat result key))))
	  )))


;;;###autoload
(defun tibetan-decompose-region (beg end)
  "Decompose Tibetan characters in the region BEG END into their components.
Components are: base and subjoined consonants, vowel signs, vowel modifiers.
One column punctuations are converted to their 2 column equivalents."
  (interactive "r")
  (let (ch-str ch-beg ch-end)
    (save-excursion
      (save-restriction
	(narrow-to-region beg end)
	(goto-char (point-min))
	;; \\cq = Tibetan character
	(while (re-search-forward "\\cq" nil t)
	  (setq ch-str (buffer-substring-no-properties
			(match-beginning 0) (match-end 0)))
	  ;; Save the points. Maybe, using save-match-data is preferable.
	  ;; But in order not to lose the trace(because the body is too long),
	  ;; we save the points in variables.
	  (setq ch-beg (match-beginning 0))
	  (setq ch-end (match-end 0))
	  ;; Here starts the decomposition.
	  (cond
	   ;; 1 column punctuations -> 2 column equivalent
	   ((string-match "[$(8!D!;!=!?!@!A"`(B]" ch-str)
	    (setq ch-str
		  (car (rassoc ch-str tibetan-precomposition-rule-alist))))
	   ;; Decomposition of composite character.
	   ((eq (char-charset (string-to-char ch-str)) 'composition)
	    ;; Make a string which consists of a sequence of
	    ;; components.
	    (setq ch-str (decompose-composite-char (string-to-char ch-str)))
	    ;; Converts nyi zla into base elements.
	    (cond ((string= ch-str "$(7#R#S#S#S(B")
		   (setq ch-str "$(7!4!5!5(B"))
		  ((string= ch-str "$(7#R#S#S(B")
		   (setq ch-str "$(7!4!5(B"))
		  ((string= ch-str "$(7#R#S!I(B")
		   (setq ch-str "$(7!6(B"))
		  ((string= ch-str "$(7#R#S(B")
		   (setq ch-str "$(7!4(B")))))
	  ;; If the sequence of components starts with a subjoined consonants,
	  (if (string-match "^\\([$(7#!(B-$(7#J(B]\\)\\(.*\\)$" ch-str)
	      ;; then the first components is converted to its base form.
	      (setq ch-str
		    (concat (car (rassoc (match-string 1 ch-str)
					 tibetan-base-to-subjoined-alist))
			    (match-string 2 ch-str))))
	  ;; If the sequence of components starts with a precomposed character,
	  (if (string-match "^\\([$(7$!(B-$(7%u(B]\\)\\(.*\\)$" ch-str)
	      ;; then it is converted into a sequence of components.
	      (setq ch-str
		    (concat (car (rassoc (match-string 1 ch-str)
					 tibetan-precomposition-rule-alist))
			    (match-string 2 ch-str))))
	  ;; Special treatment for superscribed r.
	  (if (string-match "^$(7#P(B\\(.*\\)$" ch-str)
	      (setq ch-str (concat "$(7"C(B" (match-string 1 ch-str))))
	  ;; Finally, the result of decomposition is inserted, and
	  ;; the composite character is deleted.
	  (insert-and-inherit ch-str)
	  (delete-region ch-beg ch-end))))))

;;;###autoload
(defun tibetan-compose-region (beg end)
  "Make composite chars from Tibetan character components in the region BEG END.
Two column punctuations are converted to their 1 column equivalents."
  (interactive "r")
  (let (str result)
    (save-excursion
      (save-restriction
	(narrow-to-region beg end)
	(goto-char (point-min))
	;; First, sequence of components which has a precomposed equivalent
	;; is converted.
	(while (re-search-forward
		tibetan-precomposition-rule-regexp nil t)
	  (setq str (buffer-substring-no-properties
		     (match-beginning 0) (match-end 0)))
	  (save-match-data
	    (insert-and-inherit
	     (cdr (assoc str tibetan-precomposition-rule-alist))))
	  (delete-region (match-beginning 0) (match-end 0)))
	(goto-char (point-min))
	;; Then, composable elements are put into a composite character. 
	(while (re-search-forward
		"[$(7"!(B-$(7"J$!(B-$(7%u(B]+[$(7#!(B-$(7#J!I!g!e"Q(B-$(7"^"_(B-$(7"l(B]+"
		nil t)
	  (setq str (buffer-substring-no-properties
		     (match-beginning 0) (match-end 0)))
	  (save-match-data
	    (setq result (tibetan-compose-string str))
	    (insert-and-inherit result))
	  (delete-region (match-beginning 0) (match-end 0)))))))

;;;
;;; This variable is used to avoid repeated decomposition.
;;;
(setq-default tibetan-decomposed nil)

;;;###autoload
(defun tibetan-decompose-buffer ()
  "Decomposes Tibetan characters in the buffer into their components.
See also docstring of the function tibetan-decompose-region."
  (interactive)
  (make-local-variable 'tibetan-decomposed)
  (cond ((not tibetan-decomposed)
	 (tibetan-decompose-region (point-min) (point-max))
	 (setq tibetan-decomposed t))))

;;;###autoload
(defun tibetan-compose-buffer ()
  "Composes Tibetan character components in the buffer.
See also docstring of the function tibetan-compose-region."
  (interactive)
  (make-local-variable 'tibetan-decomposed)
  (tibetan-compose-region (point-min) (point-max))
  (setq tibetan-decomposed nil))

;;;###autoload
(defun tibetan-post-read-conversion (len)
  (save-excursion
    (save-restriction
      (let ((buffer-modified-p (buffer-modified-p)))
	(narrow-to-region (point) (+ (point) len))
	(tibetan-compose-region (point-min) (point-max))
	(set-buffer-modified-p buffer-modified-p)
	(make-local-variable 'tibetan-decomposed)
	(setq tibetan-decomposed nil)
	(- (point-max) (point-min))))))


;;;###autoload
(defun tibetan-pre-write-conversion (from to)
  (setq tibetan-decomposed-temp tibetan-decomposed)
  (let ((old-buf (current-buffer)))
    (set-buffer (generate-new-buffer " *temp*"))
    (if (stringp from)
	(insert from)
      (insert-buffer-substring old-buf from to))
    (if (not tibetan-decomposed-temp)
	(tibetan-decompose-region (point-min) (point-max)))
    ;; Should return nil as annotations.
    nil))

(provide 'tibet-util)

;;; language/tibet-util.el ends here.