aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
authorSimon Marshall <[email protected]>1997-06-06 07:10:24 +0000
committerSimon Marshall <[email protected]>1997-06-06 07:10:24 +0000
commit25544ce1bd4992b990365f8a09070f963aefb67d (patch)
tree60ee4e61f351617d86afedcfa0003da85151c279 /lisp
parent14aa11f4dae9557d582f4f66b233ae84fdf925e1 (diff)
emit charsets after strings so that the final regexp finds the longest match.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/emacs-lisp/regexp-opt.el82
1 files changed, 45 insertions, 37 deletions
diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el
index adf8e9e8da..140fd8005d 100644
--- a/lisp/emacs-lisp/regexp-opt.el
+++ b/lisp/emacs-lisp/regexp-opt.el
@@ -4,7 +4,7 @@
;; Author: Simon Marshall <[email protected]>
;; Keywords: strings, regexps
-;; Version: 1.04.01
+;; Version: 1.05
;; This file is part of GNU Emacs.
@@ -27,10 +27,14 @@
;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i\\(se\\|ze\\)\\)".
;;
-;; This package generates a regexp from a given list of strings (that matches
-;; one of those strings) that is equivalent to but more efficient than:
+;; This package generates a regexp from a given list of strings (which matches
+;; one of those strings) so that the regexp generated by:
;;
-;; (mapconcat 'identity (mapcar 'regexp-quote strings) "\\|")
+;; (regexp-opt strings)
+;;
+;; is equivalent to, but more efficient than, the regexp generated by:
+;;
+;; (mapconcat 'regexp-quote strings "\\|")
;;
;; For example:
;;
@@ -42,10 +46,9 @@
;; (concat "(" (regexp-opt strings t) "\\>"))
;; => "(\\(c\\(atch\\|ond\\(ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(less\\|wind-protect\\)\\|wh\\(en\\|ile\\)\\)\\>"
;;
-;; Searching using the above example `regexp-opt' regexp is significantly
-;; faster than searching using the equivalent `mapconcat' regexp, taking
-;; approximately two-thirds of the time.
-;;
+;; Searching using the above example `regexp-opt' regexp takes approximately
+;; two-thirds of the time taken using the equivalent `mapconcat' regexp.
+
;; Since this package was written to produce efficient regexps, not regexps
;; efficiently, it is probably not a good idea to in-line too many calls in
;; your code, unless you use the following trick with `eval-when-compile':
@@ -62,7 +65,15 @@
;; (defvar definition-regexp
;; "^(\\(def\\(alias\\|const\\|macro\\|subst\\|un\\|var\\)\\)\\>")
;;
-;; Originally written for font-lock.el, from an idea from Stig's hl319.el.
+;; Note that if you use this trick for all instances of `regexp-opt' and
+;; `regexp-opt-depth' in your code, regexp-opt.el would only have to be loaded
+;; at compile time. But note also that using this trick means that should
+;; regexp-opt.el be changed, perhaps to fix a bug or to add a feature to
+;; improve the efficiency of `regexp-opt' regexps, you would have to recompile
+;; your code for such changes to have effect in your code.
+
+;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with
+;; thanks for ideas also to Michael Ernst, Bob Glickstein and Dan Nicolaescu.
;; Please don't tell me that it doesn't produce optimal regexps; I know that
;; already. For example, the above explanation for the meaning of "opt" would
;; be more efficient as "optim\\(al\\|i[sz]e\\)", but this requires complex
@@ -73,15 +84,16 @@
;;;###autoload
(defun regexp-opt (strings &optional paren)
"Return a regexp to match a string in STRINGS.
+Each string should be unique in STRINGS and should not contain any regexps.
If optional PAREN non-nil, ensure that the returned regexp is enclosed by at
least one regexp grouping construct.
-Each string in STRINGS should be unique and should not contain any regexps.
The returned regexp is typically more efficient than the equivalent regexp:
- (mapconcat 'identity (mapcar 'regexp-quote STRINGS) \"\\\\|\")
+ (let ((open-paren (if PAREN \"\\\\(\" \"\")) (close-paren (if PAREN \"\\\\)\" \"\")))
+ (concat open-paren (mapconcat 'regexp-quote STRINGS \"\\\\|\") close-paren))
-but typically contains regexp grouping constructs. Use `regexp-opt-depth' to
-count them."
+but typically contains more regexp grouping constructs.
+Use `regexp-opt-depth' to count them."
(save-match-data
;; Recurse on the sorted list.
(let ((max-lisp-eval-depth (* 1024 1024))
@@ -165,12 +177,12 @@ in REGEXP."
close-group)))
;;
;; If there are several one-character strings, remove them and recurse
- ;; on the rest.
+ ;; on the rest (first so the final regexp finds the longest match).
((> (length letters) 1)
(let ((rest (let ((completion-regexp-list '("^..+$")))
(all-completions "" (mapcar 'list strings)))))
(concat open-group
- (regexp-opt-charset letters) "\\|" (regexp-opt-group rest)
+ (regexp-opt-group rest) "\\|" (regexp-opt-charset letters)
close-group)))
;;
;; Otherwise, divide the list into those that start with a particular
@@ -196,30 +208,26 @@ in REGEXP."
(bracket "") (dash "") (caret ""))
;;
;; Make a character map but extract character set meta characters.
- (let (char)
- (while chars
- (setq char (string-to-char (pop chars)))
- (cond ((eq char ?\])
- (setq bracket "]"))
- ((eq char ?^)
- (setq caret "^"))
- ((eq char ?-)
- (setq dash "-"))
- (t
- (aset charmap char t)))))
+ (dolist (char (mapcar 'string-to-char chars))
+ (case char
+ (?\]
+ (setq bracket "]"))
+ (?^
+ (setq caret "^"))
+ (?-
+ (setq dash "-"))
+ (otherwise
+ (aset charmap char t))))
;;
;; Make a character set from the map using ranges where applicable.
- (let ((elt 0) start)
- (while (< elt charwidth)
- (when (aref charmap elt)
- (setq start (1+ elt))
- (while (and (< start charwidth) (aref charmap start))
- (incf start))
- (if (< (- start elt) 4)
- (setq charset (format "%s%c" charset elt))
- (setq charset (format "%s%c-%c" charset elt (1- start))
- elt start)))
- (incf elt)))
+ (dotimes (elt charwidth)
+ (when (aref charmap elt)
+ (let ((start elt))
+ (while (and (< elt charwidth) (aref charmap elt))
+ (incf elt))
+ (if (> (- elt start) 3)
+ (setq charset (format "%s%c-%c" charset start (1- elt)))
+ (setq charset (format "%s%c" charset (setq elt start)))))))
;;
;; Make sure a caret is not first and a dash is first or last.
(if (and (string-equal charset "") (string-equal bracket ""))