diff options
author | Miles Bader <[email protected]> | 2004-09-04 13:13:48 +0000 |
---|---|---|
committer | Miles Bader <[email protected]> | 2004-09-04 13:13:48 +0000 |
commit | 23f87bede063c31c164f97278caabdc5cf5e6980 (patch) | |
tree | 12913439eae89014aa2d810da4861f933d3348ec /lisp/gnus/deuglify.el | |
parent | 2a223f35db1bb47fb00f43191e7450b45bbd7fc4 (diff) |
Revision: [email protected]/emacs--cvs-trunk--0--patch-523
Merge from emacs--gnus--5.10, gnus--rel--5.10
Patches applied:
* [email protected]/emacs--gnus--5.10--base-0
tag of [email protected]/emacs--cvs-trunk--0--patch-464
* [email protected]/emacs--gnus--5.10--patch-1
Import from CVS branch gnus-5_10-branch
* [email protected]/emacs--gnus--5.10--patch-2
Merge from [email protected]/emacs--multi-tty--0, emacs--cvs-trunk--0
* [email protected]/emacs--gnus--5.10--patch-3
Merge from gnus--rel--5.10
* [email protected]/emacs--gnus--5.10--patch-4
Merge from gnus--rel--5.10
* [email protected]/gnus--rel--5.10--patch-18
Update from CVS
* [email protected]/gnus--rel--5.10--patch-19
Remove autoconf-generated files from archive
* [email protected]/gnus--rel--5.10--patch-20
Update from CVS
Diffstat (limited to 'lisp/gnus/deuglify.el')
-rw-r--r-- | lisp/gnus/deuglify.el | 472 |
1 files changed, 472 insertions, 0 deletions
diff --git a/lisp/gnus/deuglify.el b/lisp/gnus/deuglify.el new file mode 100644 index 0000000000..85d45cd351 --- /dev/null +++ b/lisp/gnus/deuglify.el @@ -0,0 +1,472 @@ +;;; deuglify.el --- deuglify broken Outlook (Express) articles + +;; Copyright (C) 2002, 2003 Free Software Foundation, Inc. +;; Copyright (C) 2001, 2002 Raymond Scholz + +;; Author: Raymond Scholz <[email protected]> +;; Thomas Steffen (unwrapping algorithm, +;; based on an idea of Stefan Monnier) +;; Keywords: mail, news + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; This file enables Gnus to repair broken citations produced by +;; common user agents like MS Outlook (Express). It may repair +;; articles of other user agents too. +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; +;; Outlook sometimes wraps cited lines before sending a message as +;; seen in this example: +;; +;; Example #1 +;; ---------- +;; +;; John Doe wrote: +;; +;; > This sentence no verb. This sentence no verb. This sentence +;; no +;; > verb. This sentence no verb. This sentence no verb. This +;; > sentence no verb. +;; +;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those +;; erroneously wrapped lines and will unwrap them. I.e. putting the +;; wrapped parts ("no" in this example) back where they belong (at the +;; end of the cited line above). +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Note that some people not only use broken user agents but also +;; practice a bad citation style by omitting blank lines between the +;; cited text and their own text. +;: +;; Example #2 +;; ---------- +;; +;; John Doe wrote: +;; +;; > This sentence no verb. This sentence no verb. This sentence no +;; You forgot in all your sentences. +;; > verb. This sentence no verb. This sentence no verb. This +;; > sentence no verb. +;; +;; Unwrapping "You forgot in all your sentences." would be illegal as +;; this part wasn't intended to be cited text. +;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting +;; citation line will be of a certain maximum length. You can control +;; this by adjusting `gnus-outlook-deuglify-unwrap-max'. Also +;; unwrapping will only be done if the line above the (possibly) +;; wrapped line has a minimum length of `gnus-outlook-deuglify-unwrap-min'. +;; +;; Furthermore no unwrapping will be undertaken if the last character +;; is one of the chars specified in +;; `gnus-outlook-deuglify-unwrap-stop-chars'. Setting this to ".?!" +;; inhibits unwrapping if the cited line ends with a full stop, +;; question mark or exclamation mark. Note that this variable +;; defaults to `nil', triggering a few false positives but generally +;; giving you better results. +;; +;; Unwrapping works on every level of citation. Thus you will be able +;; repair broken citations of broken user agents citing broken +;; citations of broken user agents citing broken citations... +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Citations are commonly introduced with an attribution line +;; indicating who wrote the cited text. Outlook adds superfluous +;; information that can be found in the header of the message to this +;; line and often wraps it. +;; +;; If that weren't enough, lots of people write their own text above +;; the cited text and cite the complete original article below. +;; +;; Example #3 +;; ---------- +;; +;; Hey, John. There's no in all your sentences! +;; +;; John Doe <[email protected]> wrote in message +;; news:[email protected]... +;; > This sentence no verb. This sentence no verb. This sentence +;; no +;; > verb. This sentence no verb. This sentence no verb. This +;; > sentence no verb. +;; > +;; > Bye, John +;; +;; Repairing the attribution line will be done by function +;; `gnus-article-outlook-repair-attribution which calls other function that +;; try to recognize and repair broken attribution lines. See variable +;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be +;; cut off from the beginning of an attribution line and variable +;; `gnus-outlook-deuglify-attrib-verb-regexp' for the verbs that are +;; required to be found in an attribution line. These function return +;; the point where the repaired attribution line starts. +;; +;; Rearranging the article so that the cited text appears above the +;; new text will be done by function +;; `gnus-article-outlook-rearrange-citation'. This function calls +;; `gnus-article-outlook-repair-attribution to find and repair an attribution +;; line. +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Well, and that's what the message will look like after applying +;; deuglification: +;; +;; Example #3 (deuglified) +;; ----------------------- +;; +;; John Doe <[email protected]> wrote: +;; +;; > This sentence no verb. This sentence no verb. This sentence no +;; > verb. This sentence no verb. This sentence no verb. This +;; > sentence no verb. +;; > +;; > Bye, John +;; +;; Hey, John. There's no in all your sentences! +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Usage +;; ----- +;; +;; Press `W k' in the Summary Buffer. +;; +;; Non recommended usage :-) +;; --------------------- +;; +;; To automatically invoke deuglification on every article you read, +;; put something like that in your .gnus: +;; +;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines) +;; +;; or _one_ of the following lines: +;; +;; ;; repair broken attribution lines +;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-repair-attribution) +;; +;; ;; repair broken attribution lines and citations +;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-rearrange-citation) +;; +;; Note that there always may be some false positives, so I suggest +;; using the manual invocation. After deuglification you may want to +;; refill the whole article using `W w'. +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Limitations +;; ----------- +;; +;; As I said before there may (or will) be a few false positives on +;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'. +;; +;; `gnus-article-outlook-repair-attribution will only fix the first +;; attribution line found in the article. Furthermore it fixed to +;; certain kinds of attributions. And there may be horribly many +;; false positives, vanishing lines and so on -- so don't trust your +;; eyes. Again I recommend manual invocation. +;; +;; `gnus-article-outlook-rearrange-citation' carries all the limitations of +;; `gnus-article-outlook-repair-attribution. +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; See ChangeLog for other changes. +;; +;; Revision 1.5 2002/01/27 14:39:17 rscholz +;; * New variable `gnus-outlook-deuglify-no-wrap-chars' to inhibit +;; unwrapping if one these chars is first in the possibly wrapped line. +;; * Improved rearranging of the article. +;; * New function `gnus-outlook-repair-attribution-block' for repairing +;; those big "Original Message (following some headers)" attributions. +;; +;; Revision 1.4 2002/01/03 14:05:00 rscholz +;; Renamed `gnus-outlook-deuglify-article' to +;; `gnus-article-outlook-deuglify-article'. +;; Made it easier to deuglify the article while being in Gnus' Article +;; Edit Mode. (suggested by Phil Nitschke) +;; +;; +;; Revision 1.3 2002/01/02 23:35:54 rscholz +;; Fix a bug that caused succeeding long attribution lines to be +;; unwrapped. Minor doc fixes and regular expression tuning. +;; +;; Revision 1.2 2001/12/30 20:14:34 rscholz +;; Clean up source. +;; +;; Revision 1.1 2001/12/30 20:13:32 rscholz +;; Initial revision +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Code: + +(require 'gnus-art) +(require 'gnus-sum) + +(defconst gnus-outlook-deuglify-version "1.5 Gnus version" + "Version of gnus-outlook-deuglify.") + +;;; User Customizable Variables: + +(defgroup gnus-outlook-deuglify nil + "Deuglify articles generated by broken user agents like MS Outlook (Express).") + +;;;###autoload +(defcustom gnus-outlook-deuglify-unwrap-min 45 + "Minimum length of the cited line above the (possibly) wrapped line." + :type 'integer + :group 'gnus-outlook-deuglify) + +;;;###autoload +(defcustom gnus-outlook-deuglify-unwrap-max 95 + "Maximum length of the cited line after unwrapping." + :type 'integer + :group 'gnus-outlook-deuglify) + +(defcustom gnus-outlook-deuglify-cite-marks ">|#%" + "Characters that indicate cited lines." + :type 'string + :group 'gnus-outlook-deuglify) + +(defcustom gnus-outlook-deuglify-unwrap-stop-chars nil ;; ".?!" or nil + "Characters that inhibit unwrapping if they are the last one on the cited line above the possible wrapped line." + :type '(radio (const :format "None " nil) + (string :size 0 :value ".?!")) + :group 'gnus-outlook-deuglify) + +(defcustom gnus-outlook-deuglify-no-wrap-chars "`" + "Characters that inhibit unwrapping if they are the first one in the possibly wrapped line." + :type 'string + :group 'gnus-outlook-deuglify) + +(defcustom gnus-outlook-deuglify-attrib-cut-regexp + "\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, " + "Regular expression matching the beginning of an attribution line that should be cut off." + :type 'string + :group 'gnus-outlook-deuglify) + +(defcustom gnus-outlook-deuglify-attrib-verb-regexp + "wrote\\|writes\\|says\\|schrieb\\|schreibt\\|meinte\\|skrev\\|a �crit\\|schreef\\|escribi�" + "Regular expression matching the verb used in an attribution line." + :type 'string + :group 'gnus-outlook-deuglify) + +(defcustom gnus-outlook-deuglify-attrib-end-regexp + ": *\\|\\.\\.\\." + "Regular expression matching the end of an attribution line." + :type 'string + :group 'gnus-outlook-deuglify) + +;;;###autoload +(defcustom gnus-outlook-display-hook nil + "A hook called after an deuglified article has been prepared. +It is run after `gnus-article-prepare-hook'." + :type 'hook + :group 'gnus-outlook-deuglify) + +;; Functions + +(defun gnus-outlook-display-article-buffer () + "Redisplay current buffer or article buffer." + (with-current-buffer (or gnus-article-buffer (current-buffer)) + ;; "Emulate" `gnus-article-prepare-display' without calling + ;; it. Calling `gnus-article-prepare-display' on an already + ;; prepared article removes all MIME parts. I'm unsure whether + ;; this is a bug or not. + (gnus-article-highlight t) + (gnus-treat-article nil) + (gnus-run-hooks 'gnus-article-prepare-hook + 'gnus-outlook-display-hook))) + +;;;###autoload +(defun gnus-article-outlook-unwrap-lines (&optional nodisplay) + "Unwrap lines that appear to be wrapped citation lines. +You can control what lines will be unwrapped by frobbing +`gnus-outlook-deuglify-unwrap-min' and `gnus-outlook-deuglify-unwrap-max', +indicating the minimum and maximum length of an unwrapped citation line. If +NODISPLAY is non-nil, don't redisplay the article buffer." + (interactive "P") + (save-excursion + (let ((case-fold-search nil) + (inhibit-read-only t) + (cite-marks gnus-outlook-deuglify-cite-marks) + (no-wrap gnus-outlook-deuglify-no-wrap-chars) + (stop-chars gnus-outlook-deuglify-unwrap-stop-chars)) + (gnus-with-article-buffer + (article-goto-body) + (while (re-search-forward + (concat + "^\\([ \t" cite-marks "]*\\)" + "\\([" cite-marks "].*[^\n " stop-chars "]\\)[ \t]?\n" + "\\1\\([^\n " cite-marks no-wrap "]+.*\\)$") + nil t) + (let ((len12 (- (match-end 2) (match-beginning 1))) + (len3 (- (match-end 3) (match-beginning 3)))) + (if (and (> len12 gnus-outlook-deuglify-unwrap-min) + (< (+ len12 len3) gnus-outlook-deuglify-unwrap-max)) + (progn + (replace-match "\\1\\2 \\3") + (goto-char (match-beginning 0))))))))) + (unless nodisplay (gnus-outlook-display-article-buffer))) + +(defun gnus-outlook-rearrange-article (attr-start) + "Put the text from ATTR-START to the end of buffer at the top of the article buffer." + (save-excursion + (let ((inhibit-read-only t) + (cite-marks gnus-outlook-deuglify-cite-marks)) + (gnus-with-article-buffer + (article-goto-body) + ;; article does not start with attribution + (unless (= (point) attr-start) + (gnus-kill-all-overlays) + (let ((cur (point)) + ;; before signature or end of buffer + (to (if (gnus-article-search-signature) + (point) + (point-max)))) + ;; handle the case where the full quote is below the + ;; signature + (if (< to attr-start) + (setq to (point-max))) + (transpose-regions cur attr-start attr-start to))))))) + +;; John Doe <[email protected]> wrote in message +;; news:[email protected]... + +(defun gnus-outlook-repair-attribution-outlook () + "Repair a broken attribution line (Outlook)." + (save-excursion + (let ((case-fold-search nil) + (inhibit-read-only t) + (cite-marks gnus-outlook-deuglify-cite-marks)) + (gnus-with-article-buffer + (article-goto-body) + (if (re-search-forward + (concat "^\\([^" cite-marks "].+\\)" + "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\)" + "\\(.*\n?[^\n" cite-marks "].*\\)?" + "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$") + nil t) + (progn + (gnus-kill-all-overlays) + (replace-match "\\1\\2\\4") + (match-beginning 0))))))) + + +;; ----- Original Message ----- +;; From: "John Doe" <[email protected]> +;; To: "Doe Foundation" <[email protected]> +;; Sent: Monday, November 19, 2001 12:13 PM +;; Subject: More Doenuts + +(defun gnus-outlook-repair-attribution-block () + "Repair a big broken attribution block." + (save-excursion + (let ((case-fold-search nil) + (inhibit-read-only t) + (cite-marks gnus-outlook-deuglify-cite-marks)) + (gnus-with-article-buffer + (article-goto-body) + (if (re-search-forward + (concat "^[" cite-marks " \t]*--* ?[^-]+ [^-]+ ?--*\\s *\n" + "[^\n:]+:[ \t]*\\([^\n]+\\)\n" + "\\([^\n:]+:[ \t]*[^\n]+\n\\)+") + nil t) + (progn + (gnus-kill-all-overlays) + (replace-match "\\1 wrote:\n") + (match-beginning 0))))))) + +;; On Wed, 16 Jan 2002 23:23:30 +0100, John Doe <[email protected]> wrote: + +(defun gnus-outlook-repair-attribution-other () + "Repair a broken attribution line (other user agents than Outlook)." + (save-excursion + (let ((case-fold-search nil) + (inhibit-read-only t) + (cite-marks gnus-outlook-deuglify-cite-marks)) + (gnus-with-article-buffer + (article-goto-body) + (if (re-search-forward + (concat "^\\("gnus-outlook-deuglify-attrib-cut-regexp"\\)?" + "\\([^" cite-marks "].+\\)\n\\([^\n" cite-marks "].*\\)?" + "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\).*" + "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$") + nil t) + (progn + (gnus-kill-all-overlays) + (replace-match "\\4 \\5\\6\\7") + (match-beginning 0))))))) + +;;;###autoload +(defun gnus-article-outlook-repair-attribution (&optional nodisplay) + "Repair a broken attribution line. +If NODISPLAY is non-nil, don't redisplay the article buffer." + (interactive "P") + (let ((attrib-start + (or + (gnus-outlook-repair-attribution-other) + (gnus-outlook-repair-attribution-block) + (gnus-outlook-repair-attribution-outlook)))) + (unless nodisplay (gnus-outlook-display-article-buffer)) + attrib-start)) + +(defun gnus-article-outlook-rearrange-citation (&optional nodisplay) + "Repair broken citations. +If NODISPLAY is non-nil, don't redisplay the article buffer." + (interactive "P") + (let ((attrib-start (gnus-article-outlook-repair-attribution 'nodisplay))) + ;; rearrange citations if an attribution line has been recognized + (if attrib-start + (gnus-outlook-rearrange-article attrib-start))) + (unless nodisplay (gnus-outlook-display-article-buffer))) + +;;;###autoload +(defun gnus-outlook-deuglify-article (&optional nodisplay) + "Full deuglify of broken Outlook (Express) articles. +Treat dumbquotes, unwrap lines, repair attribution and rearrange citation. If +NODISPLAY is non-nil, don't redisplay the article buffer." + (interactive "P") + ;; apply treatment of dumb quotes + (gnus-article-treat-dumbquotes) + ;; repair wrapped cited lines + (gnus-article-outlook-unwrap-lines 'nodisplay) + ;; repair attribution line and rearrange citation. + (gnus-article-outlook-rearrange-citation 'nodisplay) + (unless nodisplay (gnus-outlook-display-article-buffer))) + +;;;###autoload +(defun gnus-article-outlook-deuglify-article () + "Deuglify broken Outlook (Express) articles and redisplay." + (interactive) + (gnus-outlook-deuglify-article nil)) + +(provide 'deuglify) + +;; Local Variables: +;; coding: iso-8859-1 +;; End: + +;;; arch-tag: 5f895cc9-51a9-487c-b42e-28844d79eb73 +;;; deuglify.el ends here |