From e2044312b95f8b427ddc662cd1abf10bf4d87b2d Mon Sep 17 00:00:00 2001 From: Aaron Ecay Date: Sun, 15 Dec 2013 21:30:27 -0500 Subject: [PATCH] org-element: use brackets to disambiguate subscript/underline * lisp/org-element.el (org-element-sub/superscript-successor): use brackets to disambiguate subscript/underline * lisp/org.el (org-do-emphasis-faces): incorporate the above disambiguation * doc/org.texi: reflect these changes in the manual In an org-syntax string like 1 or 2 below, both subscript and underline are possible interpretations. This patch uses the presence of brackets to disambiguate these cases, that is, 1 is interpreted as an underlined "foo" whereas 2 is subscript "foo" followed by plain-text "_" 1: '_foo_ 2: '_{foo}_ This the in-buffer highlighting is updated to match. --- doc/org.texi | 14 ++++++++++++++ lisp/org-element.el | 22 +++++++++++++++++++--- lisp/org.el | 36 ++++++++++++++++++++++-------------- 3 files changed, 55 insertions(+), 17 deletions(-) diff --git a/doc/org.texi b/doc/org.texi index b4c4078..3eefe9a 100644 --- a/doc/org.texi +++ b/doc/org.texi @@ -9739,6 +9739,17 @@ can tweak @code{org-emphasis-regexp-components}. Beware that changing one of the above variables will no take effect until you reload Org, for which you may need to restart Emacs. +When it follows an alphanumeric character, the underscore is always +interpreted as a subscript (@pxref{Subscripts and superscripts}), and when it +follows whitespace it is always the start of an underline (assuming a +matching underscore is found in a proper position further along). However, +after a punctuation character (for example the apostrophe), the underscore +character can be ambiguous between these two interpretations. Org uses a +simple heuristic for these cases: if the character following the underscore +is an opening brace @samp{@{} or if no matching underscore is seen in the +following text, the underscore is considered to be the start of a subscript. +Otherwise, it is the start of underlining. + @node Horizontal rules @subheading Horizontal rules @cindex horizontal rules, markup rules @@ -10123,6 +10134,9 @@ In addition to showing entities as UTF-8 characters, this command will also format sub- and superscripts in a WYSIWYM way. @end table +For discussion of the resolution of ambiguities between the underscore as the +introducer of a subscript vs.@ underline, see @ref{Emphasis and monospace}. + @node @LaTeX{} fragments @subsection @LaTeX{} fragments @cindex @LaTeX{} fragments diff --git a/lisp/org-element.el b/lisp/org-element.el index 089ecfb..faa1e44 100644 --- a/lisp/org-element.el +++ b/lisp/org-element.el @@ -3408,9 +3408,25 @@ Return value is a cons cell whose CAR is either `subscript' or `superscript' and CDR is beginning position." (save-excursion (unless (bolp) (backward-char)) - (when (re-search-forward org-match-substring-regexp nil t) - (cons (if (string= (match-string 2) "_") 'subscript 'superscript) - (match-beginning 2))))) + (let (res) + (while (and (not res) + (re-search-forward org-match-substring-regexp nil t)) + (goto-char (match-beginning 0)) + (when (or + ;; this subscript uses brackets -> handle as subscript + ;; unconditionally + (eq (aref (match-string 3) 0) ?{) + ;; it is not ambiguous with an underline -> handle as + ;; subscript + (not (looking-at-p org-emph-re))) + (setq res (cons (if (string= (match-string 2) "_") + 'subscript + 'superscript) + (match-beginning 2)))) + ;; otherwise -> keep going, and let the underline + ;; parser have it + (goto-char (match-end 0))) + res))) ;;;; Superscript diff --git a/lisp/org.el b/lisp/org.el index eeb0c33..dbddc0b 100644 --- a/lisp/org.el +++ b/lisp/org.el @@ -5721,21 +5721,29 @@ This should be called after the variable `org-link-types' has changed." (if (not (= (char-after (match-beginning 3)) (char-after (match-beginning 4)))) (progn - (setq rtn t) (setq a (assoc (match-string 3) org-emphasis-alist)) - (font-lock-prepend-text-property (match-beginning 2) (match-end 2) - 'face - (nth 1 a)) - (and (nth 2 a) - (org-remove-flyspell-overlays-in - (match-beginning 0) (match-end 0))) - (add-text-properties (match-beginning 2) (match-end 2) - '(font-lock-multiline t org-emphasis t)) - (when org-hide-emphasis-markers - (add-text-properties (match-end 4) (match-beginning 5) - '(invisible org-link)) - (add-text-properties (match-beginning 3) (match-end 3) - '(invisible org-link))))) + ;; Don't do any highlighting if this is an underscore + ;; which is part of a subscript. + (unless (and (string= (car a) "_") + (save-excursion + (goto-char (match-beginning 2)) + (save-match-data + (eq (org-element-type (org-element-context)) + 'subscript)))) + (setq rtn t) + (font-lock-prepend-text-property (match-beginning 2) (match-end 2) + 'face + (nth 1 a)) + (and (nth 2 a) + (org-remove-flyspell-overlays-in + (match-beginning 0) (match-end 0))) + (add-text-properties (match-beginning 2) (match-end 2) + '(font-lock-multiline t org-emphasis t)) + (when org-hide-emphasis-markers + (add-text-properties (match-end 4) (match-beginning 5) + '(invisible org-link)) + (add-text-properties (match-beginning 3) (match-end 3) + '(invisible org-link)))))) (backward-char 1)) rtn)) -- 1.8.5.1