From 08efc990a578c925d42315c45e0b9b76536b92af Mon Sep 17 00:00:00 2001 From: Ihor Radchenko Date: Wed, 24 Mar 2021 21:27:24 +0800 Subject: [PATCH] Improve org-link-plain-re (org-link-plain-re): Update docstring. Now, the docstring explicitly mentions that the regexp must contain groups for the link type and the path. * lisp/ol.el (org-link-make-regexps): Allow URLs with up to two levels of nested brackets. Now, URLs like [1] can be matched. The new regexp is based on [2]. * testing/lisp/test-ol.el: Add tests for the plain link regexp [1] https://doi.org/10.1016/0160-791x(79)90023-x [2] https://daringfireball.net/2010/07/improved_regex_for_matching_urls --- lisp/ol.el | 41 +++++++++++---- testing/lisp/test-ol.el | 110 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 10 deletions(-) diff --git a/lisp/ol.el b/lisp/ol.el index b8bd7d234..550c0cff6 100644 --- a/lisp/ol.el +++ b/lisp/ol.el @@ -519,7 +519,10 @@ links more efficient." "Matches link with angular brackets, spaces are allowed.") (defvar org-link-plain-re nil - "Matches plain link, without spaces.") + "Matches plain link, without spaces. +Group 1 must contain the link type (i.e. https). +Group 2 must contain the link path (i.e. //example.com). +Used by `org-element-link-parser'.") (defvar org-link-bracket-re nil "Matches a link in double brackets.") @@ -807,15 +810,33 @@ This should be called after the variable `org-link-parameters' has changed." (format "<%s:\\([^>\n]*\\(?:\n[ \t]*[^> \t\n][^>\n]*\\)*\\)>" types-re) org-link-plain-re - (concat - "\\<" types-re ":" - "\\([^][ \t\n()<>]+\\(?:([[:word:]0-9_]+)\\|\\([^[:punct:] \t\n]\\|/\\)\\)\\)") - ;; "\\([^]\t\n\r<>() ]+[^]\t\n\r<>,.;() ]\\)") - org-link-bracket-re - (rx (seq "[[" - ;; URI part: match group 1. - (group - (one-or-more + (let* ((non-space-bracket "[^][ \t\n()<>]") + (parenthesis + `(seq "(" + (0+ (or (regex ,non-space-bracket) + (seq "(" + (0+ (regex ,non-space-bracket)) + ")"))) + ")"))) + ;; Heuristics for an URL link inspired by + ;; https://daringfireball.net/2010/07/improved_regex_for_matching_urls + (rx-to-string + `(seq word-start + ;; Link type: match group 1. + (regexp ,types-re) + ":" + ;; Link path: match group 2. + (group + (1+ (or (regex ,non-space-bracket) + ,parenthesis)) + (or (regexp "[^[:punct:] \t\n]") + ?/ + ,parenthesis))))) + org-link-bracket-re + (rx (seq "[[" + ;; URI part: match group 1. + (group + (one-or-more (or (not (any "[]\\")) (and "\\" (zero-or-more "\\\\") (any "[]")) (and (one-or-more "\\") (not (any "[]")))))) diff --git a/testing/lisp/test-ol.el b/testing/lisp/test-ol.el index 5b7dc513b..ddcc570b3 100644 --- a/testing/lisp/test-ol.el +++ b/testing/lisp/test-ol.el @@ -491,5 +491,115 @@ (org-previous-link)) (buffer-substring (point) (line-end-position)))))) + +;;; Link regexps + + +(defmacro test-ol-parse-link-in-text (text) + "Return list of :type and :path of link parsed in TEXT. +\"\" string must be at the beginning of the link to be parsed." + (declare (indent 1)) + `(org-test-with-temp-text ,text + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser))))) + +(ert-deftest test-ol/plain-link-re () + "Test `org-link-plain-re'." + (should + (equal + '("https" "//example.com") + (test-ol-parse-link-in-text + "(https://example.com)"))) + (should + (equal + '("https" "//example.com/qwe()") + (test-ol-parse-link-in-text + "(Some text https://example.com/qwe())"))) + (should + (equal + '("https" "//doi.org/10.1016/0160-791x(79)90023-x") + (test-ol-parse-link-in-text + "https://doi.org/10.1016/0160-791x(79)90023-x"))) + (should + (equal + '("file" "aa") + (test-ol-parse-link-in-text + "The file:aa link"))) + (should + (equal + '("file" "a(b)c") + (test-ol-parse-link-in-text + "The file:a(b)c link"))) + (should + (equal + '("file" "a()") + (test-ol-parse-link-in-text + "The file:a() link"))) + (should + (equal + '("file" "aa((a))") + (test-ol-parse-link-in-text + "The file:aa((a)) link"))) + (should + (equal + '("file" "aa(())") + (test-ol-parse-link-in-text + "The file:aa(()) link"))) + (should + (equal + '("file" "/a") + (test-ol-parse-link-in-text + "The file:/a link"))) + (should + (equal + '("file" "/a/") + (test-ol-parse-link-in-text + "The file:/a/ link"))) + (should + (equal + '("http" "//") + (test-ol-parse-link-in-text + "The http:// link"))) + (should + (equal + '("file" "ab") + (test-ol-parse-link-in-text + "The (some file:ab) link"))) + (should + (equal + '("file" "aa") + (test-ol-parse-link-in-text + "The file:aa) link"))) + (should + (equal + '("file" "aa") + (test-ol-parse-link-in-text + "The file:aa( link"))) + (should + (equal + '("http" "//foo.com/more_(than)_one_(parens)") + (test-ol-parse-link-in-text + "The http://foo.com/more_(than)_one_(parens) link"))) + (should + (equal + '("http" "//foo.com/blah_(wikipedia)#cite-1") + (test-ol-parse-link-in-text + "The http://foo.com/blah_(wikipedia)#cite-1 link"))) + (should + (equal + '("http" "//foo.com/blah_(wikipedia)_blah#cite-1") + (test-ol-parse-link-in-text + "The http://foo.com/blah_(wikipedia)_blah#cite-1 link"))) + (should + (equal + '("http" "//foo.com/unicode_(✪)_in_parens") + (test-ol-parse-link-in-text + "The http://foo.com/unicode_(✪)_in_parens link"))) + (should + (equal + '("http" "//foo.com/(something)?after=parens") + (test-ol-parse-link-in-text + "The http://foo.com/(something)?after=parens link")))) + (provide 'test-ol) ;;; test-ol.el ends here -- 2.26.2