From 6eb2208a67745e3150024e7b72509115b97fcfa3 Mon Sep 17 00:00:00 2001 From: Ihor Radchenko Date: Tue, 16 Mar 2021 20:20:32 +0800 Subject: [PATCH] Improve org-link-plain-re (org-link-plain-re): Update docstring. Now, the docstring explicitly mentions that the regexp must contain groups for the link type and the path. * lisp/ol.el (org-link-make-regexps): Allow URLs with up to two levels of nested brackets. Now, URLs like [1] can be matched. The new regexp is based on [2]. [1] https://doi.org/10.1016/0160-791x(79)90023-x [2] https://daringfireball.net/2010/07/improved_regex_for_matching_urls * testing/lisp/test-ol.el: Add tests for plain links. --- lisp/ol.el | 61 +++++++++++++------ testing/lisp/test-ol.el | 132 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 20 deletions(-) diff --git a/lisp/ol.el b/lisp/ol.el index b8bd7d234..d3a07a3ed 100644 --- a/lisp/ol.el +++ b/lisp/ol.el @@ -519,7 +519,10 @@ links more efficient." "Matches link with angular brackets, spaces are allowed.") (defvar org-link-plain-re nil - "Matches plain link, without spaces.") + "Matches plain link, without spaces. +Group 1 must contain the link type (i.e. https). +Group 2 must contain the link path (i.e. //example.com). +Used by `org-element-link-parser'.") (defvar org-link-bracket-re nil "Matches a link in double brackets.") @@ -807,26 +810,44 @@ This should be called after the variable `org-link-parameters' has changed." (format "<%s:\\([^>\n]*\\(?:\n[ \t]*[^> \t\n][^>\n]*\\)*\\)>" types-re) org-link-plain-re - (concat - "\\<" types-re ":" - "\\([^][ \t\n()<>]+\\(?:([[:word:]0-9_]+)\\|\\([^[:punct:] \t\n]\\|/\\)\\)\\)") - ;; "\\([^]\t\n\r<>() ]+[^]\t\n\r<>,.;() ]\\)") - org-link-bracket-re - (rx (seq "[[" - ;; URI part: match group 1. - (group - (one-or-more + (let* ((non-space-bracket "[^][ \t\n()<>]") + (parenthesis + `(seq "(" + (0+ (or (regex ,non-space-bracket) + (seq "(" + (0+ (regex ,non-space-bracket)) + ")"))) + ")"))) + ;; Heuristics for an URL link inspired by + ;; https://daringfireball.net/2010/07/improved_regex_for_matching_urls + (rx-to-string + `(seq word-start + ;; Link type: match group 1. + (regexp ,types-re) + ":" + ;; Link path: match group 2. + (group + (1+ (or (regex ,non-space-bracket) + ,parenthesis)) + (or (regexp "[^[:punct:] \t\n]") + ?/ + ,parenthesis))))) + org-link-bracket-re + (rx (seq "[[" + ;; URI part: match group 1. + (group + (one-or-more (or (not (any "[]\\")) - (and "\\" (zero-or-more "\\\\") (any "[]")) - (and (one-or-more "\\") (not (any "[]")))))) - "]" - ;; Description (optional): match group 2. - (opt "[" (group (+? anything)) "]") - "]")) - org-link-any-re - (concat "\\(" org-link-bracket-re "\\)\\|\\(" - org-link-angle-re "\\)\\|\\(" - org-link-plain-re "\\)")))) + (and "\\" (zero-or-more "\\\\") (any "[]")) + (and (one-or-more "\\") (not (any "[]")))))) + "]" + ;; Description (optional): match group 2. + (opt "[" (group (+? anything)) "]") + "]")) + org-link-any-re + (concat "\\(" org-link-bracket-re "\\)\\|\\(" + org-link-angle-re "\\)\\|\\(" + org-link-plain-re "\\)")))) (defun org-link-complete-file (&optional arg) "Create a file link using completion." diff --git a/testing/lisp/test-ol.el b/testing/lisp/test-ol.el index 5b7dc513b..e6208cd38 100644 --- a/testing/lisp/test-ol.el +++ b/testing/lisp/test-ol.el @@ -491,5 +491,137 @@ (org-previous-link)) (buffer-substring (point) (line-end-position)))))) + +;;; Link regexps + +(ert-deftest test-ol/plain-link-re () + "Test `org-link-plain-re'." + (should + (equal + '("https" "//example.com/qwe()") + (org-test-with-temp-text + "(Some text in parenthesis followed by link with brackets https://example.com/qwe())" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("https" "//doi.org/10.1016/0160-791x(79)90023-x") + (org-test-with-temp-text + "https://doi.org/10.1016/0160-791x(79)90023-x" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("file" "aa") + (org-test-with-temp-text + "The file:aa link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("file" "a(b)c") + (org-test-with-temp-text + "The file:a(b)c link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("file" "a()") + (org-test-with-temp-text + "The file:a() link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("file" "aa((a))") + (org-test-with-temp-text + "The file:aa((a)) link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("file" "aa(())") + (org-test-with-temp-text + "The file:aa(()) link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("file" "/a") + (org-test-with-temp-text + "The file:/a link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("file" "/a/") + (org-test-with-temp-text + "The file:/a/ link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("http" "//") + (org-test-with-temp-text + "The http:// link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("file" "ab") + (org-test-with-temp-text + "The (some file:ab) link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("file" "aa") + (org-test-with-temp-text + "The file:aa) link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("file" "aa") + (org-test-with-temp-text + "The file:aa( link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("http" "//foo.com/more_(than)_one_(parens)") + (org-test-with-temp-text + "The http://foo.com/more_(than)_one_(parens) link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("http" "//foo.com/blah_(wikipedia)#cite-1") + (org-test-with-temp-text + "The http://foo.com/blah_(wikipedia)#cite-1 link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("http" "//foo.com/blah_(wikipedia)_blah#cite-1") + (org-test-with-temp-text + "The http://foo.com/blah_(wikipedia)_blah#cite-1 link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("http" "//foo.com/unicode_(✪)_in_parens") + (org-test-with-temp-text + "The http://foo.com/unicode_(✪)_in_parens link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser)))))) + (should + (equal + '("http" "//foo.com/(something)?after=parens") + (org-test-with-temp-text + "The http://foo.com/(something)?after=parens link" + (list (org-element-property :type (org-element-link-parser)) + (org-element-property :path (org-element-link-parser))))))) + (provide 'test-ol) ;;; test-ol.el ends here -- 2.26.2