emacs-orgmode@gnu.org archives
 help / color / mirror / code / Atom feed
From: Matt Lundin <mdl@imapmail.org>
To: Org Mode <emacs-orgmode@gnu.org>
Subject: Re: [PATCH] Re: Problems with org publish cache checking
Date: Thu, 26 Nov 2015 19:30:40 -0600	[thread overview]
Message-ID: <871tbci5z3.fsf@fastmail.fm> (raw)
In-Reply-To: <8737vtjhf2.fsf@nicolasgoaziou.fr> (Nicolas Goaziou's message of "Thu, 26 Nov 2015 09:25:53 +0100")

[-- Attachment #1: Type: text/plain, Size: 879 bytes --]

Nicolas Goaziou <mail@nicolasgoaziou.fr> writes:

> Hello,
>
> Matt Lundin <mdl@imapmail.org> writes:
>
>> OK, I've worked up a patch that solves several of these issues. The
>> basic idea is to check when publishing an org file whether it includes
>> other org files and then to store that data in the cache. That way,
>> org-publish-cache-file-needs-publishing does not need to open each
>> buffer but rather can compare the stored timestamp data against the
>> actual modified times of the included files.
>
> This is much better, indeed. Thank you.
>
> One suggestion: wouldn't it make sense to also apply check to SETUPFILE
> keywords?

Yes, that's a great idea. I've added it to the patch.

One caveat: this patch does not implement recursive checking of included
files (i.e., included files that include other files), but this could be
added in the future.

Thanks,
Matt


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Speed-up-publishing-by-caching-included-file-data.patch --]
[-- Type: text/x-diff, Size: 4773 bytes --]

From cc66884f2836bee1203d06618828c0339ea2e4e2 Mon Sep 17 00:00:00 2001
From: Matt Lundin <mdl@imapmail.org>
Date: Thu, 26 Nov 2015 19:22:00 -0600
Subject: [PATCH] Speed up publishing by caching included file data

* lisp/ox-publish.el: (org-publish-cache-get-included-files): New function
  (org-publish-org-to): Use new function
  (org-publish-cache-file-needs-publishing): Use cache instead of
  visiting every file in a project.

Org-publish can now quickly determine a) whether an org source includes
other files (either via #+INCLUDE or #+SETUPFILE) and b) whether those
files have changed. This speeds up the publishing process and makes
tracking of changes in included files more reliable.
---
 lisp/ox-publish.el | 70 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 39 insertions(+), 31 deletions(-)

diff --git a/lisp/ox-publish.el b/lisp/ox-publish.el
index 90f307c..02eb06d 100644
--- a/lisp/ox-publish.el
+++ b/lisp/ox-publish.el
@@ -574,6 +574,7 @@ Return output file name."
 	     (let ((output-file
 		    (org-export-output-file-name extension nil pub-dir))
 		   (body-p (plist-get plist :body-only)))
+	       (when org-publish-cache (org-publish-cache-get-included-files))
 	       (org-export-to-file backend output-file
 		 nil nil nil body-p
 		 ;; Add `org-publish--collect-references' and
@@ -1221,42 +1222,49 @@ If FREE-CACHE, empty the cache."
 (defun org-publish-cache-file-needs-publishing
     (filename &optional pub-dir pub-func _base-dir)
   "Check the timestamp of the last publishing of FILENAME.
-Return non-nil if the file needs publishing.  Also check if
-any included files have been more recently published, so that
-the file including them will be republished as well."
+Return non-nil if the file needs publishing.  Also use the cache
+to check if any included files have changed, so that the file
+including them will be republished."
   (unless org-publish-cache
     (error
      "`org-publish-cache-file-needs-publishing' called, but no cache present"))
-  (let* ((case-fold-search t)
-	 (key (org-publish-timestamp-filename filename pub-dir pub-func))
+  (let* ((key (org-publish-timestamp-filename filename pub-dir pub-func))
 	 (pstamp (org-publish-cache-get key))
-	 (org-inhibit-startup t)
-	 (visiting (find-buffer-visiting filename))
-	 included-files-ctime buf)
-    (when (equal (file-name-extension filename) "org")
-      (setq buf (find-file (expand-file-name filename)))
-      (with-current-buffer buf
-	(goto-char (point-min))
-	(while (re-search-forward "^[ \t]*#\\+INCLUDE:" nil t)
-	  (let* ((element (org-element-at-point))
-		 (included-file
-		  (and (eq (org-element-type element) 'keyword)
-		       (let ((value (org-element-property :value element)))
-			 (and value
-			      (string-match "^\\(\".+?\"\\|\\S-+\\)" value)
-			      ;; Ignore search suffix.
-			      (car (split-string
-				    (org-remove-double-quotes
-				     (match-string 1 value)))))))))
-	    (when included-file
-	      (push (org-publish-cache-ctime-of-src
-		     (expand-file-name included-file))
-		    included-files-ctime)))))
-      (unless visiting (kill-buffer buf)))
+	 (ctime (when pstamp (org-publish-cache-ctime-of-src filename))))
     (or (null pstamp)
-	(let ((ctime (org-publish-cache-ctime-of-src filename)))
-	  (or (< pstamp ctime)
-	      (cl-some (lambda (ct) (< ctime ct)) included-files-ctime))))))
+	(< pstamp ctime)
+	(cl-some (lambda (incl)
+		   ;; See if cached time is before modification time.
+		   (< (cdr incl)
+		      (org-publish-cache-ctime-of-src (car incl))))
+	  (org-publish-cache-get-file-property filename :includes)))))
+
+(defun org-publish-cache-get-included-files ()
+  "Get data about included files in current buffer.
+Store file names and modification times in cache. Also store data
+about setupfiles."
+  (let ((case-fold-search t)
+	included)
+    (save-excursion
+      (goto-char (point-min))
+      (while (re-search-forward "^[ \t]*#\\+\\(INCLUDE\\|SETUPFILE\\):" nil t)
+	(let* ((element (org-element-at-point))
+	       (included-file
+		(and (eq (org-element-type element) 'keyword)
+		     (let ((value (org-element-property :value element)))
+		       (and value
+			    (string-match "^\\(\".+?\"\\|\\S-+\\)" value)
+			    ;; Ignore search suffix.
+			    (car (split-string
+				  (org-remove-double-quotes
+				   (match-string 1 value)))))))))
+	  (when included-file
+	    (let ((iname (expand-file-name included-file)))
+	      (push (cons iname (org-publish-cache-ctime-of-src
+				 (expand-file-name iname)))
+		    included))))))
+    (org-publish-cache-set-file-property (buffer-file-name)
+					 :includes included)))
 
 (defun org-publish-cache-set-file-property
   (filename property value &optional project-name)
-- 
2.6.2


  reply	other threads:[~2015-11-27  1:30 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-24 15:14 Problems with org publish cache checking Matt Lundin
2015-11-25 16:56 ` Matt Lundin
2015-11-26  2:30 ` [PATCH] " Matt Lundin
2015-11-26  8:25   ` Nicolas Goaziou
2015-11-27  1:30     ` Matt Lundin [this message]
2015-11-29 16:18       ` Nicolas Goaziou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.orgmode.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=871tbci5z3.fsf@fastmail.fm \
    --to=mdl@imapmail.org \
    --cc=emacs-orgmode@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).