emacs-orgmode@gnu.org archives
 help / color / mirror / code / Atom feed
From: "Andreas Röhler" <andreas.roehler@easy-emacs.de>
To: emacs-orgmode <emacs-orgmode@gnu.org>
Subject: limitations of hard-coded field separator removed
Date: Thu, 23 Feb 2012 14:25:53 +0100	[thread overview]
Message-ID: <4F463E61.1060309@easy-emacs.de> (raw)

[-- Attachment #1: Type: text/plain, Size: 465 bytes --]


attached a

removes limitations of hard-coded separator char(s).
`org-guess-separator' accepts and detects all chars as field separators.
It works based on the assumption, that char looked for appears in equal 
number at each row.

Also a default value
`org-table-import-default-separator' should make
guessing faster in some cases.

Best regards,



[-- Attachment #2: org-table-import.patch --]
[-- Type: text/x-patch, Size: 7190 bytes --]

diff --git a/lisp/org-table.el b/lisp/org-table.el
index 39cddab..6ad572b 100644
--- a/lisp/org-table.el
+++ b/lisp/org-table.el
@@ -334,6 +334,11 @@ available parameters."
   :group 'org-table-import-export
   :type 'string)
+(defcustom org-table-import-default-separator "\t"
+  "`org-table-import' may specify that value, avoid guessing."
+  :group 'org-table-import-export
+  :type 'string)
 (defconst org-table-auto-recalculate-regexp "^[ \t]*| *# *\\(|\\|$\\)"
   "Detects a table line marked for automatic recalculation.")
 (defconst org-table-recalculate-regexp "^[ \t]*| *[#*] *\\(|\\|$\\)"
@@ -474,71 +479,72 @@ SIZE is a string Columns x Rows like for example \"3x2\"."
 	  (goto-char pos)))
+(defun org-guess-separator ()
+  "Guess the separator char of a given table.
+Works based on the assumption, that char looked for appears in equal numbers at each row. "
+  (interactive)
+  (save-excursion
+    (let ((orig (point))
+          char erg matches done pos first second)
+      (beginning-of-line)
+      ;; look first for `org-table-import-default-separator'
+      (when (re-search-forward org-table-import-default-separator nil t 1)
+        (setq erg (org-guess-separator-intern)))
+      (unless erg
+        (goto-char orig)
+        (when (re-search-forward "[[:punct:][:blank:]]" nil t 1)
+          (setq erg (org-guess-separator-intern))))
+      ;; maybe neither default nor of character-class punct
+      (unless erg
+        (goto-char orig)
+        (setq erg (org-guess-separator-intern)))
+      (when (interactive-p) (if (string= "\t" erg)
+                                (message "%s" "\\t")
+                              (message "%s" erg)))
+      erg)))
+(defun org-guess-separator-intern ()
+  (let (erg)
+    (while (and (not (eolp)) (not done))
+      (setq pos (point))
+      (setq char (progn (or (looking-back ".")(looking-at ".")) (match-string-no-properties 0)))
+      (setq matches (count-matches char (line-beginning-position) (line-end-position)))
+      (forward-line 1)
+      (if (eq matches (count-matches char (line-beginning-position) (line-end-position)))
+          (progn
+            (setq done t)
+            (setq erg char))
+        (goto-char pos)
+        (forward-char 1)))
+    erg))
 (defun org-table-convert-region (beg0 end0 &optional separator)
   "Convert region to a table.
-The region goes from BEG0 to END0, but these borders will be moved
-slightly, to make sure a beginning of line in the first line is included.
-SEPARATOR specifies the field separator in the lines.  It can have the
-following values:
-'(4)     Use the comma as a field separator
-'(16)    Use a TAB as field separator
-integer  When a number, use that many spaces as field separator
-nil      When nil, the command tries to be smart and figure out the
-         separator in the following way:
-         - when each line contains a TAB, assume TAB-separated material
-         - when each line contains a comma, assume CSV material
-         - else, assume one or more SPACE characters as separator."
-  (interactive "rP")
+Optional arg SEPARATOR prompts user to specify the separator char. "
+  (interactive "r\nP")
   (let* ((beg (min beg0 end0))
-	 (end (max beg0 end0))
-	 re)
+	 (end (copy-marker (max beg0 end0)))
+	 (separator (cond ((and separator (stringp separator))
+			   separator)
+			  ((eq 4 (prefix-numeric-value separator))
+			   (read-from-minibuffer "Separator char: ")))))
     (goto-char beg)
-    (beginning-of-line 1)
-    (setq beg (move-marker (make-marker) (point)))
-    (goto-char end)
-    (if (bolp) (backward-char 1) (end-of-line 1))
-    (setq end (move-marker (make-marker) (point)))
-    ;; Get the right field separator
-    (unless separator
-      (goto-char beg)
-      (setq separator
-	    (cond
-	     ((not (re-search-forward "^[^\n\t]+$" end t)) '(16))
-	     ((not (re-search-forward "^[^\n,]+$" end t)) '(4))
-	     (t 1))))
+    (unless separator (setq separator (org-guess-separator)))
     (goto-char beg)
-    (if (equal separator '(4))
-	(while (< (point) end)
-	  ;; parse the csv stuff
-	  (cond
-	   ((looking-at "^") (insert "| "))
-	   ((looking-at "[ \t]*$") (replace-match " |") (beginning-of-line 2))
-	   ((looking-at "[ \t]*\"\\([^\"\n]*\\)\"")
-	    (replace-match "\\1")
-	    (if (looking-at "\"") (insert "\"")))
-	   ((looking-at "[^,\n]+") (goto-char (match-end 0)))
-	   ((looking-at "[ \t]*,") (replace-match " | "))
-	   (t (beginning-of-line 2))))
-      (setq re (cond
-		((equal separator '(4)) "^\\|\"?[ \t]*,[ \t]*\"?")
-		((equal separator '(16)) "^\\|\t")
-		((integerp separator)
-		 (if (< separator 1)
-		     (error "Number of spaces in separator must be >= 1")
-		   (format "^ *\\| *\t *\\| \\{%d,\\}" separator)))
-		(t (error "This should not happen"))))
-      (while (re-search-forward re end t)
-	(replace-match "| " t t)))
+    (while (re-search-forward separator end t)
+      (replace-match " | " t t))
     (goto-char beg)
-(defun org-table-import (file arg)
+(defun org-table-import (file &optional arg)
   "Import FILE as a table.
-The file is assumed to be tab-separated.  Such files can be produced by most
-spreadsheet and database applications.  If no tabs (at least one per line)
-are found, lines will be split on whitespace into fields."
+Field separator is guessed by `org-guess-separator', value of `org-table-import-default-separator' is tried first.
+With optional \\[universal-argument] a prompt takes the separator, avoids guessing. "
   (interactive "f\nP")
   (or (bolp) (newline))
   (let ((beg (point))
@@ -546,7 +552,6 @@ are found, lines will be split on whitespace into fields."
     (insert-file-contents file)
     (org-table-convert-region beg (+ (point) (- (point-max) pm)) arg)))
 (defvar org-table-last-alignment)
 (defvar org-table-last-column-widths)
 (defun org-table-export (&optional file format)
@@ -1609,7 +1614,6 @@ should be done in reverse order."
     (org-table-goto-column thiscol)
     (message "%d lines sorted, based on column %d" (length lns) column)))
 (defun org-table-cut-region (beg end)
   "Copy region in table to the clipboard and blank all relevant fields.
 If there is no active region, use just the field at point."
@@ -3813,7 +3817,6 @@ Use COMMAND to do the motion, repeat if necessary to end up in a data line."
 ;; active, this binding is ignored inside tables and replaced with a
 ;; modified self-insert.
 (defvar orgtbl-mode-map (make-keymap)
   "Keymap for `orgtbl-mode'.")
@@ -3979,7 +3982,6 @@ to execute outside of tables."
       (orgtbl-make-binding 'org-table-previous-field 104
 			   [(shift tab)] [(tab)] "\C-i"))
     (unless (featurep 'xemacs)
       (org-defkey orgtbl-mode-map [S-iso-lefttab]
          (orgtbl-make-binding 'org-table-previous-field 107
@@ -4462,7 +4464,6 @@ Valid parameters are
 :sep        Separator between two fields
 :remove-nil-lines Do not include lines that evaluate to nil.
 Each in the following group may be either a string or a function
 of no arguments returning a string:
 :tstart     String to start the table.  Ignored when :splice is t.

             reply	other threads:[~2012-02-23 13:26 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-23 13:25 Andreas Röhler [this message]
2012-03-27 21:44 ` limitations of hard-coded field separator removed Bastien

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:

  List information: https://www.orgmode.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F463E61.1060309@easy-emacs.de \
    --to=andreas.roehler@easy-emacs.de \
    --cc=emacs-orgmode@gnu.org \


* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).