From mboxrd@z Thu Jan 1 00:00:00 1970 From: David Maus Subject: [PATCH 07/16] Unescape functions moved and renamed from org-protocol.el Date: Sun, 13 Feb 2011 13:01:09 +0100 Message-ID: <1297598478-9925-8-git-send-email-dmaus@ictsoc.de> References: <87d3mwvqwq.fsf@gnu.org> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Return-path: Received: from [140.186.70.92] (port=57289 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PoaeU-0003u0-N0 for emacs-orgmode@gnu.org; Sun, 13 Feb 2011 07:02:08 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1PoaeN-00074R-Sp for emacs-orgmode@gnu.org; Sun, 13 Feb 2011 07:02:01 -0500 Received: from mail.app1.xlhost.de ([213.202.242.114]:48917 helo=mysql1.xlhost.de) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1PoaeN-00073s-GM for emacs-orgmode@gnu.org; Sun, 13 Feb 2011 07:01:59 -0500 In-Reply-To: <87d3mwvqwq.fsf@gnu.org> List-Id: "General discussions about Org-mode." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: emacs-orgmode-bounces+geo-emacs-orgmode=m.gmane.org@gnu.org Errors-To: emacs-orgmode-bounces+geo-emacs-orgmode=m.gmane.org@gnu.org To: emacs-orgmode@gnu.org, bastien.guerry@wikimedia.fr Cc: David Maus * org.el (org-link-unescape, org-link-unescape-compound) (org-link-unescape-single-byte-sequence): Functions moved and renamed from org-protocol.el. --- lisp/org.el | 90 ++++++++++++++++++++++++++++++++++++++++++++++++-----= ----- 1 files changed, 74 insertions(+), 16 deletions(-) diff --git a/lisp/org.el b/lisp/org.el index a29d429..602462d 100644 --- a/lisp/org.el +++ b/lisp/org.el @@ -8579,22 +8579,80 @@ If optional argument MERGE is set, merge TABLE in= to (encode-coding-char char 'utf-8) "") (char-to-string char))) text ""))) =20 -(defun org-link-unescape (text &optional table) - "Reverse the action of `org-link-escape'." - (if (and org-url-encoding-use-url-hexify (not table)) - (url-unhex-string text) - (setq table (or table org-link-escape-chars)) - (when text - (let ((case-fold-search t) - (re (mapconcat (lambda (x) (regexp-quote (downcase (cdr x)))) - table "\\|"))) - (while (string-match re text) - (setq text - (replace-match - (char-to-string (car (rassoc (upcase (match-string 0 text)) - table))) - t t text))) - text)))) +(defun org-link-unescape (str) + "Unhex hexified unicode strings as returned from the JavaScript functi= on +encodeURIComponent. E.g. `%C3%B6' is the german Umlaut `=C3=B6'." + (setq str (or str "")) + (let ((tmp "") + (case-fold-search t)) + (while (string-match "\\(%[0-9a-f][0-9a-f]\\)+" str) + (let* ((start (match-beginning 0)) + (end (match-end 0)) + (hex (match-string 0 str)) + (replacement (org-link-unescape-compound (upcase hex)))) + (setq tmp (concat tmp (substring str 0 start) replacement)) + (setq str (substring str end)))) + (setq tmp (concat tmp str)) + tmp)) + +(defun org-link-unescape-compound (hex) + "Unhexify unicode hex-chars. E.g. `%C3%B6' is the German Umlaut `=C3=B6= '. +Note: this function also decodes single byte encodings like +`%E1' (\"=C3=A1\") if not followed by another `%[A-F0-9]{2}' group." + (let* ((bytes (remove "" (split-string hex "%"))) + (ret "") + (eat 0) + (sum 0)) + (while bytes + (let* ((b (pop bytes)) + (a (elt b 0)) + (b (elt b 1)) + (c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0))) + (c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0))) + (val (+ (lsh c1 4) c2)) + (shift + (if (=3D 0 eat) ;; new byte + (if (>=3D val 252) 6 + (if (>=3D val 248) 5 + (if (>=3D val 240) 4 + (if (>=3D val 224) 3 + (if (>=3D val 192) 2 0))))) + 6)) + (xor + (if (=3D 0 eat) ;; new byte + (if (>=3D val 252) 252 + (if (>=3D val 248) 248 + (if (>=3D val 240) 240 + (if (>=3D val 224) 224 + (if (>=3D val 192) 192 0))))) + 128))) + (if (>=3D val 192) (setq eat shift)) + (setq val (logxor val xor)) + (setq sum (+ (lsh sum shift) val)) + (if (> eat 0) (setq eat (- eat 1))) + (cond + ((=3D 0 eat) ;multi byte + (setq ret (concat ret (org-char-to-string sum))) + (setq sum 0)) + ((not bytes) ; single byte(s) + (setq ret (org-link-unescape-single-byte-sequence hex)))) + )) ;; end (while bytes + ret )) + +(defun org-link-unescape-single-byte-sequence (hex) + "Unhexify hex-encoded single byte character sequences." + (let ((bytes (remove "" (split-string hex "%"))) + (ret "")) + (while bytes + (let* ((b (pop bytes)) + (a (elt b 0)) + (b (elt b 1)) + (c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0))) + (c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0)))) + (setq ret + (concat ret (char-to-string + (+ (lsh c1 4) c2)))))) + ret)) =20 (defun org-xor (a b) "Exclusive or." --=20 1.7.2.3