From mboxrd@z Thu Jan 1 00:00:00 1970 From: David Maus Subject: [PATCH 13/16] Refactor unescaping functions Date: Sun, 13 Feb 2011 13:01:15 +0100 Message-ID: <1297598478-9925-14-git-send-email-dmaus@ictsoc.de> References: <87d3mwvqwq.fsf@gnu.org> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Return-path: Received: from [140.186.70.92] (port=57579 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Poaef-00042W-Sa for emacs-orgmode@gnu.org; Sun, 13 Feb 2011 07:02:21 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Poaee-0007FI-8t for emacs-orgmode@gnu.org; Sun, 13 Feb 2011 07:02:18 -0500 Received: from mail.app1.xlhost.de ([213.202.242.114]:48951 helo=mysql1.xlhost.de) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Poaed-0007Ep-RN for emacs-orgmode@gnu.org; Sun, 13 Feb 2011 07:02:16 -0500 In-Reply-To: <87d3mwvqwq.fsf@gnu.org> List-Id: "General discussions about Org-mode." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: emacs-orgmode-bounces+geo-emacs-orgmode=m.gmane.org@gnu.org Errors-To: emacs-orgmode-bounces+geo-emacs-orgmode=m.gmane.org@gnu.org To: emacs-orgmode@gnu.org, bastien.guerry@wikimedia.fr Cc: David Maus * org.el (org-link-unescape): Simpler algorithm for replacing percent escapes. (org-link-unescape-compound): Use cond statements instead of nested if, convert hex string with string-to-number, save match data. (org-link-unescape-single-byte-sequence): Use mapconcat and string-to-number for unescaping single byte sequence. --- lisp/org.el | 102 ++++++++++++++++++++++-------------------------------= ----- 1 files changed, 39 insertions(+), 63 deletions(-) diff --git a/lisp/org.el b/lisp/org.el index fcd421f..f35f898 100644 --- a/lisp/org.el +++ b/lisp/org.el @@ -8584,77 +8584,53 @@ If optional argument MERGE is set, merge TABLE in= to (defun org-link-unescape (str) "Unhex hexified unicode strings as returned from the JavaScript functi= on encodeURIComponent. E.g. `%C3%B6' is the german Umlaut `=C3=B6'." - (setq str (or str "")) - (let ((tmp "") - (case-fold-search t)) - (while (string-match "\\(%[0-9a-f][0-9a-f]\\)+" str) - (let* ((start (match-beginning 0)) - (end (match-end 0)) - (hex (match-string 0 str)) - (replacement (org-link-unescape-compound (upcase hex)))) - (setq tmp (concat tmp (substring str 0 start) replacement)) - (setq str (substring str end)))) - (setq tmp (concat tmp str)) - tmp)) + (unless (and (null str) (string=3D "" str)) + (let ((pos 0) (case-fold-search t) unhexed) + (while (setq pos (string-match "\\(%[0-9a-f][0-9a-f]\\)+" str pos)= ) + (setq unhexed (org-link-unescape-compound (match-string 0 str))) + (setq str (replace-match unhexed t t str)) + (setq pos (+ pos (length unhexed)))))) + str) =20 (defun org-link-unescape-compound (hex) "Unhexify unicode hex-chars. E.g. `%C3%B6' is the German Umlaut `=C3=B6= '. Note: this function also decodes single byte encodings like `%E1' (\"=C3=A1\") if not followed by another `%[A-F0-9]{2}' group." - (let* ((bytes (remove "" (split-string hex "%"))) - (ret "") - (eat 0) - (sum 0)) - (while bytes - (let* ((b (pop bytes)) - (a (elt b 0)) - (b (elt b 1)) - (c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0))) - (c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0))) - (val (+ (lsh c1 4) c2)) - (shift - (if (=3D 0 eat) ;; new byte - (if (>=3D val 252) 6 - (if (>=3D val 248) 5 - (if (>=3D val 240) 4 - (if (>=3D val 224) 3 - (if (>=3D val 192) 2 0))))) - 6)) - (xor - (if (=3D 0 eat) ;; new byte - (if (>=3D val 252) 252 - (if (>=3D val 248) 248 - (if (>=3D val 240) 240 - (if (>=3D val 224) 224 - (if (>=3D val 192) 192 0))))) - 128))) - (if (>=3D val 192) (setq eat shift)) - (setq val (logxor val xor)) - (setq sum (+ (lsh sum shift) val)) - (if (> eat 0) (setq eat (- eat 1))) - (cond - ((=3D 0 eat) ;multi byte - (setq ret (concat ret (org-char-to-string sum))) - (setq sum 0)) - ((not bytes) ; single byte(s) - (setq ret (org-link-unescape-single-byte-sequence hex)))) - )) ;; end (while bytes - ret )) + (save-match-data + (let* ((bytes (cdr (split-string hex "%"))) + (ret "") + (eat 0) + (sum 0)) + (while bytes + (let* ((val (string-to-number (pop bytes) 16)) + (shift-xor + (if (=3D 0 eat) + (cond + ((>=3D val 252) (cons 6 252)) + ((>=3D val 248) (cons 5 248)) + ((>=3D val 240) (cons 4 240)) + ((>=3D val 224) (cons 3 224)) + ((>=3D val 192) (cons 2 192)) + (t (cons 0 0))) + (cons 6 128)))) + (if (>=3D val 192) (setq eat (car shift-xor))) + (setq val (logxor val (cdr shift-xor))) + (setq sum (+ (lsh sum (car shift-xor)) val)) + (if (> eat 0) (setq eat (- eat 1))) + (cond + ((=3D 0 eat) ;multi byte + (setq ret (concat ret (org-char-to-string sum))) + (setq sum 0)) + ((not bytes) ; single byte(s) + (setq ret (org-link-unescape-single-byte-sequence hex)))) + )) ;; end (while bytes + ret ))) =20 (defun org-link-unescape-single-byte-sequence (hex) "Unhexify hex-encoded single byte character sequences." - (let ((bytes (remove "" (split-string hex "%"))) - (ret "")) - (while bytes - (let* ((b (pop bytes)) - (a (elt b 0)) - (b (elt b 1)) - (c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0))) - (c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0)))) - (setq ret - (concat ret (char-to-string - (+ (lsh c1 4) c2)))))) - ret)) + (mapconcat (lambda (byte) + (char-to-string (string-to-number byte 16))) + (cdr (split-string hex "%")) "")) =20 (defun org-xor (a b) "Exclusive or." --=20 1.7.2.3