From mboxrd@z Thu Jan 1 00:00:00 1970 From: David Maus Subject: [PATCH 01/16] Decode single byte sequence if decoding unicode failed. Date: Sun, 13 Feb 2011 13:01:03 +0100 Message-ID: <1297598478-9925-2-git-send-email-dmaus@ictsoc.de> References: <87d3mwvqwq.fsf@gnu.org> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Return-path: Received: from [140.186.70.92] (port=43621 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Poae8-0003ew-GG for emacs-orgmode@gnu.org; Sun, 13 Feb 2011 07:01:45 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Poae7-0006sb-GG for emacs-orgmode@gnu.org; Sun, 13 Feb 2011 07:01:44 -0500 Received: from mail.app1.xlhost.de ([213.202.242.114]:48882 helo=mysql1.xlhost.de) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Poae7-0006sM-8Q for emacs-orgmode@gnu.org; Sun, 13 Feb 2011 07:01:43 -0500 In-Reply-To: <87d3mwvqwq.fsf@gnu.org> List-Id: "General discussions about Org-mode." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: emacs-orgmode-bounces+geo-emacs-orgmode=m.gmane.org@gnu.org Errors-To: emacs-orgmode-bounces+geo-emacs-orgmode=m.gmane.org@gnu.org To: emacs-orgmode@gnu.org, bastien.guerry@wikimedia.fr From: Sebastian Rose * org-protocol.el (org-protocol-unhex-single-byte-sequence): New function. Decode hex-encoded singly byte sequences. (org-protocol-unhex-compound): Use new function if decoding sequence as unicode character failed. --- lisp/org-protocol.el | 26 +++++++++++++++++++++++--- 1 files changed, 23 insertions(+), 3 deletions(-) diff --git a/lisp/org-protocol.el b/lisp/org-protocol.el index 1c501f3..33878a8 100644 --- a/lisp/org-protocol.el +++ b/lisp/org-protocol.el @@ -305,7 +305,7 @@ part." =20 (defun org-protocol-unhex-string(str) "Unhex hexified unicode strings as returned from the JavaScript functi= on -encodeURIComponent. E.g. `%C3%B6' is the german Umlaut `=C3=BC'." +encodeURIComponent. E.g. `%C3%B6' is the german Umlaut `=C3=B6'." (setq str (or str "")) (let ((tmp "") (case-fold-search t)) @@ -321,7 +321,9 @@ encodeURIComponent. E.g. `%C3%B6' is the german Umlau= t `=C3=BC'." =20 =20 (defun org-protocol-unhex-compound (hex) - "Unhexify unicode hex-chars. E.g. `%C3%B6' is the German Umlaut `=C3=BC= '." + "Unhexify unicode hex-chars. E.g. `%C3%B6' is the German Umlaut `=C3=B6= '. +Note: this function also decodes single byte encodings like +`%E1' (\"=C3=A1\") if not followed by another `%[A-F0-9]{2}' group." (let* ((bytes (remove "" (split-string hex "%"))) (ret "") (eat 0) @@ -353,12 +355,30 @@ encodeURIComponent. E.g. `%C3%B6' is the german Uml= aut `=C3=BC'." (setq val (logxor val xor)) (setq sum (+ (lsh sum shift) val)) (if (> eat 0) (setq eat (- eat 1))) - (when (=3D 0 eat) + (cond + ((=3D 0 eat) ;multi byte (setq ret (concat ret (org-protocol-char-to-string sum))) (setq sum 0)) + ((not bytes) ; single byte(s) + (setq ret (org-protocol-unhex-single-byte-sequence hex)))) )) ;; end (while bytes ret )) =20 +(defun org-protocol-unhex-single-byte-sequence(hex) + "Unhexify hex-encoded single byte character sequences." + (let ((bytes (remove "" (split-string hex "%"))) + (ret "")) + (while bytes + (let* ((b (pop bytes)) + (a (elt b 0)) + (b (elt b 1)) + (c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0))) + (c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0)))) + (setq ret + (concat ret (char-to-string + (+ (lsh c1 4) c2)))))) + ret)) + (defun org-protocol-flatten-greedy (param-list &optional strip-path repl= acement) "Greedy handlers might receive a list like this from emacsclient: '( (\"/dir/org-protocol:/greedy:/~/path1\" (23 . 12)) (\"/dir/param\") --=20 1.7.2.3