summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorGravatar Andy Wingo2010-12-11 18:48:57 +0100
committerGravatar Andy Wingo2010-12-11 19:18:13 +0100
commitbed9fff79d408d2e64add14fb26347315b7b6034 (patch)
tree15182723f83f4b837aecff83f13324b1249dd6fb
parent98cac185284dea42a31ab412f15017939d7a48eb (diff)
downloadtekuti-bed9fff79d408d2e64add14fb26347315b7b6034.tar.gz
tekuti-bed9fff79d408d2e64add14fb26347315b7b6034.zip
request-form-data smarter about character encodings
* tekuti/request.scm (request-form-data): Allow the body to be a bytevector. Update to new uri interface, and allow alternate encodings.
-rw-r--r--tekuti/request.scm41
1 files changed, 29 insertions, 12 deletions
diff --git a/tekuti/request.scm b/tekuti/request.scm
index d3d040e..736111b 100644
--- a/tekuti/request.scm
+++ b/tekuti/request.scm
@@ -31,6 +31,8 @@
#:use-module (web uri)
#:use-module (web request)
#:use-module (rnrs bytevectors)
+ #:use-module (rnrs io ports)
+ #:use-module (ice-9 rdelim)
#:use-module (tekuti config)
#:use-module (tekuti base64)
#:export (request-relative-path
@@ -40,14 +42,14 @@
request-authenticated?
request-form-data))
-(define (parse-www-form-urlencoded str)
+(define* (parse-www-form-urlencoded str #:optional (charset "utf-8"))
(map
(lambda (piece)
(let ((equals (string-index piece #\=)))
(if equals
- (cons (uri-decode (substring piece 0 equals))
- (uri-decode (substring piece (1+ equals))))
- (cons (uri-decode piece) ""))))
+ (cons (uri-decode (substring piece 0 equals) #:charset charset)
+ (uri-decode (substring piece (1+ equals)) #:charset charset))
+ (cons (uri-decode piece #:charset charset) ""))))
(string-split str #\&)))
(define (request-relative-path r)
@@ -66,15 +68,30 @@
((and q (assoc param (parse-www-form-urlencoded q))) => cdr)
(else default))))
+(define (decode-string bv charset)
+ (if (string-ci=? charset "utf-8")
+ (utf8->string bv)
+ (let ((p (open-bytevector-input-port bv)))
+ (set-port-encoding! p charset)
+ (read-delimited "" p))))
+
(define (request-form-data request body)
- (if (or (not body) (string-null? body))
- '()
- (let ((content-type (request-content-type request)))
- (cond
- ((equal? content-type '("application/x-www-form-urlencoded"))
- (parse-www-form-urlencoded body))
- (else
- (error "bad content-type" content-type))))))
+ (if (bytevector? body)
+ ;; Since valid application/x-www-form-urlencoded content only has
+ ;; ascii characters, treat the incoming data as ascii (well,
+ ;; latin-1), then use the charset when percent-decoding the
+ ;; content.
+ (request-form-data request (decode-string body "iso-8859-1"))
+ (if (or (not body) (string-null? body))
+ '()
+ (let* ((content-type (request-content-type request))
+ (charset (or (assoc-ref (cdr content-type) "charset")
+ "utf-8")))
+ (cond
+ ((equal? (car content-type) "application/x-www-form-urlencoded")
+ (parse-www-form-urlencoded body charset))
+ (else
+ (error "bad content-type" content-type)))))))
;; danger here, regarding the optional alternate clauses...
(define (request-authenticated? request)