1
0
Fork 0

request-form-data smarter about character encodings

* tekuti/request.scm (request-form-data): Allow the body to be a
  bytevector. Update to new uri interface, and allow alternate
  encodings.
This commit is contained in:
Andy Wingo 2010-12-11 18:48:57 +01:00
parent 98cac18528
commit bed9fff79d

View file

@ -31,6 +31,8 @@
#:use-module (web uri) #:use-module (web uri)
#:use-module (web request) #:use-module (web request)
#:use-module (rnrs bytevectors) #:use-module (rnrs bytevectors)
#:use-module (rnrs io ports)
#:use-module (ice-9 rdelim)
#:use-module (tekuti config) #:use-module (tekuti config)
#:use-module (tekuti base64) #:use-module (tekuti base64)
#:export (request-relative-path #:export (request-relative-path
@ -40,14 +42,14 @@
request-authenticated? request-authenticated?
request-form-data)) request-form-data))
(define (parse-www-form-urlencoded str) (define* (parse-www-form-urlencoded str #:optional (charset "utf-8"))
(map (map
(lambda (piece) (lambda (piece)
(let ((equals (string-index piece #\=))) (let ((equals (string-index piece #\=)))
(if equals (if equals
(cons (uri-decode (substring piece 0 equals)) (cons (uri-decode (substring piece 0 equals) #:charset charset)
(uri-decode (substring piece (1+ equals)))) (uri-decode (substring piece (1+ equals)) #:charset charset))
(cons (uri-decode piece) "")))) (cons (uri-decode piece #:charset charset) ""))))
(string-split str #\&))) (string-split str #\&)))
(define (request-relative-path r) (define (request-relative-path r)
@ -66,15 +68,30 @@
((and q (assoc param (parse-www-form-urlencoded q))) => cdr) ((and q (assoc param (parse-www-form-urlencoded q))) => cdr)
(else default)))) (else default))))
(define (decode-string bv charset)
(if (string-ci=? charset "utf-8")
(utf8->string bv)
(let ((p (open-bytevector-input-port bv)))
(set-port-encoding! p charset)
(read-delimited "" p))))
(define (request-form-data request body) (define (request-form-data request body)
(if (or (not body) (string-null? body)) (if (bytevector? body)
'() ;; Since valid application/x-www-form-urlencoded content only has
(let ((content-type (request-content-type request))) ;; ascii characters, treat the incoming data as ascii (well,
(cond ;; latin-1), then use the charset when percent-decoding the
((equal? content-type '("application/x-www-form-urlencoded")) ;; content.
(parse-www-form-urlencoded body)) (request-form-data request (decode-string body "iso-8859-1"))
(else (if (or (not body) (string-null? body))
(error "bad content-type" content-type)))))) '()
(let* ((content-type (request-content-type request))
(charset (or (assoc-ref (cdr content-type) "charset")
"utf-8")))
(cond
((equal? (car content-type) "application/x-www-form-urlencoded")
(parse-www-form-urlencoded body charset))
(else
(error "bad content-type" content-type)))))))
;; danger here, regarding the optional alternate clauses... ;; danger here, regarding the optional alternate clauses...
(define (request-authenticated? request) (define (request-authenticated? request)