;*=====================================================================*/
;*    serrano/prgm/project/bigloo/api/web/src/Llib/url.scm             */
;*    -------------------------------------------------------------    */
;*    Author      :  Manuel Serrano                                    */
;*    Creation    :  Sat May 28 13:32:00 2005                          */
;*    Last change :  Tue Jun 20 17:01:25 2006 (serrano)                */
;*    Copyright   :  2005-06 Manuel Serrano                            */
;*    -------------------------------------------------------------    */
;*    URL parsing                                                      */
;*=====================================================================*/

;*---------------------------------------------------------------------*/
;*    The module                                                       */
;*---------------------------------------------------------------------*/
(module __web_url

   (export (http-url-parse ::obj)
	   (url-parse ::obj)
	   (url-encode ::bstring)))

;*---------------------------------------------------------------------*/
;*    parse-error ...                                                  */
;*---------------------------------------------------------------------*/
(define (parse-error port msg obj)
   (raise (instantiate::&io-parse-error
	     (obj obj)
	     (proc 'url-parse)
	     (msg msg))))

;*---------------------------------------------------------------------*/
;*    uri-grammar ...                                                  */
;*---------------------------------------------------------------------*/
(define uri-grammar
   (regular-grammar ((CRLF "\r\n"))
      ("*"
       (values "*" #f #f #f #f))
      ((: (out #\/) (* (out #\:)) "://")
       (read/rp absolute-uri-grammar (the-port) (the-substring 0 -3) #f))
      ((: "/" (* (out " \r\n")))
       (values "*" #f #f #f (the-string)))
      (else
       (rgc-buffer-unget-char (the-port) (the-byte))
       (read/rp absolute-uri-grammar (the-port) "http" #f))))

;*---------------------------------------------------------------------*/
;*    absolute-uri-grammar ...                                         */
;*---------------------------------------------------------------------*/
(define absolute-uri-grammar
   (regular-grammar ((CRLF "\r\n")
		     (unreserved (or alpha digit #\- #\. #\_ #\~))
		     (pct-encoded (: #\% xdigit xdigit))
		     (sub-delims (in "!$&'()*+,;= "))
		     protocol
		     userinfo)
      ((: (* (or unreserved pct-encoded sub-delims #\:)) #\@)
       (set! userinfo (the-substring 0 -1))
       (ignore))
      ((: (+ (out "@:/")) ":")
       (let ((host (the-substring 0 (-fx (the-length) 1))))
	  (let* ((port (read/rp http-port-grammar (the-port)))
		 (abspath (read/rp abspath-grammar (the-port))))
	     (values protocol userinfo host port abspath))))
      ((: (+ (out "@:/")))
       (let* ((host (the-substring 0 (the-length)))
	      (port 80)
	      (abspath (read/rp abspath-grammar (the-port))))
	  (values protocol userinfo host port abspath)))
      ((: "/" (* (out ":\r\n")))
       (values protocol #f #f #f (the-string)))
      (CRLF
       #f)
      (else
       (parse-error (the-port) "Illegal character" (the-failure)))))

;*---------------------------------------------------------------------*/
;*    abspath-grammar ...                                              */
;*---------------------------------------------------------------------*/
(define abspath-grammar
   (regular-grammar ()
      ((: "/" (* (out "\r\n")))
       (the-string))
      (else
       (let ((c (the-failure)))
	  (if (eof-object? c)
	      "/"
	      (parse-error (the-port) "Illegal character" (the-failure)))))))
      
;*---------------------------------------------------------------------*/
;*    http-uri-grammar ...                                             */
;*---------------------------------------------------------------------*/
(define http-uri-grammar
   (regular-grammar ((CRLF "\r\n"))
      ("*"
       (values "*" #f #f #f #f))
      ((: (out #\/) (* (out #\:)) "://")
       (read/rp absolute-http-uri-grammar (the-port) (the-substring 0 -3) #f))
      ((: "/" (* (out " \r\n")))
       (values "*" #f #f #f (the-string)))
      (else
       (rgc-buffer-unget-char (the-port) (the-byte))
       (read/rp absolute-http-uri-grammar (the-port) "http" #f))))

;*---------------------------------------------------------------------*/
;*    absolute-http-uri-grammar ...                                    */
;*---------------------------------------------------------------------*/
(define absolute-http-uri-grammar
   (regular-grammar ((CRLF "\r\n")
		     (unreserved (or alpha digit #\- #\. #\_ #\~))
		     (pct-encoded (: #\% xdigit xdigit))
		     (sub-delims (in "!$&'()*+,;="))
		     protocol
		     userinfo)
      ((: (* (or unreserved pct-encoded sub-delims #\:)) #\@)
       (set! userinfo (the-substring 0 -1))
       (ignore))
      ((: (+ (out "@:/")) ":")
       (let ((host (the-substring 0 (-fx (the-length) 1))))
	  (let* ((port (read/rp http-port-grammar (the-port)))
		 (abspath (read/rp http-abspath-grammar (the-port))))
	     (values protocol userinfo host port abspath))))
      ((: (+ (out "@:/")))
       (let* ((host (the-substring 0 (the-length)))
	      (port 80)
	      (abspath (read/rp http-abspath-grammar (the-port))))
	  (values protocol userinfo host port abspath)))
      ((: "/" (* (out ": \r\n")))
       (values protocol #f #f #f (the-string)))
      (CRLF
       (values protocol #f #f #f (the-string)))
      (else
       (parse-error (the-port) "Illegal character" (the-failure)))))

;*---------------------------------------------------------------------*/
;*    http-abspath-grammar ...                                         */
;*---------------------------------------------------------------------*/
(define http-abspath-grammar
   (regular-grammar ()
      ((: "/" (* (out " \r\n")))
       (the-string))
      (else
       (parse-error (the-port) "Illegal character" (the-failure)))))
      
;*---------------------------------------------------------------------*/
;*    http-port-grammar ...                                            */
;*---------------------------------------------------------------------*/
(define http-port-grammar
   (regular-grammar ()
      ((+ digit)
       (the-fixnum))
      (else
       (parse-error (the-port) "Illegal character" (the-failure)))))
      
;*---------------------------------------------------------------------*/
;*    url-parse ...                                                    */
;*---------------------------------------------------------------------*/
(define (url-parse url)
   (cond
      ((input-port? url)
       (read/rp uri-grammar url))
      ((string? url)
       (let ((p (open-input-string url)))
	  (unwind-protect
	     (read/rp uri-grammar p)
	     (close-input-port p))))))

;*---------------------------------------------------------------------*/
;*    http-url-parse ...                                               */
;*---------------------------------------------------------------------*/
(define (http-url-parse url)
   (cond
      ((input-port? url)
       (read/rp http-uri-grammar url))
      ((string? url)
       (let ((p (open-input-string url)))
	  (unwind-protect
	     (read/rp http-uri-grammar p)
	     (close-input-port p))))))

;*---------------------------------------------------------------------*/
;*    url-encode ...                                                   */
;*---------------------------------------------------------------------*/
(define (url-encode str)
   (define (count str ol)
      (let loop ((i 0)
		 (n 0))
	 (if (=fx i ol)
	     n
	     (let ((c (string-ref str i)))
		(case c
		   ((#\# #\Space #\" #\' #\+ #\& #\= #\% #\? #\:)
		    (loop (+fx i 1) (+fx n 3)))
		   (else
		    (if (or (char>=? c #a128) (char<? c #a032))
			(loop (+fx i 1) (+fx n 3))
			(loop (+fx i 1) (+fx n 1)))))))))
   (define (int->char c)
      (cond
	 ((<fx c 10)
	  (integer->char (+fx c (char->integer #\0))))
	 ((<fx c 16)
	  (integer->char (+fx (-fx c 10) (char->integer #\A))))))
   (define (encode-char res j c)
      (let ((n (char->integer c)))
	 (string-set! res j #\%)
	 (cond
	    ((<fx n 16)
	     (string-set! res (+fx j 1) #\0)
	     (string-set! res (+fx j 2) (int->char n)))
	    (else
	     (let ((n1 (/fx n 16))
		   (n2 (remainder n 16)))
		(string-set! res (+fx j 1) (int->char n1))
		(string-set! res (+fx j 2) (int->char n2)))))))
   (define (encode str ol nl)
      (if (=fx nl ol)
	  str
	  (let ((res (make-string nl)))
	     (let loop ((i 0)
			(j 0))
		(if (=fx j nl)
		    res
		    (let ((c (string-ref str i)))
		       (case c
			  ((#\# #\Space #\" #\' #\+ #\& #\= #\% #\? #\:)
			   (encode-char res j c)
			   (loop (+fx i 1) (+fx j 3)))
			  (else
			   (if (or (char>=? c #a128) (char<? c #a032))
			       (begin
				  (encode-char res j c)
				  (loop (+fx i 1) (+fx j 3)))
			       (begin
				  (string-set! res j c)
				  (loop (+fx i 1) (+fx j 1))))))))))))
   (let ((ol (string-length str)))
      (encode str ol (count str ol))))
