;*=====================================================================*/
;*    serrano/prgm/project/bigloo/runtime/Read/reader.scm              */
;*    -------------------------------------------------------------    */
;*    Author      :  Manuel Serrano                                    */
;*    Creation    :  Tue Dec 27 11:16:00 1994                          */
;*    Last change :  Fri Apr 29 18:19:27 2005 (serrano)                */
;*    -------------------------------------------------------------    */
;*    Bigloo's reader                                                  */
;*=====================================================================*/

;*---------------------------------------------------------------------*/
;*    The module                                                       */
;*---------------------------------------------------------------------*/
(module __reader
   
   (import  __error
	    __rgc
	    __param)
   
   (use     __type
	    __bigloo
	    __param
	    __structure
	    __tvector
	    __dsssl
	    __ucs2
	    __unicode
	    __bexit
	    __binary
	    __object
	    
	    __r4_numbers_6_5_fixnum
	    __r4_numbers_6_5_flonum
	    __r4_numbers_6_5
	    __r4_characters_6_6
	    __r4_equivalence_6_2
	    __r4_booleans_6_1
	    __r4_symbols_6_4
	    __r4_strings_6_7
	    __r4_pairs_and_lists_6_3
	    __r4_control_features_6_9
	    __r4_vectors_6_8
	    __r4_ports_6_10_1
	    __r4_input_6_10_2
	    __r4_output_6_10_3
	    
	    __evenv)
   
   (extern (macro unspec::obj "BUNSPEC")
	   (macro boptional::obj "BOPTIONAL")
	   (macro brest::obj "BREST")
	   (macro bkey::obj "BKEY")
	   (macro make-cnst::obj (::long) "BCNST"))
   
   (java   (class foreign
	      (field static unspec::obj "BUNSPEC")
	      (field static boptional::obj "BOPTIONAL")
	      (field static brest::obj "BREST")
	      (field static bkey::obj "BKEY")
	      (method static make-cnst::obj (::long) "BCNST")))
	   
   (export  *bigloo-interpreter*
	    (bigloo-case-sensitivity::symbol)
	    (bigloo-case-sensitivity-set! ::symbol)
	    (read . port)
	    (bigloo-regular-grammar)
	    (read/case case . port)
	    (read-case-sensitive . port)
	    (read-case-insensitive . port)
	    (reader-reset!)))

;*---------------------------------------------------------------------*/
;*    bigloo-case-sensitivity ...                                      */
;*---------------------------------------------------------------------*/
(define (bigloo-case-sensitivity)
   (bigloo-case-sensitive))

;*---------------------------------------------------------------------*/
;*    bigloo-case-sensitivity-set! ...                                 */
;*---------------------------------------------------------------------*/
(define (bigloo-case-sensitivity-set! val)
   (bigloo-case-sensitive-set! val))

;*---------------------------------------------------------------------*/
;*    Control marks ...                                                */
;*---------------------------------------------------------------------*/
(define *end-of-list* (cons 0 0))
(define *dotted-mark* (cons 1 1))

;*---------------------------------------------------------------------*/
;*    *bigloo-interpreter* ...                                         */
;*---------------------------------------------------------------------*/
(define *bigloo-interpreter* #f)

;*---------------------------------------------------------------------*/
;*    reader-reset! ...                                                */
;*---------------------------------------------------------------------*/
(define (reader-reset!)
   #f)

;*---------------------------------------------------------------------*/
;*    read-error ...                                                   */
;*---------------------------------------------------------------------*/
(define (read-error msg obj port)
   (let (fname loc)
      (if (epair? obj)
	  (match-case (cer obj)
	     ((at ?fn ?pos)
	      (set! fname fn)
	      (set! loc pos))
	     (else
	      (set! fname (input-port-name port))
	      (set! loc (input-port-position port))))
	  (begin
	     (set! fname (input-port-name port))
	     (set! loc (input-port-position port))))
      (raise (instantiate::&io-read-error
		(proc 'read)
		(msg msg)
		(obj obj)
		(fname fname)
		(location loc)))))

;*---------------------------------------------------------------------*/
;*    read-error/loc ...                                               */
;*---------------------------------------------------------------------*/
(define (read-error/loc loc msg obj port)
   (let ((fname (if (epair? obj)
		    (match-case (cer obj)
		       ((at ?fname ?pos)
			fname)
		       (else
			(input-port-name port)))
		    (input-port-name port))))
      (raise (instantiate::&io-read-error
		(proc 'read)
		(msg msg)
		(obj obj)
		(fname fname)
		(location loc)))))

;*---------------------------------------------------------------------*/
;*    unreference! ...                                                 */
;*---------------------------------------------------------------------*/
(define (unreference! obj port cycles)
   (let loop ((obj obj))
      (cond
	 ((procedure? obj)
	  (let* ((no   (obj))
		 (cell (assq no cycles)))
	     (if (not (pair? cell))
		 (read-error "no target for graph reference" no port)
		 (cdr cell))))
	 ((pair? obj)
	  (set-car! obj (loop (car obj)))
	  (set-cdr! obj (loop (cdr obj)))
	  obj)
	 ((vector? obj)
	  (let ((len (vector-length obj)))
	     (let laap ((i 0))
		(if (<fx i len)
		    (begin
		       (vector-set! obj i (loop (vector-ref obj i)))
		       (laap (+fx i 1)))
		    obj))))
	 ((struct? obj)
	  (let ((len (vector-length obj)))
	     (let laap ((i 0))
		(if (<fx i len)
		    (begin
		       (struct-set! obj i (loop (struct-ref obj i)))
		       (laap (+fx i 1)))
		    obj))))
	 (else
	  obj))))
   
;*---------------------------------------------------------------------*/
;*    make-list! ...                                                   */
;*---------------------------------------------------------------------*/
(define (make-list! l port)
   (define (reverse-proper-list! l)
      (let nr ((l l)
	       (r '()))
	 (cond
	    ((eq? (car l) *dotted-mark*)
	     (read-error "Illegal pair" r port))
	    ((null? (cdr l))
	     (set-cdr! l r)
	     l)
	    (else
	     (let ((cdrl (cdr l)))
		(nr cdrl
		    (begin (set-cdr! l r)
			   l)))))))
   (define (reverse-improper-list! l)
      (let nr ((l (cddr l))
	       (r (car l)))
	 (cond
	    ((eq? (car l) *dotted-mark*)
	     (read-error "Illegal pair" r port))
	    ((null? (cdr l))
	     (set-cdr! l r)
	     l)
	    (else
	     (let ((cdrl (cdr l)))
		(nr cdrl
		    (begin (set-cdr! l r)
			   l)))))))
   (cond
      ((null? l)
       l)
      ((and (pair? l) (pair? (cdr l)) (eq? (cadr l) *dotted-mark*))
       (if (null? (cddr l))
	   (car l)
	   (reverse-improper-list! l)))
      (else
       (reverse-proper-list! l)))) 
	   
;*---------------------------------------------------------------------*/
;*    collect-up-to ...                                                */
;*---------------------------------------------------------------------*/
(define-inline (collect-up-to ignore kind port posp)
   ;; move one character backward for the open-parenthesis
   (let* ((name (input-port-name port))
	  (po (-fx (input-port-position port) 1))
	  (item (ignore)))
      (if (eq? item *end-of-list*)
	  '()
	  (let loop ((acc (if posp
			      (econs item '() (list 'at name po))
			      (cons item '()))))
	     (let ((item (ignore)))
		(if (eq? item *end-of-list*)
		    acc
		    (loop (if posp
			      (let ((po (input-port-last-token-position port)))
				 (econs item acc (list 'at name po)))
			      (cons item acc)))))))))

;*---------------------------------------------------------------------*/
;*    read-quote ...                                                   */
;*---------------------------------------------------------------------*/
(define (read-quote kwote port ignore posp)
   (if posp
       (let* ((pos (input-port-position port))
	      (obj (ignore)))
	  (if (or (eof-object? obj) (eq? obj *end-of-list*))
	      (read-error/loc pos "Illegal quotation" kwote port)
	      (econs kwote
		     (cons obj '())
		     (list 'at (input-port-name port) pos))))
       (let ((obj (ignore)))
	  (if (or (eof-object? obj) (eq? obj *end-of-list*))
	      (read-error "Illegal quotation" kwote port)
	      (cons kwote (cons obj '()))))))

;*---------------------------------------------------------------------*/
;*    read-multi-line-comment ...                                      */
;*---------------------------------------------------------------------*/
(define (read-multi-line-comment port)
   (let ((g (regular-grammar ()
	       ("#|"
		(read-multi-line-comment (the-port))
		(ignore))
	       ((+ (or (out #\# #\|) (: #\# (out #\|)) (: #\| (out #\#))))
		(ignore))
	       ("|#"
		#unspecified)
	       (else
		(let ((c (the-failure)))
		   (if (eof-object? c)
		       (read-error "EOF inside block comment -- #| missing a closing |#"
				   c
				   (the-port))))))))
      (read/rp g port)))

;*---------------------------------------------------------------------*/
;*    *sharp-grammar* ...                                              */
;*---------------------------------------------------------------------*/
(define *sharp-grammar*
   (regular-grammar ()
      
      ((: "a" (= 3 digit))
       (let ((string (the-string)))
	  (if (not (=fx (the-length) 4))
	      (read-error "Illegal ascii character" string (the-port))
	      (integer->char (string->integer (the-substring 1 4))))))
      
      ;; ucs-2 characters
      ((: "u" (= 4 xdigit))
       (integer->ucs2 (string->integer (the-substring 1 5) 16)))
      
      ;; foreign strings of char
      ((: "\"" (* (or (out #a000 #\\ #\") (: #\\ all))) "\"")
       (let ((str (the-substring 0 (-fx (the-length) 1))))
	  (escape-C-string str)))
      
      ;; ucs2 strings
      ((: "u\"" (* (or (out #a000 #\\ #\") (: #\\ all))) "\"")
       (let ((str (the-substring 2 (-fx (the-length) 1))))
  	  (utf8-string->ucs2-string str)))
      
      ;; fixnums
      ((: "o" (? (in "-+")) (+ (in ("07"))))
       (string->integer (the-substring 1 (the-length)) 8))
      ((: "d" (? (in "-+")) (+ (in ("09"))))
       (string->integer (the-substring 1 (the-length)) 10))
      ((: "e" (? (in "-+")) (+ digit))
       (string->elong (the-substring 1 (the-length)) 10))
      ((: "ex" (+ xdigit))
       (string->elong (the-substring 2 (the-length)) 16))
      ((: "l" (? (in "-+")) (+ digit))
       (string->llong (the-substring 1 (the-length)) 10))
      ((: "lx" (+ xdigit))
       (string->llong (the-substring 2 (the-length)) 16))
      
      ;; unspecified and eof-object
      ((: (in "ue") (+ (in "nspecified-objt")))
       (let ((symbol (string->symbol (string-upcase! (the-string)))))
	  (cond
	     ((eq? symbol 'UNSPECIFIED)
	      unspec)
	     ((eq? symbol 'EOF-OBJECT)
	      beof)
	     (else
	      (read-error "Illegal identifier" symbol (the-port))))))
      
      ;; constants
      ((: "<" (+ (or digit (uncase (in "afAF")))) ">")
       (if (not (=fx (the-length) 6))
	   (read-error "Illegal constant" (the-string) (the-port))
	   (make-cnst (string->integer (the-substring 1 5) 16))))
      
      (else
       (let ((c (the-failure)))
	  (if (char? c)
	      (read-error "Illegal token" (string #\# c) (the-port))
	      (read-error "Illegal char" c (the-port)))))))

;*---------------------------------------------------------------------*/
;*    *bigloo-grammar* ...                                             */
;*---------------------------------------------------------------------*/
(define *bigloo-grammar*
   (regular-grammar ((float    (or (: (* digit) "." (+ digit))
				   (: (+ digit) "." (* digit))))
		     (letter   (in ("azAZ") (#a128 #a255)))
		     (special  (in "!@~$%^&*></-_+\\=?.:"))
		     (kspecial (in "!@~$%^&*></-_+\\=?."))
		     (quote    (in "\",'`"))
		     (paren    (in "()[]{}"))
		     (id       (: (* digit)
				  (or letter special)
				  (* (or letter special digit (in ",'`")))))
		     (kid      (: (* digit)
				  (or letter kspecial)
				  (* (or letter kspecial digit (in ",'`")))))
		     (blank    (in #\Space #\Tab #a012 #a013))
		     
		     posp cycles par-open bra-open par-poses bra-poses)
      
      ;; newlines
      ((+ #\Newline)
       (ignore))
      
      ;; blank lines
      ((+ blank)
       (ignore))
      
      ;; comments
      ((: ";" (* all))
       (ignore))
      
      ;; multi-line comment (SRFI-30)
      ("#|"
       (read-multi-line-comment (the-port))
       (ignore))
      
      ;; #; expression comments
      ("#;"
	 (begin
	    (ignore)
	    (ignore)))
      
      ;; srfi-22 support
      ((bol (: "#!" #\space (in digit letter special "|,'`") (* all)))
       (ignore))
      
      ;; the interpreter header or the dsssl named constants
      ((: "#!" (+ (in digit letter special "|,'`")))
       (let* ((str (the-string)))
	  (cond
	     ((string=? str "#!optional")
	      boptional)
	     ((string=? str "#!rest")
	      brest)
	     ((string=? str "#!key")
	      bkey)
	     (else
	      (set! *bigloo-interpreter* #t)
	      (ignore)))))
      
      ;; characters
      ((: "#\\" (or letter digit special (in "|#; " quote paren)))
       (string-ref (the-string) 2))
      ((: "#\\" (>= 2 letter))
       (let ((char-name (string->symbol
			 (string-upcase!
			  (the-substring 2 (the-length))))))
	  (cond
	     ((eq? char-name 'NEWLINE)
	      #\Newline)
	     ((eq? char-name 'TAB)
	      #\tab)
	     ((eq? char-name 'SPACE)
	      #\space)
	     ((eq? char-name 'RETURN)
	      (integer->char 13))
	     (else
	      (read-error "Illegal character" (the-string) (the-port))))))
      
      ;; strings with newline in them in addition to compute
      ;; the string, we have to count the number of newline
      ;; in order to increment the line-num variable strings
      ((: "\"" (* (or (out #a000 #\\ #\") (: #\\ all))) "\"")
       (let ((str (the-substring 1 (-fx (the-length) 1))))
	  (if (bigloo-strict-r5rs-strings)
	      (let ((str (the-substring 1 (-fx (the-length) 1))))
		 (escape-scheme-string str))
	      (let ((str (the-substring 0 (-fx (the-length) 1))))
		 (escape-C-string str)))))
      
      ;; fixnums
      ((: (? "+") (+ digit))
       (the-integer))
      ((: "-" (+ digit))
       (the-integer))
      ((: "#x" (? (in "-+")) (+ (in (uncase (in ("09af"))))))
       (string->integer (the-substring 2 (the-length)) 16))
      
      ;; flonum
      ((: (? (in "-+"))
	  (or float
	      (: (or float (+ digit)) (in "eE") (? (in "+-")) (+ digit))))
       (the-flonum))
      
      ;; doted pairs
      ("."
       (if (<=fx par-open 0)
	   (read-error "Illegal token" #\. (the-port))
	   *dotted-mark*))
      
      ;; booleans
      ((: "#" (uncase #\t))
       #t)
      ((: "#" (uncase #\f))
       #f)
      
      ;; keywords
      ((or (: ":" kid) (: kid ":"))
       ;; since the keyword expression is also matched by the id
       ;; rule, keyword rule has to be placed before the id rule.
       (the-keyword))
      
      ;; identifiers
      (id
       ;; this rule has to be placed after the rule matching the `.' char
       (the-symbol))
      ((: "|" (* (or (out #a000 #\\ #\|) (: #\\ all))) "|")
       (if (=fx (the-length) 2)
	   (string->symbol "")
	   (let ((str (the-substring 0 (-fx (the-length) 1))))
	      (string->symbol (escape-C-string str)))))
      
      ;; quotations 
      ("'"
       (read-quote 'quote (the-port) ignore posp))
      ("`"
       (read-quote 'quasiquote (the-port) ignore posp))
      (","
       (read-quote 'unquote (the-port) ignore posp))
      (",@"
       (read-quote 'unquote-splicing (the-port) ignore posp))
      
      ;; lists
      ((in "([")
       ;; we increment the number of open parenthesis
       (set! par-open (+fx 1 par-open))
       (set! par-poses (cons (-fx (input-port-position (the-port)) 1)
			     par-poses))
       ;; and then, we compute the result list...
       (make-list! (collect-up-to ignore "list" (the-port) posp) (the-port)))
      ((in ")]")
       ;; we decrement the number of open parenthesis
       (set! par-open (-fx par-open 1))
       (if (<fx par-open 0)
	   (begin
	      (warning/location (input-port-name (the-port))
				(input-port-last-token-position (the-port))
				'read
				"Superfluous closing parenthesis `"
				(the-string)
				"'")
	      (set! par-open 0)
	      (ignore))
	   (begin
	      (set! par-poses (cdr par-poses))
	      *end-of-list*)))
      
      ;; vectors
      ("#("
       ;; we increment the number of open parenthesis
       (set! par-open (+fx 1 par-open))
       (set! par-poses (cons (-fx (input-port-position (the-port)) 1)
			     par-poses))
       (list->vector (reverse! (collect-up-to ignore "vector" (the-port) posp))))

      ;; tvectors
      ((: "#" (: letter (* id)) "(")
       ;; we increment the number of open parenthesis
       (set! par-open (+fx 1 par-open))
       (set! par-poses (cons (-fx (input-port-position (the-port)) 1)
			     par-poses))
       (let* ((id  (let ((str (the-substring 1 (-fx (the-length) 1))))
		      (string->symbol
		       (case (bigloo-case-sensitivity)
			  ((upcase)
			   (string-upcase! str))
			  ((downcase)
			   (string-downcase! str))
			  ((sensitive)
			   str)
			  (else
			   (string-upcase! str))))))
	      (l   (reverse! (collect-up-to ignore "vector" (the-port) posp))))
	  (list->tvector id l)))
      
      ;; structures
      ("#{"
       ;; then, we compute the structure
       ;; we increment the number of open parenthesis
       (set! bra-open (+fx 1 bra-open))
       (set! bra-poses (cons (-fx (input-port-position (the-port)) 1)
			     bra-poses))
       (let ((l (reverse! (collect-up-to ignore "structure" (the-port) posp))))
	  (cons '_structure_ l)))
      ("}"
       (set! bra-open (-fx bra-open 1))
       (if (<fx bra-open 0)
	   (begin
	      (set! bra-open 0)
	      (ignore))
	   (begin
	      (set! bra-poses (cdr bra-poses))
	      *end-of-list*)))
      
      ;; cyclic target mark
      ((: "#" (+ digit) "=")
       (let* ((no (string->integer (the-substring 1 (-fx (the-length) 1))))
	      (pos (input-port-position (the-port)))
	      (the-object (ignore)))
	  (if (eof-object? the-object)
	      (read-error/loc pos "Illegal cyclic reference" no (the-port)))
	  (set! cycles (cons (cons no the-object) cycles))
	  (unreference! the-object (the-port) cycles)))
      
      ;; cyclic target reference
      ((: "#" (+ digit) "#")
       (let* ((no (string->integer (the-substring 1 (-fx (the-length) 1))))
	      (cell (assq no cycles)))
	  (if (not (pair? cell))
	      (lambda () no)
	      (cdr cell))))

      ;; special tokens
      ("#"
	(read/rp *sharp-grammar* (the-port)))
      
      ;; error or eof
      (else
       (let ((char (the-failure)))
	  (if (eof-object? char)
	      (cond
		 ((>fx par-open 0)
		  (read-error/loc (car par-poses)
				  "Unexpected end-of-file"
				  "Unclosed list"
				  (the-port)))
		 ((>fx bra-open 0)
		  (read-error/loc (car par-poses)
				  "Unexpected end-of-file"
				  "Unclosed vector or structure"
				  (the-port)))
		 (else
		  (reset-eof (the-port))
		  char))
	      (read-error "Illegal char"
			  (illegal-char-rep char)
			  (the-port)))))))

;*---------------------------------------------------------------------*/
;*    bigloo-regular-grammar ...                                       */
;*---------------------------------------------------------------------*/
(define (bigloo-regular-grammar)
   *bigloo-grammar*)

;*---------------------------------------------------------------------*/
;*    read ...                                                         */
;*---------------------------------------------------------------------*/
(define (read . input-port)
   ;; read except an undocumented argument used by the compiler to
   ;; get line number associated with expressions.
   (cond
      ((null? input-port)
       (read/rp (bigloo-regular-grammar)
		(current-input-port)
		#f
		'() 0 0 '() '()))
      ((not (input-port? (car input-port)))
       (error 'read "type `input-port' expected" (car input-port)))
      (else
       (let ((port (car input-port)))
	  (if (closed-input-port? port)
	      ;; The reader is always compiled in unsafe mode then, the
	      ;; expansion of the *BIGLOO-GRAMMAR* never checks if the
	      ;; input port is not already closed. In consequence, we
	      ;; have to explicitly test the closeness before reading.
	      (error 'read "Illegal closed input port" port)
	      (read/rp (bigloo-regular-grammar)
		       port
		       (not (null? (cdr input-port)))
		       '() 0 0 '() '()))))))

;*---------------------------------------------------------------------*/
;*    read/case ...                                                    */
;*---------------------------------------------------------------------*/
(define (read/case case . input-port)
   (let ((old (bigloo-case-sensitivity)))
      (bigloo-case-sensitivity-set! case)
      (unwind-protect
	 (apply read input-port)
	 (bigloo-case-sensitivity-set! old))))
   
;*---------------------------------------------------------------------*/
;*    read-case-sensitive ...                                          */
;*    -------------------------------------------------------------    */
;*    Case sensitive read.                                             */
;*---------------------------------------------------------------------*/
(define (read-case-sensitive . input-port)
   (apply read/case 'sensitive input-port))

;*---------------------------------------------------------------------*/
;*    read-case-insensitive ...                                        */
;*    -------------------------------------------------------------    */
;*    Case unsensitive read.                                           */
;*---------------------------------------------------------------------*/
(define (read-case-insensitive . input-port)
   (apply read/case 'downcase input-port))
