r1/packrat.scm
author Tony Garnock-Jones <tonygarnockjones@gmail.com>
Wed, 16 Jan 2019 17:15:58 +0000
changeset 438 1fe179d53161
parent 3 d6dba34d7b01
permissions -rw-r--r--
Add missing primitive implementation for the plain interpreter.

;; Packrat Parser Library

(require 'srfi-1)

(define-record-type parse-result
  (make-parse-result successful? semantic-value next error)
  parse-result?
  (successful? parse-result-successful?)
  (semantic-value parse-result-semantic-value)
  (next parse-result-next) ;; #f, if eof or error; otherwise a parse-results
  (error parse-result-error)
  ;; ^^ #f if none, but usually a parse-error structure
  )

(define-record-type parse-results
  (make-parse-results position base next map)
  parse-results?
  (position parse-results-position) ;; a parse-position or #f if unknown
  (base parse-results-base) ;; a value, #f indicating 'none' or 'eof'
  (next parse-results-next* set-parse-results-next!)
  ;; ^^ a parse-results, or a nullary function delivering same, or #f for nothing next (eof)
  (map parse-results-map set-parse-results-map!)
  ;; ^^ an alist mapping a nonterminal to a parse-result
  )

(define-record-type parse-error
  (make-parse-error position expected-strings messages)
  parse-error?
  (position parse-error-position) ;; a parse-position or #f if unknown
  (expected-strings parse-error-expected-strings) ;; set of strings (lset)
  (messages parse-error-messages) ;; list of strings
  )

(define-record-type parse-position
  (make-parse-position file line column)
  parse-position?
  (file parse-position-file)
  (line parse-position-line)
  (column parse-position-column))

(define (top-parse-position filename)
  (make-parse-position filename 1 0))

(define (update-parse-position pos ch)
  (if (not pos)
      #f
      (let ((file (parse-position-file pos))
	    (line (parse-position-line pos))
	    (column (parse-position-column pos)))
	(cond
	 ((eq? ch #\return) (make-parse-position file line 0))
	 ((eq? ch #\newline) (make-parse-position file (+ line 1) 0))
	 ((eq? ch #\tab) (make-parse-position file line (* (quotient (+ column 8) 8) 8)))
	 (else (make-parse-position file line (+ column 1)))))))

(define (parse-position->string pos)
  (if (not pos)
      "<??>"
      (string-append (parse-position-file pos) ":"
		     (number->string (parse-position-line pos)) ":"
		     (number->string (parse-position-column pos)))))

(define (empty-results pos)
  (make-parse-results pos #f #f '()))

(define (make-results pos base next-generator)
  (make-parse-results pos base next-generator '()))

(define (make-error-expected pos str)
  (make-parse-error pos (list str) '()))

(define (make-error-message pos msg)
  (make-parse-error pos '() (list msg)))

(define (make-result semantic-value next)
  (make-parse-result #t semantic-value next #f))

(define (make-expected-result pos str)
  (make-parse-result #f #f #f (make-error-expected pos str)))

(define (make-message-result pos msg)
  (make-parse-result #f #f #f (make-error-message pos msg)))

(define (prepend-base pos base next)
  (make-parse-results pos base next '()))

(define (prepend-semantic-value pos key result next)
  (make-parse-results pos #f #f
		      (list (cons key (make-result result next)))))

(define (base-generator->results generator)
  ;; Note: applies first next-generator, to get first result
  (define (results-generator)
    (let-values (((pos base) (generator)))
      (if (not base)
	  (empty-results pos)
	  (make-results pos base results-generator))))
  (results-generator))

(define (parse-results-next results)
  (let ((next (parse-results-next* results)))
    (if (procedure? next)
	(let ((next-value (next)))
	  (set-parse-results-next! results next-value)
	  next-value)
	next)))

(define (results->result results key fn)
  (let ((results-map (parse-results-map results)))
    (cond
     ((assq key results-map) => cdr)
     (else (let ((result (fn)))
	     (set-parse-results-map! results (cons (cons key result) results-map))
	     result)))))

(define (parse-position>? a b)
  (cond
   ((not a) #f)
   ((not b) #t)
   (else (let ((la (parse-position-line a)) (lb (parse-position-line b)))
	   (or (> la lb)
	       (and (= la lb)
		    (> (parse-position-column a) (parse-position-column b))))))))

(define (parse-error-empty? e)
  (and (null? (parse-error-expected-strings e))
       (null? (parse-error-messages e))))

(define (merge-parse-errors e1 e2)
  (cond
   ((not e1) e2)
   ((not e2) e1)
   (else
    (let ((p1 (parse-error-position e1))
	  (p2 (parse-error-position e2)))
      (cond
       ((or (parse-position>? p1 p2) (parse-error-empty? e2)) e1)
       ((or (parse-position>? p2 p1) (parse-error-empty? e1)) e2)
       (else (make-parse-error p1
			       (lset-union string=?
					   (parse-error-expected-strings e1)
					   (parse-error-expected-strings e2))
			       (append (parse-error-messages e1) (parse-error-messages e2)))))))))

(define (merge-result-errors result errs)
  (make-parse-result (parse-result-successful? result)
		     (parse-result-semantic-value result)
		     (parse-result-next result)
		     (merge-parse-errors (parse-result-error result) errs)))

;---------------------------------------------------------------------------

(define (parse-results-token-kind results)
  (let ((base (parse-results-base results)))
    (and base (car base))))

(define (parse-results-token-value results)
  (let ((base (parse-results-base results)))
    (and base (cdr base))))

(define (packrat-check-base token-kind k)
  (lambda (results)
    (let ((base (parse-results-base results)))
      (if (eq? (and base (car base)) token-kind)
	  ((k (and base (cdr base))) (parse-results-next results))
	  (make-expected-result (parse-results-position results)
				(if (not token-kind)
				    "end-of-file"
				    (symbol->string token-kind)))))))

(define (packrat-check parser k)
  (lambda (results)
    (let ((result (parser results)))
      (if (parse-result-successful? result)
	  (merge-result-errors ((k (parse-result-semantic-value result))
				(parse-result-next result))
			       (parse-result-error result))
	  result))))

(define (packrat-or p1 p2)
  (lambda (results)
    (let ((result (p1 results)))
      (if (parse-result-successful? result)
	  result
	  (merge-result-errors (p2 results)
			       (parse-result-error result))))))

(define (packrat-unless explanation p1 p2)
  (lambda (results)
    (let ((result (p1 results)))
      (if (parse-result-successful? result)
	  (make-message-result (parse-results-position results)
			       explanation)
	  (p2 results)))))

;---------------------------------------------------------------------------

(define (object->external-representation o)
  (let ((s (open-output-string)))
    (write o s)
    (get-output-string s)))

(define-syntax packrat-parser
  (syntax-rules (<- quote ! @ /)
    ((_ start (nonterminal (alternative body0 body ...) ...) ...)
     (let ()
       (define nonterminal
	 (lambda (results)
	   (results->result results 'nonterminal
			    (lambda ()
			      ((packrat-parser #f "alts" nonterminal
					       ((begin body0 body ...) alternative) ...)
			       results)))))
       ...
       start))

    ((_ #f "alts" nt (body alternative))
     (packrat-parser #f "altD" nt body alternative))

    ((_ #f "alts" nt (body alternative) rest0 rest ...)
     (packrat-or (packrat-parser #f "altD" nt body alternative)
		 (packrat-parser #f "alts" nt rest0 rest ...)))

    ((_ #f "altD" nt body alternative)
     (lambda (results)
       ;;(write (list (parse-position->string (parse-results-position results))
       ;;'trying- 'nt 'alternative))
       ;;(newline)
       (let ((result ((packrat-parser #f "alt" nt body alternative) results)))
	 ;;(write (list (parse-position->string
	 ;;(parse-results-position results))
	 ;;(if (parse-result-successful? result)
	 ;;'success
	 ;;'failing)
	 ;;'nt 'alternative))
	 ;;(newline)
	 result)))

    ((_ #f "alt" nt body ())
     (lambda (results) (make-result body results)))

    ((_ #f "alt" nt body ((! fails ...) rest ...))
     (packrat-unless (string-append "Nonterminal " (symbol->string 'nt)
				    " expected to fail "
				    (object->external-representation '(fails ...)))
		     (packrat-parser #f "alt" nt #t (fails ...))
		     (packrat-parser #f "alt" nt body (rest ...))))

    ((_ #f "alt" nt body ((/ alternative ...) rest ...))
     (packrat-check (packrat-parser #f "alts" nt (#t alternative) ...)
		    (lambda (result) (packrat-parser #f "alt" nt body (rest ...)))))

    ((_ #f "alt" nt body (var <- 'val rest ...))
     (packrat-check-base 'val
			 (lambda (var)
			   (packrat-parser #f "alt" nt body (rest ...)))))

    ((_ #f "alt" nt body (var <- @ rest ...))
     (lambda (results)
       (let ((var (parse-results-position results)))
	 ((packrat-parser #f "alt" nt body (rest ...)) results))))

    ((_ #f "alt" nt body (var <- val rest ...))
     (packrat-check val
		    (lambda (var)
		      (packrat-parser #f "alt" nt body (rest ...)))))

    ((_ #f "alt" nt body ('val rest ...))
     (packrat-check-base 'val
			 (lambda (dummy)
			   (packrat-parser #f "alt" nt body (rest ...)))))

    ((_ #f "alt" nt body (val rest ...))
     (packrat-check val
		    (lambda (dummy)
		      (packrat-parser #f "alt" nt body (rest ...)))))))

'(define (x)
  (sc-expand
   '(packrat-parser expr
		    (expr ((a <- 'num '+ b <- 'num)
			   (+ a b))
			  ((a <- mulexp) a))
		    (mulexp ((a <- 'num '* b <- 'num)
			     (* a b))))))