smalltalk-tng
view r1/parsetng.scm @ 321:c4a0718c2d3c
Sketch of dependencies
| author | Tony Garnock-Jones <tonygarnockjones@gmail.com> |
|---|---|
| date | Sat Oct 08 15:36:03 2011 -0400 (7 months ago) |
| parents | |
| children |
line source
1 (require 'srfi-1) ; list
2 (require 'srfi-13) ; string
3 (require 'srfi-14) ; charset
5 (eval-when (compile) (load "packrat.scm"))
6 (require 'util)
7 (require 'packrat)
9 ;---------------------------------------------------------------------------
10 ;; utilities
12 (define (transform-grammar grammar)
13 (map (lambda (clause)
14 (let ((v (last clause))
15 (front (butlast clause)))
16 (if (procedure? v)
17 clause
18 (append front (list (lambda args
19 (debug 3 'reducing clause args)
20 (let walk ((formal v))
21 (cond
22 ((null? formal) '())
23 ((pair? formal) (cons (walk (car formal))
24 (walk (cdr formal))))
25 ((procedure? formal) (apply formal args))
26 ((number? formal) (list-ref args formal))
27 (else formal)))))))))
28 grammar))
30 ;---------------------------------------------------------------------------
31 ;; lex0: categorising characters
33 (define (lex0-ThiNG char-provider-thunk)
34 (let ((char (char-provider-thunk)))
35 (and (not (eof-object? char))
36 (cons (cond
37 ((char-set-contains? char-set:letter char) 'letter)
38 ((char-set-contains? char-set:digit char) 'digit)
39 ((or (char-whitespace? char)
40 (char-set-contains? char-set:blank char))
41 'whitespace)
42 (else (case char
43 ((#\() 'oparen)
44 ((#\)) 'cparen)
45 ((#\[) 'obrack)
46 ((#\]) 'cbrack)
47 ((#\{) 'obrace)
48 ((#\}) 'cbrace)
49 ((#\+) 'plus)
50 ((#\-) 'minus)
51 ((#\=) 'equal)
52 ((#\") 'doublequote)
53 ((#\') 'quote)
54 ((#\.) 'dot)
55 ((#\:) 'colon)
56 ((#\|) 'pipe)
57 ((#\@) 'at)
58 ((#\#) 'hash)
59 ((#\\) 'backslash)
60 ((#\_) 'underscore)
61 ((#\*) 'star)
62 (else 'misc))))
63 char))))
65 (define (unfold-lex0-ThiNG port)
66 (unfold (lambda (dummy) (eof-object? (peek-char port)))
67 lex0-ThiNG
68 (lambda (token) token)
69 (lambda () (read-char port))))
71 ;---------------------------------------------------------------------------
72 ;; lex1: building tokens from categorised character stream
73 ;;
74 ;; compound (pseudo-)token kinds:
75 ;; identifier [a-zA-Z][a-zA-Z0-9]*:?
76 ;; symbol [^ ]+
77 ;; integer [-+]?[0-9]+/[^.]
78 ;; comment "([^"\\]|\\"|\\\\])*"
79 ;; string '([^'\\]|\\'|\\\\])*'
80 ;; :=
82 (define (make-lex1-ThiNG filename char-provider-thunk)
83 (let* ((pushback* '())
84 (position (top-parse-position filename))
85 (prev-position position))
86 (define (next!)
87 (if (null? pushback*)
88 (let* ((newval (lex0-ThiNG char-provider-thunk)))
89 (if newval
90 (begin
91 (set! prev-position position)
92 (set! position (update-parse-position position (cdr newval)))))
93 newval)
94 (let ((v (car pushback*)))
95 (set! pushback* (cdr pushback*))
96 (set! prev-position position)
97 (set! position (cdr v))
98 (car v))))
100 (define (pushback! x)
101 (set! pushback* (cons (cons x position) pushback*))
102 (set! position prev-position))
104 (define (go fn . data)
105 (dispatch* (next!) fn data))
107 (define (dispatch token fn . data)
108 (dispatch* token fn data))
110 (define emit-k 'emit-k)
112 (define (dispatch* token fn data)
113 (if token
114 (apply fn token (car token) (cdr token) data)
115 (emit-k #f)))
117 (define (emit kind sv)
118 (emit-k (cons prev-position (cons kind sv))))
120 (define (lex token kind sv)
121 (case kind
122 ((whitespace) (go lex))
123 ((minus plus) (go lex-sign token))
124 ((digit) (pushback! token) (go lex-number #f 0))
125 ((letter) (go lex-identifier (list sv)))
126 ((colon) (go lex-colon token))
127 ((doublequote) (go lex-string token '() (lambda (result) (go lex))))
128 ((quote) (go lex-string token '() (lambda (result) (emit 'string result))))
129 ((hash) (go lex-symbol '()))
130 ((misc equal star) (go lex-punct (list sv)))
131 (else (emit kind sv))))
133 (define (lex-sign token kind sv sign-token)
134 (pushback! token)
135 (if (eq? kind 'digit)
136 (go lex-number (car sign-token) 0)
137 (go lex-punct (list (cdr sign-token)))))
139 (define (lex-punct token kind sv acc)
140 (case kind
141 ((misc equal star plus minus) (go lex-punct (cons sv acc)))
142 (else
143 (pushback! token)
144 (emit 'punct (list->string (reverse acc))))))
146 (define (lex-number token kind sv sign acc)
147 (case kind
148 ((digit) (go lex-number sign (+ (* acc 10)
149 (- (char->integer sv)
150 (char->integer #\0)))))
151 ((dot) (go lex-decimal sign acc token))
152 (else
153 (pushback! token)
154 (finish-integer sign acc))))
156 (define (lex-decimal token kind sv sign acc dot-token)
157 (case kind
158 ((digit) (error "Illegal syntax - floating-point literals not supported"))
159 (else
160 (pushback! token)
161 (pushback! dot-token)
162 (finish-integer sign acc))))
164 (define (finish-integer sign acc)
165 (emit 'integer (* (if (eq? sign 'minus) -1 1) acc)))
167 (define (lex-identifier token kind sv acc)
168 (case kind
169 ((letter digit) (go lex-identifier (cons sv acc)))
170 ((colon) (go lex-selector-identifier token acc))
171 (else
172 (pushback! token)
173 (finish-identifier 'identifier acc))))
175 (define (lex-selector-identifier token kind sv colon-token acc)
176 (pushback! token)
177 (if (memq kind '(equal star))
178 (begin
179 (pushback! colon-token)
180 (finish-identifier 'identifier acc))
181 (finish-identifier 'selector (cons #\: acc))))
183 (define (lex-symbol token kind sv acc)
184 (case kind
185 ((letter digit misc equal star plus minus) (go lex-symbol (cons sv acc)))
186 (else
187 (pushback! token)
188 (finish-identifier 'symbol acc))))
190 (define (finish-identifier kind acc)
191 (let ((idstr (list->string (reverse acc))))
192 (if (and (eq? kind 'identifier)
193 (string=? idstr "resend"))
194 (emit 'resend 'resend)
195 (emit kind idstr))))
197 (define (lex-colon token kind sv colon-token)
198 (case kind
199 ((equal) (emit 'colonequal #f))
200 ((star) (emit 'colonstar #f))
201 (else
202 (pushback! token)
203 (emit 'colon (cdr colon-token)))))
205 (define (lex-string token kind sv terminator acc k)
206 (if (eq? kind (car terminator))
207 (k (list->string (reverse acc)))
208 (case kind
209 ((backslash) (go (lambda (token2 kind2 sv2)
210 (go lex-string
211 terminator
212 (cons (case sv2
213 ((#\n) #\newline)
214 ((#\t) #\tab)
215 (else sv2))
216 acc)
217 k))))
218 (else (go lex-string terminator (cons sv acc) k)))))
220 (lambda ()
221 (call-with-current-continuation
222 (lambda (k)
223 (set! emit-k k)
224 (error "Value returned without emit from lexer" (go lex)))))))
226 (define (unfold-lex1-ThiNG port)
227 (let ((lexer (make-lex1-ThiNG (lambda () (read-char port))))
228 (done #f))
229 (unfold (lambda (dummy) done)
230 (lambda (dummy)
231 (let ((result (lexer)))
232 (if (not result)
233 (set! done #t))
234 result))
235 (lambda (token) token)
236 'dummy1)))
238 ;---------------------------------------------------------------------------
239 ;; parsing
241 (define (fixup-nary first-val args)
242 (let* ((selectors (map car args))
243 (vals (map cadr args))
244 (selector (string-concatenate selectors)))
245 `(send ,selector ,(cons first-val vals))))
247 (define-values (ThiNG-parser ThiNG-topexpr-parser)
248 (packrat-parser
249 (values toplevel topexpr)
251 (toplevel ((a <- topexpr 'dot b <- toplevel) (cons a b))
252 ((a <- topexpr 'dot '#f) (list a))
253 ((a <- topexpr '#f) (list a)))
255 (topexpr ((a <- method-definition) a)
256 ((a <- expr) a))
258 (expr ((a <- nary) a))
260 (nary ((a <- binary args <- nary-args) (fixup-nary a args))
261 ((a <- binary) a))
263 (nary-args ((sel <- selector b <- binary rest <- nary-args) (cons (list sel b) rest))
264 ((sel <- selector b <- binary) (list (list sel b))))
266 (binary ((u1 <- unary k <- binaryk) (k u1)))
267 (binaryk ((op <- binaryop u2 <- unary k <- binaryk)
268 (lambda (u1) (k `(send ,op (,u1 ,u2)))))
269 (()
270 (lambda (u1) u1)))
272 (binaryop ((p <- 'punct) p))
274 (unary ((v <- value k <- unaryk) (k v)))
275 (unaryk ((i <- id (! (/ ('colonequal) ('colonstar))) k <- unaryk)
276 (lambda (v) (k `(send ,i (,v)))))
277 (()
278 (lambda (v) v)))
280 (value ((i <- id 'oparen s <- stmt-seq 'cparen) `(scope ,i ,s))
281 ((i <- id) `(ref ,i))
282 ((b <- block) `(block . ,b))
283 ((s <- 'string) `(string ,s))
284 ((s <- 'symbol) `(symbol ,(string->symbol s)))
285 ((i <- 'integer) `(number ,i))
286 (('resend) `(resend))
287 (('oparen e <- expr u <- updates+ 'cparen) `(update ,e ,u))
288 (('oparen u <- updates 'cparen) `(update (ref "Root") ,u))
289 (('oparen e <- expr 'cparen) e)
290 (('oparen s <- stmt-seq 'cparen) `(scope ,*nil* ,s))
291 (('obrace ee <- expr-seq 'cbrace) `(tuple ,ee)))
293 (updates+ ((u <- update uu <- updates) (cons u uu)))
294 (updates ((u <- update uu <- updates) (cons u uu))
295 (() '()))
297 (update ((i <- id 'colonequal e <- expr) (list *false* i e))
298 ((i <- id 'colonstar e <- expr) (list *true* i e)))
300 (block (('obrack b <- binders s <- stmt-seq 'cbrack) (list b s)))
302 (expr-seq ((e <- expr 'dot ee <- expr-seq) (cons e ee))
303 ((e <- expr) (list e))
304 (() '()))
306 (stmt-seq ((e <- stmt 'dot s <- stmt-seq) (cons e s))
307 ((e <- stmt) (list e))
308 (() '()))
310 (stmt ((i <- id 'colonequal e <- expr) `(let ,i ,e))
311 ((e <- expr) e))
313 (binders ((b <- binders+ 'pipe) b)
314 (() '()))
315 (binders+ ((b <- binder bb <- binders+) (cons b bb))
316 ((b <- binder) (list b)))
317 (binder (('colon i <- id) i))
319 (method-definition ((p <- method-params 'obrack ee <- stmt-seq 'cbrack) `(method ,p ,ee)))
321 (method-params ((p1 <- method-param op <- binaryop p2 <- method-param) `(send ,op (,p1 ,p2)))
322 ((p1 <- method-param n <- method-nary) (fixup-nary p1 n))
323 ((p <- method-param i <- id) `(send ,i (,p))))
325 (method-param (('underscore 'at v <- value) (list *false* v))
326 (('underscore) (list *false* *false*))
327 ((i <- id 'at v <- value) (list i v))
328 ((i <- id) (list i *false*)))
330 (method-nary ((s <- selector p <- method-param r <- method-nary) (cons (list s p) r))
331 ((s <- selector p <- method-param) (list (list s p))))
333 (selector ((s <- 'selector) s))
334 (id ((i <- 'identifier) i))))
336 (define (parse-ThiNG filename parser char-provider-thunk)
337 (let* ((lexer (make-lex1-ThiNG filename char-provider-thunk))
338 (result (parser (base-generator->results
339 (lambda ()
340 (let ((r (lexer)))
341 (if r
342 (values (car r) (cdr r))
343 (values #f #f))))))))
344 (if (parse-result-successful? result)
345 (values #t (parse-result-semantic-value result))
346 (let ((e (parse-result-error result)))
347 (values #f (list 'parse-error
348 (parse-position->string (parse-error-position e))
349 (parse-error-expected-strings e)
350 (parse-error-messages e)))))))
