|
1 (require 'srfi-1) ; list |
|
2 (require 'srfi-13) ; string |
|
3 (require 'srfi-14) ; charset |
|
4 |
|
5 (eval-when (compile) (load "packrat.scm")) |
|
6 (require 'util) |
|
7 (require 'packrat) |
|
8 |
|
9 ;--------------------------------------------------------------------------- |
|
10 ;; utilities |
|
11 |
|
12 (define (transform-grammar grammar) |
|
13 (map (lambda (clause) |
|
14 (let ((v (last clause)) |
|
15 (front (butlast clause))) |
|
16 (if (procedure? v) |
|
17 clause |
|
18 (append front (list (lambda args |
|
19 (debug 3 'reducing clause args) |
|
20 (let walk ((formal v)) |
|
21 (cond |
|
22 ((null? formal) '()) |
|
23 ((pair? formal) (cons (walk (car formal)) |
|
24 (walk (cdr formal)))) |
|
25 ((procedure? formal) (apply formal args)) |
|
26 ((number? formal) (list-ref args formal)) |
|
27 (else formal))))))))) |
|
28 grammar)) |
|
29 |
|
30 ;--------------------------------------------------------------------------- |
|
31 ;; lex0: categorising characters |
|
32 |
|
33 (define (lex0-ThiNG char-provider-thunk) |
|
34 (let ((char (char-provider-thunk))) |
|
35 (and (not (eof-object? char)) |
|
36 (cons (cond |
|
37 ((char-set-contains? char-set:letter char) 'letter) |
|
38 ((char-set-contains? char-set:digit char) 'digit) |
|
39 ((or (char-whitespace? char) |
|
40 (char-set-contains? char-set:blank char)) |
|
41 'whitespace) |
|
42 (else (case char |
|
43 ((#\() 'oparen) |
|
44 ((#\)) 'cparen) |
|
45 ((#\[) 'obrack) |
|
46 ((#\]) 'cbrack) |
|
47 ((#\{) 'obrace) |
|
48 ((#\}) 'cbrace) |
|
49 ((#\+) 'plus) |
|
50 ((#\-) 'minus) |
|
51 ((#\=) 'equal) |
|
52 ((#\") 'doublequote) |
|
53 ((#\') 'quote) |
|
54 ((#\.) 'dot) |
|
55 ((#\:) 'colon) |
|
56 ((#\|) 'pipe) |
|
57 ((#\@) 'at) |
|
58 ((#\#) 'hash) |
|
59 ((#\\) 'backslash) |
|
60 ((#\_) 'underscore) |
|
61 ((#\*) 'star) |
|
62 (else 'misc)))) |
|
63 char)))) |
|
64 |
|
65 (define (unfold-lex0-ThiNG port) |
|
66 (unfold (lambda (dummy) (eof-object? (peek-char port))) |
|
67 lex0-ThiNG |
|
68 (lambda (token) token) |
|
69 (lambda () (read-char port)))) |
|
70 |
|
71 ;--------------------------------------------------------------------------- |
|
72 ;; lex1: building tokens from categorised character stream |
|
73 ;; |
|
74 ;; compound (pseudo-)token kinds: |
|
75 ;; identifier [a-zA-Z][a-zA-Z0-9]*:? |
|
76 ;; symbol [^ ]+ |
|
77 ;; integer [-+]?[0-9]+/[^.] |
|
78 ;; comment "([^"\\]|\\"|\\\\])*" |
|
79 ;; string '([^'\\]|\\'|\\\\])*' |
|
80 ;; := |
|
81 |
|
82 (define (make-lex1-ThiNG filename char-provider-thunk) |
|
83 (let* ((pushback* '()) |
|
84 (position (top-parse-position filename)) |
|
85 (prev-position position)) |
|
86 (define (next!) |
|
87 (if (null? pushback*) |
|
88 (let* ((newval (lex0-ThiNG char-provider-thunk))) |
|
89 (if newval |
|
90 (begin |
|
91 (set! prev-position position) |
|
92 (set! position (update-parse-position position (cdr newval))))) |
|
93 newval) |
|
94 (let ((v (car pushback*))) |
|
95 (set! pushback* (cdr pushback*)) |
|
96 (set! prev-position position) |
|
97 (set! position (cdr v)) |
|
98 (car v)))) |
|
99 |
|
100 (define (pushback! x) |
|
101 (set! pushback* (cons (cons x position) pushback*)) |
|
102 (set! position prev-position)) |
|
103 |
|
104 (define (go fn . data) |
|
105 (dispatch* (next!) fn data)) |
|
106 |
|
107 (define (dispatch token fn . data) |
|
108 (dispatch* token fn data)) |
|
109 |
|
110 (define emit-k 'emit-k) |
|
111 |
|
112 (define (dispatch* token fn data) |
|
113 (if token |
|
114 (apply fn token (car token) (cdr token) data) |
|
115 (emit-k #f))) |
|
116 |
|
117 (define (emit kind sv) |
|
118 (emit-k (cons prev-position (cons kind sv)))) |
|
119 |
|
120 (define (lex token kind sv) |
|
121 (case kind |
|
122 ((whitespace) (go lex)) |
|
123 ((minus plus) (go lex-sign token)) |
|
124 ((digit) (pushback! token) (go lex-number #f 0)) |
|
125 ((letter) (go lex-identifier (list sv))) |
|
126 ((colon) (go lex-colon token)) |
|
127 ((doublequote) (go lex-string token '() (lambda (result) (go lex)))) |
|
128 ((quote) (go lex-string token '() (lambda (result) (emit 'string result)))) |
|
129 ((hash) (go lex-symbol '())) |
|
130 ((misc equal star) (go lex-punct (list sv))) |
|
131 (else (emit kind sv)))) |
|
132 |
|
133 (define (lex-sign token kind sv sign-token) |
|
134 (pushback! token) |
|
135 (if (eq? kind 'digit) |
|
136 (go lex-number (car sign-token) 0) |
|
137 (go lex-punct (list (cdr sign-token))))) |
|
138 |
|
139 (define (lex-punct token kind sv acc) |
|
140 (case kind |
|
141 ((misc equal star plus minus) (go lex-punct (cons sv acc))) |
|
142 (else |
|
143 (pushback! token) |
|
144 (emit 'punct (list->string (reverse acc)))))) |
|
145 |
|
146 (define (lex-number token kind sv sign acc) |
|
147 (case kind |
|
148 ((digit) (go lex-number sign (+ (* acc 10) |
|
149 (- (char->integer sv) |
|
150 (char->integer #\0))))) |
|
151 ((dot) (go lex-decimal sign acc token)) |
|
152 (else |
|
153 (pushback! token) |
|
154 (finish-integer sign acc)))) |
|
155 |
|
156 (define (lex-decimal token kind sv sign acc dot-token) |
|
157 (case kind |
|
158 ((digit) (error "Illegal syntax - floating-point literals not supported")) |
|
159 (else |
|
160 (pushback! token) |
|
161 (pushback! dot-token) |
|
162 (finish-integer sign acc)))) |
|
163 |
|
164 (define (finish-integer sign acc) |
|
165 (emit 'integer (* (if (eq? sign 'minus) -1 1) acc))) |
|
166 |
|
167 (define (lex-identifier token kind sv acc) |
|
168 (case kind |
|
169 ((letter digit) (go lex-identifier (cons sv acc))) |
|
170 ((colon) (go lex-selector-identifier token acc)) |
|
171 (else |
|
172 (pushback! token) |
|
173 (finish-identifier 'identifier acc)))) |
|
174 |
|
175 (define (lex-selector-identifier token kind sv colon-token acc) |
|
176 (pushback! token) |
|
177 (if (memq kind '(equal star)) |
|
178 (begin |
|
179 (pushback! colon-token) |
|
180 (finish-identifier 'identifier acc)) |
|
181 (finish-identifier 'selector (cons #\: acc)))) |
|
182 |
|
183 (define (lex-symbol token kind sv acc) |
|
184 (case kind |
|
185 ((letter digit misc equal star plus minus) (go lex-symbol (cons sv acc))) |
|
186 (else |
|
187 (pushback! token) |
|
188 (finish-identifier 'symbol acc)))) |
|
189 |
|
190 (define (finish-identifier kind acc) |
|
191 (let ((idstr (list->string (reverse acc)))) |
|
192 (if (and (eq? kind 'identifier) |
|
193 (string=? idstr "resend")) |
|
194 (emit 'resend 'resend) |
|
195 (emit kind idstr)))) |
|
196 |
|
197 (define (lex-colon token kind sv colon-token) |
|
198 (case kind |
|
199 ((equal) (emit 'colonequal #f)) |
|
200 ((star) (emit 'colonstar #f)) |
|
201 (else |
|
202 (pushback! token) |
|
203 (emit 'colon (cdr colon-token))))) |
|
204 |
|
205 (define (lex-string token kind sv terminator acc k) |
|
206 (if (eq? kind (car terminator)) |
|
207 (k (list->string (reverse acc))) |
|
208 (case kind |
|
209 ((backslash) (go (lambda (token2 kind2 sv2) |
|
210 (go lex-string |
|
211 terminator |
|
212 (cons (case sv2 |
|
213 ((#\n) #\newline) |
|
214 ((#\t) #\tab) |
|
215 (else sv2)) |
|
216 acc) |
|
217 k)))) |
|
218 (else (go lex-string terminator (cons sv acc) k))))) |
|
219 |
|
220 (lambda () |
|
221 (call-with-current-continuation |
|
222 (lambda (k) |
|
223 (set! emit-k k) |
|
224 (error "Value returned without emit from lexer" (go lex))))))) |
|
225 |
|
226 (define (unfold-lex1-ThiNG port) |
|
227 (let ((lexer (make-lex1-ThiNG (lambda () (read-char port)))) |
|
228 (done #f)) |
|
229 (unfold (lambda (dummy) done) |
|
230 (lambda (dummy) |
|
231 (let ((result (lexer))) |
|
232 (if (not result) |
|
233 (set! done #t)) |
|
234 result)) |
|
235 (lambda (token) token) |
|
236 'dummy1))) |
|
237 |
|
238 ;--------------------------------------------------------------------------- |
|
239 ;; parsing |
|
240 |
|
241 (define (fixup-nary first-val args) |
|
242 (let* ((selectors (map car args)) |
|
243 (vals (map cadr args)) |
|
244 (selector (string-concatenate selectors))) |
|
245 `(send ,selector ,(cons first-val vals)))) |
|
246 |
|
247 (define-values (ThiNG-parser ThiNG-topexpr-parser) |
|
248 (packrat-parser |
|
249 (values toplevel topexpr) |
|
250 |
|
251 (toplevel ((a <- topexpr 'dot b <- toplevel) (cons a b)) |
|
252 ((a <- topexpr 'dot '#f) (list a)) |
|
253 ((a <- topexpr '#f) (list a))) |
|
254 |
|
255 (topexpr ((a <- method-definition) a) |
|
256 ((a <- expr) a)) |
|
257 |
|
258 (expr ((a <- nary) a)) |
|
259 |
|
260 (nary ((a <- binary args <- nary-args) (fixup-nary a args)) |
|
261 ((a <- binary) a)) |
|
262 |
|
263 (nary-args ((sel <- selector b <- binary rest <- nary-args) (cons (list sel b) rest)) |
|
264 ((sel <- selector b <- binary) (list (list sel b)))) |
|
265 |
|
266 (binary ((u1 <- unary k <- binaryk) (k u1))) |
|
267 (binaryk ((op <- binaryop u2 <- unary k <- binaryk) |
|
268 (lambda (u1) (k `(send ,op (,u1 ,u2))))) |
|
269 (() |
|
270 (lambda (u1) u1))) |
|
271 |
|
272 (binaryop ((p <- 'punct) p)) |
|
273 |
|
274 (unary ((v <- value k <- unaryk) (k v))) |
|
275 (unaryk ((i <- id (! (/ ('colonequal) ('colonstar))) k <- unaryk) |
|
276 (lambda (v) (k `(send ,i (,v))))) |
|
277 (() |
|
278 (lambda (v) v))) |
|
279 |
|
280 (value ((i <- id 'oparen s <- stmt-seq 'cparen) `(scope ,i ,s)) |
|
281 ((i <- id) `(ref ,i)) |
|
282 ((b <- block) `(block . ,b)) |
|
283 ((s <- 'string) `(string ,s)) |
|
284 ((s <- 'symbol) `(symbol ,(string->symbol s))) |
|
285 ((i <- 'integer) `(number ,i)) |
|
286 (('resend) `(resend)) |
|
287 (('oparen e <- expr u <- updates+ 'cparen) `(update ,e ,u)) |
|
288 (('oparen u <- updates 'cparen) `(update (ref "Root") ,u)) |
|
289 (('oparen e <- expr 'cparen) e) |
|
290 (('oparen s <- stmt-seq 'cparen) `(scope ,*nil* ,s)) |
|
291 (('obrace ee <- expr-seq 'cbrace) `(tuple ,ee))) |
|
292 |
|
293 (updates+ ((u <- update uu <- updates) (cons u uu))) |
|
294 (updates ((u <- update uu <- updates) (cons u uu)) |
|
295 (() '())) |
|
296 |
|
297 (update ((i <- id 'colonequal e <- expr) (list *false* i e)) |
|
298 ((i <- id 'colonstar e <- expr) (list *true* i e))) |
|
299 |
|
300 (block (('obrack b <- binders s <- stmt-seq 'cbrack) (list b s))) |
|
301 |
|
302 (expr-seq ((e <- expr 'dot ee <- expr-seq) (cons e ee)) |
|
303 ((e <- expr) (list e)) |
|
304 (() '())) |
|
305 |
|
306 (stmt-seq ((e <- stmt 'dot s <- stmt-seq) (cons e s)) |
|
307 ((e <- stmt) (list e)) |
|
308 (() '())) |
|
309 |
|
310 (stmt ((i <- id 'colonequal e <- expr) `(let ,i ,e)) |
|
311 ((e <- expr) e)) |
|
312 |
|
313 (binders ((b <- binders+ 'pipe) b) |
|
314 (() '())) |
|
315 (binders+ ((b <- binder bb <- binders+) (cons b bb)) |
|
316 ((b <- binder) (list b))) |
|
317 (binder (('colon i <- id) i)) |
|
318 |
|
319 (method-definition ((p <- method-params 'obrack ee <- stmt-seq 'cbrack) `(method ,p ,ee))) |
|
320 |
|
321 (method-params ((p1 <- method-param op <- binaryop p2 <- method-param) `(send ,op (,p1 ,p2))) |
|
322 ((p1 <- method-param n <- method-nary) (fixup-nary p1 n)) |
|
323 ((p <- method-param i <- id) `(send ,i (,p)))) |
|
324 |
|
325 (method-param (('underscore 'at v <- value) (list *false* v)) |
|
326 (('underscore) (list *false* *false*)) |
|
327 ((i <- id 'at v <- value) (list i v)) |
|
328 ((i <- id) (list i *false*))) |
|
329 |
|
330 (method-nary ((s <- selector p <- method-param r <- method-nary) (cons (list s p) r)) |
|
331 ((s <- selector p <- method-param) (list (list s p)))) |
|
332 |
|
333 (selector ((s <- 'selector) s)) |
|
334 (id ((i <- 'identifier) i)))) |
|
335 |
|
336 (define (parse-ThiNG filename parser char-provider-thunk) |
|
337 (let* ((lexer (make-lex1-ThiNG filename char-provider-thunk)) |
|
338 (result (parser (base-generator->results |
|
339 (lambda () |
|
340 (let ((r (lexer))) |
|
341 (if r |
|
342 (values (car r) (cdr r)) |
|
343 (values #f #f)))))))) |
|
344 (if (parse-result-successful? result) |
|
345 (values #t (parse-result-semantic-value result)) |
|
346 (let ((e (parse-result-error result))) |
|
347 (values #f (list 'parse-error |
|
348 (parse-position->string (parse-error-position e)) |
|
349 (parse-error-expected-strings e) |
|
350 (parse-error-messages e))))))) |