match.scm (19964B)
1 ;;;; matchable.scm -- portable hygienic pattern matcher 2 ;; 3 ;; This code is written by Alex Shinn and placed in the 4 ;; Public Domain. All warranties are disclaimed. 5 6 ;; Written in fully portable SYNTAX-RULES, with a few non-portable 7 ;; bits at the end of the file conditioned out with COND-EXPAND. 8 9 ;; This is a simple generative pattern matcher - each pattern is 10 ;; expanded into the required tests, calling a failure continuation if 11 ;; the tests pass. This makes the logic easy to follow and extend, 12 ;; but produces sub-optimal code in cases where you have many similar 13 ;; clauses due to repeating the same tests. Nonetheless a smart 14 ;; compiler should be able to remove the redundant tests. For 15 ;; MATCH-LET and DESTRUCTURING-BIND type uses there is no performance 16 ;; hit. 17 18 ;; 2008/03/20 - fixing bug where (a ...) matched non-lists 19 ;; 2008/03/15 - removing redundant check in vector patterns 20 ;; 2007/09/04 - fixing quasiquote patterns 21 ;; 2007/07/21 - allowing ellipse patterns in non-final list positions 22 ;; 2007/04/10 - fixing potential hygiene issue in match-check-ellipse 23 ;; (thanks to Taylor Campbell) 24 ;; 2007/04/08 - clean up, commenting 25 ;; 2006/12/24 - bugfixes 26 ;; 2006/12/01 - non-linear patterns, shared variables in OR, get!/set! 27 28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 30 ;; This is always passed a message, yet won't match the message, and 31 ;; thus always results in a compile-time error. 32 33 (define-syntax match-syntax-error 34 (syntax-rules () 35 ((_) 36 (match-syntax-error "invalid match-syntax-error usage")))) 37 38 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 39 40 ;; The basic interface. MATCH just performs some basic syntax 41 ;; validation, binds the match expression to a temporary variable, and 42 ;; passes it on to MATCH-NEXT. 43 44 (define-syntax match 45 (syntax-rules () 46 ((match) 47 (match-syntax-error "missing match expression")) 48 ((match atom) 49 (match-syntax-error "missing match clause")) 50 ((match (app ...) (pat . body) ...) 51 (let ((v (app ...))) 52 (match-next v (app ...) (set! (app ...)) (pat . body) ...))) 53 ((match #(vec ...) (pat . body) ...) 54 (let ((v #(vec ...))) 55 (match-next v v (set! v) (pat . body) ...))) 56 ((match atom (pat . body) ...) 57 (match-next atom atom (set! atom) (pat . body) ...)) 58 )) 59 60 ;; MATCH-NEXT passes each clause to MATCH-ONE in turn with its failure 61 ;; thunk, which is expanded by recursing MATCH-NEXT on the remaining 62 ;; clauses. 63 64 (define-syntax match-next 65 (syntax-rules (=>) 66 ;; no more clauses, the match failed 67 ((match-next v g s) 68 (%check (%error "no matching pattern"))) 69 ;; named failure continuation 70 ((match-next v g s (pat (=> failure) . body) . rest) 71 (let ((failure (lambda () (match-next v g s . rest)))) 72 ;; match-one analyzes the pattern for us 73 (match-one v pat g s (match-drop-ids (begin . body)) (failure) ()))) 74 ;; anonymous failure continuation, give it a dummy name 75 ((match-next v g s (pat . body) . rest) 76 (match-next v g s (pat (=> failure) . body) . rest)))) 77 78 ;; MATCH-ONE first checks for ellipse patterns, otherwise passes on to 79 ;; MATCH-TWO. 80 81 (define-syntax match-one 82 (syntax-rules () 83 ;; If it's a list of two values, check to see if the second one is 84 ;; an ellipse and handle accordingly, otherwise go to MATCH-TWO. 85 ((match-one v (p q . r) g s sk fk i) 86 (match-check-ellipse 87 q 88 (match-extract-vars p (match-gen-ellipses v p r g s sk fk i) i ()) 89 (match-two v (p q . r) g s sk fk i))) 90 ;; Otherwise, go directly to MATCH-TWO. 91 ((match-one . x) 92 (match-two . x)))) 93 94 ;; This is the guts of the pattern matcher. We are passed a lot of 95 ;; information in the form: 96 ;; 97 ;; (match-two var pattern getter setter success-k fail-k (ids ...)) 98 ;; 99 ;; where VAR is the symbol name of the current variable we are 100 ;; matching, PATTERN is the current pattern, getter and setter are the 101 ;; corresponding accessors (e.g. CAR and SET-CAR! of the pair holding 102 ;; VAR), SUCCESS-K is the success continuation, FAIL-K is the failure 103 ;; continuation (which is just a thunk call and is thus safe to expand 104 ;; multiple times) and IDS are the list of identifiers bound in the 105 ;; pattern so far. 106 107 (define-syntax match-two 108 (syntax-rules (_ ___ quote quasiquote ? $ = and or not set! get!) 109 ((match-two v () g s (sk ...) fk i) 110 (if (null? v) (sk ... i) fk)) 111 ((match-two v (quote p) g s (sk ...) fk i) 112 (if (equal? v 'p) (sk ... i) fk)) 113 ((match-two v (quasiquote p) g s sk fk i) 114 (match-quasiquote v p g s sk fk i)) 115 ((match-two v (and) g s (sk ...) fk i) (sk ... i)) 116 ((match-two v (and p q ...) g s sk fk i) 117 (match-one v p g s (match-one v (and q ...) g s sk fk) fk i)) 118 ((match-two v (or) g s sk fk i) fk) 119 ((match-two v (or p) g s sk fk i) 120 (match-one v p g s sk fk i)) 121 ((match-two v (or p ...) g s sk fk i) 122 (match-extract-vars (or p ...) 123 (match-gen-or v (p ...) g s sk fk i) 124 i 125 ())) 126 ((match-two v (not p) g s (sk ...) fk i) 127 (match-one v p g s (match-drop-ids fk) (sk ... i) i)) 128 ((match-two v (get! getter) g s (sk ...) fk i) 129 (let ((getter (lambda () g))) (sk ... i))) 130 ((match-two v (set! setter) g (s ...) (sk ...) fk i) 131 (let ((setter (lambda (x) (s ... x)))) (sk ... i))) 132 ((match-two v (? pred p ...) g s sk fk i) 133 (if (pred v) (match-one v (and p ...) g s sk fk i) fk)) 134 ((match-two v (= proc p) g s sk fk i) 135 (let ((w (proc v))) 136 (match-one w p g s sk fk i))) 137 ((match-two v (p ___ . r) g s sk fk i) 138 (match-extract-vars p (match-gen-ellipses v p r g s sk fk i) i ())) 139 ((match-two v (p) g s sk fk i) 140 (if (and (pair? v) (null? (cdr v))) 141 (let ((w (car v))) 142 (match-one w p (car v) (set-car! v) sk fk i)) 143 fk)) 144 ((match-two v (p . q) g s sk fk i) 145 (if (pair? v) 146 (let ((w (car v)) (x (cdr v))) 147 (match-one w p (car v) (set-car! v) 148 (match-one x q (cdr v) (set-cdr! v) sk fk) 149 fk 150 i)) 151 fk)) 152 ((match-two v #(p ...) g s sk fk i) 153 (match-vector v 0 () (p ...) sk fk i)) 154 ((match-two v _ g s (sk ...) fk i) (sk ... i)) 155 ;; Not a pair or vector or special literal, test to see if it's a 156 ;; new symbol, in which case we just bind it, or if it's an 157 ;; already bound symbol or some other literal, in which case we 158 ;; compare it with EQUAL?. 159 ((match-two v x g s (sk ...) fk (id ...)) 160 (let-syntax 161 ((new-sym? 162 (syntax-rules (id ...) 163 ((new-sym? x sk2 fk2) sk2) 164 ((new-sym? y sk2 fk2) fk2)))) 165 (new-sym? abracadabra ; thanks Oleg 166 (let ((x v)) (sk ... (id ... x))) 167 (if (equal? v x) (sk ... (id ...)) fk)))) 168 )) 169 170 ;; QUASIQUOTE patterns 171 172 (define-syntax match-quasiquote 173 (syntax-rules (unquote unquote-splicing quasiquote) 174 ((_ v (unquote p) g s sk fk i) 175 (match-one v p g s sk fk i)) 176 ((_ v ((unquote-splicing p) . rest) g s sk fk i) 177 (if (pair? v) 178 (match-one v 179 (p . tmp) 180 (match-quasiquote tmp rest g s sk fk) 181 fk 182 i) 183 fk)) 184 ((_ v (quasiquote p) g s sk fk i . depth) 185 (match-quasiquote v p g s sk fk i #f . depth)) 186 ((_ v (unquote p) g s sk fk i x . depth) 187 (match-quasiquote v p g s sk fk i . depth)) 188 ((_ v (unquote-splicing p) g s sk fk i x . depth) 189 (match-quasiquote v p g s sk fk i . depth)) 190 ((_ v (p . q) g s sk fk i . depth) 191 (if (pair? v) 192 (let ((w (car v)) (x (cdr v))) 193 (match-quasiquote 194 w p g s 195 (match-quasiquote-step x q g s sk fk depth) 196 fk i . depth)) 197 fk)) 198 ((_ v #(elt ...) g s sk fk i . depth) 199 (if (vector? v) 200 (let ((ls (vector->list v))) 201 (match-quasiquote ls (elt ...) g s sk fk i . depth)) 202 fk)) 203 ((_ v x g s sk fk i . depth) 204 (match-one v 'x g s sk fk i)))) 205 206 (define-syntax match-quasiquote-step 207 (syntax-rules () 208 ((match-quasiquote-step x q g s sk fk depth i) 209 (match-quasiquote x q g s sk fk i . depth)) 210 )) 211 212 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 213 ;; Utilities 214 215 ;; A CPS utility that takes two values and just expands into the 216 ;; first. 217 (define-syntax match-drop-ids 218 (syntax-rules () 219 ((_ expr ids ...) expr))) 220 221 ;; Generating OR clauses just involves binding the success 222 ;; continuation into a thunk which takes the identifiers common to 223 ;; each OR clause, and trying each clause, calling the thunk as soon 224 ;; as we succeed. 225 226 (define-syntax match-gen-or 227 (syntax-rules () 228 ((_ v p g s (sk ...) fk (i ...) ((id id-ls) ...)) 229 (let ((sk2 (lambda (id ...) (sk ... (i ... id ...))))) 230 (match-gen-or-step 231 v p g s (match-drop-ids (sk2 id ...)) fk (i ...)))))) 232 233 (define-syntax match-gen-or-step 234 (syntax-rules () 235 ((_ v () g s sk fk i) 236 ;; no OR clauses, call the failure continuation 237 fk) 238 ((_ v (p) g s sk fk i) 239 ;; last (or only) OR clause, just expand normally 240 (match-one v p g s sk fk i)) 241 ((_ v (p . q) g s sk fk i) 242 ;; match one and try the remaining on failure 243 (match-one v p g s sk (match-gen-or-step v q g s sk fk i) i)) 244 )) 245 246 ;; We match a pattern (p ...) by matching the pattern p in a loop on 247 ;; each element of the variable, accumulating the bound ids into lists 248 249 ;; Look at the body - it's just a named let loop, matching each 250 ;; element in turn to the same pattern. This illustrates the 251 ;; simplicity of this generative-style pattern matching. It would be 252 ;; just as easy to implement a tree searching pattern. 253 254 (define-syntax match-gen-ellipses 255 (syntax-rules () 256 ((_ v p () g s (sk ...) fk i ((id id-ls) ...)) 257 (match-check-identifier p 258 (let ((p v)) 259 (if (list? p) 260 (sk ... i) 261 fk)) 262 (let loop ((ls v) (id-ls '()) ...) 263 (cond 264 ((null? ls) 265 (let ((id (reverse id-ls)) ...) (sk ... i))) 266 ((pair? ls) 267 (let ((w (car ls))) 268 (match-one w p (car ls) (set-car! ls) 269 (match-drop-ids (loop (cdr ls) (cons id id-ls) ...)) 270 fk i))) 271 (else 272 fk))))) 273 ((_ v p (r ...) g s (sk ...) fk i ((id id-ls) ...)) 274 (match-verify-no-ellipses 275 (r ...) 276 (let* ((tail-len (length '(r ...))) 277 (ls v) 278 (len (length ls))) 279 (if (< len tail-len) 280 fk 281 (let loop ((ls ls) (n len) (id-ls '()) ...) 282 (cond 283 ((= n tail-len) 284 (let ((id (reverse id-ls)) ...) 285 (match-one ls (r ...) #f #f (sk ... i) fk i))) 286 ((pair? ls) 287 (let ((w (car ls))) 288 (match-one w p (car ls) (set-car! ls) 289 (match-drop-ids 290 (loop (cdr ls) (- n 1) (cons id id-ls) ...)) 291 fk 292 i))) 293 (else 294 fk))))))) 295 )) 296 297 (define-syntax match-verify-no-ellipses 298 (syntax-rules () 299 ((_ (x . y) sk) 300 (match-check-ellipse 301 x 302 (match-syntax-error 303 "multiple ellipse patterns not allowed at same level") 304 (match-verify-no-ellipses y sk))) 305 ((_ x sk) sk) 306 )) 307 308 ;; Vector patterns are just more of the same, with the slight 309 ;; exception that we pass around the current vector index being 310 ;; matched. 311 312 (define-syntax match-vector 313 (syntax-rules (___) 314 ((_ v n pats (p q) sk fk i) 315 (match-check-ellipse q 316 (match-vector-ellipses v n pats p sk fk i) 317 (match-vector-two v n pats (p q) sk fk i))) 318 ((_ v n pats (p ___) sk fk i) 319 (match-vector-ellipses v n pats p sk fk i)) 320 ((_ . x) 321 (match-vector-two . x)))) 322 323 ;; Check the exact vector length, then check each element in turn. 324 325 (define-syntax match-vector-two 326 (syntax-rules () 327 ((_ v n ((pat index) ...) () sk fk i) 328 (if (vector? v) 329 (let ((len (vector-length v))) 330 (if (= len n) 331 (match-vector-step v ((pat index) ...) sk fk i) 332 fk)) 333 fk)) 334 ((_ v n (pats ...) (p . q) sk fk i) 335 (match-vector v (+ n 1) (pats ... (p n)) q sk fk i)) 336 )) 337 338 (define-syntax match-vector-step 339 (syntax-rules () 340 ((_ v () (sk ...) fk i) (sk ... i)) 341 ((_ v ((pat index) . rest) sk fk i) 342 (let ((w (vector-ref v index))) 343 (match-one w pat (vector-ref v index) (vector-set! v index) 344 (match-vector-step v rest sk fk) 345 fk i))))) 346 347 ;; With a vector ellipse pattern we first check to see if the vector 348 ;; length is at least the required length. 349 350 (define-syntax match-vector-ellipses 351 (syntax-rules () 352 ((_ v n ((pat index) ...) p sk fk i) 353 (if (vector? v) 354 (let ((len (vector-length v))) 355 (if (>= len n) 356 (match-vector-step v ((pat index) ...) 357 (match-vector-tail v p n len sk fk) 358 fk i) 359 fk)) 360 fk)))) 361 362 (define-syntax match-vector-tail 363 (syntax-rules () 364 ((_ v p n len sk fk i) 365 (match-extract-vars p (match-vector-tail-two v p n len sk fk i) i ())))) 366 367 (define-syntax match-vector-tail-two 368 (syntax-rules () 369 ((_ v p n len (sk ...) fk i ((id id-ls) ...)) 370 (let loop ((j n) (id-ls '()) ...) 371 (if (>= j len) 372 (let ((id (reverse id-ls)) ...) (sk ... i)) 373 (let ((w (vector-ref v j))) 374 (match-one w p (vector-ref v j) (vetor-set! v j) 375 (match-drop-ids (loop (+ j 1) (cons id id-ls) ...)) 376 fk i))))))) 377 378 ;; Extract all identifiers in a pattern. A little more complicated 379 ;; than just looking for symbols, we need to ignore special keywords 380 ;; and not pattern forms (such as the predicate expression in ? 381 ;; patterns). 382 ;; 383 ;; (match-extract-vars pattern continuation (ids ...) (new-vars ...)) 384 385 (define-syntax match-extract-vars 386 (syntax-rules (_ ___ ? $ = quote quasiquote and or not get! set!) 387 ((match-extract-vars (? pred . p) k i v) 388 (match-extract-vars p k i v)) 389 ((match-extract-vars ($ rec . p) k i v) 390 (match-extract-vars p k i v)) 391 ((match-extract-vars (= proc p) k i v) 392 (match-extract-vars p k i v)) 393 ((match-extract-vars (quote x) (k ...) i v) 394 (k ... v)) 395 ((match-extract-vars (quasiquote x) k i v) 396 (match-extract-quasiquote-vars x k i v (#t))) 397 ((match-extract-vars (and . p) k i v) 398 (match-extract-vars p k i v)) 399 ((match-extract-vars (or . p) k i v) 400 (match-extract-vars p k i v)) 401 ((match-extract-vars (not . p) k i v) 402 (match-extract-vars p k i v)) 403 ;; A non-keyword pair, expand the CAR with a continuation to 404 ;; expand the CDR. 405 ((match-extract-vars (p q . r) k i v) 406 (match-check-ellipse 407 q 408 (match-extract-vars (p . r) k i v) 409 (match-extract-vars p (match-extract-vars-step (q . r) k i v) i ()))) 410 ((match-extract-vars (p . q) k i v) 411 (match-extract-vars p (match-extract-vars-step q k i v) i ())) 412 ((match-extract-vars #(p ...) k i v) 413 (match-extract-vars (p ...) k i v)) 414 ((match-extract-vars _ (k ...) i v) (k ... v)) 415 ((match-extract-vars ___ (k ...) i v) (k ... v)) 416 ;; This is the main part, the only place where we might add a new 417 ;; var if it's an unbound symbol. 418 ((match-extract-vars p (k ...) (i ...) v) 419 (let-syntax 420 ((new-sym? 421 (syntax-rules (i ...) 422 ((new-sym? p sk fk) sk) 423 ((new-sym? x sk fk) fk)))) 424 (new-sym? random-sym-to-match 425 (k ... ((p p-ls) . v)) 426 (k ... v)))) 427 )) 428 429 ;; Stepper used in the above so it can expand the CAR and CDR 430 ;; separately. 431 432 (define-syntax match-extract-vars-step 433 (syntax-rules () 434 ((_ p k i v ((v2 v2-ls) ...)) 435 (match-extract-vars p k (v2 ... . i) ((v2 v2-ls) ... . v))) 436 )) 437 438 (define-syntax match-extract-quasiquote-vars 439 (syntax-rules (quasiquote unquote unquote-splicing) 440 ((match-extract-quasiquote-vars (quasiquote x) k i v d) 441 (match-extract-quasiquote-vars x k i v (#t . d))) 442 ((match-extract-quasiquote-vars (unquote-splicing x) k i v d) 443 (match-extract-quasiquote-vars (unquote x) k i v d)) 444 ((match-extract-quasiquote-vars (unquote x) k i v (#t)) 445 (match-extract-vars x k i v)) 446 ((match-extract-quasiquote-vars (unquote x) k i v (#t . d)) 447 (match-extract-quasiquote-vars x k i v d)) 448 ((match-extract-quasiquote-vars (x . y) k i v (#t . d)) 449 (match-extract-quasiquote-vars 450 x 451 (match-extract-quasiquote-vars-step y k i v d) i ())) 452 ((match-extract-quasiquote-vars #(x ...) k i v (#t . d)) 453 (match-extract-quasiquote-vars (x ...) k i v d)) 454 ((match-extract-quasiquote-vars x (k ...) i v (#t . d)) 455 (k ... v)) 456 )) 457 458 (define-syntax match-extract-quasiquote-vars-step 459 (syntax-rules () 460 ((_ x k i v d ((v2 v2-ls) ...)) 461 (match-extract-quasiquote-vars x k (v2 ... . i) ((v2 v2-ls) ... . v) d)) 462 )) 463 464 465 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 466 ;; Gimme some sugar baby. 467 468 (define-syntax match-lambda 469 (syntax-rules () 470 ((_ clause ...) (lambda (expr) (match expr clause ...))))) 471 472 (define-syntax match-lambda* 473 (syntax-rules () 474 ((_ clause ...) (lambda expr (match expr clause ...))))) 475 476 (define-syntax match-let 477 (syntax-rules () 478 ((_ (vars ...) . body) 479 (match-let/helper let () () (vars ...) . body)) 480 ((_ loop . rest) 481 (match-named-let loop () . rest)))) 482 483 (define-syntax match-letrec 484 (syntax-rules () 485 ((_ vars . body) (match-let/helper letrec () () vars . body)))) 486 487 (define-syntax match-let/helper 488 (syntax-rules () 489 ((_ let ((var expr) ...) () () . body) 490 (let ((var expr) ...) . body)) 491 ((_ let ((var expr) ...) ((pat tmp) ...) () . body) 492 (let ((var expr) ...) 493 (match-let* ((pat tmp) ...) 494 . body))) 495 ((_ let (v ...) (p ...) (((a . b) expr) . rest) . body) 496 (match-let/helper 497 let (v ... (tmp expr)) (p ... ((a . b) tmp)) rest . body)) 498 ((_ let (v ...) (p ...) ((#(a ...) expr) . rest) . body) 499 (match-let/helper 500 let (v ... (tmp expr)) (p ... (#(a ...) tmp)) rest . body)) 501 ((_ let (v ...) (p ...) ((a expr) . rest) . body) 502 (match-let/helper let (v ... (a expr)) (p ...) rest . body)) 503 )) 504 505 (define-syntax match-named-let 506 (syntax-rules () 507 ((_ loop ((pat expr var) ...) () . body) 508 (let loop ((var expr) ...) 509 (match-let ((pat var) ...) 510 . body))) 511 ((_ loop (v ...) ((pat expr) . rest) . body) 512 (match-named-let loop (v ... (pat expr tmp)) rest . body)))) 513 514 (define-syntax match-let* 515 (syntax-rules () 516 ((_ () . body) 517 (begin . body)) 518 ((_ ((pat expr) . rest) . body) 519 (match expr (pat (match-let* rest . body)))))) 520 521 522 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 523 ;; Not quite portable bits. 524 525 ;; Matching ellipses `...' is tricky. A strict interpretation of R5RS 526 ;; would suggest that `...' in the literals list would treat it as a 527 ;; literal in pattern, however no SYNTAX-RULES implementation I'm 528 ;; aware of currently supports this. SRFI-46 support would makes this 529 ;; easy, but SRFI-46 also is widely unsupported. 530 531 ;; In the meantime we conditionally implement this in whatever 532 ;; low-level macro system is available, defaulting to an 533 ;; implementation which doesn't support `...' and requires the user to 534 ;; match with `___'. 535 536 (define-syntax match-check-ellipse 537 (syntax-rules ___ (...) 538 ((_ ... sk fk) sk) 539 ((_ x sk fk) fk))) 540 541 (define-syntax match-check-identifier 542 (syntax-rules () 543 ((_ (x . y) sk fk) fk) 544 ((_ #(x ...) sk fk) fk) 545 ((_ x sk fk) 546 (let-syntax 547 ((sym? 548 (syntax-rules () 549 ((sym? x sk2 fk2) sk2) 550 ((sym? y sk2 fk2) fk2)))) 551 (sym? abracadabra sk fk))) ))