From 35873a522a6ba658edea24b36bcbc4d9cd64664e Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 14 Oct 2010 12:52:38 +0200 Subject: [PATCH] import match-bind from guile-lib * tekuti/match-bind.scm: Import from guile-lib. --- tekuti/match-bind.scm | 263 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) create mode 100644 tekuti/match-bind.scm diff --git a/tekuti/match-bind.scm b/tekuti/match-bind.scm new file mode 100644 index 0000000..c3f87e7 --- /dev/null +++ b/tekuti/match-bind.scm @@ -0,0 +1,263 @@ +;; (match-bind) -- binding variables from regular expression matches +;; Copyright (C) 2007, 2009, 2010 Andy Wingo + +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;;; Commentary: +;; +;; @c +;; Utility functions and syntax constructs for dealing with regular +;; expressions in a concise manner. +;; +;;; Code: + +(define-module (tekuti match-bind) + #:use-module (ice-9 regex) + #:export (match-bind + s/// + s///g)) + +(define (regex:count re-string) + (let lp ((char-class #f) (nparen 0) (in (string->list re-string))) + (cond + ((null? in) nparen) + ((char=? (car in) #\\) + (lp char-class nparen (cddr in))) + ((char=? (car in) #\[) + (lp #t nparen (cdr in))) + ((char=? (car in) #\]) + (lp #f nparen (cdr in))) + ((and (char=? (car in) #\() (not char-class)) + (lp #f (1+ nparen) (cddr in))) + (else + (lp char-class nparen (cdr in)))))) + +(define-syntax match-substring + (syntax-rules () + ((_ m i) + (let* ((pair (vector-ref m (1+ i))) + (start (car pair)) + (end (cdr pair))) + (if (or (= start -1) (= end -1)) + #f + (substring (vector-ref m 0) start end)))))) + +(define *cache* (make-weak-key-hash-table)) +(define (memoize-re regex) + (cond + ((hashq-ref *cache* regex)) + (else + (let ((re (make-regexp regex))) + (hashq-set! *cache* regex re) + re)))) + +(define-syntax match-bind + (lambda (x) + "Match a string against a regular expression, binding lexical +variables to the various parts of the match. + +@var{vars} is a list of names to which to bind the parts of the match. +The first variable of the list will be bound to the entire match, so the +number of variables needed will be equal to the number of open +parentheses (`(') in the pattern, plus one for the whole match. + +@var{consequent} is executed if the given expression @var{str} matches +@var{regex}. If the string does not match, @var{alternate} will be +executed if present. If @var{alternate} is not present, the result of +@code{match-bind} is unspecified. + +Here is a short example: +@example + (define (star-indent line) + \"Returns the number of spaces until the first + star (`*') in the input, or #f if the first + non-space character is not a star.\" + (match-bind \"^( *)\\*.*$\" line (_ spaces) + (string-length spaces) + #f)) +@end example +" + (define (match-bindings m re vars nvars) + (let lp ((in vars) (i 0) (out '())) + (syntax-case in () + (() + (if (not (= i nvars)) + (error "bad number of bindings to match-bind" i nvars) + (reverse out))) + ((v . v*) + (lp #'v* (1+ i) + (with-syntax ((i i) (m m)) + (cons #'(v (match-substring m i)) out)))) + (v (and (identifier? #'v) (<= i nvars)) + (reverse + (cons (with-syntax + (((init ...) + (map (lambda (x) + (with-syntax ((m m) (i (+ i x))) + #'(match-substring m i))) + (iota (- nvars i))))) + #'(v (list init ...))) + out)))))) + (syntax-case x () + ((_ regex str vars consequent) + #'(match-bind regex str vars consequent (if #f #f))) + ((_ regex str vars consequent alternate) + (let ((m #'m)) + (with-syntax ((m m) + (((var val) ...) + (match-bindings m (memoize-re (syntax->datum #'regex)) + #'vars + ;; 1+ for the match:0 binding + (1+ (regex:count + (syntax->datum #'regex)))))) + #'(let* ((m (regexp-exec (memoize-re regex) str))) + (if m + (let ((var val) ...) + consequent) + alternate)))))))) + +(define-macro (make-state-parser states initial) + `(lambda (port) + (let lp ((state ',initial) + (c (read-char port)) + (out '()) + (accum '())) + (case state + ((*eof*) (reverse out)) + ,@(map + (lambda (desc) + (let ((name (car desc)) + (cont (cadr desc)) + (cases (map + (lambda (kase) + (let ((condition (car kase)) + (new-state (cdr kase))) + (cond + ((not (symbol? new-state)) + (error "invalid new-state in spec" new-state)) + ((number? condition) + `((= (length accum) ,condition) ',new-state)) + ((list? condition) + `((memv c ',condition) ',new-state)) + (else + `(,condition ',new-state))))) + (cddr desc)))) + `((,name) + (let ((new-state (cond ((eof-object? c) '*eof*) ,@cases)) + (cont ,cont)) + (if (eq? state new-state) + (lp state (read-char port) out (cons c accum)) + (lp new-state c + (if cont + (cons (cont (reverse accum)) out) + out) + '())))))) + states) + (else (error "invalid state" state (reverse out) (reverse accum))))))) + +(define (make-item-list subst) + (map + (lambda (item) + (cond ((procedure? item) item) + ((string? item) (lambda (match) item)) + (else (error "internal error" item)))) + (call-with-input-string + subst + (make-state-parser + ((string + list->string + ((#\\) . quote-head) + ((#\$) . variable-head) + (else . string)) + (quote-head + #f + (0 . quote-head) + (else . quote)) + (quote + list->string + (0 . quote) + ((#\\) . quote-head) + ((#\$) . variable-head) + (else . string)) + (variable-head + #f + (0 . variable-head) + ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) . variable) + (else . error)) + (variable + (lambda (l) + (let ((i (string->number (list->string l)))) + (lambda (match) + (or (match:substring match i) "")))) + ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) . variable) + ((#\\) . quote-head) + ((#\$) . variable-head) + (else . string))) + string)))) + +(define (s/// pat subst) + "Make a procedure that performs perl-like regular expression +search-and-replace on an input string. + +The regular expression pattern @var{pat} is in the standard regular +expression syntax accepted by @code{make-regexp}. The substitution +string is very similar to perl's @code{s///} operator. Backreferences +are indicated with a dollar sign (@samp{$}), and characters can be +escaped with the backslash. + +@code{s///} returns a procedure of one argument, the input string to be +matched. If the string matches the pattern, it will be returned with the +first matching segment replaced as per the substitution string. +Otherwise the string will be returned unmodified. + +Here are some examples: + +@example + ((s/// \"foo\" \"bar\") \"foo bar baz qux foo\") + @result{} \"bar bar baz qux foo\" + + ((s/// \"zag\" \"bar\") \"foo bar baz qux foo\") + @result{} \"foo bar baz qux foo\" + + ((s/// \"(f(o+)) (zag)?\" \"$1 $2 $3\") + \"foo bar baz qux foo\") + @result{} \"foo oo bar baz qux foo\" +@end example +" + (let ((re (make-regexp pat)) + (substers `(,match:prefix + ,@(make-item-list subst) + ,match:suffix))) + (lambda (string) + (let ((match (regexp-exec re string))) + (if match + (apply string-append (map (lambda (p) (p match)) substers)) + string))))) + +(define (s///g pat subst) + "Make a procedure that performs perl-like global search-and-replace on +an input string. + +The @var{pat} and @var{subst} arguments are as in the non-global +@code{s///}. @xref{match-bind s///,,s///}, for more information. + +@code{s///g} differs from @code{s///} in that it does a global search +and replace, not stopping at the first match. +" + (let ((re (make-regexp pat)) + (items `(pre + ,@(make-item-list subst) + post))) + (lambda (string) + (apply regexp-substitute/global #f re string items))))