summaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorGravatar Andy Wingo2010-10-14 12:52:38 +0200
committerGravatar Andy Wingo2010-11-13 18:47:03 +0100
commit35873a522a6ba658edea24b36bcbc4d9cd64664e (patch)
treea22ae04ba2e847bbd2f7e7db078e621905988b45
parent1a0ff53dfcc01e0ef607f88566ac4e932137dd02 (diff)
downloadtekuti-35873a522a6ba658edea24b36bcbc4d9cd64664e.tar.gz
tekuti-35873a522a6ba658edea24b36bcbc4d9cd64664e.zip
import match-bind from guile-lib
* tekuti/match-bind.scm: Import from guile-lib.
-rw-r--r--tekuti/match-bind.scm263
1 files changed, 263 insertions, 0 deletions
diff --git a/tekuti/match-bind.scm b/tekuti/match-bind.scm
new file mode 100644
index 0000000..c3f87e7
--- /dev/null
+++ b/tekuti/match-bind.scm
@@ -0,0 +1,263 @@
+;; (match-bind) -- binding variables from regular expression matches
+;; Copyright (C) 2007, 2009, 2010 Andy Wingo <wingo at pobox dot com>
+
+;; This program is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;;
+;; @c
+;; Utility functions and syntax constructs for dealing with regular
+;; expressions in a concise manner.
+;;
+;;; Code:
+
+(define-module (tekuti match-bind)
+ #:use-module (ice-9 regex)
+ #:export (match-bind
+ s///
+ s///g))
+
+(define (regex:count re-string)
+ (let lp ((char-class #f) (nparen 0) (in (string->list re-string)))
+ (cond
+ ((null? in) nparen)
+ ((char=? (car in) #\\)
+ (lp char-class nparen (cddr in)))
+ ((char=? (car in) #\[)
+ (lp #t nparen (cdr in)))
+ ((char=? (car in) #\])
+ (lp #f nparen (cdr in)))
+ ((and (char=? (car in) #\() (not char-class))
+ (lp #f (1+ nparen) (cddr in)))
+ (else
+ (lp char-class nparen (cdr in))))))
+
+(define-syntax match-substring
+ (syntax-rules ()
+ ((_ m i)
+ (let* ((pair (vector-ref m (1+ i)))
+ (start (car pair))
+ (end (cdr pair)))
+ (if (or (= start -1) (= end -1))
+ #f
+ (substring (vector-ref m 0) start end))))))
+
+(define *cache* (make-weak-key-hash-table))
+(define (memoize-re regex)
+ (cond
+ ((hashq-ref *cache* regex))
+ (else
+ (let ((re (make-regexp regex)))
+ (hashq-set! *cache* regex re)
+ re))))
+
+(define-syntax match-bind
+ (lambda (x)
+ "Match a string against a regular expression, binding lexical
+variables to the various parts of the match.
+
+@var{vars} is a list of names to which to bind the parts of the match.
+The first variable of the list will be bound to the entire match, so the
+number of variables needed will be equal to the number of open
+parentheses (`(') in the pattern, plus one for the whole match.
+
+@var{consequent} is executed if the given expression @var{str} matches
+@var{regex}. If the string does not match, @var{alternate} will be
+executed if present. If @var{alternate} is not present, the result of
+@code{match-bind} is unspecified.
+
+Here is a short example:
+@example
+ (define (star-indent line)
+ \"Returns the number of spaces until the first
+ star (`*') in the input, or #f if the first
+ non-space character is not a star.\"
+ (match-bind \"^( *)\\*.*$\" line (_ spaces)
+ (string-length spaces)
+ #f))
+@end example
+"
+ (define (match-bindings m re vars nvars)
+ (let lp ((in vars) (i 0) (out '()))
+ (syntax-case in ()
+ (()
+ (if (not (= i nvars))
+ (error "bad number of bindings to match-bind" i nvars)
+ (reverse out)))
+ ((v . v*)
+ (lp #'v* (1+ i)
+ (with-syntax ((i i) (m m))
+ (cons #'(v (match-substring m i)) out))))
+ (v (and (identifier? #'v) (<= i nvars))
+ (reverse
+ (cons (with-syntax
+ (((init ...)
+ (map (lambda (x)
+ (with-syntax ((m m) (i (+ i x)))
+ #'(match-substring m i)))
+ (iota (- nvars i)))))
+ #'(v (list init ...)))
+ out))))))
+ (syntax-case x ()
+ ((_ regex str vars consequent)
+ #'(match-bind regex str vars consequent (if #f #f)))
+ ((_ regex str vars consequent alternate)
+ (let ((m #'m))
+ (with-syntax ((m m)
+ (((var val) ...)
+ (match-bindings m (memoize-re (syntax->datum #'regex))
+ #'vars
+ ;; 1+ for the match:0 binding
+ (1+ (regex:count
+ (syntax->datum #'regex))))))
+ #'(let* ((m (regexp-exec (memoize-re regex) str)))
+ (if m
+ (let ((var val) ...)
+ consequent)
+ alternate))))))))
+
+(define-macro (make-state-parser states initial)
+ `(lambda (port)
+ (let lp ((state ',initial)
+ (c (read-char port))
+ (out '())
+ (accum '()))
+ (case state
+ ((*eof*) (reverse out))
+ ,@(map
+ (lambda (desc)
+ (let ((name (car desc))
+ (cont (cadr desc))
+ (cases (map
+ (lambda (kase)
+ (let ((condition (car kase))
+ (new-state (cdr kase)))
+ (cond
+ ((not (symbol? new-state))
+ (error "invalid new-state in spec" new-state))
+ ((number? condition)
+ `((= (length accum) ,condition) ',new-state))
+ ((list? condition)
+ `((memv c ',condition) ',new-state))
+ (else
+ `(,condition ',new-state)))))
+ (cddr desc))))
+ `((,name)
+ (let ((new-state (cond ((eof-object? c) '*eof*) ,@cases))
+ (cont ,cont))
+ (if (eq? state new-state)
+ (lp state (read-char port) out (cons c accum))
+ (lp new-state c
+ (if cont
+ (cons (cont (reverse accum)) out)
+ out)
+ '()))))))
+ states)
+ (else (error "invalid state" state (reverse out) (reverse accum)))))))
+
+(define (make-item-list subst)
+ (map
+ (lambda (item)
+ (cond ((procedure? item) item)
+ ((string? item) (lambda (match) item))
+ (else (error "internal error" item))))
+ (call-with-input-string
+ subst
+ (make-state-parser
+ ((string
+ list->string
+ ((#\\) . quote-head)
+ ((#\$) . variable-head)
+ (else . string))
+ (quote-head
+ #f
+ (0 . quote-head)
+ (else . quote))
+ (quote
+ list->string
+ (0 . quote)
+ ((#\\) . quote-head)
+ ((#\$) . variable-head)
+ (else . string))
+ (variable-head
+ #f
+ (0 . variable-head)
+ ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) . variable)
+ (else . error))
+ (variable
+ (lambda (l)
+ (let ((i (string->number (list->string l))))
+ (lambda (match)
+ (or (match:substring match i) ""))))
+ ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) . variable)
+ ((#\\) . quote-head)
+ ((#\$) . variable-head)
+ (else . string)))
+ string))))
+
+(define (s/// pat subst)
+ "Make a procedure that performs perl-like regular expression
+search-and-replace on an input string.
+
+The regular expression pattern @var{pat} is in the standard regular
+expression syntax accepted by @code{make-regexp}. The substitution
+string is very similar to perl's @code{s///} operator. Backreferences
+are indicated with a dollar sign (@samp{$}), and characters can be
+escaped with the backslash.
+
+@code{s///} returns a procedure of one argument, the input string to be
+matched. If the string matches the pattern, it will be returned with the
+first matching segment replaced as per the substitution string.
+Otherwise the string will be returned unmodified.
+
+Here are some examples:
+
+@example
+ ((s/// \"foo\" \"bar\") \"foo bar baz qux foo\")
+ @result{} \"bar bar baz qux foo\"
+
+ ((s/// \"zag\" \"bar\") \"foo bar baz qux foo\")
+ @result{} \"foo bar baz qux foo\"
+
+ ((s/// \"(f(o+)) (zag)?\" \"$1 $2 $3\")
+ \"foo bar baz qux foo\")
+ @result{} \"foo oo bar baz qux foo\"
+@end example
+"
+ (let ((re (make-regexp pat))
+ (substers `(,match:prefix
+ ,@(make-item-list subst)
+ ,match:suffix)))
+ (lambda (string)
+ (let ((match (regexp-exec re string)))
+ (if match
+ (apply string-append (map (lambda (p) (p match)) substers))
+ string)))))
+
+(define (s///g pat subst)
+ "Make a procedure that performs perl-like global search-and-replace on
+an input string.
+
+The @var{pat} and @var{subst} arguments are as in the non-global
+@code{s///}. @xref{match-bind s///,,s///}, for more information.
+
+@code{s///g} differs from @code{s///} in that it does a global search
+and replace, not stopping at the first match.
+"
+ (let ((re (make-regexp pat))
+ (items `(pre
+ ,@(make-item-list subst)
+ post)))
+ (lambda (string)
+ (apply regexp-substitute/global #f re string items))))