summaryrefslogtreecommitdiffstatshomepage
path: root/tekuti/filters.scm
blob: 7fb7369e26605cf8c4d40582659da767731b44f2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
;; Tekuti
;; Copyright (C) 2008 Andy Wingo <wingo at pobox dot com>

;; This program is free software; you can redistribute it and/or    
;; modify it under the terms of the GNU General Public License as   
;; published by the Free Software Foundation; either version 3 of   
;; the License, or (at your option) any later version.              
;;                                                                  
;; This program is distributed in the hope that it will be useful,  
;; but WITHOUT ANY WARRANTY; without even the implied warranty of   
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    
;; GNU General Public License for more details.                     
;;                                                                  
;; You should have received a copy of the GNU General Public License
;; along with this program; if not, contact:
;;
;; Free Software Foundation           Voice:  +1-617-542-5942
;; 59 Temple Place - Suite 330        Fax:    +1-617-542-2652
;; Boston, MA  02111-1307,  USA       gnu@gnu.org

;;; Commentary:
;;
;; This is the main script that will launch tekuti.
;;
;;; Code:

(define-module (tekuti filters)
  #:use-module (sxml simple)
  #:use-module (sxml transform)
  #:use-module (match-bind)
  #:export (wordpress->sxml))

(define blocks '(table thead tfoot caption colgroup tbody tr td th div
                 dl dd dt ul ol li pre select form map area blockquote
                 address math style input p h1 h2 h3 h4 h5 h6))

(define (can-contain-p? tag)
  (memq tag '(div li blockquote)))

(define (inline? tag)
  (not (memq tag blocks)))

(define (wpautop tag body)
  (define (pclose p out)
    (if p (cons (cons 'p (reverse p)) out) out))
  (define (scons x p)
    (if (and (string? x) (string-null? x))
        p
        (cons x (or p '()))))
  (define (pbreak p)
    (if p (cons '(br) p) p))
  
  (let* ((head (if (and (pair? (car body))
                        (eq? (caar body) '@))
                   (list tag (car body))
                   (list tag)))
         (body (if (null? (cdr head)) body (cdr body))))
    (let lp ((p #f) (in body) (out (reverse head)))
      (cond
       ((null? in)
        (reverse (pclose p out)))
       ((string? (car in))
        (match-bind "^([^\n]*)\n(\n*)(.*)$" (car in) (_ head p-or-br? tail)
                    (if (string-null? p-or-br?)
                        (lp (if (string-null? tail)
                                (scons head p)
                                (pbreak (scons head p)))
                            (scons tail (cdr in)) out)
                        (lp #f (scons tail (cdr in))
                            (pclose (scons head p) out)))
                    (lp (cons (car in) (or p '()))
                        (cdr in) out)))
       ((inline? (caar in))
        (lp (scons (car in) p) (cdr in) out))
       (else
        (lp #f (cdr in)
            (cons (car in) (pclose p out))))))))

(define (wordpress->sxml text)
  (catch 'parser-error
(lambda ()
  (let ((sxml (cadr (with-input-from-string (string-append "<div>" text "</div>")
                      xml->sxml))))
    (pre-post-order
     sxml
     `((*default* . ,(lambda (tag . body)
                       (if (can-contain-p? tag)
                           (wpautop tag body)
                           (cons tag body))))
       (*text* . ,(lambda (tag text)
                    text))))))
         (lambda (key . args)
           `(pre "parse error: "
                 ,(with-output-to-string (lambda () (write args)))
                 "\n"
                 ,text))))