blob: 7fb7369e26605cf8c4d40582659da767731b44f2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
;; Tekuti
;; Copyright (C) 2008 Andy Wingo <wingo at pobox dot com>
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 3 of
;; the License, or (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program; if not, contact:
;;
;; Free Software Foundation Voice: +1-617-542-5942
;; 59 Temple Place - Suite 330 Fax: +1-617-542-2652
;; Boston, MA 02111-1307, USA gnu@gnu.org
;;; Commentary:
;;
;; This is the main script that will launch tekuti.
;;
;;; Code:
(define-module (tekuti filters)
#:use-module (sxml simple)
#:use-module (sxml transform)
#:use-module (match-bind)
#:export (wordpress->sxml))
(define blocks '(table thead tfoot caption colgroup tbody tr td th div
dl dd dt ul ol li pre select form map area blockquote
address math style input p h1 h2 h3 h4 h5 h6))
(define (can-contain-p? tag)
(memq tag '(div li blockquote)))
(define (inline? tag)
(not (memq tag blocks)))
(define (wpautop tag body)
(define (pclose p out)
(if p (cons (cons 'p (reverse p)) out) out))
(define (scons x p)
(if (and (string? x) (string-null? x))
p
(cons x (or p '()))))
(define (pbreak p)
(if p (cons '(br) p) p))
(let* ((head (if (and (pair? (car body))
(eq? (caar body) '@))
(list tag (car body))
(list tag)))
(body (if (null? (cdr head)) body (cdr body))))
(let lp ((p #f) (in body) (out (reverse head)))
(cond
((null? in)
(reverse (pclose p out)))
((string? (car in))
(match-bind "^([^\n]*)\n(\n*)(.*)$" (car in) (_ head p-or-br? tail)
(if (string-null? p-or-br?)
(lp (if (string-null? tail)
(scons head p)
(pbreak (scons head p)))
(scons tail (cdr in)) out)
(lp #f (scons tail (cdr in))
(pclose (scons head p) out)))
(lp (cons (car in) (or p '()))
(cdr in) out)))
((inline? (caar in))
(lp (scons (car in) p) (cdr in) out))
(else
(lp #f (cdr in)
(cons (car in) (pclose p out))))))))
(define (wordpress->sxml text)
(catch 'parser-error
(lambda ()
(let ((sxml (cadr (with-input-from-string (string-append "<div>" text "</div>")
xml->sxml))))
(pre-post-order
sxml
`((*default* . ,(lambda (tag . body)
(if (can-contain-p? tag)
(wpautop tag body)
(cons tag body))))
(*text* . ,(lambda (tag text)
text))))))
(lambda (key . args)
`(pre "parse error: "
,(with-output-to-string (lambda () (write args)))
"\n"
,text))))
|