Merge remote-tracking branch 'origin/master' into customizations
This commit is contained in:
commit
cc4ce6cad4
7 changed files with 757 additions and 27 deletions
|
@ -10,6 +10,7 @@ SOURCES = \
|
|||
tekuti/filters.scm \
|
||||
tekuti/git.scm \
|
||||
tekuti/index.scm \
|
||||
tekuti/marxdown.scm \
|
||||
tekuti/match-bind.scm \
|
||||
tekuti/mod-lisp.scm \
|
||||
tekuti/page-helpers.scm \
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
;; Tekuti
|
||||
;; Copyright (C) 2008, 2010, 2012 Andy Wingo <wingo at pobox dot com>
|
||||
;; Copyright (C) 2008, 2010, 2012, 2022 Andy Wingo <wingo at pobox dot com>
|
||||
|
||||
;; This program is free software; you can redistribute it and/or
|
||||
;; modify it under the terms of the GNU General Public License as
|
||||
|
@ -60,7 +60,7 @@
|
|||
(raw (assq-ref comment 'raw-content)))
|
||||
(or (case format
|
||||
((wordpress) (false-if-exception (wordpress->sxml raw)))
|
||||
|
||||
((marxdown) (false-if-exception (marxdown->sxml raw)))
|
||||
(else `(pre ,raw)))
|
||||
`(pre ,raw))))))
|
||||
|
||||
|
@ -141,14 +141,17 @@
|
|||
(total-legit-features (feature-count legit-features)))
|
||||
(hash-for-each
|
||||
(lambda (feature _)
|
||||
(let ((bogus-count (hash-ref bogus-features feature 0))
|
||||
(let ((bogus-count (hash-ref bogus-features feature))
|
||||
(legit-count (hash-ref legit-features feature 0)))
|
||||
(hash-set! log-bogosities feature
|
||||
(if (and (> total-bogus-features 0)
|
||||
(> total-legit-features 0))
|
||||
(log (/ (/ (+ bogus-count 0.001) total-bogus-features)
|
||||
(/ (+ legit-count 0.001) total-legit-features)))
|
||||
0))))
|
||||
(cond
|
||||
(bogus-count
|
||||
(hash-set! log-bogosities feature
|
||||
(log (/ (/ (+ bogus-count 0.001) total-bogus-features)
|
||||
(/ (+ legit-count 0.001) total-legit-features)))))
|
||||
(else
|
||||
(hash-set! log-bogosities feature
|
||||
(log (/ (/ 0.01 total-bogus-features)
|
||||
(/ (+ legit-count 0.01) total-legit-features))))))))
|
||||
changed-features)))
|
||||
|
||||
(define (compute-bogus-probability comment log-bogosities bogus-prior
|
||||
|
@ -288,13 +291,19 @@
|
|||
(assq-ref index 'master))))
|
||||
(let ((removed-features (count-features removed))
|
||||
(added-features (count-features added)))
|
||||
;; If a comment passes the spam filter, it gets added, and is
|
||||
;; presumed legitimate. It could then be reverted, in which
|
||||
;; case we should remove its features from the legitimate count
|
||||
;; and add them to the bogus count.
|
||||
(hash-for-each (lambda (k v)
|
||||
(remove-feature! legit-features k v)
|
||||
(add-feature! bogus-features k v))
|
||||
removed-features)
|
||||
(hash-for-each (lambda (k v)
|
||||
(add-feature! legit-features k v)
|
||||
(remove-feature! bogus-features k v))
|
||||
;; Asymmetry with removed-features; the
|
||||
;; comment wasn't previously marked bogus, so
|
||||
;; we just add its features to the legit set.
|
||||
(add-feature! legit-features k v))
|
||||
added-features)
|
||||
(update-bogosities! bogosities removed-features
|
||||
legit-features bogus-features)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
;; Tekuti
|
||||
;; Copyright (C) 2008, 2010, 2012 Andy Wingo <wingo at pobox dot com>
|
||||
;; Copyright (C) 2008, 2010, 2012, 2022 Andy Wingo <wingo at pobox dot com>
|
||||
|
||||
;; This program is free software; you can redistribute it and/or
|
||||
;; modify it under the terms of the GNU General Public License as
|
||||
|
@ -41,7 +41,8 @@
|
|||
compute-legit-comments compute-bogus-comments))
|
||||
|
||||
(define *comment-spec*
|
||||
`((timestamp . ,string->number)))
|
||||
`((timestamp . ,string->number)
|
||||
(format . ,string->symbol)))
|
||||
|
||||
(define (blob->comment encoded-name sha1)
|
||||
(let ((blob (git "show" sha1)))
|
||||
|
@ -91,6 +92,7 @@
|
|||
(let ((format (or (assq-ref comment 'format) 'wordpress)))
|
||||
((case format
|
||||
((wordpress) wordpress->sxml)
|
||||
((marxdown) marxdown->sxml)
|
||||
(else (lambda (text) `(pre ,text))))
|
||||
(comment-raw-content comment))))))
|
||||
|
||||
|
@ -119,7 +121,7 @@
|
|||
`(("author" ,(lambda (x) #f))
|
||||
("email" ,bad-email?)
|
||||
("url" ,bad-url?)
|
||||
("comment" ,bad-user-submitted-xhtml?)
|
||||
("comment" ,bad-user-submitted-marxdown?)
|
||||
("x" ,bad-number?)
|
||||
("submit" ,(lambda (x) #f))))
|
||||
|
||||
|
@ -147,6 +149,7 @@
|
|||
(author . ,(de-newline author))
|
||||
(author_email . ,email)
|
||||
(author_url . ,url)
|
||||
(format . marxdown)
|
||||
(raw-content . ,content))))
|
||||
|
||||
(define (make-new-comment key title comment)
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
(define-module (tekuti config)
|
||||
#:use-module (tekuti util)
|
||||
#:use-module ((sxml ssax) #:select (define-parsed-entity!))
|
||||
#:declarative? #f
|
||||
#:export (*public-scheme* *public-host* *public-port* *public-path-base*
|
||||
*private-host* *private-port* *private-path-base*
|
||||
*git-dir* *git* *debug* *admin-user* *admin-pass*
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
;; Tekuti
|
||||
;; Copyright (C) 2008, 2010, 2011, 2012 Andy Wingo <wingo at pobox dot com>
|
||||
;; Copyright (C) 2008, 2010, 2011, 2012, 2022 Andy Wingo <wingo at pobox dot com>
|
||||
|
||||
;; This program is free software; you can redistribute it and/or
|
||||
;; modify it under the terms of the GNU General Public License as
|
||||
|
@ -27,10 +27,11 @@
|
|||
(define-module (tekuti filters)
|
||||
#:use-module (sxml simple)
|
||||
#:use-module (sxml transform)
|
||||
#:use-module (tekuti marxdown)
|
||||
#:use-module (tekuti match-bind)
|
||||
#:use-module (tekuti util)
|
||||
#:export (wordpress->sxml
|
||||
*allowed-tags* bad-user-submitted-xhtml?))
|
||||
#:export (wordpress->sxml marxdown->sxml
|
||||
*allowed-tags* bad-user-submitted-marxdown?))
|
||||
|
||||
(define blocks '(table thead tfoot caption colgroup tbody tr td th div
|
||||
dl dd dt ul ol li pre select form map area blockquote
|
||||
|
@ -90,6 +91,12 @@
|
|||
(*text* . ,(lambda (tag text)
|
||||
text))))))
|
||||
|
||||
(define (marxdown->sxml text)
|
||||
(smarxdown->shtml (call-with-input-string text marxdown->smarxdown)))
|
||||
|
||||
(define (pre-class? class)
|
||||
(string-prefix? "pre-" class))
|
||||
|
||||
(define *allowed-tags*
|
||||
`((a (href . ,urlish?) title)
|
||||
(abbr title)
|
||||
|
@ -101,9 +108,10 @@
|
|||
(em)
|
||||
(i)
|
||||
(p)
|
||||
(pre)
|
||||
(pre (class . ,pre-class?))
|
||||
(strike)
|
||||
(strong)))
|
||||
(strong)
|
||||
(tt)))
|
||||
|
||||
(define (compile-sxslt-rules tags)
|
||||
(define (ok . body)
|
||||
|
@ -140,13 +148,18 @@
|
|||
`((div ,(compile-sxslt-rules *allowed-tags*)
|
||||
. ,(lambda body body))))
|
||||
|
||||
(define (bad-user-submitted-xhtml? x)
|
||||
(define (bad-user-submitted-marxdown? x)
|
||||
(catch #t
|
||||
(lambda ()
|
||||
(pre-post-order (wordpress->sxml x) *valid-xhtml-rules*)
|
||||
(pre-post-order (marxdown->sxml x) *valid-xhtml-rules*)
|
||||
#f)
|
||||
(lambda (key . args)
|
||||
`(div (p (b "Invalid XHTML"))
|
||||
`(div (p (b "Invalid Marxdown"))
|
||||
(p "The input grammar is essentially Markdown. However "
|
||||
"there are some differences, notably that e.g. *emph* "
|
||||
"blocks need to be properly closed and that any "
|
||||
"embedded HTML needs to be well-formed XHTML.")
|
||||
(p "Further information:")
|
||||
,(case key
|
||||
((parser-error)
|
||||
`(div
|
||||
|
@ -168,6 +181,5 @@
|
|||
((bad-attr-value)
|
||||
`(p "XHTML attribute has bad value: " ,(car args)))
|
||||
(else
|
||||
(pk key args)
|
||||
`(p "Jesus knows why, and so do you")))))))
|
||||
`(p "Hey not real knows why, and so do you")))))))
|
||||
|
||||
|
|
701
tekuti/marxdown.scm
Normal file
701
tekuti/marxdown.scm
Normal file
|
@ -0,0 +1,701 @@
|
|||
;; Tekuti
|
||||
;; Copyright (C) 2022 Andy Wingo <wingo at pobox dot com>
|
||||
|
||||
;; This program is free software; you can redistribute it and/or
|
||||
;; modify it under the terms of the GNU General Public License as
|
||||
;; published by the Free Software Foundation; either version 3 of
|
||||
;; the License, or (at your option) any later version.
|
||||
;;
|
||||
;; This program is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with this program; if not, contact:
|
||||
;;
|
||||
;; Free Software Foundation Voice: +1-617-542-5942
|
||||
;; 59 Temple Place - Suite 330 Fax: +1-617-542-2652
|
||||
;; Boston, MA 02111-1307, USA gnu@gnu.org
|
||||
|
||||
;;; Commentary:
|
||||
;;
|
||||
;; "Marxdown" to SXML parser.
|
||||
;;
|
||||
;; Marxdown is a dialect of Markdown, designed for simplicity and
|
||||
;; embeddability. The X in markdown refers to how it processes embedded
|
||||
;; XML.
|
||||
;;
|
||||
;; In some ways, Marxdown is a more restrictive subset of Markdown:
|
||||
;;
|
||||
;; - Marxdown only supports headings with the ### prefix ("ATX
|
||||
;; headings"), and doesn't support so-called "Setext headings" that
|
||||
;; follow the heading with --- or similar. Why bother?
|
||||
;;
|
||||
;; - Marxdown doesn't support indented code blocks, instead only using
|
||||
;; fenced code blocks.
|
||||
;;
|
||||
;; - Markxdown doesn't currently support link titles or reference
|
||||
;; links.
|
||||
;;
|
||||
;; - Marxdown requires each line of a block quote to have the same
|
||||
;; indent and `>` characters, not just the first line.
|
||||
;;
|
||||
;; - Marxdown doesn't support multi-` inline code sequences; just the
|
||||
;; single `.
|
||||
;;
|
||||
;; - ...
|
||||
;;
|
||||
;; Generally speaking, a valid Marxdown document will also be valid
|
||||
;; Markdown, as defined by CommonMark. However there are exceptions.
|
||||
;; Marxdown is less "sloppy", if you will; unlike Markdown, not any
|
||||
;; sequence of characters is valid Marxdown. Parsing Marxdown can raise
|
||||
;; an error.
|
||||
;;
|
||||
;; Notably, Marxdown requires valid nesting. For example, `*_foo*_` is
|
||||
;; an error in Marxdown, as the emphasis markers aren't nested properly.
|
||||
;;
|
||||
;; Also, when Marxdown parses embedded XML, it hands over control of the
|
||||
;; character stream to a validating XML parser which can also throw an
|
||||
;; error. This XML parser doesn't stop at what CommonMark Markdown
|
||||
;; would consider block boundaries. As a consequence, Marxdown cannot
|
||||
;; be embedded inside XML. Due to a limitation which may be relaxed in
|
||||
;; the future, the XML parser doesn't strip off any blockquote prefix.
|
||||
;;
|
||||
;; These limitations come with a benefit: tools can easily consume
|
||||
;; Marxdown and then embed that result in SXML documents of different
|
||||
;; kinds.
|
||||
;;
|
||||
;;; Code:
|
||||
|
||||
(define-module (tekuti marxdown)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (ice-9 textual-ports)
|
||||
#:use-module (ice-9 rdelim)
|
||||
#:use-module (sxml ssax)
|
||||
#:use-module (sxml transform)
|
||||
#:use-module ((srfi srfi-1) #:select (fold))
|
||||
#:export (marxdown->smarxdown
|
||||
smarxdown->shtml))
|
||||
|
||||
(define (parse-one-xml-element port)
|
||||
;; -> seed
|
||||
(define (fdown elem-gi attributes namespaces expected-content seed)
|
||||
'())
|
||||
;; -> seed
|
||||
(define (fup elem-gi attributes namespaces parent-seed seed)
|
||||
(cons `(,elem-gi
|
||||
,@(match namespaces
|
||||
(() '())
|
||||
;; fixme: prohibit?
|
||||
(_ `((*NAMESPACES* ,namespaces))))
|
||||
,@(match attributes
|
||||
(() '())
|
||||
(((attr . value) ...)
|
||||
`((@ . ,(map list attr value)))))
|
||||
. ,(reverse seed))
|
||||
parent-seed))
|
||||
(define (ftext string1 string2 seed)
|
||||
(if (string-null? string2)
|
||||
(cons string1 seed)
|
||||
(cons* string2 string1 seed)))
|
||||
(define parse-element (ssax:make-elem-parser fdown fup ftext ()))
|
||||
|
||||
(let ((token (ssax:read-markup-token port)))
|
||||
(match (xml-token-kind token)
|
||||
('COMMENT #f)
|
||||
('START
|
||||
(let ((elems #f) (entities '()) (namespaces '()) (seed '()))
|
||||
(match (parse-element (xml-token-head token) port elems
|
||||
entities namespaces #t seed)
|
||||
((elt) elt))))
|
||||
(kind (error "unexpected XML token" token)))))
|
||||
|
||||
(define (marxdown->smarxdown port)
|
||||
(define (round-up x y)
|
||||
(* y (ceiling-quotient x y)))
|
||||
|
||||
(define (advance/tab indent)
|
||||
(round-up (1+ indent) 4))
|
||||
|
||||
(define (advance indent)
|
||||
(1+ indent))
|
||||
|
||||
(define (unget1 ch)
|
||||
(unget-char port ch))
|
||||
(define (unget chars)
|
||||
(match chars
|
||||
(()
|
||||
(values))
|
||||
((ch . chars)
|
||||
(unget1 ch)
|
||||
(unget chars))))
|
||||
|
||||
(define (next) (get-char port))
|
||||
(define (peek) (peek-char port))
|
||||
|
||||
(define (next-not-eof ctx)
|
||||
(let ((ch (next)))
|
||||
(if (eof-object? ch)
|
||||
(error "EOF while reading" ctx)
|
||||
ch)))
|
||||
(define (next-line-and-delim)
|
||||
(let lp ((chars '()))
|
||||
(define (finish delim)
|
||||
(cons (reverse-list->string chars) delim))
|
||||
(let ((ch (next)))
|
||||
(cond
|
||||
((eof-object? ch) (finish ch))
|
||||
((eqv? ch #\return)
|
||||
(if (eqv? (peek) #\newline)
|
||||
(finish (next))
|
||||
(lp (cons ch chars))))
|
||||
((eqv? ch #\newline)
|
||||
(finish ch))
|
||||
(else
|
||||
(lp (cons ch chars)))))))
|
||||
|
||||
(define (skip-whitespace k)
|
||||
(let lp ((indent 0))
|
||||
(let ((ch (next)))
|
||||
(case ch
|
||||
((#\space)
|
||||
(lp (advance indent)))
|
||||
((#\tab)
|
||||
(lp (advance/tab indent)))
|
||||
(else
|
||||
(k ch indent))))))
|
||||
|
||||
(define empty-indent '(0))
|
||||
(define (read-indent k)
|
||||
(skip-whitespace
|
||||
(lambda (ch indent)
|
||||
(match ch
|
||||
(#\>
|
||||
(read-indent
|
||||
(lambda (ch indent*)
|
||||
(k ch
|
||||
(cons indent
|
||||
(match indent*
|
||||
((head . tail)
|
||||
;; Account for the #\>.
|
||||
(cons (1+ head) tail))))))))
|
||||
(_
|
||||
(k ch (list indent)))))))
|
||||
|
||||
(define (advance-indent indent)
|
||||
(match indent
|
||||
((indent* ... indent)
|
||||
(append indent* (list (advance indent))))))
|
||||
|
||||
(define (advance-indent/tab indent)
|
||||
(let* ((col (apply + indent))
|
||||
(col* (advance/tab col)))
|
||||
(match indent
|
||||
((indent* ... indent)
|
||||
(append indent* (+ (- col* col) indent))))))
|
||||
|
||||
(define (compare-indents prev new)
|
||||
(match (vector prev new)
|
||||
(#((x . prev-tail) (x . new-tail))
|
||||
(compare-indents prev-tail new-tail))
|
||||
(_
|
||||
(match (vector prev new)
|
||||
(#(() ()) 'same)
|
||||
(#((prev) (new)) (if (< prev new) 'inner 'outer))
|
||||
(#(() _) 'inner-blockquote)
|
||||
(#(_ ()) 'outer-blockquote)
|
||||
(_ 'different-blockquote)))))
|
||||
|
||||
;; indent as list of blockquote
|
||||
|
||||
;; kup ::= (node type info indent) -> _
|
||||
;; knext ::= nodelist -> node
|
||||
|
||||
(define (drop-whitespace-up-to n col kt kf)
|
||||
(define col-end (+ col n))
|
||||
(let lp ((n n))
|
||||
(cond
|
||||
((zero? n) (kt))
|
||||
(else
|
||||
(match (next)
|
||||
(#\space (lp (1- n)))
|
||||
(#\tab
|
||||
(let ((col (advance/tab (- col-end n))))
|
||||
(cond
|
||||
((<= col col-end) (lp (- col-end col)))
|
||||
(else (kt)))))
|
||||
(#\newline
|
||||
;; Sure. Trailing whitespace can be any indent.
|
||||
(unget1 #\newline)
|
||||
(kt))
|
||||
(#\return
|
||||
(lp n))
|
||||
(ch
|
||||
(unless (eof-object? ch) (unget1 ch))
|
||||
(kf)))))))
|
||||
|
||||
(define (drop-whitespace-then-blockquote n col kt kf)
|
||||
(define col-end (+ col n))
|
||||
(let lp ((n n) (kf kf))
|
||||
(let* ((ch (next))
|
||||
(kf (lambda () (unless (eof-object? ch) (unget1 ch)) (kf))))
|
||||
(cond
|
||||
((zero? n)
|
||||
(match ch
|
||||
(#\> (kt kf))
|
||||
(_ (kf))))
|
||||
(else
|
||||
(match ch
|
||||
(#\space (lp (1- n) kf))
|
||||
(#\tab
|
||||
(let ((col (advance/tab (- col-end n))))
|
||||
(cond
|
||||
((<= col col-end) (lp (- col-end col) kf))
|
||||
(else (kf)))))
|
||||
(_ (kf))))))))
|
||||
|
||||
(define (consume-indent indent kt kf)
|
||||
(match indent
|
||||
((0) (kt))
|
||||
(_
|
||||
(let lp ((indent indent) (col 0) (kf kf))
|
||||
(match indent
|
||||
((n) (drop-whitespace-up-to n col kt kf))
|
||||
((n m . indent)
|
||||
(let* ((indent (cons (1- m) indent))
|
||||
(kt (lambda (kf) (lp indent (+ col n 1) kf))))
|
||||
(drop-whitespace-then-blockquote n col kt kf))))))))
|
||||
|
||||
(define (read-pre tag indent k)
|
||||
(let lp ((body '()))
|
||||
(define (finish tail)
|
||||
(k (list 'pre tag (string-concatenate-reverse body tail))))
|
||||
(consume-indent
|
||||
indent
|
||||
(lambda ()
|
||||
(match (next-line-and-delim)
|
||||
((str . delim)
|
||||
(cond
|
||||
((eof-object? delim)
|
||||
(finish (if (eof-object? str) "" str)))
|
||||
((string=? str "```")
|
||||
(finish ""))
|
||||
(else
|
||||
(lp (cons* (string delim) str body)))))))
|
||||
(lambda ()
|
||||
(finish "")))))
|
||||
|
||||
(define (read-link-destination)
|
||||
(match (next-not-eof "link destination")
|
||||
(#\<
|
||||
(let lp ((chars '()))
|
||||
(match (next-not-eof "<>-delimited link")
|
||||
(#\> (reverse-list->string chars))
|
||||
(#\< (error "< inside <>-delimited link"))
|
||||
(#\\
|
||||
(match (next)
|
||||
((? eof-object?) (lp (cons #\\ chars)))
|
||||
(ch (lp (cons ch chars)))))
|
||||
(ch (lp (cons ch chars))))))
|
||||
(ch
|
||||
(unget1 ch)
|
||||
(let lp ((chars '()) (k reverse-list->string))
|
||||
(match (next-not-eof "link destination")
|
||||
(#\) (unget1 #\)) (k chars))
|
||||
((and ch
|
||||
(or #\space #\)
|
||||
(? (lambda (ch)
|
||||
(or (char<? ch (integer->char #x20))
|
||||
(char=? ch (integer->char #x7f)))))))
|
||||
(unget1 ch)
|
||||
(k chars))
|
||||
(#\(
|
||||
(lp (cons #\( chars)
|
||||
(lambda (chars)
|
||||
(match (next-not-eof "link destination")
|
||||
(#\) (lp (cons #\) chars) k))
|
||||
(ch (error "unexpected char" ch))))))
|
||||
(#\\
|
||||
(lp (cons (next-not-eof "link-destination") chars) k))
|
||||
(ch (lp (cons ch chars) k)))))))
|
||||
|
||||
(define (read-link indent continue)
|
||||
(read-text
|
||||
#f indent
|
||||
(lambda (ch)
|
||||
(match ch
|
||||
(#\]
|
||||
(lambda (text)
|
||||
(match (next)
|
||||
(#\(
|
||||
(let ((dest (read-link-destination)))
|
||||
(match (next)
|
||||
(#\)
|
||||
(continue `(link ,dest . ,text)))
|
||||
(ch
|
||||
(error "unexpected after link dest" ch)))))
|
||||
(ch (error "link missing destination URL")))))
|
||||
(_ #f)))
|
||||
(lambda (elts)
|
||||
(error "end-of-block while reading link"))))
|
||||
|
||||
(define (read-emph indent delim continue)
|
||||
(define (delim? ch) (eqv? ch delim))
|
||||
(match (next-not-eof "emphasis")
|
||||
((? delim?)
|
||||
(let ((done? (lambda (ch)
|
||||
(match ch
|
||||
((? delim?)
|
||||
(match (next-not-eof "emphasis")
|
||||
((? delim?) continue)
|
||||
(ch (unget1 ch) #f)))
|
||||
(_ #f)))))
|
||||
(read-text 'strong indent done?
|
||||
(lambda (elt)
|
||||
(error "end of block while reading strong" elt)))))
|
||||
(ch
|
||||
(unget1 ch)
|
||||
(let ((done? (lambda (ch) (and (delim? ch) continue))))
|
||||
(read-text 'emph indent done?
|
||||
(lambda (elt)
|
||||
(error "end of block while reading emph" elt)))))))
|
||||
|
||||
(define (read-code indent continue)
|
||||
(let lp ((chars '()))
|
||||
(match (next-not-eof "backticks")
|
||||
(#\` (continue `(code ,(reverse-list->string chars))))
|
||||
(#\return (lp chars))
|
||||
(#\newline
|
||||
(consume-indent
|
||||
indent
|
||||
(lambda ()
|
||||
(match (next-not-eof "code")
|
||||
((or #\return #\newline)
|
||||
(error "end of block while reading code"))
|
||||
(ch (unget1 ch) (lp (cons #\space chars)))))
|
||||
(lambda () (error "end of block while reading code"))))
|
||||
(ch (lp (cons ch chars))))))
|
||||
|
||||
(define (read-text tag indent done? on-block-end)
|
||||
(let lp ((elts '()))
|
||||
(define (continue elt) (lp (cons elt elts)))
|
||||
(define (finish kdone)
|
||||
(let lp ((elts elts) (out '()))
|
||||
(match elts
|
||||
(() (kdone (if tag (cons tag out) out)))
|
||||
(((? char? ch) . elts)
|
||||
(let lp2 ((elts elts) (chars (list ch)))
|
||||
(match elts
|
||||
(((? char? ch) . elts)
|
||||
(lp2 elts (cons ch chars)))
|
||||
(_
|
||||
(lp elts (cons (list->string chars) out))))))
|
||||
((elt . elts)
|
||||
(lp elts (cons elt out))))))
|
||||
(define (consume-blank-lines-then-finish kdone)
|
||||
(let lp ()
|
||||
(match (next)
|
||||
((? eof-object?) (finish kdone))
|
||||
(#\return (lp))
|
||||
(#\newline
|
||||
(consume-indent indent lp (lambda () (finish kdone))))
|
||||
(ch
|
||||
(unget1 ch)
|
||||
(finish kdone)))))
|
||||
(match (next)
|
||||
((? eof-object?) (finish on-block-end))
|
||||
(#\return (lp elts))
|
||||
(#\newline
|
||||
(consume-indent
|
||||
indent
|
||||
(lambda ()
|
||||
(cond
|
||||
((done? #\newline) => consume-blank-lines-then-finish)
|
||||
(else (lp (cons #\newline elts)))))
|
||||
(lambda ()
|
||||
(finish on-block-end))))
|
||||
((= done? (and kdone (not #f))) (finish kdone))
|
||||
((and (or #\_ #\*) delim) (read-emph indent delim continue))
|
||||
(#\` (read-code indent continue))
|
||||
(#\[
|
||||
(read-link indent continue))
|
||||
(#\<
|
||||
(unget1 #\<)
|
||||
(match (parse-one-xml-element port)
|
||||
(#f (lp elts))
|
||||
(elt (continue `(inline-xml ,elt)))))
|
||||
(#\\ (lp (cons (next-not-eof "backslash") elts)))
|
||||
(#\! (match (next)
|
||||
((? eof-object?) (lp (cons #\! elts)))
|
||||
(#\[
|
||||
(read-link indent
|
||||
(lambda (link)
|
||||
(match link
|
||||
(('link dest . alt)
|
||||
(continue `(image ,dest . ,alt)))))))
|
||||
(ch
|
||||
(unget1 ch)
|
||||
(lp (cons #\! elts)))))
|
||||
(ch (lp (cons ch elts))))))
|
||||
|
||||
(define (read-para indent kup knext)
|
||||
(define (make-continuation reader)
|
||||
(lambda (para)
|
||||
(reader indent kup (lambda (nodelist)
|
||||
(knext (cons para nodelist))))))
|
||||
(define (done? ch)
|
||||
(match ch
|
||||
(#\newline
|
||||
(let lp ((ch (next)))
|
||||
(match ch
|
||||
((? eof-object?)
|
||||
(lambda (para)
|
||||
(kup (knext (list para)) ch empty-indent)))
|
||||
(ch
|
||||
(read-block-type ch #t
|
||||
make-continuation
|
||||
(if (eqv? ch #\newline)
|
||||
(lambda (chars)
|
||||
(unget chars)
|
||||
(make-continuation read-para))
|
||||
(lambda (chars)
|
||||
(unget chars)
|
||||
#f)))))))
|
||||
(_ #f)))
|
||||
(read-text 'para indent done? (make-continuation read-block-list)))
|
||||
|
||||
(define (read-para* chars indent kup knext)
|
||||
(unget chars)
|
||||
(read-para indent kup knext))
|
||||
|
||||
(define (read-heading level indent continue)
|
||||
(let ((continue (lambda (heading)
|
||||
(match heading
|
||||
(('heading . body)
|
||||
(continue `(heading ,level . ,body)))))))
|
||||
(read-text 'heading indent (lambda (ch)
|
||||
(and (eqv? ch #\newline) continue))
|
||||
continue)))
|
||||
|
||||
(define (read-li marker marker-indent marker-size kup knext)
|
||||
(define list-tag
|
||||
(match marker
|
||||
((? number?) 'enumerate)
|
||||
((? char?) 'itemize)))
|
||||
(define (list-tag? tag) (eq? tag list-tag))
|
||||
(read-indent
|
||||
(lambda (ch indent)
|
||||
(match indent
|
||||
((outer . inner)
|
||||
(match marker-indent
|
||||
((marker-outer ... marker-inner)
|
||||
(let ((body-indent
|
||||
(append marker-outer
|
||||
(list (+ marker-inner marker-size outer))
|
||||
inner)))
|
||||
(read-block
|
||||
ch body-indent
|
||||
(lambda (blocks ch next-indent)
|
||||
(read-indented-block
|
||||
ch marker-indent next-indent kup
|
||||
(lambda (nodelist)
|
||||
(knext
|
||||
(match nodelist
|
||||
((((? list-tag?) . items) . nodelist)
|
||||
`((,list-tag (item . ,blocks) . ,items) . ,nodelist))
|
||||
(_
|
||||
`((,list-tag (item . ,blocks)) . ,nodelist)))))))
|
||||
identity)))))))))
|
||||
|
||||
(define (read-block-type ch in-text? kblock ktext)
|
||||
(define (make-continue indent kup knext)
|
||||
(lambda (block)
|
||||
(read-block-list indent kup
|
||||
(lambda (nodelist)
|
||||
(knext (cons block nodelist))))))
|
||||
(match ch
|
||||
(#\#
|
||||
(let lp ((level 1))
|
||||
(match (next-not-eof "heading")
|
||||
(#\#
|
||||
(lp (1+ level)))
|
||||
(#\space
|
||||
(kblock
|
||||
(lambda (indent kup knext)
|
||||
(read-heading level indent (make-continue indent kup knext)))))
|
||||
(#\return (lp level))
|
||||
(#\newline
|
||||
(kblock
|
||||
(lambda (indent kup knext)
|
||||
((make-continue indent kup knext) `(heading ,level)))))
|
||||
(ch
|
||||
(ktext (cons ch (make-list level #\#)))))))
|
||||
(#\`
|
||||
(match (next)
|
||||
((? eof-object?) (ktext '(#\`)))
|
||||
(#\`
|
||||
(match (next)
|
||||
((? eof-object?) (ktext '(#\` #\`)))
|
||||
(#\`
|
||||
(kblock
|
||||
(lambda (indent kup knext)
|
||||
(match (next-line-and-delim)
|
||||
((tag . delim)
|
||||
(cond
|
||||
((eof-object? delim)
|
||||
(error "eof while reading code block"))
|
||||
(else
|
||||
(read-pre (if (string-null? tag) #f tag) indent
|
||||
(make-continue indent kup knext)))))))))
|
||||
(ch
|
||||
(ktext (list ch #\` #\`)))))
|
||||
(ch
|
||||
(ktext (list ch #\`)))))
|
||||
((or #\- #\* #\+)
|
||||
(match (peek)
|
||||
((or #\space #\tab)
|
||||
(kblock (lambda (indent kup knext)
|
||||
(read-li ch indent 1 kup knext))))
|
||||
(_
|
||||
(ktext (list ch)))))
|
||||
((or #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
|
||||
(let lp ((chars (list ch)))
|
||||
(let ((ch (next)))
|
||||
(match ch
|
||||
((? eof-object?) (ktext chars))
|
||||
((or #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
|
||||
(lp (cons ch chars)))
|
||||
((or #\. #\))
|
||||
;; fixme: record delimiter
|
||||
(match (next)
|
||||
((? eof-object?)
|
||||
(ktext (cons ch chars)))
|
||||
((and ch2 (or #\space #\tab))
|
||||
(unget1 ch2)
|
||||
(if (or (not in-text?) (equal? chars '(#\1)))
|
||||
(kblock
|
||||
(lambda (indent kup knext)
|
||||
(let ((idx (fold (lambda (ch idx)
|
||||
(+ (* idx 10)
|
||||
(- (char->integer ch)
|
||||
(char->integer #\0))))
|
||||
0 chars)))
|
||||
(read-li idx indent (1+ (length chars))
|
||||
kup knext))))
|
||||
(ktext (cons ch chars))))
|
||||
(ch2
|
||||
(ktext (cons* ch2 ch chars)))))
|
||||
(_
|
||||
(ktext (cons ch chars)))))))
|
||||
(#\<
|
||||
(unget1 #\<)
|
||||
(if in-text?
|
||||
(ktext '())
|
||||
(kblock
|
||||
(lambda (indent kup knext)
|
||||
(match (parse-one-xml-element port)
|
||||
(#f (read-block-list indent kup knext))
|
||||
(elt ((make-continue indent kup knext) `(block-xml ,elt))))))))
|
||||
(#\return
|
||||
(read-block-type (next-not-eof "newline") in-text? kblock ktext))
|
||||
(#\newline
|
||||
;; fixme: record loose li
|
||||
(kblock read-block-list))
|
||||
((? eof-object?)
|
||||
(kblock
|
||||
(lambda (indent kup knext)
|
||||
(kup (knext '()) ch empty-indent))))
|
||||
(_
|
||||
(ktext (list ch)))))
|
||||
|
||||
(define (read-block ch indent kup knext)
|
||||
(define (have-block read-block)
|
||||
(read-block indent kup knext))
|
||||
(define (have-text chars)
|
||||
(read-para* chars indent kup knext))
|
||||
(read-block-type ch #f have-block have-text))
|
||||
|
||||
(define (finish-block-list blocks)
|
||||
(match blocks
|
||||
((block) block)
|
||||
(_ `(begin . ,blocks))))
|
||||
(define (finish-block-quote blocks)
|
||||
(match blocks
|
||||
((block) block)
|
||||
(_ `(blockquote . ,blocks))))
|
||||
|
||||
(define (read-indented-block ch outer-indent indent kup knext)
|
||||
(define (recurse finish recurse-indent)
|
||||
(read-indented-block ch recurse-indent indent
|
||||
(lambda (nested ch indent)
|
||||
(read-indented-block ch outer-indent indent
|
||||
kup
|
||||
(lambda (nodelist)
|
||||
(knext (cons nested nodelist)))))
|
||||
finish))
|
||||
(match (compare-indents outer-indent indent)
|
||||
('same
|
||||
(read-block ch indent kup knext))
|
||||
((or 'outer
|
||||
'outer-blockquote
|
||||
'different-blockquote)
|
||||
(kup (knext '()) ch indent))
|
||||
('inner
|
||||
(recurse finish-block-list indent))
|
||||
('inner-blockquote
|
||||
(recurse finish-block-quote
|
||||
(list-head indent (1+ (length outer-indent)))))))
|
||||
|
||||
(define (parse-error reason)
|
||||
(error reason))
|
||||
|
||||
(define (read-block-list outer-indent kup knext)
|
||||
(read-indent
|
||||
(lambda (ch indent)
|
||||
(read-indented-block ch outer-indent indent kup knext))))
|
||||
|
||||
(read-block-list empty-indent
|
||||
(lambda (nodelist ch indent) nodelist)
|
||||
finish-block-list))
|
||||
|
||||
(define* (smarxdown->shtml exp #:key
|
||||
(heading-offset 0)
|
||||
(handle-inline-xml identity)
|
||||
(handle-block-xml identity))
|
||||
(define (transform-inline exp)
|
||||
(match exp
|
||||
(('inline-xml xml) (handle-inline-xml xml))
|
||||
(('code . body) `(tt . ,body))
|
||||
(('emph . body) `(i . ,(map transform-inline body)))
|
||||
(('strong . body) `(b . ,(map transform-inline body)))
|
||||
(('link dest . body) `(a (@ (href ,dest)) . ,(map transform-inline body)))
|
||||
((? string? str) str)))
|
||||
(define (transform-block exp)
|
||||
(match exp
|
||||
(('block-xml xml) (handle-block-xml xml))
|
||||
(('para . body) `(p . ,(map transform-inline body)))
|
||||
(('blockquote . body) `(blockquote . ,(map transform-block body)))
|
||||
(('itemize ('item . item) ...)
|
||||
`(ul . ,(map (lambda (blocks)
|
||||
`(li . ,(map transform-block blocks)))
|
||||
item)))
|
||||
(('enumerate ('item . item) ...)
|
||||
`(ol . ,(map (lambda (blocks)
|
||||
`(li . ,(map transform-block blocks)))
|
||||
item)))
|
||||
(('pre #f . body) `(pre . ,body))
|
||||
(('pre info . body) `(pre (@ (class ,(string-append "pre-" info)))
|
||||
. ,body))
|
||||
(('heading level . body)
|
||||
(let* ((level (+ level heading-offset))
|
||||
(tag (cond ((<= level 0) 'h1)
|
||||
((<= level 6)
|
||||
(string->symbol
|
||||
(string #\h (integer->char
|
||||
(+ level (char->integer #\0))))))
|
||||
(else 'h6))))
|
||||
`(,tag . ,(map transform-inline body))))))
|
||||
(match exp
|
||||
(('begin exp ...)
|
||||
`(div . ,(map transform-block exp)))))
|
|
@ -1,5 +1,5 @@
|
|||
;; Tekuti
|
||||
;; Copyright (C) 2008, 2010, 2011, 2012, 2014, 2021 Andy Wingo <wingo at pobox dot com>
|
||||
;; Copyright (C) 2008, 2010, 2011, 2012, 2014, 2021, 2022 Andy Wingo <wingo at pobox dot com>
|
||||
|
||||
;; This program is free software; you can redistribute it and/or
|
||||
;; modify it under the terms of the GNU General Public License as
|
||||
|
@ -59,6 +59,7 @@
|
|||
`((timestamp . ,string->number)
|
||||
(tags . ,(lambda (v) (string-split/trimming v #\,)))
|
||||
(title . ,identity)
|
||||
(format . ,string->symbol)
|
||||
(comments-closed-timestamp . ,(lambda (str)
|
||||
(if (string-null? str)
|
||||
#f
|
||||
|
@ -139,9 +140,10 @@
|
|||
(lambda ()
|
||||
(case format
|
||||
((wordpress) (wordpress->sxml raw))
|
||||
((marxdown) (marxdown->sxml raw))
|
||||
(else `(pre ,raw))))
|
||||
(lambda args
|
||||
`(pre ,(bad-user-submitted-xhtml? raw))))))
|
||||
`(pre ,(bad-user-submitted-marxdown? raw))))))
|
||||
|
||||
(define (post-readable-date post)
|
||||
(let ((date (time-utc->date
|
||||
|
@ -167,7 +169,7 @@
|
|||
((assq-ref parsed k)
|
||||
=> (lambda (v) (format #t "~a: ~a\n" k v)))))
|
||||
'(timestamp tags status title name comment_status
|
||||
comments-closed-timestamp))))
|
||||
comments-closed-timestamp format))))
|
||||
(content (with-output-to-blob (display (assq-ref parsed 'body))))
|
||||
(key (assq-ref parsed 'key))
|
||||
(message (format #f "~a: \"~a\""
|
||||
|
@ -229,6 +231,7 @@
|
|||
(comments-closed-timestamp . ,comments-closed-timestamp)
|
||||
(timestamp . ,timestamp)
|
||||
(name . ,name)
|
||||
(format . marxdown)
|
||||
(key . ,(string-downcase
|
||||
(uri-encode
|
||||
(string-append (date->string (timestamp->date timestamp)
|
||||
|
|
Loading…
Reference in a new issue