1
0
Fork 0

Merge remote-tracking branch 'origin/master' into customizations

This commit is contained in:
Tom Willemse 2023-09-06 15:17:33 -07:00
commit cc4ce6cad4
7 changed files with 757 additions and 27 deletions

View file

@ -10,6 +10,7 @@ SOURCES = \
tekuti/filters.scm \ tekuti/filters.scm \
tekuti/git.scm \ tekuti/git.scm \
tekuti/index.scm \ tekuti/index.scm \
tekuti/marxdown.scm \
tekuti/match-bind.scm \ tekuti/match-bind.scm \
tekuti/mod-lisp.scm \ tekuti/mod-lisp.scm \
tekuti/page-helpers.scm \ tekuti/page-helpers.scm \

View file

@ -1,5 +1,5 @@
;; Tekuti ;; Tekuti
;; Copyright (C) 2008, 2010, 2012 Andy Wingo <wingo at pobox dot com> ;; Copyright (C) 2008, 2010, 2012, 2022 Andy Wingo <wingo at pobox dot com>
;; This program is free software; you can redistribute it and/or ;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as ;; modify it under the terms of the GNU General Public License as
@ -60,7 +60,7 @@
(raw (assq-ref comment 'raw-content))) (raw (assq-ref comment 'raw-content)))
(or (case format (or (case format
((wordpress) (false-if-exception (wordpress->sxml raw))) ((wordpress) (false-if-exception (wordpress->sxml raw)))
((marxdown) (false-if-exception (marxdown->sxml raw)))
(else `(pre ,raw))) (else `(pre ,raw)))
`(pre ,raw)))))) `(pre ,raw))))))
@ -141,14 +141,17 @@
(total-legit-features (feature-count legit-features))) (total-legit-features (feature-count legit-features)))
(hash-for-each (hash-for-each
(lambda (feature _) (lambda (feature _)
(let ((bogus-count (hash-ref bogus-features feature 0)) (let ((bogus-count (hash-ref bogus-features feature))
(legit-count (hash-ref legit-features feature 0))) (legit-count (hash-ref legit-features feature 0)))
(cond
(bogus-count
(hash-set! log-bogosities feature (hash-set! log-bogosities feature
(if (and (> total-bogus-features 0)
(> total-legit-features 0))
(log (/ (/ (+ bogus-count 0.001) total-bogus-features) (log (/ (/ (+ bogus-count 0.001) total-bogus-features)
(/ (+ legit-count 0.001) total-legit-features))) (/ (+ legit-count 0.001) total-legit-features)))))
0)))) (else
(hash-set! log-bogosities feature
(log (/ (/ 0.01 total-bogus-features)
(/ (+ legit-count 0.01) total-legit-features))))))))
changed-features))) changed-features)))
(define (compute-bogus-probability comment log-bogosities bogus-prior (define (compute-bogus-probability comment log-bogosities bogus-prior
@ -288,13 +291,19 @@
(assq-ref index 'master)))) (assq-ref index 'master))))
(let ((removed-features (count-features removed)) (let ((removed-features (count-features removed))
(added-features (count-features added))) (added-features (count-features added)))
;; If a comment passes the spam filter, it gets added, and is
;; presumed legitimate. It could then be reverted, in which
;; case we should remove its features from the legitimate count
;; and add them to the bogus count.
(hash-for-each (lambda (k v) (hash-for-each (lambda (k v)
(remove-feature! legit-features k v) (remove-feature! legit-features k v)
(add-feature! bogus-features k v)) (add-feature! bogus-features k v))
removed-features) removed-features)
(hash-for-each (lambda (k v) (hash-for-each (lambda (k v)
(add-feature! legit-features k v) ;; Asymmetry with removed-features; the
(remove-feature! bogus-features k v)) ;; comment wasn't previously marked bogus, so
;; we just add its features to the legit set.
(add-feature! legit-features k v))
added-features) added-features)
(update-bogosities! bogosities removed-features (update-bogosities! bogosities removed-features
legit-features bogus-features) legit-features bogus-features)

View file

@ -1,5 +1,5 @@
;; Tekuti ;; Tekuti
;; Copyright (C) 2008, 2010, 2012 Andy Wingo <wingo at pobox dot com> ;; Copyright (C) 2008, 2010, 2012, 2022 Andy Wingo <wingo at pobox dot com>
;; This program is free software; you can redistribute it and/or ;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as ;; modify it under the terms of the GNU General Public License as
@ -41,7 +41,8 @@
compute-legit-comments compute-bogus-comments)) compute-legit-comments compute-bogus-comments))
(define *comment-spec* (define *comment-spec*
`((timestamp . ,string->number))) `((timestamp . ,string->number)
(format . ,string->symbol)))
(define (blob->comment encoded-name sha1) (define (blob->comment encoded-name sha1)
(let ((blob (git "show" sha1))) (let ((blob (git "show" sha1)))
@ -91,6 +92,7 @@
(let ((format (or (assq-ref comment 'format) 'wordpress))) (let ((format (or (assq-ref comment 'format) 'wordpress)))
((case format ((case format
((wordpress) wordpress->sxml) ((wordpress) wordpress->sxml)
((marxdown) marxdown->sxml)
(else (lambda (text) `(pre ,text)))) (else (lambda (text) `(pre ,text))))
(comment-raw-content comment)))))) (comment-raw-content comment))))))
@ -119,7 +121,7 @@
`(("author" ,(lambda (x) #f)) `(("author" ,(lambda (x) #f))
("email" ,bad-email?) ("email" ,bad-email?)
("url" ,bad-url?) ("url" ,bad-url?)
("comment" ,bad-user-submitted-xhtml?) ("comment" ,bad-user-submitted-marxdown?)
("x" ,bad-number?) ("x" ,bad-number?)
("submit" ,(lambda (x) #f)))) ("submit" ,(lambda (x) #f))))
@ -147,6 +149,7 @@
(author . ,(de-newline author)) (author . ,(de-newline author))
(author_email . ,email) (author_email . ,email)
(author_url . ,url) (author_url . ,url)
(format . marxdown)
(raw-content . ,content)))) (raw-content . ,content))))
(define (make-new-comment key title comment) (define (make-new-comment key title comment)

View file

@ -27,6 +27,7 @@
(define-module (tekuti config) (define-module (tekuti config)
#:use-module (tekuti util) #:use-module (tekuti util)
#:use-module ((sxml ssax) #:select (define-parsed-entity!)) #:use-module ((sxml ssax) #:select (define-parsed-entity!))
#:declarative? #f
#:export (*public-scheme* *public-host* *public-port* *public-path-base* #:export (*public-scheme* *public-host* *public-port* *public-path-base*
*private-host* *private-port* *private-path-base* *private-host* *private-port* *private-path-base*
*git-dir* *git* *debug* *admin-user* *admin-pass* *git-dir* *git* *debug* *admin-user* *admin-pass*

View file

@ -1,5 +1,5 @@
;; Tekuti ;; Tekuti
;; Copyright (C) 2008, 2010, 2011, 2012 Andy Wingo <wingo at pobox dot com> ;; Copyright (C) 2008, 2010, 2011, 2012, 2022 Andy Wingo <wingo at pobox dot com>
;; This program is free software; you can redistribute it and/or ;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as ;; modify it under the terms of the GNU General Public License as
@ -27,10 +27,11 @@
(define-module (tekuti filters) (define-module (tekuti filters)
#:use-module (sxml simple) #:use-module (sxml simple)
#:use-module (sxml transform) #:use-module (sxml transform)
#:use-module (tekuti marxdown)
#:use-module (tekuti match-bind) #:use-module (tekuti match-bind)
#:use-module (tekuti util) #:use-module (tekuti util)
#:export (wordpress->sxml #:export (wordpress->sxml marxdown->sxml
*allowed-tags* bad-user-submitted-xhtml?)) *allowed-tags* bad-user-submitted-marxdown?))
(define blocks '(table thead tfoot caption colgroup tbody tr td th div (define blocks '(table thead tfoot caption colgroup tbody tr td th div
dl dd dt ul ol li pre select form map area blockquote dl dd dt ul ol li pre select form map area blockquote
@ -90,6 +91,12 @@
(*text* . ,(lambda (tag text) (*text* . ,(lambda (tag text)
text)))))) text))))))
(define (marxdown->sxml text)
(smarxdown->shtml (call-with-input-string text marxdown->smarxdown)))
(define (pre-class? class)
(string-prefix? "pre-" class))
(define *allowed-tags* (define *allowed-tags*
`((a (href . ,urlish?) title) `((a (href . ,urlish?) title)
(abbr title) (abbr title)
@ -101,9 +108,10 @@
(em) (em)
(i) (i)
(p) (p)
(pre) (pre (class . ,pre-class?))
(strike) (strike)
(strong))) (strong)
(tt)))
(define (compile-sxslt-rules tags) (define (compile-sxslt-rules tags)
(define (ok . body) (define (ok . body)
@ -140,13 +148,18 @@
`((div ,(compile-sxslt-rules *allowed-tags*) `((div ,(compile-sxslt-rules *allowed-tags*)
. ,(lambda body body)))) . ,(lambda body body))))
(define (bad-user-submitted-xhtml? x) (define (bad-user-submitted-marxdown? x)
(catch #t (catch #t
(lambda () (lambda ()
(pre-post-order (wordpress->sxml x) *valid-xhtml-rules*) (pre-post-order (marxdown->sxml x) *valid-xhtml-rules*)
#f) #f)
(lambda (key . args) (lambda (key . args)
`(div (p (b "Invalid XHTML")) `(div (p (b "Invalid Marxdown"))
(p "The input grammar is essentially Markdown. However "
"there are some differences, notably that e.g. *emph* "
"blocks need to be properly closed and that any "
"embedded HTML needs to be well-formed XHTML.")
(p "Further information:")
,(case key ,(case key
((parser-error) ((parser-error)
`(div `(div
@ -168,6 +181,5 @@
((bad-attr-value) ((bad-attr-value)
`(p "XHTML attribute has bad value: " ,(car args))) `(p "XHTML attribute has bad value: " ,(car args)))
(else (else
(pk key args) `(p "Hey not real knows why, and so do you")))))))
`(p "Jesus knows why, and so do you")))))))

701
tekuti/marxdown.scm Normal file
View file

@ -0,0 +1,701 @@
;; Tekuti
;; Copyright (C) 2022 Andy Wingo <wingo at pobox dot com>
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 3 of
;; the License, or (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program; if not, contact:
;;
;; Free Software Foundation Voice: +1-617-542-5942
;; 59 Temple Place - Suite 330 Fax: +1-617-542-2652
;; Boston, MA 02111-1307, USA gnu@gnu.org
;;; Commentary:
;;
;; "Marxdown" to SXML parser.
;;
;; Marxdown is a dialect of Markdown, designed for simplicity and
;; embeddability. The X in markdown refers to how it processes embedded
;; XML.
;;
;; In some ways, Marxdown is a more restrictive subset of Markdown:
;;
;; - Marxdown only supports headings with the ### prefix ("ATX
;; headings"), and doesn't support so-called "Setext headings" that
;; follow the heading with --- or similar. Why bother?
;;
;; - Marxdown doesn't support indented code blocks, instead only using
;; fenced code blocks.
;;
;; - Markxdown doesn't currently support link titles or reference
;; links.
;;
;; - Marxdown requires each line of a block quote to have the same
;; indent and `>` characters, not just the first line.
;;
;; - Marxdown doesn't support multi-` inline code sequences; just the
;; single `.
;;
;; - ...
;;
;; Generally speaking, a valid Marxdown document will also be valid
;; Markdown, as defined by CommonMark. However there are exceptions.
;; Marxdown is less "sloppy", if you will; unlike Markdown, not any
;; sequence of characters is valid Marxdown. Parsing Marxdown can raise
;; an error.
;;
;; Notably, Marxdown requires valid nesting. For example, `*_foo*_` is
;; an error in Marxdown, as the emphasis markers aren't nested properly.
;;
;; Also, when Marxdown parses embedded XML, it hands over control of the
;; character stream to a validating XML parser which can also throw an
;; error. This XML parser doesn't stop at what CommonMark Markdown
;; would consider block boundaries. As a consequence, Marxdown cannot
;; be embedded inside XML. Due to a limitation which may be relaxed in
;; the future, the XML parser doesn't strip off any blockquote prefix.
;;
;; These limitations come with a benefit: tools can easily consume
;; Marxdown and then embed that result in SXML documents of different
;; kinds.
;;
;;; Code:
(define-module (tekuti marxdown)
#:use-module (ice-9 match)
#:use-module (ice-9 textual-ports)
#:use-module (ice-9 rdelim)
#:use-module (sxml ssax)
#:use-module (sxml transform)
#:use-module ((srfi srfi-1) #:select (fold))
#:export (marxdown->smarxdown
smarxdown->shtml))
(define (parse-one-xml-element port)
;; -> seed
(define (fdown elem-gi attributes namespaces expected-content seed)
'())
;; -> seed
(define (fup elem-gi attributes namespaces parent-seed seed)
(cons `(,elem-gi
,@(match namespaces
(() '())
;; fixme: prohibit?
(_ `((*NAMESPACES* ,namespaces))))
,@(match attributes
(() '())
(((attr . value) ...)
`((@ . ,(map list attr value)))))
. ,(reverse seed))
parent-seed))
(define (ftext string1 string2 seed)
(if (string-null? string2)
(cons string1 seed)
(cons* string2 string1 seed)))
(define parse-element (ssax:make-elem-parser fdown fup ftext ()))
(let ((token (ssax:read-markup-token port)))
(match (xml-token-kind token)
('COMMENT #f)
('START
(let ((elems #f) (entities '()) (namespaces '()) (seed '()))
(match (parse-element (xml-token-head token) port elems
entities namespaces #t seed)
((elt) elt))))
(kind (error "unexpected XML token" token)))))
(define (marxdown->smarxdown port)
(define (round-up x y)
(* y (ceiling-quotient x y)))
(define (advance/tab indent)
(round-up (1+ indent) 4))
(define (advance indent)
(1+ indent))
(define (unget1 ch)
(unget-char port ch))
(define (unget chars)
(match chars
(()
(values))
((ch . chars)
(unget1 ch)
(unget chars))))
(define (next) (get-char port))
(define (peek) (peek-char port))
(define (next-not-eof ctx)
(let ((ch (next)))
(if (eof-object? ch)
(error "EOF while reading" ctx)
ch)))
(define (next-line-and-delim)
(let lp ((chars '()))
(define (finish delim)
(cons (reverse-list->string chars) delim))
(let ((ch (next)))
(cond
((eof-object? ch) (finish ch))
((eqv? ch #\return)
(if (eqv? (peek) #\newline)
(finish (next))
(lp (cons ch chars))))
((eqv? ch #\newline)
(finish ch))
(else
(lp (cons ch chars)))))))
(define (skip-whitespace k)
(let lp ((indent 0))
(let ((ch (next)))
(case ch
((#\space)
(lp (advance indent)))
((#\tab)
(lp (advance/tab indent)))
(else
(k ch indent))))))
(define empty-indent '(0))
(define (read-indent k)
(skip-whitespace
(lambda (ch indent)
(match ch
(#\>
(read-indent
(lambda (ch indent*)
(k ch
(cons indent
(match indent*
((head . tail)
;; Account for the #\>.
(cons (1+ head) tail))))))))
(_
(k ch (list indent)))))))
(define (advance-indent indent)
(match indent
((indent* ... indent)
(append indent* (list (advance indent))))))
(define (advance-indent/tab indent)
(let* ((col (apply + indent))
(col* (advance/tab col)))
(match indent
((indent* ... indent)
(append indent* (+ (- col* col) indent))))))
(define (compare-indents prev new)
(match (vector prev new)
(#((x . prev-tail) (x . new-tail))
(compare-indents prev-tail new-tail))
(_
(match (vector prev new)
(#(() ()) 'same)
(#((prev) (new)) (if (< prev new) 'inner 'outer))
(#(() _) 'inner-blockquote)
(#(_ ()) 'outer-blockquote)
(_ 'different-blockquote)))))
;; indent as list of blockquote
;; kup ::= (node type info indent) -> _
;; knext ::= nodelist -> node
(define (drop-whitespace-up-to n col kt kf)
(define col-end (+ col n))
(let lp ((n n))
(cond
((zero? n) (kt))
(else
(match (next)
(#\space (lp (1- n)))
(#\tab
(let ((col (advance/tab (- col-end n))))
(cond
((<= col col-end) (lp (- col-end col)))
(else (kt)))))
(#\newline
;; Sure. Trailing whitespace can be any indent.
(unget1 #\newline)
(kt))
(#\return
(lp n))
(ch
(unless (eof-object? ch) (unget1 ch))
(kf)))))))
(define (drop-whitespace-then-blockquote n col kt kf)
(define col-end (+ col n))
(let lp ((n n) (kf kf))
(let* ((ch (next))
(kf (lambda () (unless (eof-object? ch) (unget1 ch)) (kf))))
(cond
((zero? n)
(match ch
(#\> (kt kf))
(_ (kf))))
(else
(match ch
(#\space (lp (1- n) kf))
(#\tab
(let ((col (advance/tab (- col-end n))))
(cond
((<= col col-end) (lp (- col-end col) kf))
(else (kf)))))
(_ (kf))))))))
(define (consume-indent indent kt kf)
(match indent
((0) (kt))
(_
(let lp ((indent indent) (col 0) (kf kf))
(match indent
((n) (drop-whitespace-up-to n col kt kf))
((n m . indent)
(let* ((indent (cons (1- m) indent))
(kt (lambda (kf) (lp indent (+ col n 1) kf))))
(drop-whitespace-then-blockquote n col kt kf))))))))
(define (read-pre tag indent k)
(let lp ((body '()))
(define (finish tail)
(k (list 'pre tag (string-concatenate-reverse body tail))))
(consume-indent
indent
(lambda ()
(match (next-line-and-delim)
((str . delim)
(cond
((eof-object? delim)
(finish (if (eof-object? str) "" str)))
((string=? str "```")
(finish ""))
(else
(lp (cons* (string delim) str body)))))))
(lambda ()
(finish "")))))
(define (read-link-destination)
(match (next-not-eof "link destination")
(#\<
(let lp ((chars '()))
(match (next-not-eof "<>-delimited link")
(#\> (reverse-list->string chars))
(#\< (error "< inside <>-delimited link"))
(#\\
(match (next)
((? eof-object?) (lp (cons #\\ chars)))
(ch (lp (cons ch chars)))))
(ch (lp (cons ch chars))))))
(ch
(unget1 ch)
(let lp ((chars '()) (k reverse-list->string))
(match (next-not-eof "link destination")
(#\) (unget1 #\)) (k chars))
((and ch
(or #\space #\)
(? (lambda (ch)
(or (char<? ch (integer->char #x20))
(char=? ch (integer->char #x7f)))))))
(unget1 ch)
(k chars))
(#\(
(lp (cons #\( chars)
(lambda (chars)
(match (next-not-eof "link destination")
(#\) (lp (cons #\) chars) k))
(ch (error "unexpected char" ch))))))
(#\\
(lp (cons (next-not-eof "link-destination") chars) k))
(ch (lp (cons ch chars) k)))))))
(define (read-link indent continue)
(read-text
#f indent
(lambda (ch)
(match ch
(#\]
(lambda (text)
(match (next)
(#\(
(let ((dest (read-link-destination)))
(match (next)
(#\)
(continue `(link ,dest . ,text)))
(ch
(error "unexpected after link dest" ch)))))
(ch (error "link missing destination URL")))))
(_ #f)))
(lambda (elts)
(error "end-of-block while reading link"))))
(define (read-emph indent delim continue)
(define (delim? ch) (eqv? ch delim))
(match (next-not-eof "emphasis")
((? delim?)
(let ((done? (lambda (ch)
(match ch
((? delim?)
(match (next-not-eof "emphasis")
((? delim?) continue)
(ch (unget1 ch) #f)))
(_ #f)))))
(read-text 'strong indent done?
(lambda (elt)
(error "end of block while reading strong" elt)))))
(ch
(unget1 ch)
(let ((done? (lambda (ch) (and (delim? ch) continue))))
(read-text 'emph indent done?
(lambda (elt)
(error "end of block while reading emph" elt)))))))
(define (read-code indent continue)
(let lp ((chars '()))
(match (next-not-eof "backticks")
(#\` (continue `(code ,(reverse-list->string chars))))
(#\return (lp chars))
(#\newline
(consume-indent
indent
(lambda ()
(match (next-not-eof "code")
((or #\return #\newline)
(error "end of block while reading code"))
(ch (unget1 ch) (lp (cons #\space chars)))))
(lambda () (error "end of block while reading code"))))
(ch (lp (cons ch chars))))))
(define (read-text tag indent done? on-block-end)
(let lp ((elts '()))
(define (continue elt) (lp (cons elt elts)))
(define (finish kdone)
(let lp ((elts elts) (out '()))
(match elts
(() (kdone (if tag (cons tag out) out)))
(((? char? ch) . elts)
(let lp2 ((elts elts) (chars (list ch)))
(match elts
(((? char? ch) . elts)
(lp2 elts (cons ch chars)))
(_
(lp elts (cons (list->string chars) out))))))
((elt . elts)
(lp elts (cons elt out))))))
(define (consume-blank-lines-then-finish kdone)
(let lp ()
(match (next)
((? eof-object?) (finish kdone))
(#\return (lp))
(#\newline
(consume-indent indent lp (lambda () (finish kdone))))
(ch
(unget1 ch)
(finish kdone)))))
(match (next)
((? eof-object?) (finish on-block-end))
(#\return (lp elts))
(#\newline
(consume-indent
indent
(lambda ()
(cond
((done? #\newline) => consume-blank-lines-then-finish)
(else (lp (cons #\newline elts)))))
(lambda ()
(finish on-block-end))))
((= done? (and kdone (not #f))) (finish kdone))
((and (or #\_ #\*) delim) (read-emph indent delim continue))
(#\` (read-code indent continue))
(#\[
(read-link indent continue))
(#\<
(unget1 #\<)
(match (parse-one-xml-element port)
(#f (lp elts))
(elt (continue `(inline-xml ,elt)))))
(#\\ (lp (cons (next-not-eof "backslash") elts)))
(#\! (match (next)
((? eof-object?) (lp (cons #\! elts)))
(#\[
(read-link indent
(lambda (link)
(match link
(('link dest . alt)
(continue `(image ,dest . ,alt)))))))
(ch
(unget1 ch)
(lp (cons #\! elts)))))
(ch (lp (cons ch elts))))))
(define (read-para indent kup knext)
(define (make-continuation reader)
(lambda (para)
(reader indent kup (lambda (nodelist)
(knext (cons para nodelist))))))
(define (done? ch)
(match ch
(#\newline
(let lp ((ch (next)))
(match ch
((? eof-object?)
(lambda (para)
(kup (knext (list para)) ch empty-indent)))
(ch
(read-block-type ch #t
make-continuation
(if (eqv? ch #\newline)
(lambda (chars)
(unget chars)
(make-continuation read-para))
(lambda (chars)
(unget chars)
#f)))))))
(_ #f)))
(read-text 'para indent done? (make-continuation read-block-list)))
(define (read-para* chars indent kup knext)
(unget chars)
(read-para indent kup knext))
(define (read-heading level indent continue)
(let ((continue (lambda (heading)
(match heading
(('heading . body)
(continue `(heading ,level . ,body)))))))
(read-text 'heading indent (lambda (ch)
(and (eqv? ch #\newline) continue))
continue)))
(define (read-li marker marker-indent marker-size kup knext)
(define list-tag
(match marker
((? number?) 'enumerate)
((? char?) 'itemize)))
(define (list-tag? tag) (eq? tag list-tag))
(read-indent
(lambda (ch indent)
(match indent
((outer . inner)
(match marker-indent
((marker-outer ... marker-inner)
(let ((body-indent
(append marker-outer
(list (+ marker-inner marker-size outer))
inner)))
(read-block
ch body-indent
(lambda (blocks ch next-indent)
(read-indented-block
ch marker-indent next-indent kup
(lambda (nodelist)
(knext
(match nodelist
((((? list-tag?) . items) . nodelist)
`((,list-tag (item . ,blocks) . ,items) . ,nodelist))
(_
`((,list-tag (item . ,blocks)) . ,nodelist)))))))
identity)))))))))
(define (read-block-type ch in-text? kblock ktext)
(define (make-continue indent kup knext)
(lambda (block)
(read-block-list indent kup
(lambda (nodelist)
(knext (cons block nodelist))))))
(match ch
(#\#
(let lp ((level 1))
(match (next-not-eof "heading")
(#\#
(lp (1+ level)))
(#\space
(kblock
(lambda (indent kup knext)
(read-heading level indent (make-continue indent kup knext)))))
(#\return (lp level))
(#\newline
(kblock
(lambda (indent kup knext)
((make-continue indent kup knext) `(heading ,level)))))
(ch
(ktext (cons ch (make-list level #\#)))))))
(#\`
(match (next)
((? eof-object?) (ktext '(#\`)))
(#\`
(match (next)
((? eof-object?) (ktext '(#\` #\`)))
(#\`
(kblock
(lambda (indent kup knext)
(match (next-line-and-delim)
((tag . delim)
(cond
((eof-object? delim)
(error "eof while reading code block"))
(else
(read-pre (if (string-null? tag) #f tag) indent
(make-continue indent kup knext)))))))))
(ch
(ktext (list ch #\` #\`)))))
(ch
(ktext (list ch #\`)))))
((or #\- #\* #\+)
(match (peek)
((or #\space #\tab)
(kblock (lambda (indent kup knext)
(read-li ch indent 1 kup knext))))
(_
(ktext (list ch)))))
((or #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
(let lp ((chars (list ch)))
(let ((ch (next)))
(match ch
((? eof-object?) (ktext chars))
((or #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
(lp (cons ch chars)))
((or #\. #\))
;; fixme: record delimiter
(match (next)
((? eof-object?)
(ktext (cons ch chars)))
((and ch2 (or #\space #\tab))
(unget1 ch2)
(if (or (not in-text?) (equal? chars '(#\1)))
(kblock
(lambda (indent kup knext)
(let ((idx (fold (lambda (ch idx)
(+ (* idx 10)
(- (char->integer ch)
(char->integer #\0))))
0 chars)))
(read-li idx indent (1+ (length chars))
kup knext))))
(ktext (cons ch chars))))
(ch2
(ktext (cons* ch2 ch chars)))))
(_
(ktext (cons ch chars)))))))
(#\<
(unget1 #\<)
(if in-text?
(ktext '())
(kblock
(lambda (indent kup knext)
(match (parse-one-xml-element port)
(#f (read-block-list indent kup knext))
(elt ((make-continue indent kup knext) `(block-xml ,elt))))))))
(#\return
(read-block-type (next-not-eof "newline") in-text? kblock ktext))
(#\newline
;; fixme: record loose li
(kblock read-block-list))
((? eof-object?)
(kblock
(lambda (indent kup knext)
(kup (knext '()) ch empty-indent))))
(_
(ktext (list ch)))))
(define (read-block ch indent kup knext)
(define (have-block read-block)
(read-block indent kup knext))
(define (have-text chars)
(read-para* chars indent kup knext))
(read-block-type ch #f have-block have-text))
(define (finish-block-list blocks)
(match blocks
((block) block)
(_ `(begin . ,blocks))))
(define (finish-block-quote blocks)
(match blocks
((block) block)
(_ `(blockquote . ,blocks))))
(define (read-indented-block ch outer-indent indent kup knext)
(define (recurse finish recurse-indent)
(read-indented-block ch recurse-indent indent
(lambda (nested ch indent)
(read-indented-block ch outer-indent indent
kup
(lambda (nodelist)
(knext (cons nested nodelist)))))
finish))
(match (compare-indents outer-indent indent)
('same
(read-block ch indent kup knext))
((or 'outer
'outer-blockquote
'different-blockquote)
(kup (knext '()) ch indent))
('inner
(recurse finish-block-list indent))
('inner-blockquote
(recurse finish-block-quote
(list-head indent (1+ (length outer-indent)))))))
(define (parse-error reason)
(error reason))
(define (read-block-list outer-indent kup knext)
(read-indent
(lambda (ch indent)
(read-indented-block ch outer-indent indent kup knext))))
(read-block-list empty-indent
(lambda (nodelist ch indent) nodelist)
finish-block-list))
(define* (smarxdown->shtml exp #:key
(heading-offset 0)
(handle-inline-xml identity)
(handle-block-xml identity))
(define (transform-inline exp)
(match exp
(('inline-xml xml) (handle-inline-xml xml))
(('code . body) `(tt . ,body))
(('emph . body) `(i . ,(map transform-inline body)))
(('strong . body) `(b . ,(map transform-inline body)))
(('link dest . body) `(a (@ (href ,dest)) . ,(map transform-inline body)))
((? string? str) str)))
(define (transform-block exp)
(match exp
(('block-xml xml) (handle-block-xml xml))
(('para . body) `(p . ,(map transform-inline body)))
(('blockquote . body) `(blockquote . ,(map transform-block body)))
(('itemize ('item . item) ...)
`(ul . ,(map (lambda (blocks)
`(li . ,(map transform-block blocks)))
item)))
(('enumerate ('item . item) ...)
`(ol . ,(map (lambda (blocks)
`(li . ,(map transform-block blocks)))
item)))
(('pre #f . body) `(pre . ,body))
(('pre info . body) `(pre (@ (class ,(string-append "pre-" info)))
. ,body))
(('heading level . body)
(let* ((level (+ level heading-offset))
(tag (cond ((<= level 0) 'h1)
((<= level 6)
(string->symbol
(string #\h (integer->char
(+ level (char->integer #\0))))))
(else 'h6))))
`(,tag . ,(map transform-inline body))))))
(match exp
(('begin exp ...)
`(div . ,(map transform-block exp)))))

View file

@ -1,5 +1,5 @@
;; Tekuti ;; Tekuti
;; Copyright (C) 2008, 2010, 2011, 2012, 2014, 2021 Andy Wingo <wingo at pobox dot com> ;; Copyright (C) 2008, 2010, 2011, 2012, 2014, 2021, 2022 Andy Wingo <wingo at pobox dot com>
;; This program is free software; you can redistribute it and/or ;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as ;; modify it under the terms of the GNU General Public License as
@ -59,6 +59,7 @@
`((timestamp . ,string->number) `((timestamp . ,string->number)
(tags . ,(lambda (v) (string-split/trimming v #\,))) (tags . ,(lambda (v) (string-split/trimming v #\,)))
(title . ,identity) (title . ,identity)
(format . ,string->symbol)
(comments-closed-timestamp . ,(lambda (str) (comments-closed-timestamp . ,(lambda (str)
(if (string-null? str) (if (string-null? str)
#f #f
@ -139,9 +140,10 @@
(lambda () (lambda ()
(case format (case format
((wordpress) (wordpress->sxml raw)) ((wordpress) (wordpress->sxml raw))
((marxdown) (marxdown->sxml raw))
(else `(pre ,raw)))) (else `(pre ,raw))))
(lambda args (lambda args
`(pre ,(bad-user-submitted-xhtml? raw)))))) `(pre ,(bad-user-submitted-marxdown? raw))))))
(define (post-readable-date post) (define (post-readable-date post)
(let ((date (time-utc->date (let ((date (time-utc->date
@ -167,7 +169,7 @@
((assq-ref parsed k) ((assq-ref parsed k)
=> (lambda (v) (format #t "~a: ~a\n" k v))))) => (lambda (v) (format #t "~a: ~a\n" k v)))))
'(timestamp tags status title name comment_status '(timestamp tags status title name comment_status
comments-closed-timestamp)))) comments-closed-timestamp format))))
(content (with-output-to-blob (display (assq-ref parsed 'body)))) (content (with-output-to-blob (display (assq-ref parsed 'body))))
(key (assq-ref parsed 'key)) (key (assq-ref parsed 'key))
(message (format #f "~a: \"~a\"" (message (format #f "~a: \"~a\""
@ -229,6 +231,7 @@
(comments-closed-timestamp . ,comments-closed-timestamp) (comments-closed-timestamp . ,comments-closed-timestamp)
(timestamp . ,timestamp) (timestamp . ,timestamp)
(name . ,name) (name . ,name)
(format . marxdown)
(key . ,(string-downcase (key . ,(string-downcase
(uri-encode (uri-encode
(string-append (date->string (timestamp->date timestamp) (string-append (date->string (timestamp->date timestamp)