From e62d37ae7f974a0ce9585ca0908a98b5d424b9f4 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Sat, 8 Oct 2022 07:51:39 +0000 Subject: [PATCH 01/19] Commit pending changes --- tekuti/classifier.scm | 14 ++++++++++---- tekuti/config.scm | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tekuti/classifier.scm b/tekuti/classifier.scm index 6c8ba9a..2a98c45 100644 --- a/tekuti/classifier.scm +++ b/tekuti/classifier.scm @@ -135,11 +135,17 @@ (total-legit-features (feature-count legit-features))) (hash-for-each (lambda (feature _) - (let ((bogus-count (hash-ref bogus-features feature 0)) + (let ((bogus-count (hash-ref bogus-features feature)) (legit-count (hash-ref legit-features feature 0))) - (hash-set! log-bogosities feature - (log (/ (/ (+ bogus-count 0.001) total-bogus-features) - (/ (+ legit-count 0.001) total-legit-features)))))) + (cond + (bogus-count + (hash-set! log-bogosities feature + (log (/ (/ (+ bogus-count 0.001) total-bogus-features) + (/ (+ legit-count 0.001) total-legit-features))))) + (else + (hash-set! log-bogosities feature + (log (/ (/ 0.01 total-bogus-features) + (/ (+ legit-count 0.01) total-legit-features)))))))) changed-features))) (define (compute-bogus-probability comment log-bogosities bogus-prior diff --git a/tekuti/config.scm b/tekuti/config.scm index 634c482..77fb6ff 100644 --- a/tekuti/config.scm +++ b/tekuti/config.scm @@ -27,6 +27,7 @@ (define-module (tekuti config) #:use-module (tekuti util) #:use-module ((sxml ssax) #:select (define-parsed-entity!)) + #:declarative? #f #:export (*public-scheme* *public-host* *public-port* *public-path-base* *private-host* *private-port* *private-path-base* *git-dir* *git* *debug* *admin-user* *admin-pass* From 1236e8a996ad66e90f83524a2bdcd53362bbfc28 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 20 Oct 2022 21:40:27 +0200 Subject: [PATCH 02/19] Add marxdown Marxdown a new dialect for writing blog posts. We'll see! --- Makefile.am | 1 + tekuti/marxdown.scm | 631 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 632 insertions(+) create mode 100644 tekuti/marxdown.scm diff --git a/Makefile.am b/Makefile.am index 97e9643..3dbca22 100644 --- a/Makefile.am +++ b/Makefile.am @@ -10,6 +10,7 @@ SOURCES = \ tekuti/filters.scm \ tekuti/git.scm \ tekuti/index.scm \ + tekuti/marxdown.scm \ tekuti/match-bind.scm \ tekuti/mod-lisp.scm \ tekuti/page-helpers.scm \ diff --git a/tekuti/marxdown.scm b/tekuti/marxdown.scm new file mode 100644 index 0000000..39c3f8d --- /dev/null +++ b/tekuti/marxdown.scm @@ -0,0 +1,631 @@ +;; Tekuti +;; Copyright (C) 2022 Andy Wingo + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 3 of +;; the License, or (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program; if not, contact: +;; +;; Free Software Foundation Voice: +1-617-542-5942 +;; 59 Temple Place - Suite 330 Fax: +1-617-542-2652 +;; Boston, MA 02111-1307, USA gnu@gnu.org + +;;; Commentary: +;; +;; "Marxdown" to SXML parser. +;; +;; Marxdown is a dialect of Markdown, designed for simplicity and +;; embeddability. The X in markdown refers to how it processes embedded +;; XML. +;; +;; In some ways, Marxdown is a more restrictive subset of Markdown: +;; +;; - Marxdown only supports headings with the ### prefix ("ATX +;; headings"), and doesn't support so-called "Setext headings" that +;; follow the heading with --- or similar. Why bother? +;; +;; - Marxdown doesn't support indented code blocks, instead only using +;; fenced code blocks. +;; +;; - Markxdown doesn't currently support link titles or reference +;; links. +;; +;; - Marxdown requires each line of a block quote to have the same +;; indent and `>` characters, not just the first line. +;; +;; - Marxdown doesn't support multi-` inline code sequences; just the +;; single `. +;; +;; - ... +;; +;; Generally speaking, a valid Marxdown document will also be valid +;; Markdown, as defined by CommonMark. However there are exceptions. +;; Marxdown is less "sloppy", if you will; unlike Markdown, not any +;; sequence of characters is valid Marxdown. Parsing Marxdown can raise +;; an error. +;; +;; Notably, Marxdown requires valid nesting. For example, `*_foo*_` is +;; an error in Marxdown, as the emphasis markers aren't nested properly. +;; +;; Also, when Marxdown parses embedded XML, it hands over control of the +;; character stream to a validating XML parser which can also throw an +;; error. This XML parser doesn't stop at what CommonMark Markdown +;; would consider block boundaries. As a consequence, Marxdown cannot +;; be embedded inside XML. Due to a limitation which may be relaxed in +;; the future, the XML parser doesn't strip off any blockquote prefix. +;; +;; These limitations come with a benefit: tools can easily consume +;; Marxdown and then embed that result in SXML documents of different +;; kinds. +;; +;;; Code: + +(define-module (tekuti marxdown) + #:use-module (ice-9 match) + #:use-module (ice-9 textual-ports) + #:use-module (ice-9 rdelim) + #:use-module (sxml ssax) + #:use-module (sxml transform) + #:use-module ((srfi srfi-1) #:select (fold)) + #:export (marxdown->smarxdown + smarxdown->shtml)) + +(define (parse-one-xml-element port) + ;; -> seed + (define (fdown elem-gi attributes namespaces expected-content seed) + '()) + ;; -> seed + (define (fup elem-gi attributes namespaces parent-seed seed) + (cons `(,elem-gi + ,@(match namespaces + (() '()) + ;; fixme: prohibit? + (_ `((*NAMESPACES* ,namespaces)))) + ,@(match attributes + (() '()) + (((attr . value) ...) + `((@ ,(map list attr value))))) + . ,(reverse seed)) + parent-seed)) + (define (ftext string1 string2 seed) + (if (string-null? string2) + (cons string1 seed) + (cons* string2 string1 seed))) + (define parse-element (ssax:make-elem-parser fdown fup ftext ())) + + (let ((token (ssax:read-markup-token port))) + (match (xml-token-kind token) + ('COMMENT #f) + ('START + (let ((elems #f) (entities '()) (namespaces '()) (seed '())) + (match (parse-element (xml-token-head token) port elems + entities namespaces #t seed) + ((elt) elt)))) + (kind (error "unexpected XML token" token))))) + +(define (marxdown->smarxdown port) + (define (round-up x y) + (* y (ceiling-quotient x y))) + + (define (advance/tab indent) + (round-up (1+ indent) 4)) + + (define (advance indent) + (1+ indent)) + + (define (unget1 ch) + (unget-char port ch)) + (define (unget chars) + (match chars + (() + (values)) + ((ch . chars) + (unget1 ch) + (unget chars)))) + + (define (next) (get-char port)) + (define (next-not-eof ctx) + (let ((ch (next))) + (if (eof-object? ch) + (error "EOF while reading" ctx) + ch))) + (define (next-line-and-delim) (read-line port 'split)) + (define (peek) (peek-char port)) + + (define (skip-whitespace k) + (let lp ((indent 0)) + (let ((ch (next))) + (case ch + ((#\space) + (lp (advance indent))) + ((#\tab) + (lp (advance/tab indent))) + (else + (k ch indent)))))) + + (define empty-indent '(0)) + (define (read-indent k) + (skip-whitespace + (lambda (ch indent) + (match ch + (#\> + (read-indent + (lambda (ch indent*) + (k ch + (cons indent + (match indent* + ((head . tail) + ;; Account for the #\>. + (cons (1+ head) tail)))))))) + (_ + (k ch (list indent))))))) + + (define (advance-indent indent) + (match indent + ((indent* ... indent) + (append indent* (list (advance indent)))))) + + (define (advance-indent/tab indent) + (let* ((col (apply + indent)) + (col* (advance/tab col))) + (match indent + ((indent* ... indent) + (append indent* (+ (- col* col) indent)))))) + + (define (compare-indents prev new) + (match (vector prev new) + (#((x . prev-tail) (x . new-tail)) + (compare-indents prev-tail new-tail)) + (_ + (match (vector prev new) + (#(() ()) 'same) + (#((prev) (new)) (if (< prev new) 'inner 'outer)) + (#(() _) 'inner-blockquote) + (#(_ ()) 'outer-blockquote) + (_ 'different-blockquote))))) + + ;; indent as list of blockquote + + ;; kup ::= (node type info indent) -> _ + ;; knext ::= nodelist -> node + + (define (drop-whitespace-up-to n col kt kf) + (define col-end (+ col n)) + (let lp ((n n)) + (cond + ((zero? n) (kt)) + (else + (match (next) + (#\space (lp (1- n))) + (#\tab + (let ((col (advance/tab (- col-end n)))) + (cond + ((<= col col-end) (lp (- col-end col))) + (else (kt))))) + (#\newline + ;; Sure. Trailing whitespace can be any indent. + (kt)) + (ch + (unless (eof-object? ch) (unget1 ch)) + (kf))))))) + + (define (drop-whitespace-then-blockquote n col kt kf) + (define col-end (+ col n)) + (let lp ((n n) (kf kf)) + (let* ((ch (next)) + (kf (lambda () (unless (eof-object? ch) (unget1 ch)) (kf)))) + (cond + ((zero? n) + (match ch + (#\> (kt kf)) + (_ (kf)))) + (else + (match ch + (#\space (lp (1- n) kf)) + (#\tab + (let ((col (advance/tab (- col-end n)))) + (cond + ((<= col col-end) (lp (- col-end col) kf)) + (else (kf))))) + (_ (kf)))))))) + + (define (consume-indent indent kt kf) + (match indent + ((0) (kt)) + (_ + (let lp ((indent indent) (col 0) (kf kf)) + (match indent + ((n) (drop-whitespace-up-to n col kt kf)) + ((n m . indent) + (let* ((indent (cons (1- m) indent)) + (kt (lambda (kf) (lp indent (+ col n 1) kf)))) + (drop-whitespace-then-blockquote n col kt kf)))))))) + + (define (read-pre tag indent k) + (let lp ((body '())) + (define (finish tail) + (k (list 'pre tag (string-concatenate-reverse body tail)))) + (consume-indent + indent + (lambda () + (match (next-line-and-delim) + ((str . delim) + (cond + ((eof-object? delim) + (finish (if (eof-object? str) "" str))) + ((string=? str "```") + (finish "")) + (else + (lp (cons* (string delim) str body))))))) + (lambda () + (finish ""))))) + + (define (read-link-destination) + (match (next-not-eof "link destination") + (#\< + (let lp ((chars '())) + (match (next-not-eof "<>-delimited link") + (#\> (reverse-list->string chars)) + (#\< (error "< inside <>-delimited link")) + (#\\ + (match (next) + ((? eof-object?) (lp (cons #\\ chars))) + (ch (lp (cons ch chars))))) + (ch (lp (cons ch chars)))))) + (ch + (unget1 ch) + (let lp ((chars '()) (k reverse-list->string)) + (match (next-not-eof "link destination") + (#\) (unget1 #\)) (k chars)) + ((and ch + (or #\space #\) + (? (lambda (ch) + (or (charchar #x20)) + (char=? ch (integer->char #x7f))))))) + (unget1 ch) + (k chars)) + (#\( + (lp (cons #\( chars) + (lambda (chars) + (match (next-not-eof "link destination") + (#\) (lp (cons #\) chars) k)) + (ch (error "unexpected char" ch)))))) + (#\\ + (lp (cons (next-not-eof "link-destination") chars) k)) + (ch (lp (cons ch chars) k))))))) + + (define (read-link indent continue) + (read-text + #f indent + (lambda (ch) + (match ch + (#\] + (lambda (text) + (match (next) + (#\( + (let ((dest (read-link-destination))) + (match (next) + (#\) + (continue `(link ,dest . ,text))) + (ch + (error "unexpected after link dest" ch))))) + (ch (error "link missing destination URL"))))) + (_ #f))) + (lambda (elts) + (error "end-of-block while reading link")))) + + (define (read-emph indent delim continue) + (define (delim? ch) (eqv? ch delim)) + (match (next-not-eof "emphasis") + ((? delim?) + (let ((done? (lambda (ch) + (match ch + ((? delim?) + (match (next-not-eof "emphasis") + ((? delim?) continue) + (ch (unget1 ch) #f))) + (_ #f))))) + (read-text 'strong indent done? + (lambda (elt) + (error "end of block while reading strong" elt))))) + (ch + (unget1 ch) + (let ((done? (lambda (ch) (and (delim? ch) continue)))) + (read-text 'emph indent done? + (lambda (elt) + (error "end of block while reading emph" elt))))))) + + (define (read-code indent continue) + (let lp ((chars '())) + (match (next-not-eof "backticks") + (#\` (continue `(code ,(reverse-list->string chars)))) + (#\newline + (consume-indent + indent + (lambda () + (match (next-not-eof "code") + (#\newline (error "end of block while reading code")) + (ch (unget1 ch) (lp (cons #\space chars))))) + (lambda () (error "end of block while reading code")))) + (ch (lp (cons ch chars)))))) + + (define (read-text tag indent done? on-block-end) + (let lp ((elts '())) + (define (continue elt) (lp (cons elt elts))) + (define (finish kdone) + (let lp ((elts elts) (out '())) + (match elts + (() (kdone (if tag (cons tag out) out))) + (((? char? ch) . elts) + (let lp2 ((elts elts) (chars (list ch))) + (match elts + (((? char? ch) . elts) + (lp2 elts (cons ch chars))) + (_ + (lp elts (cons (list->string chars) out)))))) + ((elt . elts) + (lp elts (cons elt out)))))) + (match (next) + ((? eof-object?) (finish on-block-end)) + (#\newline + (consume-indent + indent + (lambda () + (cond + ((done? #\newline) => finish) + (else (lp (cons #\newline elts))))) + (lambda () + (finish on-block-end)))) + ((= done? (and kdone (not #f))) (finish kdone)) + ((and (or #\_ #\*) delim) (read-emph indent delim continue)) + (#\` (read-code indent continue)) + (#\[ + (read-link indent continue)) + (#\< + (unget1 #\<) + (match (parse-one-xml-element port) + (#f (lp elts)) + (elt (continue `(inline-xml ,elt))))) + (#\\ (lp (cons (next-not-eof "backslash") elts))) + (#\! (match (next) + ((? eof-object?) (lp (cons #\! elts))) + (#\[ + (read-link indent + (lambda (link) + (match link + (('link dest . alt) + (continue `(image ,dest . ,alt))))))) + (ch (lp (cons* ch #\! elts))))) + (ch (lp (cons ch elts)))))) + + (define (read-para indent kup knext) + (define (make-continuation reader) + (lambda (para) + (reader indent kup (lambda (nodelist) + (knext (cons para nodelist)))))) + (define (done? ch) + (match ch + (#\newline + (let ((ch (next))) + (match ch + ((? eof-object?) + (lambda (para) + (kup (knext (list para)) ch empty-indent))) + (ch + (read-block-type ch #t + make-continuation + (lambda (chars) + (unget chars) + #f)))))) + (_ #f))) + (read-text 'para indent done? (make-continuation read-block-list))) + + (define (read-para* chars indent kup knext) + (unget chars) + (read-para indent kup knext)) + + (define (read-heading level knext) + (error "unimplemented")) + + (define (read-li marker marker-indent marker-size kup knext) + (read-indent + (lambda (ch indent) + (match indent + ((outer . inner) + (match marker-indent + ((marker-outer ... marker-inner) + (let ((body-indent + (append marker-outer + (list (+ marker-inner marker-size outer)) + inner))) + (pk 'hey marker marker-indent marker-size indent body-indent ch) + (read-indented-block + ch marker-indent body-indent + (lambda (elt ch indent) + (pk ch marker-indent indent elt) + (read-indented-block ch marker-indent indent kup + (lambda (nodelist) + (knext (cons elt nodelist))))) + (lambda (nodelist) + (cons* 'li marker nodelist))))))))))) + + (define (read-block ch indent kup knext) + (define (have-block read-block) + (read-block indent kup knext)) + (define (have-text chars) + (read-para* chars indent kup knext)) + (read-block-type ch #f have-block have-text)) + + (define (read-block-type ch in-text? kblock ktext) + (define (make-continue indent kup knext) + (lambda (block) + (read-block-list indent kup + (lambda (nodelist) + (knext (cons block nodelist)))))) + (match ch + (#\# + (let lp ((level 1)) + (match (next-not-eof "heading") + (#\# + (lp (1+ level))) + (#\space + (kblock + (lambda (indent kup knext) + (read-heading level (make-continue indent kup knext))))) + (#\newline + (kblock + (lambda (indent kup knext) + ((make-continue indent kup knext) `(heading ,level))))) + (ch + (ktext (cons ch (make-list level #\#))))))) + (#\` + (match (next) + ((? eof-object?) (ktext '(#\`))) + (#\` + (match (next) + ((? eof-object?) (ktext '(#\` #\`))) + (#\` + (kblock + (lambda (indent kup knext) + (match (next-line-and-delim) + ((tag . delim) + (cond + ((eof-object? delim) + (error "eof while reading code block")) + (else + (read-pre (if (string-null? tag) #f tag) indent + (make-continue indent kup knext))))))))) + (ch + (ktext (list ch #\` #\`))))) + (ch + (ktext (list ch #\`))))) + ((or #\- #\* #\+) + (match (peek) + ((or #\space #\tab) + (kblock (lambda (indent kup knext) + (read-li ch indent 1 kup knext)))) + (_ + (ktext (list ch))))) + ((or #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) + (let lp ((chars (list ch))) + (let ((ch (next))) + (match ch + ((? eof-object?) (ktext chars)) + ((or #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) + (lp (cons ch chars))) + ((or #\. #\)) + ;; fixme: record delimiter + (match (next) + ((? eof-object?) + (ktext (cons ch chars))) + ((and ch2 (or #\space #\tab)) + (unget1 ch2) + (if (or (not in-text?) (equal? chars '(#\1))) + (kblock + (lambda (indent kup knext) + (let ((idx (fold (lambda (ch idx) + (+ (* idx 10) + (- (char->integer ch) + (char->integer #\0)))) + 0 chars))) + (read-li idx indent (1+ (length chars)) + kup knext)))) + (ktext (cons ch chars)))) + (ch2 + (ktext (cons* ch2 ch chars))))) + (_ + (ktext (cons ch chars))))))) + (#\< + (unget1 #\<) + (if in-text? + (ktext '()) + (kblock + (lambda (indent kup knext) + (match (parse-one-xml-element port) + (#f (read-block-list indent kup knext)) + (elt ((make-continue indent kup knext) `(block-xml ,elt)))))))) + (#\newline + ;; fixme: record loose li + (kblock read-block-list)) + ((? eof-object?) + (kblock + (lambda (indent kup knext) + (kup (knext '()) ch empty-indent)))) + (_ + (ktext (list ch))))) + + (define (read-indented-block ch outer-indent indent kup knext) + (define (recurse kind recurse-indent) + (read-indented-block ch recurse-indent indent + (lambda (nested ch indent) + (read-indented-block ch outer-indent indent + kup + (lambda (nodelist) + (knext (cons nested nodelist))))) + (lambda (nodelist) + (cons kind nodelist)))) + (match (compare-indents outer-indent indent) + ('same + (read-block ch indent kup knext)) + ((or 'outer + 'outer-blockquote + 'different-blockquote) + (kup (knext '()) ch indent)) + ('inner + (recurse 'begin indent)) + ('inner-blockquote + (recurse 'blockquote (list-head indent (1+ (length outer-indent))))))) + + (define (parse-error reason) + (error reason)) + + (define (read-block-list outer-indent kup knext) + (read-indent + (lambda (ch indent) + (read-indented-block ch outer-indent indent kup knext)))) + + (read-block-list empty-indent + (lambda (nodelist ch indent) nodelist) + (lambda (nodelist) (cons 'top nodelist)))) + +(define* (smarxdown->shtml exp #:key + (heading-offset 0) + (handle-inline-xml identity) + (handle-block-xml identity)) + (define (transform-inline exp) + (match exp + (('inline-xml xml) (handle-inline-xml xml)) + (('code . body) exp) + (('emph . body) `(emph . ,(map transform-inline body))) + (('strong . body) `(string . ,(map transform-inline body))) + (('link dest . body) `(a (@ (href ,dest)) . ,(map transform-inline body))) + ((? string? str) str))) + (define (transform-block exp) + (match exp + (('block-xml xml) (handle-block-xml xml)) + (('para . body) `(p . ,(map transform-inline body))) + (('blockquote . body) `(blockquote . (map transform-block body))) + (('list-item . body) (error "fixme")) + (('pre #f . body) `(pre . ,body)) + (('pre info . body) `(pre (@ (class ,(string-append "pre-" info))) + . ,body)) + (('heading level . body) + (let* ((level (+ level heading-offset)) + (tag (cond ((<= level 0) 'h1) + ((<= level 6) + (string->symbol + (string #\h (integer->char + (+ level (char->integer #\0)))))) + (else 'h6)))) + `(,tag . ,(map transform-inline body)))))) + (match exp + (('top exp ...) + `(top . ,(map transform-block exp))))) From 4374fbf1dcc7c26349dbcd28021ad9f743228ecc Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 20 Oct 2022 22:37:55 +0200 Subject: [PATCH 03/19] Switch to marxdown as default post and comment format --- tekuti/classifier.scm | 6 +++--- tekuti/comment.scm | 7 ++++--- tekuti/filters.scm | 24 ++++++++++++++++-------- tekuti/post.scm | 7 ++++--- 4 files changed, 27 insertions(+), 17 deletions(-) diff --git a/tekuti/classifier.scm b/tekuti/classifier.scm index 2a98c45..a15e213 100644 --- a/tekuti/classifier.scm +++ b/tekuti/classifier.scm @@ -1,5 +1,5 @@ ;; Tekuti -;; Copyright (C) 2008, 2010, 2012 Andy Wingo +;; Copyright (C) 2008, 2010, 2012, 2022 Andy Wingo ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -56,11 +56,11 @@ (author ,(assq-ref comment 'author)) (email ,(assq-ref comment 'author_email)) (url ,(assq-ref comment 'author_url)) - ,(let ((format (or (assq-ref comment 'format) 'wordpress)) + ,(let ((format (or (assq-ref comment 'format) 'marxdown)) (raw (assq-ref comment 'raw-content))) (or (case format ((wordpress) (false-if-exception (wordpress->sxml raw))) - + ((marxdown) (false-if-exception (marxdown->sxml raw))) (else `(pre ,raw))) `(pre ,raw)))))) diff --git a/tekuti/comment.scm b/tekuti/comment.scm index db3e3e8..5c716b8 100644 --- a/tekuti/comment.scm +++ b/tekuti/comment.scm @@ -1,5 +1,5 @@ ;; Tekuti -;; Copyright (C) 2008, 2010, 2012 Andy Wingo +;; Copyright (C) 2008, 2010, 2012, 2022 Andy Wingo ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -88,9 +88,10 @@ (a (@ (href ,(string-append "#" (assq-ref comment 'key)))) ,(comment-readable-date comment))) ,(neutralize-links - (let ((format (or (assq-ref comment 'format) 'wordpress))) + (let ((format (or (assq-ref comment 'format) 'marxdown))) ((case format ((wordpress) wordpress->sxml) + ((marxdown) marxdown->sxml) (else (lambda (text) `(pre ,text)))) (comment-raw-content comment)))))) @@ -119,7 +120,7 @@ `(("author" ,(lambda (x) #f)) ("email" ,bad-email?) ("url" ,bad-url?) - ("comment" ,bad-user-submitted-xhtml?) + ("comment" ,bad-user-submitted-marxdown?) ("x" ,bad-number?) ("submit" ,(lambda (x) #f)))) diff --git a/tekuti/filters.scm b/tekuti/filters.scm index be37d0c..2361010 100644 --- a/tekuti/filters.scm +++ b/tekuti/filters.scm @@ -1,5 +1,5 @@ ;; Tekuti -;; Copyright (C) 2008, 2010, 2011, 2012 Andy Wingo +;; Copyright (C) 2008, 2010, 2011, 2012, 2022 Andy Wingo ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -27,10 +27,11 @@ (define-module (tekuti filters) #:use-module (sxml simple) #:use-module (sxml transform) + #:use-module (tekuti marxdown) #:use-module (tekuti match-bind) #:use-module (tekuti util) - #:export (wordpress->sxml - *allowed-tags* bad-user-submitted-xhtml?)) + #:export (wordpress->sxml marxdown->sxml + *allowed-tags* bad-user-submitted-marxdown?)) (define blocks '(table thead tfoot caption colgroup tbody tr td th div dl dd dt ul ol li pre select form map area blockquote @@ -90,6 +91,9 @@ (*text* . ,(lambda (tag text) text)))))) +(define (marxdown->sxml text) + (smarxdown->shtml (call-with-input-string text marxdown->smarxdown))) + (define *allowed-tags* `((a (href . ,urlish?) title) (abbr title) @@ -140,13 +144,18 @@ `((div ,(compile-sxslt-rules *allowed-tags*) . ,(lambda body body)))) -(define (bad-user-submitted-xhtml? x) +(define (bad-user-submitted-marxdown? x) (catch #t (lambda () - (pre-post-order (wordpress->sxml x) *valid-xhtml-rules*) + (pre-post-order (marxdown->sxml x) *valid-xhtml-rules*) #f) (lambda (key . args) - `(div (p (b "Invalid XHTML")) + `(div (p (b "Invalid Marxdown")) + (p "The input grammar is essentially Markdown. However " + "there are some differences, notably that e.g. *emph* " + "blocks need to be properly closed and that any " + "embedded HTML needs to be well-formed XHTML.") + (p "Further information:") ,(case key ((parser-error) `(div @@ -168,6 +177,5 @@ ((bad-attr-value) `(p "XHTML attribute has bad value: " ,(car args))) (else - (pk key args) - `(p "Jesus knows why, and so do you"))))))) + `(p "Hey not real knows why, and so do you"))))))) diff --git a/tekuti/post.scm b/tekuti/post.scm index 993fb07..b8afcba 100644 --- a/tekuti/post.scm +++ b/tekuti/post.scm @@ -1,5 +1,5 @@ ;; Tekuti -;; Copyright (C) 2008, 2010, 2011, 2012, 2014, 2021 Andy Wingo +;; Copyright (C) 2008, 2010, 2011, 2012, 2014, 2021, 2022 Andy Wingo ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -133,15 +133,16 @@ (git "show" (string-append (assq-ref post 'sha1) ":content"))) (define (post-sxml-content post) - (let ((format (or (assq-ref post 'format) 'wordpress)) + (let ((format (or (assq-ref post 'format) 'marxdown)) (raw (post-raw-content post))) (catch #t (lambda () (case format ((wordpress) (wordpress->sxml raw)) + ((marxdown) (marxdown->sxml raw)) (else `(pre ,raw)))) (lambda args - `(pre ,(bad-user-submitted-xhtml? raw)))))) + `(pre ,(bad-user-submitted-marxdown? raw)))))) (define (post-readable-date post) (let ((date (time-utc->date From f424227fff15a7f4b368e8dc708b864007250fb9 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 20 Oct 2022 22:42:57 +0200 Subject: [PATCH 04/19] Fix format for comments --- tekuti/comment.scm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tekuti/comment.scm b/tekuti/comment.scm index 5c716b8..6c27fad 100644 --- a/tekuti/comment.scm +++ b/tekuti/comment.scm @@ -88,7 +88,7 @@ (a (@ (href ,(string-append "#" (assq-ref comment 'key)))) ,(comment-readable-date comment))) ,(neutralize-links - (let ((format (or (assq-ref comment 'format) 'marxdown))) + (let ((format (or (assq-ref comment 'format) 'wordpress))) ((case format ((wordpress) wordpress->sxml) ((marxdown) marxdown->sxml) @@ -148,6 +148,7 @@ (author . ,(de-newline author)) (author_email . ,email) (author_url . ,url) + (format . marxdown) (raw-content . ,content)))) (define (make-new-comment key title comment) From 9dd5eba908fb503d9fb73029043ea97d5674f06f Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 20 Oct 2022 22:45:35 +0200 Subject: [PATCH 05/19] Fix format for comments --- tekuti/classifier.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tekuti/classifier.scm b/tekuti/classifier.scm index a15e213..a8a57de 100644 --- a/tekuti/classifier.scm +++ b/tekuti/classifier.scm @@ -56,7 +56,7 @@ (author ,(assq-ref comment 'author)) (email ,(assq-ref comment 'author_email)) (url ,(assq-ref comment 'author_url)) - ,(let ((format (or (assq-ref comment 'format) 'marxdown)) + ,(let ((format (or (assq-ref comment 'format) 'wordpress)) (raw (assq-ref comment 'raw-content))) (or (case format ((wordpress) (false-if-exception (wordpress->sxml raw))) From ddb029d674be85db93abb1e10c9c5fd1e223cb35 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 20 Oct 2022 22:51:27 +0200 Subject: [PATCH 06/19] Fix format for posts (!) --- tekuti/post.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tekuti/post.scm b/tekuti/post.scm index b8afcba..3d06d8b 100644 --- a/tekuti/post.scm +++ b/tekuti/post.scm @@ -133,7 +133,7 @@ (git "show" (string-append (assq-ref post 'sha1) ":content"))) (define (post-sxml-content post) - (let ((format (or (assq-ref post 'format) 'marxdown)) + (let ((format (or (assq-ref post 'format) 'wordpress)) (raw (post-raw-content post))) (catch #t (lambda () From 9720ba7bcf19e88617e5872c873c36bb7458ec4a Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 20 Oct 2022 22:54:07 +0200 Subject: [PATCH 07/19] New posts are marxdown --- tekuti/post.scm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tekuti/post.scm b/tekuti/post.scm index 3d06d8b..444f193 100644 --- a/tekuti/post.scm +++ b/tekuti/post.scm @@ -168,7 +168,7 @@ ((assq-ref parsed k) => (lambda (v) (format #t "~a: ~a\n" k v))))) '(timestamp tags status title name comment_status - comments-closed-timestamp)))) + comments-closed-timestamp format)))) (content (with-output-to-blob (display (assq-ref parsed 'body)))) (key (assq-ref parsed 'key)) (message (format #f "~a: \"~a\"" @@ -230,6 +230,7 @@ (comments-closed-timestamp . ,comments-closed-timestamp) (timestamp . ,timestamp) (name . ,name) + (format . marxdown) (key . ,(string-downcase (uri-encode (string-append (date->string (timestamp->date timestamp) From f920fb678bec3a279059e2fef081278b76e06e6f Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 20 Oct 2022 22:57:13 +0200 Subject: [PATCH 08/19] Fix some marxdown bugs --- tekuti/marxdown.scm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tekuti/marxdown.scm b/tekuti/marxdown.scm index 39c3f8d..205fefe 100644 --- a/tekuti/marxdown.scm +++ b/tekuti/marxdown.scm @@ -605,14 +605,14 @@ (('inline-xml xml) (handle-inline-xml xml)) (('code . body) exp) (('emph . body) `(emph . ,(map transform-inline body))) - (('strong . body) `(string . ,(map transform-inline body))) + (('strong . body) `(strong . ,(map transform-inline body))) (('link dest . body) `(a (@ (href ,dest)) . ,(map transform-inline body))) ((? string? str) str))) (define (transform-block exp) (match exp (('block-xml xml) (handle-block-xml xml)) (('para . body) `(p . ,(map transform-inline body))) - (('blockquote . body) `(blockquote . (map transform-block body))) + (('blockquote . body) `(blockquote . ,(map transform-block body))) (('list-item . body) (error "fixme")) (('pre #f . body) `(pre . ,body)) (('pre info . body) `(pre (@ (class ,(string-append "pre-" info))) @@ -628,4 +628,4 @@ `(,tag . ,(map transform-inline body)))))) (match exp (('top exp ...) - `(top . ,(map transform-block exp))))) + `(div . ,(map transform-block exp))))) From eebd1b219a9fae9cdbb346c7292ddb69d57652d6 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 20 Oct 2022 23:03:16 +0200 Subject: [PATCH 09/19] Parse formats to symbol --- tekuti/comment.scm | 3 ++- tekuti/post.scm | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tekuti/comment.scm b/tekuti/comment.scm index 6c27fad..872ec64 100644 --- a/tekuti/comment.scm +++ b/tekuti/comment.scm @@ -41,7 +41,8 @@ compute-legit-comments compute-bogus-comments)) (define *comment-spec* - `((timestamp . ,string->number))) + `((timestamp . ,string->number) + (format . ,string->symbol))) (define (blob->comment encoded-name sha1) (let ((blob (git "show" sha1))) diff --git a/tekuti/post.scm b/tekuti/post.scm index 444f193..4ed3687 100644 --- a/tekuti/post.scm +++ b/tekuti/post.scm @@ -59,6 +59,7 @@ `((timestamp . ,string->number) (tags . ,(lambda (v) (string-split/trimming v #\,))) (title . ,identity) + (format . ,string->symbol) (comments-closed-timestamp . ,(lambda (str) (if (string-null? str) #f From 2d102677d1218ec76413252c227e0413d8273214 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 20 Oct 2022 23:10:46 +0200 Subject: [PATCH 10/19] Fix xml attr parsing --- tekuti/marxdown.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tekuti/marxdown.scm b/tekuti/marxdown.scm index 205fefe..6b43a17 100644 --- a/tekuti/marxdown.scm +++ b/tekuti/marxdown.scm @@ -92,7 +92,7 @@ ,@(match attributes (() '()) (((attr . value) ...) - `((@ ,(map list attr value))))) + `((@ . ,(map list attr value))))) . ,(reverse seed)) parent-seed)) (define (ftext string1 string2 seed) From 4a9766c6c1f72f571f1ab906a9af786b5eb201e3 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 20 Oct 2022 23:26:35 +0200 Subject: [PATCH 11/19] Fix emph lowering --- tekuti/marxdown.scm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tekuti/marxdown.scm b/tekuti/marxdown.scm index 6b43a17..2b6457e 100644 --- a/tekuti/marxdown.scm +++ b/tekuti/marxdown.scm @@ -603,9 +603,9 @@ (define (transform-inline exp) (match exp (('inline-xml xml) (handle-inline-xml xml)) - (('code . body) exp) - (('emph . body) `(emph . ,(map transform-inline body))) - (('strong . body) `(strong . ,(map transform-inline body))) + (('code . body) `(tt . ,body)) + (('emph . body) `(i . ,(map transform-inline body))) + (('strong . body) `(b . ,(map transform-inline body))) (('link dest . body) `(a (@ (href ,dest)) . ,(map transform-inline body))) ((? string? str) str))) (define (transform-block exp) From 5f1620e0e7d4c62ba8ee9b22dc0c9de75c931730 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Sat, 22 Oct 2022 09:55:07 +0200 Subject: [PATCH 12/19] Handle CR/LF in marxdown; fix trailing ! in paragraphs --- tekuti/marxdown.scm | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tekuti/marxdown.scm b/tekuti/marxdown.scm index 2b6457e..a94ba8d 100644 --- a/tekuti/marxdown.scm +++ b/tekuti/marxdown.scm @@ -213,6 +213,8 @@ (#\newline ;; Sure. Trailing whitespace can be any indent. (kt)) + (#\return + (lp n)) (ch (unless (eof-object? ch) (unget1 ch)) (kf))))))) @@ -347,12 +349,14 @@ (let lp ((chars '())) (match (next-not-eof "backticks") (#\` (continue `(code ,(reverse-list->string chars)))) + (#\return (lp chars)) (#\newline (consume-indent indent (lambda () (match (next-not-eof "code") - (#\newline (error "end of block while reading code")) + ((or #\return #\newline) + (error "end of block while reading code")) (ch (unget1 ch) (lp (cons #\space chars))))) (lambda () (error "end of block while reading code")))) (ch (lp (cons ch chars)))))) @@ -375,6 +379,7 @@ (lp elts (cons elt out)))))) (match (next) ((? eof-object?) (finish on-block-end)) + (#\return (lp elts)) (#\newline (consume-indent indent @@ -403,7 +408,9 @@ (match link (('link dest . alt) (continue `(image ,dest . ,alt))))))) - (ch (lp (cons* ch #\! elts))))) + (ch + (unget1 ch) + (lp (cons #\! elts))))) (ch (lp (cons ch elts)))))) (define (read-para indent kup knext) @@ -413,6 +420,7 @@ (knext (cons para nodelist)))))) (define (done? ch) (match ch + (#\return (done? (next))) (#\newline (let ((ch (next))) (match ch @@ -446,11 +454,9 @@ (append marker-outer (list (+ marker-inner marker-size outer)) inner))) - (pk 'hey marker marker-indent marker-size indent body-indent ch) (read-indented-block ch marker-indent body-indent (lambda (elt ch indent) - (pk ch marker-indent indent elt) (read-indented-block ch marker-indent indent kup (lambda (nodelist) (knext (cons elt nodelist))))) @@ -480,6 +486,7 @@ (kblock (lambda (indent kup knext) (read-heading level (make-continue indent kup knext))))) + (#\return (lp level)) (#\newline (kblock (lambda (indent kup knext) @@ -552,6 +559,8 @@ (match (parse-one-xml-element port) (#f (read-block-list indent kup knext)) (elt ((make-continue indent kup knext) `(block-xml ,elt)))))))) + (#\return + (read-block-type (next-not-eof "newline") in-text? kblock ktext)) (#\newline ;; fixme: record loose li (kblock read-block-list)) From 7652cb17cf75dbed888927e7f501e46ce03ff9ec Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Sun, 23 Oct 2022 09:45:52 +0200 Subject: [PATCH 13/19] Implement heading parsing --- tekuti/marxdown.scm | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tekuti/marxdown.scm b/tekuti/marxdown.scm index a94ba8d..fea595c 100644 --- a/tekuti/marxdown.scm +++ b/tekuti/marxdown.scm @@ -420,7 +420,6 @@ (knext (cons para nodelist)))))) (define (done? ch) (match ch - (#\return (done? (next))) (#\newline (let ((ch (next))) (match ch @@ -440,8 +439,14 @@ (unget chars) (read-para indent kup knext)) - (define (read-heading level knext) - (error "unimplemented")) + (define (read-heading level indent continue) + (let ((continue (lambda (heading) + (match heading + (('heading . body) + (continue `(heading ,level . ,body))))))) + (read-text 'heading indent (lambda (ch) + (and (eqv? ch #\newline) continue)) + continue))) (define (read-li marker marker-indent marker-size kup knext) (read-indent @@ -485,7 +490,7 @@ (#\space (kblock (lambda (indent kup knext) - (read-heading level (make-continue indent kup knext))))) + (read-heading level indent (make-continue indent kup knext))))) (#\return (lp level)) (#\newline (kblock From 105873854c0d02899e94adbffde985b2de908bdd Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Sun, 23 Oct 2022 17:30:36 +0200 Subject: [PATCH 14/19] Fix list parsing --- tekuti/marxdown.scm | 70 +++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/tekuti/marxdown.scm b/tekuti/marxdown.scm index fea595c..3ac390b 100644 --- a/tekuti/marxdown.scm +++ b/tekuti/marxdown.scm @@ -449,6 +449,11 @@ continue))) (define (read-li marker marker-indent marker-size kup knext) + (define list-tag + (match marker + ((? number?) 'enumerate) + ((? char?) 'itemize))) + (define (list-tag? tag) (eq? tag list-tag)) (read-indent (lambda (ch indent) (match indent @@ -459,21 +464,19 @@ (append marker-outer (list (+ marker-inner marker-size outer)) inner))) - (read-indented-block - ch marker-indent body-indent - (lambda (elt ch indent) - (read-indented-block ch marker-indent indent kup - (lambda (nodelist) - (knext (cons elt nodelist))))) - (lambda (nodelist) - (cons* 'li marker nodelist))))))))))) - - (define (read-block ch indent kup knext) - (define (have-block read-block) - (read-block indent kup knext)) - (define (have-text chars) - (read-para* chars indent kup knext)) - (read-block-type ch #f have-block have-text)) + (read-block + ch body-indent + (lambda (blocks ch next-indent) + (read-indented-block + ch marker-indent next-indent kup + (lambda (nodelist) + (knext + (match nodelist + ((((? list-tag?) . items) . nodelist) + `((,list-tag (item . ,blocks) . ,items) . ,nodelist)) + (_ + `((,list-tag (item . ,blocks)) . ,nodelist))))))) + identity))))))))) (define (read-block-type ch in-text? kblock ktext) (define (make-continue indent kup knext) @@ -576,16 +579,31 @@ (_ (ktext (list ch))))) + (define (read-block ch indent kup knext) + (define (have-block read-block) + (read-block indent kup knext)) + (define (have-text chars) + (read-para* chars indent kup knext)) + (read-block-type ch #f have-block have-text)) + + (define (finish-block-list blocks) + (match blocks + ((block) block) + (_ `(begin . ,blocks)))) + (define (finish-block-quote blocks) + (match blocks + ((block) block) + (_ `(blockquote . ,blocks)))) + (define (read-indented-block ch outer-indent indent kup knext) - (define (recurse kind recurse-indent) + (define (recurse finish recurse-indent) (read-indented-block ch recurse-indent indent (lambda (nested ch indent) (read-indented-block ch outer-indent indent kup (lambda (nodelist) (knext (cons nested nodelist))))) - (lambda (nodelist) - (cons kind nodelist)))) + finish)) (match (compare-indents outer-indent indent) ('same (read-block ch indent kup knext)) @@ -594,9 +612,10 @@ 'different-blockquote) (kup (knext '()) ch indent)) ('inner - (recurse 'begin indent)) + (recurse finish-block-list indent)) ('inner-blockquote - (recurse 'blockquote (list-head indent (1+ (length outer-indent))))))) + (recurse finish-block-quote + (list-head indent (1+ (length outer-indent))))))) (define (parse-error reason) (error reason)) @@ -608,7 +627,7 @@ (read-block-list empty-indent (lambda (nodelist ch indent) nodelist) - (lambda (nodelist) (cons 'top nodelist)))) + finish-block-list)) (define* (smarxdown->shtml exp #:key (heading-offset 0) @@ -627,7 +646,14 @@ (('block-xml xml) (handle-block-xml xml)) (('para . body) `(p . ,(map transform-inline body))) (('blockquote . body) `(blockquote . ,(map transform-block body))) - (('list-item . body) (error "fixme")) + (('itemize ('item . item) ...) + `(ul . ,(map (lambda (blocks) + `(li . ,(map transform-block blocks))) + item))) + (('enumerate ('item . item) ...) + `(ol . ,(map (lambda (blocks) + `(li . ,(map transform-block blocks))) + item))) (('pre #f . body) `(pre . ,body)) (('pre info . body) `(pre (@ (class ,(string-append "pre-" info))) . ,body)) From 304f8839ec1a67df242dd9dd913341d98bb44982 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Sun, 23 Oct 2022 21:45:43 +0200 Subject: [PATCH 15/19] Hopefully fix paragraph parsing --- tekuti/marxdown.scm | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/tekuti/marxdown.scm b/tekuti/marxdown.scm index 3ac390b..b88bc44 100644 --- a/tekuti/marxdown.scm +++ b/tekuti/marxdown.scm @@ -212,6 +212,7 @@ (else (kt))))) (#\newline ;; Sure. Trailing whitespace can be any indent. + (unget1 #\newline) (kt)) (#\return (lp n)) @@ -377,6 +378,16 @@ (lp elts (cons (list->string chars) out)))))) ((elt . elts) (lp elts (cons elt out)))))) + (define (consume-blank-lines-then-finish kdone) + (let lp () + (match (next) + ((? eof-object?) (finish kdone)) + (#\return (lp)) + (#\newline + (consume-indent indent lp (lambda () (finish kdone)))) + (ch + (unget1 ch) + (finish kdone))))) (match (next) ((? eof-object?) (finish on-block-end)) (#\return (lp elts)) @@ -385,7 +396,7 @@ indent (lambda () (cond - ((done? #\newline) => finish) + ((done? #\newline) => consume-blank-lines-then-finish) (else (lp (cons #\newline elts))))) (lambda () (finish on-block-end)))) @@ -421,7 +432,7 @@ (define (done? ch) (match ch (#\newline - (let ((ch (next))) + (let lp ((ch (next))) (match ch ((? eof-object?) (lambda (para) @@ -429,9 +440,13 @@ (ch (read-block-type ch #t make-continuation - (lambda (chars) - (unget chars) - #f)))))) + (if (eqv? ch #\newline) + (lambda (chars) + (unget chars) + (make-continuation read-para)) + (lambda (chars) + (unget chars) + #f))))))) (_ #f))) (read-text 'para indent done? (make-continuation read-block-list))) From ba9a560f01545f2f7d72d0b5b8f6dccbaec14b3d Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Sun, 23 Oct 2022 21:48:57 +0200 Subject: [PATCH 16/19] Fix html translation --- tekuti/marxdown.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tekuti/marxdown.scm b/tekuti/marxdown.scm index b88bc44..c268e6b 100644 --- a/tekuti/marxdown.scm +++ b/tekuti/marxdown.scm @@ -682,5 +682,5 @@ (else 'h6)))) `(,tag . ,(map transform-inline body)))))) (match exp - (('top exp ...) + (('begin exp ...) `(div . ,(map transform-block exp))))) From 17905ace4456d88ea355b4f5f9bc79a0bafb3926 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Tue, 29 Nov 2022 10:16:30 +0100 Subject: [PATCH 17/19] Fix online classifier update --- tekuti/classifier.scm | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tekuti/classifier.scm b/tekuti/classifier.scm index a8a57de..c248234 100644 --- a/tekuti/classifier.scm +++ b/tekuti/classifier.scm @@ -283,13 +283,19 @@ (assq-ref index 'master)))) (let ((removed-features (count-features removed)) (added-features (count-features added))) + ;; If a comment passes the spam filter, it gets added, and is + ;; presumed legitimate. It could then be reverted, in which + ;; case we should remove its features from the legitimate count + ;; and add them to the bogus count. (hash-for-each (lambda (k v) (remove-feature! legit-features k v) (add-feature! bogus-features k v)) removed-features) (hash-for-each (lambda (k v) - (add-feature! legit-features k v) - (remove-feature! bogus-features k v)) + ;; Asymmetry with removed-features; the + ;; comment wasn't previously marked bogus, so + ;; we just add its features to the legit set. + (add-feature! legit-features k v)) added-features) (update-bogosities! bogosities removed-features legit-features bogus-features) From f290d7c2c74868c2218a3b3cb8844fdba56d4a35 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Tue, 29 Nov 2022 10:19:11 +0100 Subject: [PATCH 18/19] Allow tt in comments, class in pre --- tekuti/filters.scm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tekuti/filters.scm b/tekuti/filters.scm index 2361010..bccf992 100644 --- a/tekuti/filters.scm +++ b/tekuti/filters.scm @@ -94,6 +94,9 @@ (define (marxdown->sxml text) (smarxdown->shtml (call-with-input-string text marxdown->smarxdown))) +(define (pre-class? class) + (string-prefix? "pre-" class)) + (define *allowed-tags* `((a (href . ,urlish?) title) (abbr title) @@ -105,9 +108,10 @@ (em) (i) (p) - (pre) + (pre (class . ,pre-class?)) (strike) - (strong))) + (strong) + (tt))) (define (compile-sxslt-rules tags) (define (ok . body) From 21793fb83ef51a522acadde51c98299f7e84b293 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Sat, 10 Dec 2022 21:41:13 +0100 Subject: [PATCH 19/19] Allow \r in pre terminators --- tekuti/marxdown.scm | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tekuti/marxdown.scm b/tekuti/marxdown.scm index c268e6b..b6cd133 100644 --- a/tekuti/marxdown.scm +++ b/tekuti/marxdown.scm @@ -132,13 +132,28 @@ (unget chars)))) (define (next) (get-char port)) + (define (peek) (peek-char port)) + (define (next-not-eof ctx) (let ((ch (next))) (if (eof-object? ch) (error "EOF while reading" ctx) ch))) - (define (next-line-and-delim) (read-line port 'split)) - (define (peek) (peek-char port)) + (define (next-line-and-delim) + (let lp ((chars '())) + (define (finish delim) + (cons (reverse-list->string chars) delim)) + (let ((ch (next))) + (cond + ((eof-object? ch) (finish ch)) + ((eqv? ch #\return) + (if (eqv? (peek) #\newline) + (finish (next)) + (lp (cons ch chars)))) + ((eqv? ch #\newline) + (finish ch)) + (else + (lp (cons ch chars))))))) (define (skip-whitespace k) (let lp ((indent 0))