From e8eb7aa1763e086c0cbc8453c81332ce5c288939 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Sun, 19 Jan 2014 18:56:52 +0100 Subject: [PATCH 01/33] Work around uri-encode changing capitalization * tekuti/page.scm (make-post-key): Downcase, to cope with uri-encode from Guile changing from lower-case hexidecimals to upper-case. Otherwise older post keys wouldn't work... --- tekuti/page.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tekuti/page.scm b/tekuti/page.scm index 58ad45e..e351f47 100644 --- a/tekuti/page.scm +++ b/tekuti/page.scm @@ -61,7 +61,7 @@ ;; Encoded twice, so as to form a single path-component. (define (make-post-key . parts) - (uri-encode (encode-and-join-uri-path parts))) + (string-downcase (uri-encode (encode-and-join-uri-path parts)))) (define (not-implemented request . args) (respond `((h1 "Not yet implemented") From 9d435217937963799398a036718a1d6091ed68d8 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Sun, 19 Jan 2014 20:53:28 +0100 Subject: [PATCH 02/33] Downcase when creating posts. * tekuti/post.scm (parse-post-data): Downcase when creating posts also. --- tekuti/post.scm | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tekuti/post.scm b/tekuti/post.scm index 6ba9bcf..64b8d91 100644 --- a/tekuti/post.scm +++ b/tekuti/post.scm @@ -1,5 +1,5 @@ ;; Tekuti -;; Copyright (C) 2008, 2010, 2011, 2012 Andy Wingo +;; Copyright (C) 2008, 2010, 2011, 2012, 2014 Andy Wingo ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -196,10 +196,11 @@ (comment_status . ,(if comments-open? "open" "closed")) (timestamp . ,timestamp) (name . ,name) - (key . ,(uri-encode - (string-append (date->string (timestamp->date timestamp) - "~Y/~m/~d/") - (uri-encode name)))))))) + (key . ,(string-downcase + (uri-encode + (string-append (date->string (timestamp->date timestamp) + "~Y/~m/~d/") + (uri-encode name))))))))) (define (make-new-post post-data) (munge-post #f (parse-post-data post-data))) From 89da28b7527c74cbaeb61a7a8a1684a3b1441240 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 9 Oct 2014 20:38:18 +0200 Subject: [PATCH 03/33] Better mobile experience * tekuti/template.scm (templatize): Render using device width as viewport, for better mobile experience. --- tekuti/template.scm | 1 + 1 file changed, 1 insertion(+) diff --git a/tekuti/template.scm b/tekuti/template.scm index 79293ef..d9101b1 100644 --- a/tekuti/template.scm +++ b/tekuti/template.scm @@ -54,6 +54,7 @@ (head (title ,title) (meta (@ (name "Generator") (content "An unholy concoction of parenthetical guile"))) + (meta (@ (name "viewport") (content "width=device-width"))) (link (@ (rel "stylesheet") (type "text/css") (media "screen") From c087e7b6fa79a8d0a92beb9d3d74665d5ac919d2 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 15 Oct 2014 09:35:36 +0200 Subject: [PATCH 04/33] Scheme-relative URL * tekuti/template.scm (templatize): Use scheme-relative URL for tekuti. --- tekuti/template.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tekuti/template.scm b/tekuti/template.scm index d9101b1..771f400 100644 --- a/tekuti/template.scm +++ b/tekuti/template.scm @@ -71,5 +71,5 @@ (div (@ (id "content")) ,@body) (div (@ (id "footer")) "powered by " - (a (@ (href "http://wingolog.org/software/tekuti/")) + (a (@ (href "//wingolog.org/software/tekuti/")) "tekuti")))))) From c631aaf35c9a9158ba5032598be8216891f1be76 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 15 Oct 2014 09:46:53 +0200 Subject: [PATCH 05/33] Allow scheme-relative URIs in urlish? * tekuti/util.scm (urlish?): Allow scheme-relative URIs. --- tekuti/util.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tekuti/util.scm b/tekuti/util.scm index e77d255..3ee0637 100644 --- a/tekuti/util.scm +++ b/tekuti/util.scm @@ -1,5 +1,5 @@ ;; Tekuti -;; Copyright (C) 2008, 2010, 2011, 2012 Andy Wingo +;; Copyright (C) 2008, 2010, 2011, 2012, 2014 Andy Wingo ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -62,7 +62,7 @@ (define (urlish? x) (let ((uri (string->uri x))) (and uri - (memq (uri-scheme uri) '(http https)) + (memq (uri-scheme uri) '(#f http https)) (uri-host uri) #t))) From ab54aca59fcf18cfbb08eefe4d02a1ba3fa64e7e Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 15 Oct 2014 12:08:51 +0200 Subject: [PATCH 06/33] Support for relative URIs for https + http access. * tekuti/page-helpers.scm (ensure-uri): Rename from ensure-public-uri. A URI is absolute by definition. (ensure-uri-reference): New helper. Requires git guile. (respond): Use ensure-URI-reference to get the reference. (atom-header, atom-entry): Use ensure-uri. * tekuti/page.scm (atom-feed-from-posts): Pass the URI scheme from the request-uri when reconstructing * tekuti/config.scm (*public-scheme*): New config variable, defaulting to http. --- tekuti/config.scm | 5 +++-- tekuti/page-helpers.scm | 22 ++++++++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/tekuti/config.scm b/tekuti/config.scm index 65fd182..27b6fa6 100644 --- a/tekuti/config.scm +++ b/tekuti/config.scm @@ -1,5 +1,5 @@ ;; Tekuti -;; Copyright (C) 2008, 2010, 2012 Andy Wingo +;; Copyright (C) 2008, 2010, 2012, 2014 Andy Wingo ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -27,13 +27,14 @@ (define-module (tekuti config) #:use-module (tekuti util) #:use-module ((sxml ssax) #:select (define-parsed-entity!)) - #:export (*public-host* *public-port* *public-path-base* + #:export (*public-scheme* *public-host* *public-port* *public-path-base* *private-host* *private-port* *private-path-base* *git-dir* *git* *debug* *admin-user* *admin-pass* *css-file* *navbar-links* *navbar-infix* *title* *subtitle* *name* *server-impl* *server-impl-args*)) +(define *public-scheme* 'http) (define *public-host* "127.0.0.1") (define *public-port* 8080) (define *public-path-base* '()) diff --git a/tekuti/page-helpers.scm b/tekuti/page-helpers.scm index ab0a5e7..47f8cc4 100644 --- a/tekuti/page-helpers.scm +++ b/tekuti/page-helpers.scm @@ -65,13 +65,23 @@ ((_ tail) tail))) -(define (ensure-public-uri x) +(define (ensure-uri x) (cond ((uri? x) x) ((string? x) - (build-uri 'http #:host *public-host* #:port *public-port* #:path x)) + (build-uri *public-scheme* #:host *public-host* #:port *public-port* + #:path x)) ((list? x) - (ensure-public-uri (relurl x))) + (ensure-uri (relurl x))) + (else (error "can't turn into a uri" x)))) + +(define (ensure-uri-reference x) + (cond + ((uri? x) x) + ((string? x) + (build-uri-reference #:path x)) + ((list? x) + (ensure-uri-reference (relurl x))) (else (error "can't turn into a uri" x)))) (define* (respond #:optional body #:key @@ -88,7 +98,7 @@ (values (build-response #:code status #:headers (build-headers - location (and=> redirect ensure-public-uri) + location (and=> redirect ensure-uri-reference) last-modified last-modified content-type (cons content-type content-type-params) date (current-date) @@ -377,7 +387,7 @@ (define (atom-header last-modified) (define (relurl . tail) - (uri->string (ensure-public-uri tail))) + (uri->string (ensure-uri tail))) `(feed (@ (xmlns "http://www.w3.org/2005/Atom") (xml:base ,(relurl))) (title (@ (type "text")) ,*title*) @@ -396,7 +406,7 @@ (define (atom-entry post) (define (relurl . tail) - (uri->string (ensure-public-uri tail))) + (uri->string (ensure-uri tail))) `(entry (author (name ,*name*) (uri ,(relurl))) (title (@ (type "text")) ,(post-title post)) From 6d491994f7fb60bac59ee006e08e4e34f0714d47 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 22 Oct 2014 15:29:28 +0200 Subject: [PATCH 07/33] ensure-uri-reference works with older Guiles * tekuti/page-helpers.scm (ensure-uri-reference): Fix to work with Guile without build-uri-reference. --- tekuti/page-helpers.scm | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tekuti/page-helpers.scm b/tekuti/page-helpers.scm index 47f8cc4..5c68013 100644 --- a/tekuti/page-helpers.scm +++ b/tekuti/page-helpers.scm @@ -1,5 +1,5 @@ ;; Tekuti -;; Copyright (C) 2008, 2010, 2012 Andy Wingo +;; Copyright (C) 2008, 2010, 2012, 2014 Andy Wingo ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -79,7 +79,10 @@ (cond ((uri? x) x) ((string? x) - (build-uri-reference #:path x)) + (if (defined? 'build-uri-reference) + (build-uri-reference #:path x) + ;; Absolute URIs on older Guile. + (ensure-uri x))) ((list? x) (ensure-uri-reference (relurl x))) (else (error "can't turn into a uri" x)))) From 68e7b82b21ebe0c6deca08fb65f98198e86fd07b Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 22 Oct 2014 15:46:35 +0200 Subject: [PATCH 08/33] Add example config * example-config: New file --- example-config | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 example-config diff --git a/example-config b/example-config new file mode 100644 index 0000000..8db9eea --- /dev/null +++ b/example-config @@ -0,0 +1,17 @@ +;; -*- scheme -*- +(set! *navbar-links* + '(("about" . "/about/") + ("cat pictures" . "/cat-pictures/"))) +(set! *navbar-infix* " | ") +(set! *public-host* "example.com") +(set! *public-port* 80) +(set! *public-path-base* '()) +(set! *private-host* "127.0.0.1") +(set! *private-port* 8080) +(set! *private-path-base* '()) + +(define *admin-pass* "password") +(define *title* "kittens") +(define *subtitle* "all about kittens") +(define *name* "Jane Doe") + From e45cd521f92f0f96da7e67aef6f885ed3251e901 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 22 Oct 2014 15:50:31 +0200 Subject: [PATCH 09/33] Add example nginx config. * nginx-config: New file. --- nginx-config | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 nginx-config diff --git a/nginx-config b/nginx-config new file mode 100644 index 0000000..311bb2e --- /dev/null +++ b/nginx-config @@ -0,0 +1,24 @@ +server { + listen 80 default_server; + listen [::]:80 default_server ipv6only=on; + + root /var/www/example.com; + index index.html; + + # Make site accessible from http://example.com/ + server_name example.com; + + location = / { proxy_pass http://127.0.0.1:8080/; } + location /archives { proxy_pass http://127.0.0.1:8080/archives; } + location /feed { proxy_pass http://127.0.0.1:8080/feed; } + location /search { proxy_pass http://127.0.0.1:8080/search; } + location /tags { proxy_pass http://127.0.0.1:8080/tags; } + location /debug { proxy_pass http://127.0.0.1:8080/debug; } + location /admin { proxy_pass http://127.0.0.1:8080/admin; } + + location / { + # First attempt to serve request as file, then + # as directory, then fall back to displaying a 404. + try_files $uri $uri/ =404; + } +} From 7ecade64bd4f7272d1bc8bf3c6058cae1a02b2b3 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 22 Oct 2014 16:14:59 +0200 Subject: [PATCH 10/33] Fix expanduser on absolute paths. * tekuti/util.scm (expanduser): Fix when given absolute paths. --- tekuti/util.scm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tekuti/util.scm b/tekuti/util.scm index 3ee0637..2a6d6fb 100644 --- a/tekuti/util.scm +++ b/tekuti/util.scm @@ -103,7 +103,8 @@ (define (expanduser path) (let ((parts (string-split path #\/))) - (if (eqv? (string-ref (car parts) 0) #\~) + (if (and (not (string-null? (car parts))) + (eqv? (string-ref (car parts) 0) #\~)) (let ((user (if (= (string-length (car parts)) 1) (passwd:name (getpwuid (geteuid))) (substring (car parts) 1)))) From 20e765829504cbbbb5e3652dc1d5ee559a553ad0 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 17 Dec 2014 11:05:07 +0100 Subject: [PATCH 11/33] Add ability to share draft posts As a side effect of this change, any pending posts in your repository that are in "draft" status will effectively move into the new "draft" state, which means they can be reached by their URL. That's not a big deal though because those URLs are not trivial to guess. * tekuti/page-helpers.scm (post-editing-form): Add radio button to choose post status. * tekuti/post.scm (post-public?, post-draft?, post-private?): New accessors for post status. (post-from-key): Add #:allow-draft? keyword argument. * tekuti/page.scm (page-show-post): Allow direct access to draft posts. --- tekuti/page-helpers.scm | 27 +++++++++++++++++++-------- tekuti/page.scm | 3 ++- tekuti/post.scm | 17 +++++++++++++---- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/tekuti/page-helpers.scm b/tekuti/page-helpers.scm index 5c68013..b4606e2 100644 --- a/tekuti/page-helpers.scm +++ b/tekuti/page-helpers.scm @@ -177,7 +177,7 @@ "")))) (label (@ (for "tags")) " <- tags, comma-separated")) (p (input (@ (name "date") (type "text") - (value ,(if (and=> post post-published?) + (value ,(if (and=> post post-public?) (timestamp->rfc822-date (post-timestamp post)) "")))) (label (@ (for "date")) " <- date (empty == now)")) @@ -187,16 +187,27 @@ (label (@ (for "comments")) " comments open?")) (div (textarea (@ (name "body") (rows "20") (cols "60")) ,(if post (post-raw-content post) ""))) - (input (@ (type "submit") (name "status") - (value "publish"))) - " " - (input (@ (type "submit") (name "status") - (value "draft")))) + (p (label (input (@ (type "radio") (name "status") (value "private") + ,@(if (or (not post) (post-private? post)) + '((checked "checked")) + '()))) + "Private (Only visible to admin)") (br) + (label (input (@ (type "radio") (name "status") (value "draft") + ,@(if (or (not post) (post-draft? post)) + '((checked "checked")) + '()))) + "Draft (Only accessible via direct link)") (br) + (label (input (@ (type "radio") (name "status") (value "publish") + ,@(if (or (not post) (post-public? post)) + '((checked "checked")) + '()))) + "Public") (br)) + (p (input (@ (type "submit") + (value ,(if post "Modify post" "New post")))))) ,@(if post `((form (@ (method "POST") (action ,(relurl `("admin" "delete-post" ,(post-key post))))) - " " - (input (@ (type "submit") (name "delete") (value "delete")))) + (p (input (@ (type "submit") (name "delete") (value "delete"))))) ,@(let ((l (comments-sxml-content-edit post))) (if (null? l) l `((h2 "comments") diff --git a/tekuti/page.scm b/tekuti/page.scm index e351f47..fe8294f 100644 --- a/tekuti/page.scm +++ b/tekuti/page.scm @@ -193,7 +193,8 @@ (define (page-show-post request body index year month day post) (cond - ((post-from-key index (make-post-key year month day post)) + ((post-from-key index (make-post-key year month day post) + #:allow-draft? #t) => (lambda (post) (respond `(,(post-sidebar post index) ,(show-post post #t)) diff --git a/tekuti/post.scm b/tekuti/post.scm index 64b8d91..62d8fcb 100644 --- a/tekuti/post.scm +++ b/tekuti/post.scm @@ -37,7 +37,8 @@ #:use-module (srfi srfi-19) #:export (post-from-key - post-tags post-timestamp post-key post-published? + post-tags post-timestamp post-key + post-public? post-draft? post-private? post-comments-open? post-comments post-sxml-content post-readable-date post-n-comments post-raw-content @@ -79,9 +80,11 @@ ;;; pulling posts out of the index ;;; -(define* (post-from-key index key #:key allow-unpublished?) +(define* (post-from-key index key #:key allow-unpublished? allow-draft?) (let ((post (hash-ref (assq-ref index 'posts) key))) - (if (and post (or (post-published? post) allow-unpublished?)) + (if (and post (or (post-public? post) + (and (post-draft? post) allow-draft?) + allow-unpublished?)) post #f))) @@ -89,9 +92,15 @@ ;;; accessors ;;; -(define (post-published? post-alist) +(define (post-public? post-alist) (equal? (assq-ref post-alist 'status) "publish")) +(define (post-draft? post-alist) + (equal? (assq-ref post-alist 'status) "draft")) + +(define (post-private? post-alist) + (equal? (assq-ref post-alist 'status) "private")) + (define (post-timestamp post-alist) (assq-ref post-alist 'timestamp)) From acc13819e18fdf02e1805fae9840db81b982381c Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 17 Dec 2014 11:14:54 +0100 Subject: [PATCH 12/33] Minor aesthetic fixups * tekuti/page-helpers.scm (post-editing-form): Aesthetic fixups. --- tekuti/page-helpers.scm | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tekuti/page-helpers.scm b/tekuti/page-helpers.scm index b4606e2..8b1e0ea 100644 --- a/tekuti/page-helpers.scm +++ b/tekuti/page-helpers.scm @@ -191,23 +191,30 @@ ,@(if (or (not post) (post-private? post)) '((checked "checked")) '()))) - "Private (Only visible to admin)") (br) + "private (only visible to admin)") (br) (label (input (@ (type "radio") (name "status") (value "draft") ,@(if (or (not post) (post-draft? post)) '((checked "checked")) '()))) - "Draft (Only accessible via direct link)") (br) + "draft (only accessible via " + ,(if post + `(a (@ (href ,(post-url post))) + ,(post-url post)) + "direct link") + ")") + + (br) (label (input (@ (type "radio") (name "status") (value "publish") ,@(if (or (not post) (post-public? post)) '((checked "checked")) '()))) - "Public") (br)) + "public")) (p (input (@ (type "submit") - (value ,(if post "Modify post" "New post")))))) + (value ,(if post "modify post" "new post")))))) ,@(if post `((form (@ (method "POST") (action ,(relurl `("admin" "delete-post" ,(post-key post))))) - (p (input (@ (type "submit") (name "delete") (value "delete"))))) + (p (input (@ (type "submit") (value "delete post"))))) ,@(let ((l (comments-sxml-content-edit post))) (if (null? l) l `((h2 "comments") From cf10aa7056d6f53f84e74f9a687b1b2291c61cc4 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 17 Dec 2014 11:19:49 +0100 Subject: [PATCH 13/33] New posts start in private status * tekuti/page-helpers.scm (post-editing-form): Fix status of new posts. --- tekuti/page-helpers.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tekuti/page-helpers.scm b/tekuti/page-helpers.scm index 8b1e0ea..329d420 100644 --- a/tekuti/page-helpers.scm +++ b/tekuti/page-helpers.scm @@ -193,7 +193,7 @@ '()))) "private (only visible to admin)") (br) (label (input (@ (type "radio") (name "status") (value "draft") - ,@(if (or (not post) (post-draft? post)) + ,@(if (and post (post-draft? post)) '((checked "checked")) '()))) "draft (only accessible via " @@ -205,7 +205,7 @@ (br) (label (input (@ (type "radio") (name "status") (value "publish") - ,@(if (or (not post) (post-public? post)) + ,@(if (and post (post-public? post)) '((checked "checked")) '()))) "public")) From 7643585d124e8568a755eb527efdc0c5fef669f2 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Sun, 28 Aug 2016 10:40:36 +0200 Subject: [PATCH 14/33] Fix 304 edge case in atom feed * tekuti/page.scm (atom-feed-from-posts): Don't bomb if the client sends a if-modified-since but we have no posts. --- tekuti/page.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tekuti/page.scm b/tekuti/page.scm index fe8294f..9d73637 100644 --- a/tekuti/page.scm +++ b/tekuti/page.scm @@ -322,7 +322,7 @@ (post-timestamp (car posts))))) (cond ((let ((since (request-if-modified-since request))) - (and since (>= (date->timestamp since) last-modified))) + (and since last-modified (>= (date->timestamp since) last-modified))) (respond #f #:status 304 #:last-modified (timestamp->date last-modified) #:etag (assq-ref index 'master))) From e700e4af7a0a17b070fa86b2582f0d5ae18eff68 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Sun, 28 Aug 2016 10:58:34 +0200 Subject: [PATCH 15/33] Fix spurious 304 error * tekuti/cache.scm (make-entry): Only return a cached not-modified response if the request was conditional. --- tekuti/cache.scm | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tekuti/cache.scm b/tekuti/cache.scm index f7ef6a6..7e7d649 100644 --- a/tekuti/cache.scm +++ b/tekuti/cache.scm @@ -72,17 +72,21 @@ (lambda (request) (and (equal? (request-uri request) uri) (eq? (request-method request) method) - (let ((last-modified (response-last-modified response)) + (let ((request-etags (request-if-none-match request)) (since (request-if-modified-since request))) - (if (and last-modified since) - (time<=? (date->time-utc last-modified) - (date->time-utc since)) - #t)) - (let ((etag (response-etag response)) - (match (request-if-none-match request))) - (if (and etag match) - (and (list? match) (member etag match)) - #t)) + (and + ;; Only return a 304 if the request is conditional. + (or request-etags since) + ;; If the request specifies an etag set, honor it. + (or (not request-etags) + (and (list? request-etags) + (member (response-etag response) request-etags))) + ;; Likewise for if-modified-since. + (or (not since) + (let ((last-modified (response-last-modified response))) + (and last-modified + (time<=? (date->time-utc last-modified) + (date->time-utc since))))))) (cons response body)))) ((200) (lambda (request) From 01ca951b7615b0d282fd1504ab12b3775408de72 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 2 Mar 2017 15:28:55 +0000 Subject: [PATCH 16/33] Improve security question. Fix reindexing. --- tekuti/comment.scm | 11 +++++++---- tekuti/git.scm | 2 +- tekuti/index.scm | 24 ++++++++++++++---------- tekuti/page-helpers.scm | 2 +- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/tekuti/comment.scm b/tekuti/comment.scm index dda9388..2993b35 100644 --- a/tekuti/comment.scm +++ b/tekuti/comment.scm @@ -30,6 +30,7 @@ #:use-module (tekuti util) #:use-module (tekuti filters) #:use-module (tekuti post) + #:use-module (ice-9 match) #:use-module (srfi srfi-1) #:use-module (srfi srfi-19) #:use-module (sxml transform) @@ -95,10 +96,12 @@ `(p "Bad URL. (Only http and https are allowed.)"))) (define (bad-number? x) - (if (string->number x) - #f - '(p "Bad number. Give me something that Scheme's " - (tt "string->number") " will like."))) + (match (string->number x) + (#f '(p "Bad number. Give me something that Scheme's " + (tt "string->number") " will like.")) + (n (if (and (real? n) (<= 34 n 42)) + #f + '(p "Number not between 34 and 42."))))) (define *new-comment-spec* `(("author" ,(lambda (x) #f)) diff --git a/tekuti/git.scm b/tekuti/git.scm index 3c9df31..9fc1f63 100644 --- a/tekuti/git.scm +++ b/tekuti/git.scm @@ -171,7 +171,7 @@ (let ((s (string-trim-both (call-with-input-file rev read-line)))) (and (= (string-length s) 40) s))) - (git "rev-parse" rev))) + (string-trim-both (git "rev-parse" rev)))) (define (git-rev-list rev n) (let lp ((lines (string-split diff --git a/tekuti/index.scm b/tekuti/index.scm index 4d7d4fe..df02f89 100644 --- a/tekuti/index.scm +++ b/tekuti/index.scm @@ -25,6 +25,7 @@ ;;; Code: (define-module (tekuti index) + #:use-module (ice-9 match) #:use-module ((srfi srfi-1) #:select (fold)) #:use-module (system repl error-handling) #:use-module (tekuti util) @@ -80,16 +81,19 @@ new)) (define (read-index) - (let* ((ref (false-if-git-error (git-rev-parse "refs/heads/index"))) - (dents (if ref (git-ls-tree ref #f) '()))) - (acons 'index ref - (and (and-map (lambda (spec) - (assoc (symbol->string (car spec)) dents)) - index-specs) - (map (lambda (dent) - (cons (string->symbol (car dent)) - (blob->index (car dent) (cadr dent)))) - dents))))) + (match (false-if-git-error (git-rev-parse "refs/heads/index")) + (#f (maybe-reindex '())) + (ref + (let ((dents (git-ls-tree ref #f))) + (if (and-map (lambda (spec) + (assoc (symbol->string (car spec)) dents)) + index-specs) + (acons 'index ref + (map (lambda (dent) + (cons (string->symbol (car dent)) + (blob->index (car dent) (cadr dent)))) + dents)) + (maybe-reindex (acons 'index ref '()))))))) (define (maybe-reindex old-index) (let ((master (git-rev-parse "refs/heads/master"))) diff --git a/tekuti/page-helpers.scm b/tekuti/page-helpers.scm index 329d420..fce30e4 100644 --- a/tekuti/page-helpers.scm +++ b/tekuti/page-helpers.scm @@ -257,7 +257,7 @@ " " (label (@ (for "url")) (small "Website"))) (p (input (@ (type "text") (name "x") (value "") (size "22") (tabindex "3"))) - " " (label (@ (for "x")) (small "What's your favorite number?"))) + " " (label (@ (for "x")) (small "What's a number between 34 and 42?"))) ;(p (small "allowed tags: ")) (p (textarea (@ (name "comment") (id "comment") (cols "65") (rows "10") (tabindex "4")) From 42b77efe6af4c7fa7186764e1ee3ca99da41914f Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Fri, 3 Mar 2017 17:13:36 +0100 Subject: [PATCH 17/33] Neutralize links in comments. --- tekuti/comment.scm | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tekuti/comment.scm b/tekuti/comment.scm index 2993b35..894bdb0 100644 --- a/tekuti/comment.scm +++ b/tekuti/comment.scm @@ -65,6 +65,16 @@ (define (comment-raw-content comment) (assq-ref comment 'raw-content)) +(define (neutralize-links sxml) + (pre-post-order + sxml + `((a . ,(lambda sxml + (match sxml + (('a ('@ . attrs) . body) + `(a (@ (rel "external nofollow") . ,attrs) . ,body))))) + (*default* . ,(lambda sxml sxml)) + (*text* . ,(lambda (tag text) text))))) + (define (comment-sxml-content comment) `(li (@ (class "alt") (id ,(assq-ref comment 'key))) (cite ,(let ((url (assq-ref comment 'author_url)) @@ -76,11 +86,12 @@ (small (@ (class "commentmetadata")) (a (@ (href ,(string-append "#" (assq-ref comment 'key)))) ,(comment-readable-date comment))) - ,(let ((format (or (assq-ref comment 'format) 'wordpress))) - ((case format - ((wordpress) wordpress->sxml) - (else (lambda (text) `(pre ,text)))) - (comment-raw-content comment))))) + ,(neutralize-links + (let ((format (or (assq-ref comment 'format) 'wordpress))) + ((case format + ((wordpress) wordpress->sxml) + (else (lambda (text) `(pre ,text)))) + (comment-raw-content comment)))))) (define (comment-timestamp comment-alist) (or (assq-ref comment-alist 'timestamp) #f)) From 5e603daa9ea232a35397ab74f87260eebe727a97 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Mon, 6 Mar 2017 09:25:39 +0100 Subject: [PATCH 18/33] Add bogus comment detector. * tekuti/classifier.scm: New file. * Makefile.am: Add new file. * tekuti/comment.scm: * tekuti/git.scm: * tekuti/index.scm: * tekuti/page.scm: Update to include bogus comment detector. --- Makefile.am | 1 + tekuti/classifier.scm | 302 ++++++++++++++++++++++++++++++++++++++++++ tekuti/comment.scm | 94 ++++++++++--- tekuti/git.scm | 18 ++- tekuti/index.scm | 79 ++++++----- tekuti/page.scm | 29 +++- 6 files changed, 453 insertions(+), 70 deletions(-) create mode 100644 tekuti/classifier.scm diff --git a/Makefile.am b/Makefile.am index 521c995..97e9643 100644 --- a/Makefile.am +++ b/Makefile.am @@ -4,6 +4,7 @@ SOURCES = \ tekuti/base64.scm \ tekuti/boot.scm \ tekuti/cache.scm \ + tekuti/classifier.scm \ tekuti/comment.scm \ tekuti/config.scm \ tekuti/filters.scm \ diff --git a/tekuti/classifier.scm b/tekuti/classifier.scm new file mode 100644 index 0000000..6c8ba9a --- /dev/null +++ b/tekuti/classifier.scm @@ -0,0 +1,302 @@ +;; Tekuti +;; Copyright (C) 2008, 2010, 2012 Andy Wingo + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 3 of +;; the License, or (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program; if not, contact: +;; +;; Free Software Foundation Voice: +1-617-542-5942 +;; 59 Temple Place - Suite 330 Fax: +1-617-542-2652 +;; Boston, MA 02111-1307, USA gnu@gnu.org + +;;; Commentary: +;; +;; Comments -- pulling them out of the database, and making new ones. +;; +;;; Code: + +(define-module (tekuti classifier) + #:use-module (tekuti comment) + #:use-module (tekuti filters) + #:use-module (tekuti git) + #:use-module (tekuti util) + #:use-module (ice-9 match) + #:use-module (srfi srfi-11) + #:use-module (srfi srfi-9) + #:export (reindex-legit-comments + reindex-bogus-comments + reindex-classifier + comment-is-bogus?)) + +(define (tokenize-comment comment) + (define (decorate-tokens decorator tokens) + (map (lambda (token) (string-append decorator token)) tokens)) + (define (tokenize exp) + (match exp + ((or () #f) '()) + ((? string?) (string-tokenize exp char-set:letter+digit)) + (((or 'comment 'div 'p) . body) + (tokenize body)) + (((? symbol? tag) . body) + (decorate-tokens (string-append (symbol->string tag) ".") + (tokenize body))) + ((head . tail) (append (tokenize head) (tokenize tail))) + (_ (pk 'what exp comment) '()))) + (tokenize + `(comment + (author ,(assq-ref comment 'author)) + (email ,(assq-ref comment 'author_email)) + (url ,(assq-ref comment 'author_url)) + ,(let ((format (or (assq-ref comment 'format) 'wordpress)) + (raw (assq-ref comment 'raw-content))) + (or (case format + ((wordpress) (false-if-exception (wordpress->sxml raw))) + + (else `(pre ,raw))) + `(pre ,raw)))))) + +(define (fold-features comment f seed) + (let lp ((tokens (tokenize-comment comment)) (seed seed)) + (define (make-3-gram a b c) + (string-append a " " (or b "") " " (or c ""))) + (define (make-2-gram a b) + (string-append a " " (or b ""))) + (define (make-1-gram a) + a) + (match tokens + ((a . tokens) + (lp tokens + (or (f (match tokens + ((b c . _) (make-3-gram a b c)) + ((b) (make-3-gram a b #f)) + (() (make-3-gram a #f #f))) + (or (f (match tokens + ((b . _) (make-2-gram a b)) + (() (make-2-gram a #f))) + (f (make-1-gram a) + seed)) + seed)) + (f (make-1-gram a) + seed)))) + (() seed)))) + +(define (count-features comments) + (let ((counts (make-hash-table))) + (hash-for-each + (lambda (comment-sha1 comment-name) + (fold-features (blob->comment comment-name comment-sha1) + (lambda (feature counts) + (add-feature! counts feature) + counts) + counts)) + comments) + counts)) + +;; A feature's bogosity is the probability that a bogus document +;; contains that feature, divided by the probability that a legit +;; document contains the feature. +(define (compute-log-bogosities legit-features bogus-features) + (define (feature-count table) + (hash-fold (lambda (feature count sum) (+ count sum)) 0 table)) + (let ((total-bogus-features (feature-count bogus-features)) + (total-legit-features (feature-count legit-features)) + (log-bogosities (make-hash-table))) + (hash-for-each + (lambda (feature bogus-count) + (let ((legit-count (hash-ref legit-features feature 0))) + (hash-set! log-bogosities feature + (log (/ (/ (+ bogus-count 0.001) total-bogus-features) + (/ (+ legit-count 0.001) total-legit-features)))))) + bogus-features) + (hash-for-each + (lambda (feature legit-count) + (let ((bogus-count (hash-ref bogus-features feature))) + (unless bogus-count + (hash-set! log-bogosities feature + (log (/ (/ 0.01 total-bogus-features) + (/ (+ legit-count 0.01) total-legit-features))))))) + legit-features) + log-bogosities)) + +(define (update-bogosities! log-bogosities changed-features + legit-features bogus-features) + (define (feature-count table) + (hash-fold (lambda (feature count sum) (+ count sum)) 0 table)) + (let ((total-bogus-features (feature-count bogus-features)) + (total-legit-features (feature-count legit-features))) + (hash-for-each + (lambda (feature _) + (let ((bogus-count (hash-ref bogus-features feature 0)) + (legit-count (hash-ref legit-features feature 0))) + (hash-set! log-bogosities feature + (log (/ (/ (+ bogus-count 0.001) total-bogus-features) + (/ (+ legit-count 0.001) total-legit-features)))))) + changed-features))) + +(define (compute-bogus-probability comment log-bogosities bogus-prior + feature-limit) + (let ((v (make-vector feature-limit 0.0))) + (define (add-bogosity! log-bogosity) + (let ((mag (abs log-bogosity))) + (when (< (abs (vector-ref v 0)) mag) + (let lp ((idx 0)) + (let ((next (1+ idx))) + (cond + ((and (< next (vector-length v)) + (< (abs (vector-ref v next)) mag)) + (vector-set! v idx (vector-ref v next)) + (lp (1+ idx))) + (else + (vector-set! v idx log-bogosity)))))))) + (fold-features comment + (lambda (feature _) + (add-bogosity! (hash-ref log-bogosities feature 0.0))) + #f) + (let* ((ratio (exp (+ (log (/ bogus-prior (- 1.0 bogus-prior))) + (apply + (vector->list v)))))) + (/ ratio (+ ratio 1.0))))) + +(define (compute-differing-comments old-master new-master) + ;; sha1 -> name + (define (compute-hash-diff old new) + (let ((removed (make-hash-table)) + (added (make-hash-table))) + (hash-for-each (lambda (post-sha1 post-name) + (unless (hash-ref new post-sha1) + (hash-set! removed post-sha1 post-name))) + old) + (hash-for-each (lambda (post-sha1 post-name) + (unless (hash-ref old post-sha1) + (hash-set! added post-sha1 post-name))) + new) + (values removed added))) + (define* (git-ls-tree->hash ref kind #:optional (out (make-hash-table))) + (for-each + (match-lambda + ((name sha1 (? (lambda (x) (eq? x kind)))) + (hash-set! out sha1 name))) + (git-ls-tree ref #f)) + out) + (let ((old-post-trees (git-ls-tree->hash old-master 'tree)) + (new-post-trees (git-ls-tree->hash new-master 'tree))) + (let-values (((removed-post-trees added-post-trees) + (compute-hash-diff old-post-trees new-post-trees))) + (let ((old-comments (make-hash-table)) + (new-comments (make-hash-table))) + (hash-for-each + (lambda (sha1 name) + (git-ls-tree->hash (string-append sha1 ":comments") 'blob + old-comments)) + removed-post-trees) + (hash-for-each + (lambda (sha1 name) + (git-ls-tree->hash (string-append sha1 ":comments") 'blob + new-comments)) + added-post-trees) + (compute-hash-diff old-comments new-comments))))) + +(define (reindex-legit-comments old-index index) + (let ((old-legit-comments (assq-ref old-index 'legit-comments)) + (old-master (assq-ref old-index 'master)) + (new-master (assq-ref index 'master))) + (if old-legit-comments + (let-values (((removed added) + (compute-differing-comments old-master new-master))) + (hash-for-each (lambda (k v) + (hash-remove! old-legit-comments k)) + removed) + (hash-for-each (lambda (k v) + (hash-set! old-legit-comments k v)) + added) + old-legit-comments) + (compute-legit-comments new-master)))) + +(define (reindex-bogus-comments old-index index) + (let ((old-bogus-comments (assq-ref old-index 'bogus-comments)) + (old-classifier (assq-ref old-index 'classifier)) + (old-master (assq-ref old-index 'master)) + (new-master (assq-ref index 'master))) + (if old-bogus-comments + (let-values (((removed added) + (compute-differing-comments old-master new-master))) + (hash-for-each (lambda (k v) + (hash-remove! old-bogus-comments k)) + added) + (hash-for-each (lambda (k v) + (hash-set! old-bogus-comments k v)) + removed) + old-bogus-comments) + (compute-bogus-comments new-master (assq-ref index 'legit-comments))))) + +(define-record-type + (make-classifier legit-prior legit-features bogus-features bogosities) + classifier? + (legit-prior classifier-legit-prior) + (legit-features classifier-legit-features) + (bogus-features classifier-bogus-features) + (bogosities classifier-bogosities)) + +(define (rebuild-classifier legit-comments bogus-comments) + (format #t "Rebuilding bogus comment classifier...\n") + (with-time-debugging + (let* ((legit-count (hash-count (const #t) legit-comments)) + (bogus-count (hash-count (const #t) bogus-comments)) + (legit-prior (/ legit-count (+ legit-count bogus-count 0.0))) + (legit-features (count-features legit-comments)) + (bogus-features (count-features bogus-comments)) + (bogosities (compute-log-bogosities legit-features bogus-features))) + (make-classifier legit-prior legit-features bogus-features bogosities)))) + +(define* (add-feature! features feature #:optional (count 1)) + (let ((h (hash-create-handle! features feature 0))) + (set-cdr! h (+ (cdr h) count)))) + +(define* (remove-feature! features feature #:optional (count 1)) + (let ((h (hash-get-handle features feature))) + (when h + (set-cdr! h (- (cdr h) count))))) + +(define (reindex-classifier old-index index) + (match (assq-ref old-index 'classifier) + (#f + (rebuild-classifier (assq-ref index 'legit-comments) + (assq-ref index 'bogus-comments))) + ((and classifier + ($ legit-prior legit-features bogus-features bogosities)) + (let-values (((removed added) + (compute-differing-comments (assq-ref old-index 'master) + (assq-ref index 'master)))) + (let ((removed-features (count-features removed)) + (added-features (count-features added))) + (hash-for-each (lambda (k v) + (remove-feature! legit-features k v) + (add-feature! bogus-features k v)) + removed-features) + (hash-for-each (lambda (k v) + (add-feature! legit-features k v) + (remove-feature! bogus-features k v)) + added-features) + (update-bogosities! bogosities removed-features + legit-features bogus-features) + (update-bogosities! bogosities added-features + legit-features bogus-features))) + classifier))) + +(define* (comment-bogus-probability classifier comment #:key (feature-limit 20)) + (let* ((log-bogosities (classifier-bogosities classifier)) + (bogus-prior (- 1.0 (classifier-legit-prior classifier)))) + (compute-bogus-probability comment log-bogosities + bogus-prior feature-limit))) + +(define* (comment-is-bogus? index comment #:key (threshold 0.5)) + (let ((classifier (assq-ref index 'classifier))) + (> (comment-bogus-probability classifier comment) threshold))) diff --git a/tekuti/comment.scm b/tekuti/comment.scm index 894bdb0..db3e3e8 100644 --- a/tekuti/comment.scm +++ b/tekuti/comment.scm @@ -37,7 +37,8 @@ #:use-module (tekuti match-bind) #:export (blob->comment comment-sxml-content comment-timestamp comment-readable-date bad-new-comment-post? - make-new-comment delete-comment)) + parse-new-comment make-new-comment delete-comment + compute-legit-comments compute-bogus-comments)) (define *comment-spec* `((timestamp . ,string->number))) @@ -137,31 +138,38 @@ (define de-newline (s///g "[\n\r]" " ")) -(define (make-new-comment key title post-data) +(define (parse-new-comment post-data) (let ((content (assoc-ref post-data "comment")) (author (assoc-ref post-data "author")) (email (assoc-ref post-data "email")) (url (assoc-ref post-data "url"))) - (let ((sha1 (with-output-to-blob - (for-each - (lambda (pair) - (format #t "~a: ~a\n" (car pair) (cdr pair))) - `((timestamp . ,(time-second (current-time))) - (author . ,(de-newline author)) - (author_email . ,email) - (author_url . ,url))) - (display "\n") - (display content))) - (message (format #f "comment on \"~a\" by ~a" title author))) - (git-update-ref - "refs/heads/master" - (lambda (master) - (git-commit-tree (munge-tree1 master - 'create - (list key "comments") - (list sha1 sha1 'blob)) - master message #f)) - 5)))) + `((timestamp . ,(time-second (current-time))) + (author . ,(de-newline author)) + (author_email . ,email) + (author_url . ,url) + (raw-content . ,content)))) + +(define (make-new-comment key title comment) + (let ((sha1 (with-output-to-blob + (for-each + (match-lambda + ((k . v) + (unless (eq? k 'raw-content) + (format #t "~a: ~a\n" k v)))) + comment) + (display "\n") + (display (assq-ref comment 'raw-content)))) + (message (format #f "comment on \"~a\" by ~a" title + (assq-ref comment 'author)))) + (git-update-ref + "refs/heads/master" + (lambda (master) + (git-commit-tree (munge-tree1 master + 'create + (list key "comments") + (list sha1 sha1 'blob)) + master message #f)) + 5))) (define (delete-comment post id) (let ((key (post-key post)) @@ -174,3 +182,45 @@ `(,id)) master message #f)) 5))) + +(define (compute-legit-comments master-ref) + ;; sha1 -> #t + (define legit (make-hash-table)) + (pk 'computing-legit) + (for-each + (match-lambda + ((post-name post-sha1 'tree) + (for-each + (match-lambda + ((comment-name comment-sha1 'blob) + (hash-set! legit comment-sha1 comment-name))) + (git-ls-tree (string-append post-sha1 ":comments") #f)))) + (git-ls-tree master-ref #f)) + (pk 'done legit)) + +(define (compute-bogus-comments master-ref legit) + ;; sha1 -> #t + (define visited-trees (make-hash-table)) + (define bogus (make-hash-table)) + (pk 'computing-bogus) + (fold-commits + (lambda (rev commit _) + (pk 'computing-bogus rev) + (for-each + (match-lambda + ((post-name post-sha1 'tree) + (unless (hash-ref visited-trees post-sha1) + (hash-set! visited-trees post-sha1 #t) + (for-each + (match-lambda + ((comment-name comment-sha1 'blob) + (unless (or (hash-ref legit comment-sha1) + (hash-ref bogus comment-sha1)) + (hash-set! bogus comment-sha1 comment-name))) + (_ #f)) + (git-ls-tree (string-append post-sha1 ":comments") #f)))) + (_ #f)) + (git-ls-tree (assq-ref commit 'tree) #f))) + (assq-ref (parse-commit master-ref) 'parent) + #f) + bogus) diff --git a/tekuti/git.scm b/tekuti/git.scm index 9fc1f63..8324344 100644 --- a/tekuti/git.scm +++ b/tekuti/git.scm @@ -27,6 +27,7 @@ (define-module (tekuti git) #:use-module (ice-9 rdelim) #:use-module (ice-9 popen) + #:use-module (ice-9 textual-ports) #:use-module (tekuti util) #:use-module (tekuti config) #:use-module (tekuti match-bind) @@ -44,6 +45,7 @@ git-commit-tree git-rev-list git-revert munge-tree munge-tree1 parse-commit commit-utc-timestamp + fold-commits with-output-to-blob with-input-from-blob)) @@ -77,7 +79,7 @@ (define (prepend-env args) (if (null? env) args - (cons "/usr/bin/env" (append env args)))) + (cons "/run/current-system/profile/bin/env" (append env args)))) (define (redirect-input args) (if input-file (list "/bin/sh" "-c" @@ -86,11 +88,7 @@ args)) (let* ((real-args (trc (redirect-input (prepend-env args)))) (pipe (apply open-pipe* OPEN_READ real-args)) - (output (begin - (let ((bv (get-bytevector-all pipe))) - (if (eof-object? bv) - "" - (utf8->string bv))))) + (output (get-string-all pipe)) (ret (close-pipe pipe))) (case (status:exit-val ret) ((0) output) @@ -335,6 +333,14 @@ "^([^ ]+) (.*)$" (_ k v) (cons (string->symbol k) v)))))) +(define (fold-commits f rev seed) + (let lp ((rev (git-rev-parse rev)) (seed seed)) + (if rev + (let ((commit (parse-commit rev))) + (lp (assq-ref commit 'parent) + (f rev commit seed))) + seed))) + (define (commit-utc-timestamp commit) (match-bind "^(.*) ([0-9]+) ([+-][0-9]+)" (assq-ref (parse-commit commit) 'committer) diff --git a/tekuti/index.scm b/tekuti/index.scm index df02f89..103e344 100644 --- a/tekuti/index.scm +++ b/tekuti/index.scm @@ -33,46 +33,48 @@ #:use-module (tekuti post) #:use-module (tekuti tags) #:use-module (tekuti cache) + #:use-module (tekuti classifier) #:export (maybe-reindex read-index update-index)) +;; Additionally an index has an "index" field, indicating the commit +;; that it was saved in, and a "master" field, indicating the commit +;; that it indexes. (define index-specs - `((posts ,reindex-posts ,write-hash ,read-hash) + `((master #f ,write ,read) + (posts ,reindex-posts ,write-hash ,read-hash) (posts-by-date ,reindex-posts-by-date ,write ,read) (tags ,reindex-tags ,write-hash ,read-hash) - (cache ,(lambda _ (make-empty-cache)) ,(lambda (x) #f) ,(lambda () '())))) + (legit-comments ,reindex-legit-comments ,write-hash ,read-hash) + (bogus-comments ,reindex-bogus-comments ,write-hash ,read-hash) + (classifier ,reindex-classifier #f #f) + (cache ,(lambda _ (make-empty-cache)) #f #f))) (define (reindex oldindex master) + ;; Leave off "index" field. (with-time-debugging - (fold (lambda (pair index) - (acons (car pair) ((cadr pair) oldindex index) - index)) + (fold (lambda (spec index) + (match spec + ((key reindex write read) + (acons key (with-time-debugging (begin (pk key) (reindex oldindex index))) index)))) (acons 'master master '()) - index-specs))) - -(define (assoc-list-ref alist key n default) - (let ((l (assoc key alist))) - (if l (list-ref l n) default))) - -(define (index->blob key value) - (with-output-to-blob - ((assoc-list-ref index-specs key 2 write) value))) - -(define (blob->index name sha1) - (with-input-from-blob - sha1 - ((assoc-list-ref index-specs (string->symbol name) 3 read)))) + ;; Skip past "master" as we handle that one specially. + (match index-specs + ((('master . _) . specs) specs))))) (define (write-index index oldref) (let ((new (git-commit-tree (git-mktree (let lp ((index index)) - (cond - ((null? index) '()) - ((eq? (caar index) 'index) (lp (cdr index))) - (else (cons (list (caar index) - (index->blob (caar index) (cdar index)) - 'blob) - (lp (cdr index))))))) + (match index + (() '()) + (((k . v) . index) + (match (assq k index-specs) + ((_ reindex write read) + (if write + (cons (list k (with-output-to-blob (write v)) 'blob) + (lp index)) + (lp index))) + (_ (lp index))))))) oldref "reindex\n" (commit-utc-timestamp (assq-ref index 'master))))) (or (false-if-git-error @@ -81,19 +83,26 @@ new)) (define (read-index) + (pk 'reading-index) (match (false-if-git-error (git-rev-parse "refs/heads/index")) (#f (maybe-reindex '())) (ref (let ((dents (git-ls-tree ref #f))) - (if (and-map (lambda (spec) - (assoc (symbol->string (car spec)) dents)) - index-specs) - (acons 'index ref - (map (lambda (dent) - (cons (string->symbol (car dent)) - (blob->index (car dent) (cadr dent)))) - dents)) - (maybe-reindex (acons 'index ref '()))))))) + (fold (lambda (spec index) + (match spec + ((key reindex write read) + (pk 'read-index-key key) + (acons key + (cond + ((and read (assoc (symbol->string key) dents)) + => (match-lambda + ((_ sha1 'blob) + (with-input-from-blob sha1 (read))))) + (else + (reindex '() index))) + index)))) + `((index . ,ref)) + index-specs))))) (define (maybe-reindex old-index) (let ((master (git-rev-parse "refs/heads/master"))) diff --git a/tekuti/page.scm b/tekuti/page.scm index 9d73637..777b281 100644 --- a/tekuti/page.scm +++ b/tekuti/page.scm @@ -30,6 +30,7 @@ #:use-module (tekuti git) #:use-module (tekuti post) #:use-module (tekuti comment) + #:use-module (tekuti classifier) #:use-module (web uri) #:use-module (web request) #:use-module (tekuti request) @@ -208,13 +209,27 @@ (cond ((post-from-key index (make-post-key year month day name)) => (lambda (post) - (cond - ((bad-new-comment-post? data) - => (lambda (reason) - (respond `((p "Bad post data: " ,(pk reason)))))) - (else - (let ((comment (make-new-comment (post-key post) (post-title post) - data))) + (let ((comment (parse-new-comment data))) + (cond + ((bad-new-comment-post? data) + => (lambda (reason) + (respond `((p "Bad post data: " ,(pk reason)))))) + ((comment-is-bogus? index comment) + (respond `((p "Comment appears to be bogus; ignoring.") + (p "I'm testing out a new automated bogus " + "comment detector. If you feel your comment " + "was caught unfairly, tweet it to me or send " + "it by email. Or press back and reword it.") + (p "If you are a spammer, note that I fixed " + "the comment renderer to properly add " + (tt "rel='external nofollow'") " on all " + "links in comments. Go take a look at any " + "comment with a link to see for yourself. " + "Trying to linkbomb this site probably won't " + "give you any link juice so it's not worth " + "the trouble to either one of us :)")))) + (else + (make-new-comment (post-key post) (post-title post) comment) ;; nb: at this point, `post' is out-of-date (respond `((p "Comment posted, thanks.")) #:redirect (post-url post #:fragment "comments") From 493fe697baad9168e77970d7dc3f2bad12c7b26f Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Mon, 6 Mar 2017 11:17:39 +0100 Subject: [PATCH 19/33] Update parsed entities --- tekuti/config.scm | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tekuti/config.scm b/tekuti/config.scm index 27b6fa6..93f1880 100644 --- a/tekuti/config.scm +++ b/tekuti/config.scm @@ -59,9 +59,9 @@ (define *server-impl-args* (lambda () `(#:host ,*private-host* #:port ,*private-port*))) -(define-parsed-entity! 'agrave 224) -(define-parsed-entity! 'laquo 171) -(define-parsed-entity! 'mdash 8212) -(define-parsed-entity! 'nbsp 160) -(define-parsed-entity! 'raquo 187) -(define-parsed-entity! 'uacute 250) +(define-parsed-entity! 'agrave (string (integer->char 224))) +(define-parsed-entity! 'laquo (string (integer->char 171))) +(define-parsed-entity! 'mdash (string (integer->char 8212))) +(define-parsed-entity! 'nbsp (string (integer->char 160))) +(define-parsed-entity! 'raquo (string (integer->char 187))) +(define-parsed-entity! 'uacute (string (integer->char 250))) From ff529670e196b5f5418dc2802eb5ba30816b86b6 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Mon, 6 Mar 2017 14:18:37 +0000 Subject: [PATCH 20/33] Fix env invocation --- tekuti/git.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tekuti/git.scm b/tekuti/git.scm index 8324344..cad4e0c 100644 --- a/tekuti/git.scm +++ b/tekuti/git.scm @@ -1,5 +1,5 @@ ;; Tekuti -;; Copyright (C) 2008, 2010, 2011, 2012 Andy Wingo +;; Copyright (C) 2008, 2010, 2011, 2012, 2017 Andy Wingo ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -79,7 +79,7 @@ (define (prepend-env args) (if (null? env) args - (cons "/run/current-system/profile/bin/env" (append env args)))) + (cons "env" (append env args)))) (define (redirect-input args) (if input-file (list "/bin/sh" "-c" From b16c08a385522ea689beb09101fe23a002f9235a Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Fri, 31 May 2019 15:31:04 +0200 Subject: [PATCH 21/33] Serialize as HTML rather than XHTML * tekuti/page-helpers.scm (html-doctype): Relax to HTML doctype. (define-tag-set, void-element?, template-element?, raw-text-element?) (escapable-raw-text-element?, foreign-element?, make-char-quotator) (attribute-value-empty?, attribute-value-needs-quotes-chars) (attribute-value-needs-quotes?, print-attribute-value/quoted) (print-text/quoted, shtml->html): Emit HTML by default. This will allow documents to contain