Add bogus comment detector.
* tekuti/classifier.scm: New file. * Makefile.am: Add new file. * tekuti/comment.scm: * tekuti/git.scm: * tekuti/index.scm: * tekuti/page.scm: Update to include bogus comment detector.
This commit is contained in:
parent
42b77efe6a
commit
5e603daa9e
6 changed files with 453 additions and 70 deletions
|
@ -4,6 +4,7 @@ SOURCES = \
|
|||
tekuti/base64.scm \
|
||||
tekuti/boot.scm \
|
||||
tekuti/cache.scm \
|
||||
tekuti/classifier.scm \
|
||||
tekuti/comment.scm \
|
||||
tekuti/config.scm \
|
||||
tekuti/filters.scm \
|
||||
|
|
302
tekuti/classifier.scm
Normal file
302
tekuti/classifier.scm
Normal file
|
@ -0,0 +1,302 @@
|
|||
;; Tekuti
|
||||
;; Copyright (C) 2008, 2010, 2012 Andy Wingo <wingo at pobox dot com>
|
||||
|
||||
;; This program is free software; you can redistribute it and/or
|
||||
;; modify it under the terms of the GNU General Public License as
|
||||
;; published by the Free Software Foundation; either version 3 of
|
||||
;; the License, or (at your option) any later version.
|
||||
;;
|
||||
;; This program is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with this program; if not, contact:
|
||||
;;
|
||||
;; Free Software Foundation Voice: +1-617-542-5942
|
||||
;; 59 Temple Place - Suite 330 Fax: +1-617-542-2652
|
||||
;; Boston, MA 02111-1307, USA gnu@gnu.org
|
||||
|
||||
;;; Commentary:
|
||||
;;
|
||||
;; Comments -- pulling them out of the database, and making new ones.
|
||||
;;
|
||||
;;; Code:
|
||||
|
||||
(define-module (tekuti classifier)
|
||||
#:use-module (tekuti comment)
|
||||
#:use-module (tekuti filters)
|
||||
#:use-module (tekuti git)
|
||||
#:use-module (tekuti util)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (srfi srfi-11)
|
||||
#:use-module (srfi srfi-9)
|
||||
#:export (reindex-legit-comments
|
||||
reindex-bogus-comments
|
||||
reindex-classifier
|
||||
comment-is-bogus?))
|
||||
|
||||
(define (tokenize-comment comment)
|
||||
(define (decorate-tokens decorator tokens)
|
||||
(map (lambda (token) (string-append decorator token)) tokens))
|
||||
(define (tokenize exp)
|
||||
(match exp
|
||||
((or () #f) '())
|
||||
((? string?) (string-tokenize exp char-set:letter+digit))
|
||||
(((or 'comment 'div 'p) . body)
|
||||
(tokenize body))
|
||||
(((? symbol? tag) . body)
|
||||
(decorate-tokens (string-append (symbol->string tag) ".")
|
||||
(tokenize body)))
|
||||
((head . tail) (append (tokenize head) (tokenize tail)))
|
||||
(_ (pk 'what exp comment) '())))
|
||||
(tokenize
|
||||
`(comment
|
||||
(author ,(assq-ref comment 'author))
|
||||
(email ,(assq-ref comment 'author_email))
|
||||
(url ,(assq-ref comment 'author_url))
|
||||
,(let ((format (or (assq-ref comment 'format) 'wordpress))
|
||||
(raw (assq-ref comment 'raw-content)))
|
||||
(or (case format
|
||||
((wordpress) (false-if-exception (wordpress->sxml raw)))
|
||||
|
||||
(else `(pre ,raw)))
|
||||
`(pre ,raw))))))
|
||||
|
||||
(define (fold-features comment f seed)
|
||||
(let lp ((tokens (tokenize-comment comment)) (seed seed))
|
||||
(define (make-3-gram a b c)
|
||||
(string-append a " " (or b "") " " (or c "")))
|
||||
(define (make-2-gram a b)
|
||||
(string-append a " " (or b "")))
|
||||
(define (make-1-gram a)
|
||||
a)
|
||||
(match tokens
|
||||
((a . tokens)
|
||||
(lp tokens
|
||||
(or (f (match tokens
|
||||
((b c . _) (make-3-gram a b c))
|
||||
((b) (make-3-gram a b #f))
|
||||
(() (make-3-gram a #f #f)))
|
||||
(or (f (match tokens
|
||||
((b . _) (make-2-gram a b))
|
||||
(() (make-2-gram a #f)))
|
||||
(f (make-1-gram a)
|
||||
seed))
|
||||
seed))
|
||||
(f (make-1-gram a)
|
||||
seed))))
|
||||
(() seed))))
|
||||
|
||||
(define (count-features comments)
|
||||
(let ((counts (make-hash-table)))
|
||||
(hash-for-each
|
||||
(lambda (comment-sha1 comment-name)
|
||||
(fold-features (blob->comment comment-name comment-sha1)
|
||||
(lambda (feature counts)
|
||||
(add-feature! counts feature)
|
||||
counts)
|
||||
counts))
|
||||
comments)
|
||||
counts))
|
||||
|
||||
;; A feature's bogosity is the probability that a bogus document
|
||||
;; contains that feature, divided by the probability that a legit
|
||||
;; document contains the feature.
|
||||
(define (compute-log-bogosities legit-features bogus-features)
|
||||
(define (feature-count table)
|
||||
(hash-fold (lambda (feature count sum) (+ count sum)) 0 table))
|
||||
(let ((total-bogus-features (feature-count bogus-features))
|
||||
(total-legit-features (feature-count legit-features))
|
||||
(log-bogosities (make-hash-table)))
|
||||
(hash-for-each
|
||||
(lambda (feature bogus-count)
|
||||
(let ((legit-count (hash-ref legit-features feature 0)))
|
||||
(hash-set! log-bogosities feature
|
||||
(log (/ (/ (+ bogus-count 0.001) total-bogus-features)
|
||||
(/ (+ legit-count 0.001) total-legit-features))))))
|
||||
bogus-features)
|
||||
(hash-for-each
|
||||
(lambda (feature legit-count)
|
||||
(let ((bogus-count (hash-ref bogus-features feature)))
|
||||
(unless bogus-count
|
||||
(hash-set! log-bogosities feature
|
||||
(log (/ (/ 0.01 total-bogus-features)
|
||||
(/ (+ legit-count 0.01) total-legit-features)))))))
|
||||
legit-features)
|
||||
log-bogosities))
|
||||
|
||||
(define (update-bogosities! log-bogosities changed-features
|
||||
legit-features bogus-features)
|
||||
(define (feature-count table)
|
||||
(hash-fold (lambda (feature count sum) (+ count sum)) 0 table))
|
||||
(let ((total-bogus-features (feature-count bogus-features))
|
||||
(total-legit-features (feature-count legit-features)))
|
||||
(hash-for-each
|
||||
(lambda (feature _)
|
||||
(let ((bogus-count (hash-ref bogus-features feature 0))
|
||||
(legit-count (hash-ref legit-features feature 0)))
|
||||
(hash-set! log-bogosities feature
|
||||
(log (/ (/ (+ bogus-count 0.001) total-bogus-features)
|
||||
(/ (+ legit-count 0.001) total-legit-features))))))
|
||||
changed-features)))
|
||||
|
||||
(define (compute-bogus-probability comment log-bogosities bogus-prior
|
||||
feature-limit)
|
||||
(let ((v (make-vector feature-limit 0.0)))
|
||||
(define (add-bogosity! log-bogosity)
|
||||
(let ((mag (abs log-bogosity)))
|
||||
(when (< (abs (vector-ref v 0)) mag)
|
||||
(let lp ((idx 0))
|
||||
(let ((next (1+ idx)))
|
||||
(cond
|
||||
((and (< next (vector-length v))
|
||||
(< (abs (vector-ref v next)) mag))
|
||||
(vector-set! v idx (vector-ref v next))
|
||||
(lp (1+ idx)))
|
||||
(else
|
||||
(vector-set! v idx log-bogosity))))))))
|
||||
(fold-features comment
|
||||
(lambda (feature _)
|
||||
(add-bogosity! (hash-ref log-bogosities feature 0.0)))
|
||||
#f)
|
||||
(let* ((ratio (exp (+ (log (/ bogus-prior (- 1.0 bogus-prior)))
|
||||
(apply + (vector->list v))))))
|
||||
(/ ratio (+ ratio 1.0)))))
|
||||
|
||||
(define (compute-differing-comments old-master new-master)
|
||||
;; sha1 -> name
|
||||
(define (compute-hash-diff old new)
|
||||
(let ((removed (make-hash-table))
|
||||
(added (make-hash-table)))
|
||||
(hash-for-each (lambda (post-sha1 post-name)
|
||||
(unless (hash-ref new post-sha1)
|
||||
(hash-set! removed post-sha1 post-name)))
|
||||
old)
|
||||
(hash-for-each (lambda (post-sha1 post-name)
|
||||
(unless (hash-ref old post-sha1)
|
||||
(hash-set! added post-sha1 post-name)))
|
||||
new)
|
||||
(values removed added)))
|
||||
(define* (git-ls-tree->hash ref kind #:optional (out (make-hash-table)))
|
||||
(for-each
|
||||
(match-lambda
|
||||
((name sha1 (? (lambda (x) (eq? x kind))))
|
||||
(hash-set! out sha1 name)))
|
||||
(git-ls-tree ref #f))
|
||||
out)
|
||||
(let ((old-post-trees (git-ls-tree->hash old-master 'tree))
|
||||
(new-post-trees (git-ls-tree->hash new-master 'tree)))
|
||||
(let-values (((removed-post-trees added-post-trees)
|
||||
(compute-hash-diff old-post-trees new-post-trees)))
|
||||
(let ((old-comments (make-hash-table))
|
||||
(new-comments (make-hash-table)))
|
||||
(hash-for-each
|
||||
(lambda (sha1 name)
|
||||
(git-ls-tree->hash (string-append sha1 ":comments") 'blob
|
||||
old-comments))
|
||||
removed-post-trees)
|
||||
(hash-for-each
|
||||
(lambda (sha1 name)
|
||||
(git-ls-tree->hash (string-append sha1 ":comments") 'blob
|
||||
new-comments))
|
||||
added-post-trees)
|
||||
(compute-hash-diff old-comments new-comments)))))
|
||||
|
||||
(define (reindex-legit-comments old-index index)
|
||||
(let ((old-legit-comments (assq-ref old-index 'legit-comments))
|
||||
(old-master (assq-ref old-index 'master))
|
||||
(new-master (assq-ref index 'master)))
|
||||
(if old-legit-comments
|
||||
(let-values (((removed added)
|
||||
(compute-differing-comments old-master new-master)))
|
||||
(hash-for-each (lambda (k v)
|
||||
(hash-remove! old-legit-comments k))
|
||||
removed)
|
||||
(hash-for-each (lambda (k v)
|
||||
(hash-set! old-legit-comments k v))
|
||||
added)
|
||||
old-legit-comments)
|
||||
(compute-legit-comments new-master))))
|
||||
|
||||
(define (reindex-bogus-comments old-index index)
|
||||
(let ((old-bogus-comments (assq-ref old-index 'bogus-comments))
|
||||
(old-classifier (assq-ref old-index 'classifier))
|
||||
(old-master (assq-ref old-index 'master))
|
||||
(new-master (assq-ref index 'master)))
|
||||
(if old-bogus-comments
|
||||
(let-values (((removed added)
|
||||
(compute-differing-comments old-master new-master)))
|
||||
(hash-for-each (lambda (k v)
|
||||
(hash-remove! old-bogus-comments k))
|
||||
added)
|
||||
(hash-for-each (lambda (k v)
|
||||
(hash-set! old-bogus-comments k v))
|
||||
removed)
|
||||
old-bogus-comments)
|
||||
(compute-bogus-comments new-master (assq-ref index 'legit-comments)))))
|
||||
|
||||
(define-record-type <classifier>
|
||||
(make-classifier legit-prior legit-features bogus-features bogosities)
|
||||
classifier?
|
||||
(legit-prior classifier-legit-prior)
|
||||
(legit-features classifier-legit-features)
|
||||
(bogus-features classifier-bogus-features)
|
||||
(bogosities classifier-bogosities))
|
||||
|
||||
(define (rebuild-classifier legit-comments bogus-comments)
|
||||
(format #t "Rebuilding bogus comment classifier...\n")
|
||||
(with-time-debugging
|
||||
(let* ((legit-count (hash-count (const #t) legit-comments))
|
||||
(bogus-count (hash-count (const #t) bogus-comments))
|
||||
(legit-prior (/ legit-count (+ legit-count bogus-count 0.0)))
|
||||
(legit-features (count-features legit-comments))
|
||||
(bogus-features (count-features bogus-comments))
|
||||
(bogosities (compute-log-bogosities legit-features bogus-features)))
|
||||
(make-classifier legit-prior legit-features bogus-features bogosities))))
|
||||
|
||||
(define* (add-feature! features feature #:optional (count 1))
|
||||
(let ((h (hash-create-handle! features feature 0)))
|
||||
(set-cdr! h (+ (cdr h) count))))
|
||||
|
||||
(define* (remove-feature! features feature #:optional (count 1))
|
||||
(let ((h (hash-get-handle features feature)))
|
||||
(when h
|
||||
(set-cdr! h (- (cdr h) count)))))
|
||||
|
||||
(define (reindex-classifier old-index index)
|
||||
(match (assq-ref old-index 'classifier)
|
||||
(#f
|
||||
(rebuild-classifier (assq-ref index 'legit-comments)
|
||||
(assq-ref index 'bogus-comments)))
|
||||
((and classifier
|
||||
($ <classifier> legit-prior legit-features bogus-features bogosities))
|
||||
(let-values (((removed added)
|
||||
(compute-differing-comments (assq-ref old-index 'master)
|
||||
(assq-ref index 'master))))
|
||||
(let ((removed-features (count-features removed))
|
||||
(added-features (count-features added)))
|
||||
(hash-for-each (lambda (k v)
|
||||
(remove-feature! legit-features k v)
|
||||
(add-feature! bogus-features k v))
|
||||
removed-features)
|
||||
(hash-for-each (lambda (k v)
|
||||
(add-feature! legit-features k v)
|
||||
(remove-feature! bogus-features k v))
|
||||
added-features)
|
||||
(update-bogosities! bogosities removed-features
|
||||
legit-features bogus-features)
|
||||
(update-bogosities! bogosities added-features
|
||||
legit-features bogus-features)))
|
||||
classifier)))
|
||||
|
||||
(define* (comment-bogus-probability classifier comment #:key (feature-limit 20))
|
||||
(let* ((log-bogosities (classifier-bogosities classifier))
|
||||
(bogus-prior (- 1.0 (classifier-legit-prior classifier))))
|
||||
(compute-bogus-probability comment log-bogosities
|
||||
bogus-prior feature-limit)))
|
||||
|
||||
(define* (comment-is-bogus? index comment #:key (threshold 0.5))
|
||||
(let ((classifier (assq-ref index 'classifier)))
|
||||
(> (comment-bogus-probability classifier comment) threshold)))
|
|
@ -37,7 +37,8 @@
|
|||
#:use-module (tekuti match-bind)
|
||||
#:export (blob->comment comment-sxml-content comment-timestamp
|
||||
comment-readable-date bad-new-comment-post?
|
||||
make-new-comment delete-comment))
|
||||
parse-new-comment make-new-comment delete-comment
|
||||
compute-legit-comments compute-bogus-comments))
|
||||
|
||||
(define *comment-spec*
|
||||
`((timestamp . ,string->number)))
|
||||
|
@ -137,31 +138,38 @@
|
|||
|
||||
(define de-newline (s///g "[\n\r]" " "))
|
||||
|
||||
(define (make-new-comment key title post-data)
|
||||
(define (parse-new-comment post-data)
|
||||
(let ((content (assoc-ref post-data "comment"))
|
||||
(author (assoc-ref post-data "author"))
|
||||
(email (assoc-ref post-data "email"))
|
||||
(url (assoc-ref post-data "url")))
|
||||
(let ((sha1 (with-output-to-blob
|
||||
(for-each
|
||||
(lambda (pair)
|
||||
(format #t "~a: ~a\n" (car pair) (cdr pair)))
|
||||
`((timestamp . ,(time-second (current-time)))
|
||||
(author . ,(de-newline author))
|
||||
(author_email . ,email)
|
||||
(author_url . ,url)))
|
||||
(display "\n")
|
||||
(display content)))
|
||||
(message (format #f "comment on \"~a\" by ~a" title author)))
|
||||
(git-update-ref
|
||||
"refs/heads/master"
|
||||
(lambda (master)
|
||||
(git-commit-tree (munge-tree1 master
|
||||
'create
|
||||
(list key "comments")
|
||||
(list sha1 sha1 'blob))
|
||||
master message #f))
|
||||
5))))
|
||||
`((timestamp . ,(time-second (current-time)))
|
||||
(author . ,(de-newline author))
|
||||
(author_email . ,email)
|
||||
(author_url . ,url)
|
||||
(raw-content . ,content))))
|
||||
|
||||
(define (make-new-comment key title comment)
|
||||
(let ((sha1 (with-output-to-blob
|
||||
(for-each
|
||||
(match-lambda
|
||||
((k . v)
|
||||
(unless (eq? k 'raw-content)
|
||||
(format #t "~a: ~a\n" k v))))
|
||||
comment)
|
||||
(display "\n")
|
||||
(display (assq-ref comment 'raw-content))))
|
||||
(message (format #f "comment on \"~a\" by ~a" title
|
||||
(assq-ref comment 'author))))
|
||||
(git-update-ref
|
||||
"refs/heads/master"
|
||||
(lambda (master)
|
||||
(git-commit-tree (munge-tree1 master
|
||||
'create
|
||||
(list key "comments")
|
||||
(list sha1 sha1 'blob))
|
||||
master message #f))
|
||||
5)))
|
||||
|
||||
(define (delete-comment post id)
|
||||
(let ((key (post-key post))
|
||||
|
@ -174,3 +182,45 @@
|
|||
`(,id))
|
||||
master message #f))
|
||||
5)))
|
||||
|
||||
(define (compute-legit-comments master-ref)
|
||||
;; sha1 -> #t
|
||||
(define legit (make-hash-table))
|
||||
(pk 'computing-legit)
|
||||
(for-each
|
||||
(match-lambda
|
||||
((post-name post-sha1 'tree)
|
||||
(for-each
|
||||
(match-lambda
|
||||
((comment-name comment-sha1 'blob)
|
||||
(hash-set! legit comment-sha1 comment-name)))
|
||||
(git-ls-tree (string-append post-sha1 ":comments") #f))))
|
||||
(git-ls-tree master-ref #f))
|
||||
(pk 'done legit))
|
||||
|
||||
(define (compute-bogus-comments master-ref legit)
|
||||
;; sha1 -> #t
|
||||
(define visited-trees (make-hash-table))
|
||||
(define bogus (make-hash-table))
|
||||
(pk 'computing-bogus)
|
||||
(fold-commits
|
||||
(lambda (rev commit _)
|
||||
(pk 'computing-bogus rev)
|
||||
(for-each
|
||||
(match-lambda
|
||||
((post-name post-sha1 'tree)
|
||||
(unless (hash-ref visited-trees post-sha1)
|
||||
(hash-set! visited-trees post-sha1 #t)
|
||||
(for-each
|
||||
(match-lambda
|
||||
((comment-name comment-sha1 'blob)
|
||||
(unless (or (hash-ref legit comment-sha1)
|
||||
(hash-ref bogus comment-sha1))
|
||||
(hash-set! bogus comment-sha1 comment-name)))
|
||||
(_ #f))
|
||||
(git-ls-tree (string-append post-sha1 ":comments") #f))))
|
||||
(_ #f))
|
||||
(git-ls-tree (assq-ref commit 'tree) #f)))
|
||||
(assq-ref (parse-commit master-ref) 'parent)
|
||||
#f)
|
||||
bogus)
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
(define-module (tekuti git)
|
||||
#:use-module (ice-9 rdelim)
|
||||
#:use-module (ice-9 popen)
|
||||
#:use-module (ice-9 textual-ports)
|
||||
#:use-module (tekuti util)
|
||||
#:use-module (tekuti config)
|
||||
#:use-module (tekuti match-bind)
|
||||
|
@ -44,6 +45,7 @@
|
|||
git-commit-tree git-rev-list git-revert
|
||||
|
||||
munge-tree munge-tree1 parse-commit commit-utc-timestamp
|
||||
fold-commits
|
||||
|
||||
with-output-to-blob with-input-from-blob))
|
||||
|
||||
|
@ -77,7 +79,7 @@
|
|||
(define (prepend-env args)
|
||||
(if (null? env)
|
||||
args
|
||||
(cons "/usr/bin/env" (append env args))))
|
||||
(cons "/run/current-system/profile/bin/env" (append env args))))
|
||||
(define (redirect-input args)
|
||||
(if input-file
|
||||
(list "/bin/sh" "-c"
|
||||
|
@ -86,11 +88,7 @@
|
|||
args))
|
||||
(let* ((real-args (trc (redirect-input (prepend-env args))))
|
||||
(pipe (apply open-pipe* OPEN_READ real-args))
|
||||
(output (begin
|
||||
(let ((bv (get-bytevector-all pipe)))
|
||||
(if (eof-object? bv)
|
||||
""
|
||||
(utf8->string bv)))))
|
||||
(output (get-string-all pipe))
|
||||
(ret (close-pipe pipe)))
|
||||
(case (status:exit-val ret)
|
||||
((0) output)
|
||||
|
@ -335,6 +333,14 @@
|
|||
"^([^ ]+) (.*)$" (_ k v)
|
||||
(cons (string->symbol k) v))))))
|
||||
|
||||
(define (fold-commits f rev seed)
|
||||
(let lp ((rev (git-rev-parse rev)) (seed seed))
|
||||
(if rev
|
||||
(let ((commit (parse-commit rev)))
|
||||
(lp (assq-ref commit 'parent)
|
||||
(f rev commit seed)))
|
||||
seed)))
|
||||
|
||||
(define (commit-utc-timestamp commit)
|
||||
(match-bind
|
||||
"^(.*) ([0-9]+) ([+-][0-9]+)" (assq-ref (parse-commit commit) 'committer)
|
||||
|
|
|
@ -33,46 +33,48 @@
|
|||
#:use-module (tekuti post)
|
||||
#:use-module (tekuti tags)
|
||||
#:use-module (tekuti cache)
|
||||
#:use-module (tekuti classifier)
|
||||
#:export (maybe-reindex read-index update-index))
|
||||
|
||||
;; Additionally an index has an "index" field, indicating the commit
|
||||
;; that it was saved in, and a "master" field, indicating the commit
|
||||
;; that it indexes.
|
||||
(define index-specs
|
||||
`((posts ,reindex-posts ,write-hash ,read-hash)
|
||||
`((master #f ,write ,read)
|
||||
(posts ,reindex-posts ,write-hash ,read-hash)
|
||||
(posts-by-date ,reindex-posts-by-date ,write ,read)
|
||||
(tags ,reindex-tags ,write-hash ,read-hash)
|
||||
(cache ,(lambda _ (make-empty-cache)) ,(lambda (x) #f) ,(lambda () '()))))
|
||||
(legit-comments ,reindex-legit-comments ,write-hash ,read-hash)
|
||||
(bogus-comments ,reindex-bogus-comments ,write-hash ,read-hash)
|
||||
(classifier ,reindex-classifier #f #f)
|
||||
(cache ,(lambda _ (make-empty-cache)) #f #f)))
|
||||
|
||||
(define (reindex oldindex master)
|
||||
;; Leave off "index" field.
|
||||
(with-time-debugging
|
||||
(fold (lambda (pair index)
|
||||
(acons (car pair) ((cadr pair) oldindex index)
|
||||
index))
|
||||
(fold (lambda (spec index)
|
||||
(match spec
|
||||
((key reindex write read)
|
||||
(acons key (with-time-debugging (begin (pk key) (reindex oldindex index))) index))))
|
||||
(acons 'master master '())
|
||||
index-specs)))
|
||||
|
||||
(define (assoc-list-ref alist key n default)
|
||||
(let ((l (assoc key alist)))
|
||||
(if l (list-ref l n) default)))
|
||||
|
||||
(define (index->blob key value)
|
||||
(with-output-to-blob
|
||||
((assoc-list-ref index-specs key 2 write) value)))
|
||||
|
||||
(define (blob->index name sha1)
|
||||
(with-input-from-blob
|
||||
sha1
|
||||
((assoc-list-ref index-specs (string->symbol name) 3 read))))
|
||||
;; Skip past "master" as we handle that one specially.
|
||||
(match index-specs
|
||||
((('master . _) . specs) specs)))))
|
||||
|
||||
(define (write-index index oldref)
|
||||
(let ((new (git-commit-tree
|
||||
(git-mktree
|
||||
(let lp ((index index))
|
||||
(cond
|
||||
((null? index) '())
|
||||
((eq? (caar index) 'index) (lp (cdr index)))
|
||||
(else (cons (list (caar index)
|
||||
(index->blob (caar index) (cdar index))
|
||||
'blob)
|
||||
(lp (cdr index)))))))
|
||||
(match index
|
||||
(() '())
|
||||
(((k . v) . index)
|
||||
(match (assq k index-specs)
|
||||
((_ reindex write read)
|
||||
(if write
|
||||
(cons (list k (with-output-to-blob (write v)) 'blob)
|
||||
(lp index))
|
||||
(lp index)))
|
||||
(_ (lp index)))))))
|
||||
oldref "reindex\n"
|
||||
(commit-utc-timestamp (assq-ref index 'master)))))
|
||||
(or (false-if-git-error
|
||||
|
@ -81,19 +83,26 @@
|
|||
new))
|
||||
|
||||
(define (read-index)
|
||||
(pk 'reading-index)
|
||||
(match (false-if-git-error (git-rev-parse "refs/heads/index"))
|
||||
(#f (maybe-reindex '()))
|
||||
(ref
|
||||
(let ((dents (git-ls-tree ref #f)))
|
||||
(if (and-map (lambda (spec)
|
||||
(assoc (symbol->string (car spec)) dents))
|
||||
index-specs)
|
||||
(acons 'index ref
|
||||
(map (lambda (dent)
|
||||
(cons (string->symbol (car dent))
|
||||
(blob->index (car dent) (cadr dent))))
|
||||
dents))
|
||||
(maybe-reindex (acons 'index ref '())))))))
|
||||
(fold (lambda (spec index)
|
||||
(match spec
|
||||
((key reindex write read)
|
||||
(pk 'read-index-key key)
|
||||
(acons key
|
||||
(cond
|
||||
((and read (assoc (symbol->string key) dents))
|
||||
=> (match-lambda
|
||||
((_ sha1 'blob)
|
||||
(with-input-from-blob sha1 (read)))))
|
||||
(else
|
||||
(reindex '() index)))
|
||||
index))))
|
||||
`((index . ,ref))
|
||||
index-specs)))))
|
||||
|
||||
(define (maybe-reindex old-index)
|
||||
(let ((master (git-rev-parse "refs/heads/master")))
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#:use-module (tekuti git)
|
||||
#:use-module (tekuti post)
|
||||
#:use-module (tekuti comment)
|
||||
#:use-module (tekuti classifier)
|
||||
#:use-module (web uri)
|
||||
#:use-module (web request)
|
||||
#:use-module (tekuti request)
|
||||
|
@ -208,13 +209,27 @@
|
|||
(cond
|
||||
((post-from-key index (make-post-key year month day name))
|
||||
=> (lambda (post)
|
||||
(cond
|
||||
((bad-new-comment-post? data)
|
||||
=> (lambda (reason)
|
||||
(respond `((p "Bad post data: " ,(pk reason))))))
|
||||
(else
|
||||
(let ((comment (make-new-comment (post-key post) (post-title post)
|
||||
data)))
|
||||
(let ((comment (parse-new-comment data)))
|
||||
(cond
|
||||
((bad-new-comment-post? data)
|
||||
=> (lambda (reason)
|
||||
(respond `((p "Bad post data: " ,(pk reason))))))
|
||||
((comment-is-bogus? index comment)
|
||||
(respond `((p "Comment appears to be bogus; ignoring.")
|
||||
(p "I'm testing out a new automated bogus "
|
||||
"comment detector. If you feel your comment "
|
||||
"was caught unfairly, tweet it to me or send "
|
||||
"it by email. Or press back and reword it.")
|
||||
(p "If you are a spammer, note that I fixed "
|
||||
"the comment renderer to properly add "
|
||||
(tt "rel='external nofollow'") " on all "
|
||||
"links in comments. Go take a look at any "
|
||||
"comment with a link to see for yourself. "
|
||||
"Trying to linkbomb this site probably won't "
|
||||
"give you any link juice so it's not worth "
|
||||
"the trouble to either one of us :)"))))
|
||||
(else
|
||||
(make-new-comment (post-key post) (post-title post) comment)
|
||||
;; nb: at this point, `post' is out-of-date
|
||||
(respond `((p "Comment posted, thanks."))
|
||||
#:redirect (post-url post #:fragment "comments")
|
||||
|
|
Loading…
Reference in a new issue