Fix some “Numerical overflow” errors
When starting a new blog, or trying to run on some very old data, there is no data available on any bogus or legitimate comments. When this happens Tekuti will try to divide by 0 and divide 0 by other things in a few places, assuming that there is at least _some_ data about bogus and legitimate comments. I’m not 100% sure what the expectation is of these calculations, so I might have chosen the wrong solution, but it seems to me that if there is no data available, all we know is that there is nothing (0). This fixes an issue with both trying to comment on a fresh new data set, or a dataset that hasn’t been touched since 2014.
This commit is contained in:
parent
480c050275
commit
6da1ed8fd2
1 changed files with 18 additions and 7 deletions
|
@ -114,16 +114,22 @@
|
|||
(lambda (feature bogus-count)
|
||||
(let ((legit-count (hash-ref legit-features feature 0)))
|
||||
(hash-set! log-bogosities feature
|
||||
(if (and (> total-bogus-features 0)
|
||||
(> total-legit-features 0))
|
||||
(log (/ (/ (+ bogus-count 0.001) total-bogus-features)
|
||||
(/ (+ legit-count 0.001) total-legit-features))))))
|
||||
(/ (+ legit-count 0.001) total-legit-features)))
|
||||
0))))
|
||||
bogus-features)
|
||||
(hash-for-each
|
||||
(lambda (feature legit-count)
|
||||
(let ((bogus-count (hash-ref bogus-features feature)))
|
||||
(unless bogus-count
|
||||
(hash-set! log-bogosities feature
|
||||
(if (and (> total-bogus-features 0)
|
||||
(> total-legit-features 0))
|
||||
(log (/ (/ 0.01 total-bogus-features)
|
||||
(/ (+ legit-count 0.01) total-legit-features)))))))
|
||||
(/ (+ legit-count 0.01) total-legit-features)))
|
||||
0)))))
|
||||
legit-features)
|
||||
log-bogosities))
|
||||
|
||||
|
@ -138,8 +144,11 @@
|
|||
(let ((bogus-count (hash-ref bogus-features feature 0))
|
||||
(legit-count (hash-ref legit-features feature 0)))
|
||||
(hash-set! log-bogosities feature
|
||||
(if (and (> total-bogus-features 0)
|
||||
(> total-legit-features 0))
|
||||
(log (/ (/ (+ bogus-count 0.001) total-bogus-features)
|
||||
(/ (+ legit-count 0.001) total-legit-features))))))
|
||||
(/ (+ legit-count 0.001) total-legit-features)))
|
||||
0))))
|
||||
changed-features)))
|
||||
|
||||
(define (compute-bogus-probability comment log-bogosities bogus-prior
|
||||
|
@ -250,7 +259,9 @@
|
|||
(with-time-debugging
|
||||
(let* ((legit-count (hash-count (const #t) legit-comments))
|
||||
(bogus-count (hash-count (const #t) bogus-comments))
|
||||
(legit-prior (/ legit-count (+ legit-count bogus-count 0.0)))
|
||||
(legit-prior (if (> legit-count 0)
|
||||
(/ legit-count (+ legit-count bogus-count 0.0))
|
||||
0))
|
||||
(legit-features (count-features legit-comments))
|
||||
(bogus-features (count-features bogus-comments))
|
||||
(bogosities (compute-log-bogosities legit-features bogus-features)))
|
||||
|
|
Loading…
Reference in a new issue