255 lines
8.2 KiB
EmacsLisp
255 lines
8.2 KiB
EmacsLisp
;;; fuzzy.el --- Fuzzy matching utilities
|
||
|
||
;; Copyright (C) 2010 Tomohiro Matsuyama
|
||
|
||
;; Author: Tomohiro Matsuyama <m2ym.pub@gmail.com>
|
||
;; Keywords: convenience
|
||
|
||
;; This program is free software; you can redistribute it and/or modify
|
||
;; it under the terms of the GNU General Public License as published by
|
||
;; the Free Software Foundation, either version 3 of the License, or
|
||
;; (at your option) any later version.
|
||
|
||
;; This program is distributed in the hope that it will be useful,
|
||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
;; GNU General Public License for more details.
|
||
|
||
;; You should have received a copy of the GNU General Public License
|
||
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
;;; Commentary:
|
||
|
||
;;
|
||
|
||
;;; Code:
|
||
|
||
(eval-when-compile
|
||
(require 'cl))
|
||
(require 'regexp-opt)
|
||
|
||
(defgroup fuzzy nil
|
||
"Fuzzy matching utilities."
|
||
:group 'convenience
|
||
:prefix "fuzzy-")
|
||
|
||
(defcustom fuzzy-accept-error-rate 0.10
|
||
"Error threshold."
|
||
:group 'fuzzy)
|
||
|
||
(defvar fuzzy-accept-length-difference 2)
|
||
|
||
(defvar fuzzy-regexp-some-char (format "\\w\\{0,%s\\}" fuzzy-accept-length-difference))
|
||
|
||
|
||
|
||
;;; Functions
|
||
|
||
(defun fuzzy-reverse-string (string)
|
||
(apply 'string (nreverse (append string nil))))
|
||
|
||
(defun fuzzy-regexp-compile (string)
|
||
(labels ((oddp (n) (eq (logand n 1) 1))
|
||
(evenp (n) (eq (logand n 1) 0))
|
||
(opt (n) (regexp-opt-charset (append (substring string
|
||
(max 0 (- n 1))
|
||
(min (length string) (+ n 2))) nil))))
|
||
(concat
|
||
"\\("
|
||
(loop for i below (length string)
|
||
for c = (if (evenp i) (opt i) fuzzy-regexp-some-char)
|
||
concat c)
|
||
"\\|"
|
||
(loop for i below (length string)
|
||
for c = (if (oddp i) (opt i) fuzzy-regexp-some-char)
|
||
concat c)
|
||
"\\)")))
|
||
|
||
(defalias 'fuzzy-edit-distance 'fuzzy-jaro-winkler-distance)
|
||
|
||
(defun fuzzy-jaro-winkler-distance (s1 s2)
|
||
"http://en.wikipedia.org/wiki/Jaro-Winkler_distance"
|
||
(let* ((l1 (length s1))
|
||
(l2 (length s2))
|
||
(r (max 1 (1- (/ (max l1 l2) 2))))
|
||
(m 0)
|
||
(tr 0)
|
||
(p 0)
|
||
cs1 cs2)
|
||
(loop with seen = (make-vector l2 nil)
|
||
for i below l1
|
||
for c1 = (aref s1 i) do
|
||
(loop for j from (max 0 (- i r)) below (min l2 (+ i r))
|
||
for c2 = (aref s2 j)
|
||
if (and (char-equal c1 c2)
|
||
(null (aref seen j))) do
|
||
(push c1 cs1)
|
||
(aset seen j c2)
|
||
(incf m)
|
||
and return nil)
|
||
finally
|
||
(setq cs1 (nreverse cs1)
|
||
cs2 (loop for i below l2
|
||
for c = (aref seen i)
|
||
if c collect c)))
|
||
(loop for c1 in cs1
|
||
for c2 in cs2
|
||
if (not (char-equal c1 c2)) do
|
||
(incf tr))
|
||
(loop for i below (min m 5)
|
||
for c1 across s1
|
||
for c2 across s2
|
||
while (char-equal c1 c2) do
|
||
(incf p))
|
||
(if (eq m 0)
|
||
0.0
|
||
(setq m (float m))
|
||
(let* ((dj (/ (+ (/ m l1) (/ m l2) (/ (- m (/ tr 2)) m)) 3))
|
||
(dw (+ dj (* p 0.1 (- 1 dj)))))
|
||
dw))))
|
||
|
||
;; this function should be compiled
|
||
(byte-compile 'fuzzy-jaro-winkler-distance)
|
||
|
||
(defun fuzzy-match (s1 s2 &optional function)
|
||
(or function (setq function 'fuzzy-edit-distance))
|
||
(and (<= (abs (- (length s1) (length s2)))
|
||
fuzzy-accept-length-difference)
|
||
(>= (funcall function s1 s2)
|
||
(- 1 fuzzy-accept-error-rate))))
|
||
|
||
(defun fuzzy-all-completions (string collection)
|
||
"all-completions family with fuzzy matching."
|
||
(loop with length = (length string)
|
||
for str in collection
|
||
for s = (substring str 0 (min (length str)
|
||
(+ length fuzzy-accept-length-difference)))
|
||
if (fuzzy-match string s)
|
||
collect str))
|
||
|
||
|
||
|
||
;;; Search and Incremental Search
|
||
|
||
(defvar fuzzy-search-cache nil)
|
||
(defvar fuzzy-search-cache-string nil)
|
||
|
||
(defun fuzzy-search-cache-activate ()
|
||
(setq fuzzy-search-cache (make-hash-table))
|
||
(setq fuzzy-search-cache-string nil))
|
||
|
||
(defun fuzzy-search-cache-deactive ()
|
||
(setq fuzzy-search-cache nil)
|
||
(setq fuzzy-search-cache-string nil))
|
||
|
||
(defun fuzzy-search-edit-distance (s1 s2)
|
||
(or (and fuzzy-search-cache
|
||
(cond
|
||
((null fuzzy-search-cache-string)
|
||
(setq fuzzy-search-cache-string s1)
|
||
nil)
|
||
((not (equal fuzzy-search-cache-string s1))
|
||
(setq fuzzy-search-cache-string s1)
|
||
(clrhash fuzzy-search-cache)
|
||
nil)
|
||
(t))
|
||
(gethash s2 fuzzy-search-cache))
|
||
(let ((d (fuzzy-edit-distance s1 s2)))
|
||
(if fuzzy-search-cache
|
||
(puthash s2 d fuzzy-search-cache))
|
||
d)))
|
||
|
||
(defun fuzzy-search-match (s1 s2)
|
||
(fuzzy-match s1 s2 'fuzzy-search-edit-distance))
|
||
|
||
(defun fuzzy-search-forward (string &optional bound noerror count)
|
||
(let* ((regexp (fuzzy-regexp-compile string))
|
||
match-data)
|
||
(save-excursion
|
||
(while (and (null match-data)
|
||
(re-search-forward regexp bound t))
|
||
(if (fuzzy-search-match string (match-string 1))
|
||
(setq match-data (match-data))
|
||
(goto-char (1+ (match-beginning 1))))))
|
||
(when match-data
|
||
(store-match-data match-data)
|
||
(goto-char (match-end 1)))))
|
||
|
||
(defun fuzzy-search-backward (string &optional bound noerror count)
|
||
(let* ((regexp (fuzzy-regexp-compile string))
|
||
match-data begin end)
|
||
(save-excursion
|
||
(while (and (null match-data)
|
||
(re-search-backward regexp bound t))
|
||
(setq begin (match-beginning 1)
|
||
end (match-end 1))
|
||
(store-match-data nil)
|
||
(goto-char (max (point-min) (- begin (* (length string) 2))))
|
||
(while (re-search-forward regexp end t)
|
||
(if (fuzzy-search-match string (match-string 1))
|
||
(setq match-data (match-data))
|
||
(goto-char (1+ (match-beginning 1)))))
|
||
(unless match-data
|
||
(goto-char begin)))
|
||
(if match-data
|
||
(progn
|
||
(store-match-data match-data)
|
||
(goto-char (match-beginning 1)))
|
||
(store-match-data nil)))))
|
||
|
||
(defvar fuzzy-isearch nil)
|
||
(defvar fuzzy-isearch-failed-count 0)
|
||
(defvar fuzzy-isearch-enabled 'on-failed)
|
||
(defvar fuzzy-isearch-original-search-fun nil)
|
||
(defvar fuzzy-isearch-prefix "[FUZZY] ")
|
||
|
||
(defun fuzzy-isearch-activate ()
|
||
(setq fuzzy-isearch t)
|
||
(setq fuzzy-isearch-failed-count 0)
|
||
(fuzzy-search-cache-activate))
|
||
|
||
(defun fuzzy-isearch-deactivate ()
|
||
(setq fuzzy-isearch nil)
|
||
(setq fuzzy-isearch-failed-count 0)
|
||
(fuzzy-search-cache-deactive))
|
||
|
||
(defun fuzzy-isearch ()
|
||
(cond (isearch-word
|
||
(if isearch-forward 'word-search-forward 'word-search-backward))
|
||
(isearch-regexp
|
||
(if isearch-forward 're-search-forward 're-search-backward))
|
||
((or fuzzy-isearch
|
||
(eq fuzzy-isearch-enabled 'always)
|
||
(and (eq fuzzy-isearch-enabled 'on-failed)
|
||
(null isearch-success)
|
||
isearch-wrapped
|
||
(> (setq fuzzy-isearch-failed-count (1+ fuzzy-isearch-failed-count))
|
||
1)))
|
||
(unless fuzzy-isearch
|
||
;(goto-char isearch-opoint)
|
||
(fuzzy-isearch-activate))
|
||
(if isearch-forward 'fuzzy-search-forward 'fuzzy-search-backward))
|
||
(t
|
||
(if isearch-forward 'search-forward 'search-backward))))
|
||
|
||
(defun fuzzy-isearch-end-hook ()
|
||
(fuzzy-isearch-deactivate))
|
||
|
||
(defun turn-on-fuzzy-isearch ()
|
||
(interactive)
|
||
(setq fuzzy-isearch-original-search-fun isearch-search-fun-function)
|
||
(setq isearch-search-fun-function 'fuzzy-isearch)
|
||
(add-hook 'isearch-mode-end-hook 'fuzzy-isearch-end-hook))
|
||
|
||
(defun turn-off-fuzzy-isearch ()
|
||
(interactive)
|
||
(setq isearch-search-fun-function fuzzy-isearch-original-search-fun)
|
||
(remove-hook 'isearch-mode-end-hook 'fuzzy-isearch-end-hook))
|
||
|
||
(defadvice isearch-message-prefix (after fuzzy-isearch-message-prefix activate)
|
||
(if fuzzy-isearch
|
||
(setq ad-return-value (concat fuzzy-isearch-prefix ad-return-value))
|
||
ad-return-value))
|
||
|
||
(provide 'fuzzy)
|
||
;;; fuzzy.el ends here
|