summaryrefslogtreecommitdiffstats
path: root/emacs.d/nxhtml/util/idn.el
blob: 21f7a4cd16a363f8d73e85ff180b623c093186e9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
;;; idn.el --- Recommended Identifier Profiles for IDN
;;
;; Author: Lennart Borgman (lennart O borgman A gmail O com)
;; Created: 2010-03-24 Wed
;; Version: 0.1
;; Last-Updated: 2010-03-26 Fri
;; URL:
;; Keywords:
;; Compatibility:
;;
;; Features that might be required by this library:
;;
  ;; `nxhtml-base'.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;; Commentary:
;;
;; Functions for handling IDN chars defined by
;; `http://www.unicode.org/reports/tr39/'.
;;
;; See `idn-is-recommended'.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;; Change log:
;;
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 3, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program; see the file COPYING.  If not, write to
;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth
;; Floor, Boston, MA 02110-1301, USA.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;; Code:

;; Fix-me: You have to change this if you are not using nXhtml:
(require 'nxhtml-base)
(defvar uts39-datadir (expand-file-name "etc/uts39/" nxhtml-install-dir))

(defun idn-init (bv)
  (save-match-data
    (let* ((idnchars-file (expand-file-name "idnchars.txt" uts39-datadir))
           (idnchars-old (find-buffer-visiting idnchars-file))
           (idnchars-buf (or idnchars-old
                             (if (not (file-exists-p idnchars-file))
                                 (message "Can't find file %S" idnchars-file)
                               (find-file-noselect idnchars-file))))
           here
           (range-patt (rx bol
                           (group (repeat 4 (any xdigit)))
                           (optional ".."
                                     (group (repeat 4 (any xdigit))))))
           (num-idn 0))
      (when idnchars-buf
        (with-current-buffer idnchars-buf
          (setq here (point))
          (save-restriction
            (widen)
            (goto-char (point-min))
            (while (re-search-forward range-patt nil t)
              (let* ((str-beg (match-string 0))
                     (str-end (match-string 2))
                     (beg (string-to-number str-beg 16))
                     (end (or (when str-end (string-to-number str-end 16))
                              beg)))
                ;;(message "str-beg=%S str-end=%S" str-beg str-end)
                (dotimes (ii (1+ (- end beg)))
                  (let ((num (+ ii beg)))
                    ;;(message "setting idn-char %s #%4x" num num)
                    (setq num-idn (1+ num-idn))
                    (aset bv num t))))))
          (goto-char here))
        (unless idnchars-old (kill-buffer idnchars-buf))
        (message "Found %d IDN chars" num-idn)
        t))))

(defconst idn-char-vector
  (let ((bv (make-bool-vector (* 256 256) nil)))
    (when (idn-init bv)
      ;; (string-to-number "002D" 16)
      ;; Make a quick sanity check:
      (unless (and (not (aref bv 44))
                   (aref bv 45))
        (message "idn-char-vector: Bad idn data in file idnchars.txt"))
      bv))
  "Boolean vector with recommended IDN chars.")


;;(idn-is-recommended 0)
;;(idn-is-recommended 65535)
(defsubst idn-is-recommended (char)
  "Return t if character CHAR is a recommended IDN char.
See URL `http://www.unicode.org/reports/tr39/'.

Data is initialized from the file idnchars.txt in the directory
`uts39-datadir'.  This file is fetched from the above URL."
  (aref idn-char-vector char))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Below are some help functions that can be commented out.

;;(global-set-key [f9] 'idn-char-at-point)
(defun idn-char-at-point (pos)
  "Tell if char at POS is an recommended IDN char.
Default POS is current point."
  (interactive "d")
  (let* ((this-char (char-after pos))
         (recommended (idn-is-recommended this-char)))
    (message "IDN char at point: %s (#%000x)" recommended this-char)))

(defun idn-list-chars ()
  "Show all IDN chars.
For more info see `idn-is-recommended'.

Note: This may crash Emacs currently, at least on w32."
  (interactive)
  (with-output-to-temp-buffer (help-buffer)
    (help-setup-xref (list #'idn-list-chars) (interactive-p))
    (with-current-buffer (help-buffer)
      (insert
       "Recommended Identifier Characters for IDN:\n\n")
      (let ((col 0)
            (cnt 0))
        (dotimes (nn (length idn-char-vector))
          (when (aref idn-char-vector nn)
            (setq cnt (1+ cnt))
            (setq col (mod (1+ col) 20))
            (when (= col 0) (insert "\n "))
            (insert " " (char-to-string nn))))
        (insert "\n\n"
                (format "There were %d IDN chars defined in `idn-char-vector'." cnt))
        ))))

(provide 'idn)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; idn.el ends here