;------------------------------------------------------------------------------ ; Emacs の内部コードと通常使われる文字コードとの相互換算 ; $Id$ ;****************************************************************************** ;********************************** 基礎情報 ********************************** ;--------------------- Emacs 21.3 が対応している文字セット -------------------- ; 1バイト 2バイト ; 1 eight-bit-control(*) japanese-jisx0208-1978 (*)A0-FFはeight-bit-graphic ; 2 - chinese-gb2312 ; 3 - japanese-jisx0208 ; 4 - korean-ksc5601 ; 5 - japanese-jisx0212 ; 6 - chinese-cns11643-1 ; 7 - chinese-cns11643-2 ; 8 - japanese-jisx0213-1 ; 9 - chinese-big5-1 ; 10 - chinese-big5-2 ; 16 indian-1-column ; 17 latin-iso8859-1 tibetan-1-column ; 18 latin-iso8859-2 mule-unicode-2500-33ff ; 19 latin-iso8859-3 mule-unicode-e000-ffff ; 20 latin-iso8859-4 mule-unicode-0100-24ff ; 21 thai-tis620 ethiopic ; 22 greek-iso8859-7 chinese-cns11643-3 ; 23 arabic-iso8859-6 chinese-cns11643-4 ; 24 hebrew-iso8859-8 chinese-cns11643-5 ; 25 katakana-jisx0201 chinese-cns11643-6 ; 26 latin-jisx0201 chinese-cns11643-7 ; 27 indian-2-column ; 28 cyrillic-iso8859-5 tibetan ; 29 latin-iso8859-9 ; 30 latin-iso8859-15 japanese-jisx0213-2 ; 31 latin-iso8859-14 ; 32 japanese-jisx0208-1978 ; 33 chinese-gb2312 ; 34 japanese-jisx0208 ; 35 korean-ksc5601 ; 36 japanese-jisx0212 ; 37 chinese-cns11643-1 ; 38 chinese-cns11643-2 ; 39 japanese-jisx0213-1 ; 40 chinese-big5-1 ; 41 chinese-big5-2 ; 48 chinese-sisheng ; 49 ipa ※Mule 2.3 では Right-to-Left ; 50 vietnamese-viscii-lower ; 51 vietnamese-viscii-upper ; 52 arabic-digit ; 53 arabic-1-column ; 54 ascii-right-to-left ※Mule 2.3 では IPA ; 55 lao - ; 112 arabic-2-column ; 113 indian-is13194 ;--------------------- Mule 2.3 が対応している各文字セット -------------------- ; 1 Japanese Old ; 2 Chinese ; 3 Japanese ; 4 Korean ; 5 Japanese Supplement ; 6 CNS Plane1 ; 7 CNS Plane2 ; 8 ; 9 Big5 Level 1 ; 10 Big5 Level 2 ; 11 1-byte/1-column private character sets ; 12 1-byte/2-column private character sets ; 13 2-byte/1-column private character sets ; 14 2-byte/2-column private character sets ; ; 17 Latin-1 ; 18 Latin-2 ; 19 Latin-3 ; 20 Latin-4 ; 21 Thai Ethio ; 22 Greek CNS Plane3 ; 23 Arabic CNS Plane4 ; 24 Hebrew CNS Plane5 ; 25 Japanese Katakana CNS Plane6 ; 26 Japanese Roman CNS Plane7 ; 27 ; 28 Cyrillic ; 29 Latin-5 ; ; 32 Japanese Old ; 33 Chinese ; 34 Japanese ; 35 Korean ; 36 Japanese Supplement ; 37 CNS Plane1 ; 38 CNS Plane2 ; 39 ; 40 Big5 Level 1 ; 41 Big5 Level 2 ; 42 1-byte/1-column private character sets ; 43 1-byte/2-column private character sets ; 44 2-byte/1-column private character sets ; 45 2-byte/2-column private character sets ; ; 48 PinYin-ZhuYin ; 49 Right-to-Left ※Emacs 21 では IPA ; 50 VISCII lower ; 51 VISCII upper ; 52 Arabic digit ; 53 1-column Arabic ; 54 IPA for Mule ※Emacs 21 では Right-to-Left ; ; 112 2-column Arabic ;----------------------------- 各文字コードの構造 ----------------------------- ; CNS 11643(EUC-TW)の構造 ; G0…ASCII ; G1…CNS 11643 第1面 ; G2…CNS 11643 第2面以降(8E n xx xx; n=A2〜A7) ;------------------------------------ 共通 ------------------------------------ ; 最近の Emacs なら string-to-int で済むのだが… (autoload 'hexl-hex-string-to-integer "hexl") ;7 ビットコードをシフト JIS に変換…plane 2 は未実装 (defun jis-to-sjis (H L &optional plane) (or plane (setq plane 1)) (setq H (- H ?\x21)) (setq L (- L ?\x21)) (setq L (+ (* (% H 2) 94) L ?\x40)) (if (>= L ?\x7F) (setq L (1+ L))) (setq H (+ (/ H 2) ?\x81)) (if (>= H ?\x9F) (setq H (+ H ?\x40))) (+ (* H 256) L) ) ; (message "%x" (jis-to-sjis 36 34)) テスト用 ;----------------------------- Emacs 20 以降の場合 ---------------------------- (cond ((fboundp 'split-char) (fset 'ds-char-charset (symbol-function 'char-charset)) (defun ds-char-description (c) (charset-short-name (char-charset c)) ) (defun ds-char-to-7bit (c) (let ((L (split-char c))) (if (nth 2 L) (+ (* 256 (nth 1 L)) (nth 2 L)) (nth 1 L) ) )) (defun ds-char-to-euctw (c) (let ( (pln (ds-char-charset c)) ) (cond ((eq pln 'chinese-cns11643-1) (logior (ds-char-to-7bit c) ?\x8080)) ((eq pln 'chinese-cns11643-2) (logior (ds-char-to-7bit c) ?\xA28080)) ((eq pln 'chinese-cns11643-3) (logior (ds-char-to-7bit c) ?\xA38080)) ((eq pln 'chinese-cns11643-4) (logior (ds-char-to-7bit c) ?\xA48080)) ((eq pln 'chinese-cns11643-5) (logior (ds-char-to-7bit c) ?\xA58080)) ((eq pln 'chinese-cns11643-6) (logior (ds-char-to-7bit c) ?\xA68080)) ((eq pln 'chinese-cns11643-7) (logior (ds-char-to-7bit c) ?\xA78080)) (t nil) ) )) (defun ds-char-to-sjis (c) (setq c (split-char c)) (cond ((eq (car c) 'japanese-jisx0208-1978) (jis-to-sjis (nth 1 c) (nth 2 c) 1)) ((eq (car c) 'japanese-jisx0208) (jis-to-sjis (nth 1 c) (nth 2 c) 1)) ((eq (car c) 'japanese-jisx0213-1) (jis-to-sjis (nth 1 c) (nth 2 c) 1)) ;((eq (car c) 'japanese-jisx0213-2) ; (jis-to-sjis (nth 1 c) (nth 2 c) 2)) ;第二面は今のところ jis-to-sjis がサポートしていないので ;コメントアウト ) ) (defun ds-char-to-big5 (c) nil ) ) ;------------------------------- Mule 2.3 の場合 ------------------------------ ((boundp 'lc-jp) ; cond (fset 'ds-char-charset (symbol-function 'char-leading-char)) (defun ds-char-description (c) (or (char-description c) "ASCII") ) (defun ds-char-to-7bit (c) (let ( (zeroth (char-component c 0)) (first (char-component c 1)) (second (char-component c 2)) ) (cond ((= first 0) (logand 127 zeroth)) ((= second 0) (logand 127 first )) (t (logand ?\x7F7F (+ (* first 256) second))) ) )) (defun ds-char-to-euctw (c) (let ( (plane (ds-char-charset c)) ) (cond ((= plane lc-cns1) (logior (ds-char-to-7bit c) ?\x8080)) ((= plane lc-cns2) (logior (ds-char-to-7bit c) ?\xA28080)) ((= plane lc-cns3) (logior (ds-char-to-7bit c) ?\xA38080)) ((= plane lc-cns4) (logior (ds-char-to-7bit c) ?\xA48080)) ((= plane lc-cns5) (logior (ds-char-to-7bit c) ?\xA58080)) ((= plane lc-cns6) (logior (ds-char-to-7bit c) ?\xA68080)) ((= plane lc-cns7) (logior (ds-char-to-7bit c) ?\xA78080)) (t nil) ) )) (defun ds-char-to-sjis (c) (let ( (charset (char-component c 0)) (first (logand (char-component c 1) ?\x7F)) (second (logand (char-component c 2) ?\x7F)) ) (cond ((= charset lc-jp ) (jis-to-sjis first second 1)) ((= charset lc-jpold) (jis-to-sjis first second 1)) ) )) (defun ds-char-to-big5 (c) nil ) )) ;-------------------------- 文字コードを表示する関数 -------------------------- (defun ds-describe-char (&optional c) (interactive) (or c (setq c (char-after (point)))) (if (integerp c) (let (7bit a) (setq 7bit (ds-char-to-7bit c)) (message "%s, %s, 7bit=0x%x, 8bit=0x%x%s" (ds-char-description c) (cond ((< c 32) "") ((and (>= c 127) (< c 160)) "") (t (format "'%c'" c)) ) 7bit (if (< 7bit ?\x100) (logior ?\x80 7bit) (logior ?\x8080 7bit) ) (cond ((setq a (ds-char-to-sjis c)) (format ", SJIS=%x" a)) ((setq a (ds-char-to-big5 c)) (format ", Big5=%x" a)) ((setq a (ds-char-to-euctw c)) (format ", EUC=%x" a)) (t "") ) ) )) ) ;------------------------------------------------------------------------------ (provide 'ds-char)