summaryrefslogtreecommitdiff
path: root/Functions/Zle/insert-composed-char
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2005-08-22 09:27:57 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2005-08-22 09:27:57 +0000
commit7df83c6a1c5304506c2fd7d0444ad567493da719 (patch)
tree2bda8b80221626ab28bdb108d026229f0f3e24b9 /Functions/Zle/insert-composed-char
parent5b12bb382726ae2120d109fe1d55a548313ab8c0 (diff)
downloadzsh-7df83c6a1c5304506c2fd7d0444ad567493da719.tar.gz
zsh-7df83c6a1c5304506c2fd7d0444ad567493da719.zip
21676: insert-unicode-char now insert-composed-char.
Also uses RFC 1345.
Diffstat (limited to 'Functions/Zle/insert-composed-char')
-rw-r--r--Functions/Zle/insert-composed-char407
1 files changed, 407 insertions, 0 deletions
diff --git a/Functions/Zle/insert-composed-char b/Functions/Zle/insert-composed-char
new file mode 100644
index 000000000..60a42e089
--- /dev/null
+++ b/Functions/Zle/insert-composed-char
@@ -0,0 +1,407 @@
+# Accented characters. Inputs two keys. There are two types: those
+# with a base character followed by an accent (see below for codes for
+# accents), and those with a two-character mnemonic for the composed
+# character. These are (with the exception of the Euro) the codes
+# given by RFC 1345. Note that some codes in RFC 1345 require three
+# characters to be input; none of these are handled.
+#
+# For best results zsh should have been built with support for
+# multibyte characters (--enable-multibyte), but single character sets
+# also work.
+#
+# Outputs the character converted from Unicode into the local representation.
+# (The conversion is done within the shell, using whatever facilities
+# the C library provides.)
+#
+# When used as a zle widget, the character is inserted at the cursor
+# position. With a numeric argument, preview in status line; outside zle,
+# print character (and newline) to standard output.
+#
+# The set of accented characters is reasonably complete up to U+0180, the
+# set of special characters less so. However, it mostly gives up at that
+# point. Adding new Unicode characters is easy, however. Please send any
+# additions to zsh-workers@sunsite.dk .
+#
+# Some of the accent codes are a little more obscure than others.
+# ! Grave
+# ' Acute
+# > Circumflex
+# ? Tilde
+# - Macron. (A horizonal bar over the letter.)
+# ( Breve. (A shallow dish shape over the letter.)
+# . Dot above, or no dot with lower case i, or dot in the middle of L or l.
+# : Diaeresis (Umlaut)
+# , Cedilla
+# _ Underline (none of these currently)
+# / Stroke through character
+# " Double acute
+# ; Ogonek. (A little forward facing hook at the bottom right
+# of the character.)
+# < Caron. (A little v over the letter.)
+# 0 Circle
+# 2 Hook
+# 9 Horn
+# Hence A! is upper case A with a grave, c, is lower case c with cedilla.
+#
+# Some other composed charaters:
+# Various ligatures:
+# AE ae OE oe IJ ij
+#
+# ASCII characters not on all keyboards:
+# <( [
+# // \
+# )> ]
+# (! {
+# !! |
+# !) }
+# '? ~
+#
+# Special letters:
+# ss Eszett (schafes S)
+# D- d- TH th Eth and thorn
+# kk kra
+# 'n 'n
+# NG ng ng
+# OI oi OI
+# yr yr
+# ED ezh
+#
+# Currency symbols:
+# Ct Cent
+# Pd Pound sterling
+# Cu Currency
+# Ye Yen
+# Eu Euro (not in RFC 1345 but logical)
+#
+# Punctuation
+# !I Inverted !
+# BB Broken vertical bar
+# SE Section
+# Co Copyright
+# -a Spanish feminine ordinal indicator
+# << Left guillemet
+# -- Soft hyphen
+# Rg Registered trade mark
+# PI Pilcrow (paragraph)
+# -o Spanish masculine ordinal indicator
+# >> Right guillemet
+# ?I Inverted question mark
+# '6 Left single quote
+# '9 Right single quote
+# .9 "Right" low quote
+# 9' Reversed "right" quote
+# "6 Left double quote
+# "9 Right double quote
+# :9 "Right" low double quote
+# 9" Reversed "right" double quote
+# /- Dagger
+# /= Double dagger
+#
+# Mathematical
+# DG Degree
+# +- +/-
+# 2S Superscript 2
+# 3S Superscript 3
+# My Micro
+# .M Middle dot
+# 1S Superscript 1
+# 14 Quarter
+# 12 Half
+# 34 Three quarters
+# *X Multiplication
+# -: Division
+#
+# Accents with no base character
+# '> Circumflex (caret)
+# '! Grave (backtick)
+# ', Cedilla
+# ': Diaeresis (Umlaut)
+# 'm Macron
+# '' Acute
+
+emulate -LR zsh
+setopt cbases extendedglob printeightbit
+
+local accent basechar ochar error
+
+if [[ -n $WIDGET ]]; then
+ error=(zle -M)
+else
+ error=print
+fi
+
+if (( ${+zsh_accented_chars} == 0 )); then
+ # The associative array zsh_accent_chars is indexed by the
+ # accent. The values are sets of character / Unicode pairs for
+ # the character with the given accent. The Unicode value is
+ # a hex index with no base discriminator; essentially a UCS-4 index
+ # with the leading zeroes suppressed.
+ typeset -gA zsh_accented_chars
+
+ # grave
+ accent=\!
+ zsh_accented_chars[$accent]="\
+A C0 E C8 I CC O D2 U D9 a E0 e E8 i EC o F2 u F9 N 1F8 n 1F9 \
+"
+ # acute
+ accent=\'
+ zsh_accented_chars[$accent]="\
+A C1 E C9 I CD O D3 U DA Y DD a E1 e E9 i EC o F3 u FA y FD C 106 c 107 \
+L 139 l 13A N 143 n 144 R 154 r 155 S 15A s 15B Z 179 z 17A \
+"
+ # circumflex
+ accent=\>
+ zsh_accented_chars[$accent]="\
+A C2 E CA I CE O D4 U DB a E2 e EA i EE o F4 u FB C 108 c 109 G 11C g 11d \
+H 124 h 125 J 134 j 135 S 15C s 15D W 174 w 175 Y 176 y 177 \
+"
+ # tilde
+ accent=\?
+ zsh_accented_chars[$accent]="\
+A C3 E CB N D1 O D5 a E3 n F1 o F5 I 128 i 129 U 168 u 169 \
+"
+ # macron (d-, D- give eth)
+ accent=-
+ zsh_accented_chars[$accent]="\
+A 100 a 101 d F0 D D0 E 112 e 113 I 12a i 12b O 14C o 14D U 16A u 16B \
+"
+ # breve
+ accent=\(
+ zsh_accented_chars[$accent]="\
+A 102 a 103 E 114 e 115 G 11E g 11F I 12C i 12D O 14E o 14F U 16C u 16D \
+"
+ # dot above, small i with no dot, or l with middle dot
+ accent=.
+ zsh_accented_chars[$accent]="\
+C 10A c 10b E 116 e 117 G 120 g 121 I 130 i 131 L 13F l 140 Z 17B z 17C \
+"
+ # diaeresis / Umlaut
+ accent=:
+ zsh_accented_chars[$accent]="\
+A C4 I CF O D6 U DC a E4 e EB i EF o F6 u FC y FF Y 178 \
+"
+ # cedilla
+ accent=,
+ zsh_accented_chars[$accent]="\
+C C7 c E7 G 122 g 123 K 136 k 137 L 13B l 13C N 145 n 146 R 156 r 157 \
+S 15E s 15F T 162 t 163 \
+"
+ # underline (_) would go here
+ # stroke through
+ accent=/
+ zsh_accented_chars[$accent]="\
+O D8 o F8 D 110 d 111 H 126 h 127 L 141 l 142 T 166 t 167 b 180 \
+"
+ # double acute
+ accent=\"
+ zsh_accented_chars[$accent]="\
+O 150 o 151 U 170 u 171\
+"
+ # ogonek
+ accent=\;
+ zsh_accented_chars[$accent]="\
+A 104 a 105 E 118 e 119 I 12E i 12F U 172 u 173 \
+"
+ # caron
+ accent=\<
+ zsh_accented_chars[$accent]="\
+C 10C c 10D D 10E d 10F E 11A e 11B L 13D l 13E N 147 n 148 R 158 r 159 \
+S 160 s 161 T 164 t 165 Z 17D z 17E \
+"
+ # ring above
+ accent=0
+ zsh_accented_chars[$accent]="\
+A C5 a E5 U 16E u 16F \
+"
+ # hook above
+ accent=2
+ zsh_accented_chars[$accent]="\
+A 1EA2 a 1EA3 E 1EBA e 1EBA \
+"
+ # horn, also right quotation marks
+ accent=9
+ zsh_accented_chars[$accent]="\
+O 1A0 o 1A1 U 1Af u 1b0 ' 2019 . 201A \" 201D : 201E \
+"
+ # left quotation marks
+ accent=6
+ zsh_accented_chars[$accent]="\
+' 2018 \" 201C \
+"
+ # reversed quotation marks for convenience
+ accent=\'
+ zsh_accented_chars[$accent]+=" \
+9 201B \
+"
+ accent=\"
+ zsh_accented_chars[$accent]+=" \
+9 201F \
+"
+
+ # ligature with E
+ accent=e
+ zsh_accented_chars[$accent]="\
+A C6 O 152 \
+"
+ # ligature with e
+ accent=e
+ zsh_accented_chars[$accent]="\
+a E6 o 153 \
+"
+ # ligature with J
+ accent=J
+ zsh_accented_chars[$accent]="\
+I 132 \
+"
+ # ligature with j
+ accent=j
+ zsh_accented_chars[$accent]="\
+i 133 \
+"
+ # eszett
+ accent=s
+ zsh_accented_chars[$accent]="\
+s DF \
+"
+ # upper case thorn
+ accent=H
+ zsh_accented_chars[$accent]="\
+T DE \
+"
+ # lower case thorn
+ accent=h
+ zsh_accented_chars[$accent]="\
+t FE \
+"
+
+ # Remaining characters are handled as separate pairs.
+ # We need to remember that the assoc array is keyed by the second character.
+ # Left square bracket
+ accent=\(
+ zsh_accented_chars[$accent]+=" < 5B"
+ # Reverse solidus (backslash to you and me).
+ accent=/
+ zsh_accented_chars[$accent]+=" / 5C"
+ # Right square bracket, circumflex
+ accent=\>
+ zsh_accented_chars[$accent]+=" ) 5D ' 5E"
+ # Grave accent
+ accent=\!
+ zsh_accented_chars[$accent]+=" ' 60"
+ # diglyphys for (usually) standard characters {, |, }, ~
+ accent=\!
+ zsh_accented_chars[$accent]+=" ( 7B"
+ zsh_accented_chars[$accent]+=" ! 7C"
+ accent=\)
+ zsh_accented_chars[$accent]+=" ! 7D"
+ accent=\?
+ zsh_accented_chars[$accent]+=" ' 7E"
+ # non-breaking space
+ zsh_accented_chars[S]+=" N A0"
+ # inverted exclamation mark
+ zsh_accented_chars[I]+=" ! A1"
+ # cent
+ zsh_accented_chars[t]+=" C A2"
+ # pound sterling
+ zsh_accented_chars[d]+=" P A3"
+ # currency
+ zsh_accented_chars[u]+=" C A4"
+ # yen
+ zsh_accented_chars[e]+=" Y A5"
+ # broken bar
+ zsh_accented_chars[B]+=" B A6"
+ # section
+ zsh_accented_chars[E]+=" S A7"
+ # lonely diaeresis
+ zsh_accented_chars[:]+=" ' A8"
+ # copyright
+ zsh_accented_chars[o]+=" C A9"
+ # spanish feminine ordinal
+ zsh_accented_chars[a]+=" - AA"
+ # left guillemet
+ accent=\<
+ zsh_accented_chars[$accent]+=" < AB"
+ zsh_accented_chars[O]+=" N AC"
+ # soft hyphen
+ zsh_accented_chars[-]+=" - AD"
+ # registered
+ zsh_accented_chars[g]+=" R AE"
+ # lonely macron
+ zsh_accented_chars[m]+=" ' AF"
+ # degree
+ zsh_accented_chars[G]+=" D B0"
+ # +/-
+ zsh_accented_chars[-]+=" + B1"
+ # superscripts
+ zsh_accented_chars[S]+=" 2 B2 3 B3"
+ # lonely acute
+ accent=\'
+ zsh_accented_chars[$accent]+=" ' B4"
+ # micro
+ zsh_accented_chars[y]+=" M B5"
+ # pilcrow (paragraph)
+ zsh_accented_chars[I]+=" P B6"
+ # Middle dot
+ zsh_accented_chars[M]+=" . B7"
+ # Lonely cedilla
+ zsh_accented_chars[,]+=" ' B8"
+ # Superscript one
+ zsh_accented_chars[S]+=" 1 B9"
+ # spanish masculine ordinal
+ zsh_accented_chars[o]+=" - BA"
+ # right guillemet
+ accent=\>
+ zsh_accented_chars[$accent]+=" > BB"
+ # fractions
+ zsh_accented_chars[4]+=" 1 BC 3 BE"
+ zsh_accented_chars[2]+=" 1 BD"
+ # inverted question mark
+ zsh_accented_chars[I]+=" ? BF"
+ # multiplication
+ zsh_accented_chars[X]+=" * D7"
+ # division
+ zsh_accented_chars[:]+=" - F7"
+ # kra
+ zsh_accented_chars[k]+=" k 138"
+ # apostrophe n
+ zsh_accented_chars[n]+=" ' 149"
+ # Lappish ng
+ zsh_accented_chars[G]+=" N 14A"
+ zsh_accented_chars[g]+=" n 14B"
+ # OI
+ zsh_accented_chars[I]+=" O 1A2"
+ zsh_accented_chars[i]+=" o 1A3"
+ # yr
+ zsh_accented_chars[r]+=" y 1A6"
+ # ezh
+ zsh_accented_chars[D]+=" E 1B7"
+ # euro (I invented this but it's logical)
+ zsh_accented_chars[u]+=" E 20AC"
+ # dagger and double dagger
+ zsh_accented_chars[-]+=" / 2020"
+ zsh_accented_chars[=]+=" / 2021"
+fi
+
+read -k basechar || return 1
+read -k accent || return 1
+
+local -A charmap
+charmap=(${=zsh_accented_chars[$accent]})
+
+if [[ ${#charmap} -eq 0 || -z $charmap[$basechar] ]]; then
+ $error "Combination ${basechar}${accent} is not available."
+ return 1
+fi
+
+if [[ -z $WIDGET ]]; then
+ [[ -t 1 ]] && print
+ print "\U${(l.8..0.)charmap[$basechar]}"
+else
+ ochar="$(print -n "\U${(l.8..0.)charmap[$basechar]}")"
+
+ if (( ${+NUMERIC} )); then
+ $error "Character ${(l.8..0.)charmap[$basechar]}: $ochar"
+ else
+ LBUFFER+=$ochar
+ fi
+fi