summaryrefslogtreecommitdiff
path: root/cfa635/charset.go
diff options
context:
space:
mode:
Diffstat (limited to 'cfa635/charset.go')
-rw-r--r--cfa635/charset.go397
1 files changed, 397 insertions, 0 deletions
diff --git a/cfa635/charset.go b/cfa635/charset.go
new file mode 100644
index 0000000..5521b3b
--- /dev/null
+++ b/cfa635/charset.go
@@ -0,0 +1,397 @@
+// Copyright 2022 Benjamin Barenblat
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+package cfa635
+
+import (
+ "unicode"
+ "unicode/utf8"
+
+ "golang.org/x/text/runes"
+ "golang.org/x/text/transform"
+)
+
+// NewEncoder returns a Transformer that converts UTF-8 to the CFA635 display
+// character set. The Transformer uses ¿ as the replacement character.
+//
+// The returned Transformer is lossy, converting various Unicode code points to
+// the same byte. For example, U+DF LATIN SMALL LETTER SHARP S (ß) and U+03B2
+// GREEK SMALL LETTER BETA (β) are both converted to 0xbe.
+//
+// The returned Transformer will never map anything to bytes in the range 0x00,
+// …, 0x0f.
+func NewEncoder() transform.Transformer {
+ identityMapped := unicode.RangeTable{
+ R16: []unicode.Range16{
+ {0x20, 0x23, 1},
+ {0x25, 0x3f, 1},
+ {0x41, 0x5a, 1},
+ {0x61, 0x7a, 1}},
+ R32: nil,
+ LatinOffset: 83,
+ }
+ return runes.If(runes.In(&identityMapped), nil, encode{})
+}
+
+type encode struct{}
+
+func (_ encode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+ for nDst < len(dst) && nSrc < len(src) {
+ r, rLen := utf8.DecodeRune(src[nSrc:])
+ if r == utf8.RuneError {
+ err = transform.ErrShortSrc
+ break
+ }
+ dst[nDst] = encode1(r)
+ nDst++
+ nSrc += rLen
+ }
+ return
+}
+
+func (_ encode) Reset() {}
+
+func encode1(c rune) byte {
+ switch c {
+ case '⏵', '▶', '▸', '►', '⯈':
+ return 0x10
+ case '⏴', '◀', '⯇':
+ return 0x11
+ case '⏫':
+ return 0x12
+ case '⏬':
+ return 0x13
+ case '«', '≪', '《':
+ return 0x14
+ case '»', '≫', '》':
+ return 0x15
+ case '↖', '⬉', '⭦':
+ return 0x16
+ case '↗', '⬈', '⭧':
+ return 0x17
+ case '↙', '⬋', '⭩':
+ return 0x18
+ case '↘', '⬊', '⭨':
+ return 0x19
+ case '⏶', '▲', '▴':
+ return 0x1a
+ case '⏷', '▼', '▾':
+ return 0x1b
+ case '↲', '↵', '⏎', '⮐':
+ return 0x1c
+ case '^', '˄', 'ˆ', '⌃':
+ return 0x1d
+ case 'ᵛ':
+ return 0x1e
+ case 0xa0, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 0x202f, 0x2060, 0x3000:
+ return 0x20
+ case 0x01c3:
+ return 0x21
+ case 'ʺ', '˝', '״', '″', '〃':
+ return 0x22
+ case '℔', '⌗', '♯', '⧣':
+ return 0x23
+ case '¤':
+ return 0x24
+ case '٪', '⁒':
+ return 0x25
+ case 'ʹ', 'ʼ', 'ˈ', '׳', '‘', '’', '′', 'ꞌ':
+ return 0x27
+ case '٭', '∗', '⚹':
+ return 0x2a
+ case '˖':
+ return 0x2b
+ case '‚':
+ return 0x2c
+ case '˗', '‐', '‑', '‒', '–', '−', '𐆑':
+ return 0x2d
+ case '․':
+ return 0x2e
+ case '⁄', '∕', '⟋':
+ return 0x2f
+ case '։', '׃', '፡', '∶', '꞉':
+ return 0x3a
+ case ';':
+ return 0x3b
+ case '˂', '‹', '〈', '⟨', '〈':
+ return 0x3c
+ case '᐀', '⹀', '゠', '꞊', '𐆐', '🟰':
+ return 0x3d
+ case '˃', '›', '〉', '⟩', '〉':
+ return 0x3e
+ case '¡':
+ return 0x40
+ case 'Ä':
+ return 0x5b
+ case 'Ö':
+ return 0x5c
+ case 'Ñ':
+ return 0x5d
+ case 'Ü':
+ return 0x5e
+ case '§':
+ return 0x5d
+ case '¿':
+ return 0x60
+ case 'ä':
+ return 0x7b
+ case 'ö':
+ return 0x7c
+ case 'ñ':
+ return 0x7d
+ case 'ü':
+ return 0x7e
+ case 'à':
+ return 0x7f
+ case '°', '˚', 'ᴼ', 'ᵒ', '⁰':
+ return 0x80
+ case '¹':
+ return 0x81
+ case '²':
+ return 0x82
+ case '³':
+ return 0x83
+ case '⁴':
+ return 0x84
+ case '⁵':
+ return 0x85
+ case '⁶':
+ return 0x86
+ case '⁷':
+ return 0x87
+ case '⁸':
+ return 0x88
+ case '⁹':
+ return 0x89
+ case '½':
+ return 0x8a
+ case '¼':
+ return 0x8b
+ case '±':
+ return 0x8c
+ case '≥':
+ return 0x8d
+ case '≤':
+ return 0x8e
+ case 'µ', 'μ':
+ return 0x8f
+ case '♪', '𝅘𝅥𝅮':
+ return 0x90
+ case '♬':
+ return 0x91
+ case '🔔', '🕭':
+ return 0x92
+ case '♥', '❤', '💙', '💚', '💛', '💜', '🖤', '🤎', '🧡':
+ return 0x93
+ case '◆', '♦':
+ return 0x94
+ case '𐎂':
+ return 0x95
+ case '「':
+ return 0x96
+ case '」':
+ return 0x97
+ case '“', '❝':
+ return 0x98
+ case '”', '❞':
+ return 0x99
+ case 'ɑ', 'α':
+ return 0x9c
+ case 'ɛ', 'ε':
+ return 0x9d
+ case 'δ':
+ return 0x9e
+ case '∞':
+ return 0x9f
+ case '@':
+ return 0xa0
+ case '£':
+ return 0xa1
+ case '$':
+ return 0xa2
+ case '¥':
+ return 0xa3
+ case 'è':
+ return 0xa4
+ case 'é':
+ return 0xa5
+ case 'ù':
+ return 0xa6
+ case 'ì':
+ return 0xa7
+ case 'ò':
+ return 0xa8
+ case 'Ç':
+ return 0xa9
+ case 'ᵖ':
+ return 0xaa
+ case 'Ø':
+ return 0xab
+ case 'ø':
+ return 0xac
+ case 'ʳ':
+ return 0xad
+ case 'Å', 'Å':
+ return 0xae
+ case 'å':
+ return 0xaf
+ case 'Δ', '∆', '⌂':
+ return 0xb0
+ case '¢', 'ȼ', '₵':
+ return 0xb1
+ case 'Φ':
+ return 0xb2
+ case 'τ':
+ return 0xb3
+ case 'λ':
+ return 0xb4
+ case 'Ω', 'Ω':
+ return 0xb5
+ case 'π':
+ return 0xb6
+ case 'Ψ':
+ return 0xb7
+ case 'Ʃ', 'Σ', '∑':
+ return 0xb8
+ case 'Θ', 'ϴ', 'θ':
+ return 0xb9
+ case 'Ξ':
+ return 0xba
+ case '⏺', '⚫', '⬤', '🔴':
+ return 0xbb
+ case 'Æ':
+ return 0xbc
+ case 'æ', 'ӕ':
+ return 0xbd
+ case 'ß', 'β':
+ return 0xbe
+ case 'É':
+ return 0xbf
+ case 'Γ':
+ return 0xc0
+ case 'Λ':
+ return 0xc1
+ case 'Π', '∏':
+ return 0xc2
+ case 'Υ', 'ϓ':
+ return 0xc3
+ case '_', 'ˍ':
+ return 0xc4
+ case 'È':
+ return 0xc5
+ case 'Ê':
+ return 0xc6
+ case 'ê':
+ return 0xc7
+ case 'ç':
+ return 0xc8
+ case 'ğ', 'ǧ':
+ return 0xc9
+ case 'Ş':
+ return 0xca
+ case 'ş', 'ș':
+ return 0xcb
+ case 'İ':
+ return 0xcc
+ case 'ı':
+ return 0xcd
+ case '~', '˜', '⁓', '∼', '〜', '~':
+ return 0xce
+ case '◇', '◊', '♢':
+ return 0xcf
+ case 'ƒ':
+ return 0xd5
+ case 0x2588:
+ return 0xd6
+ case 0x2589, 0x258a:
+ return 0xd7
+ case 0x258b, 0x258c:
+ return 0xd8
+ case 0x258d:
+ return 0xd9
+ case 0x258e, 0x258f:
+ return 0xda
+ case '₧':
+ return 0xdb
+ case '◦':
+ return 0xdc
+ case '•', '⋅':
+ return 0xdd
+ case '↑', '⬆', '⭡':
+ return 0xde
+ case '→', '⮕', '⭢':
+ return 0xdf
+ case '↓', '⬇', '⭣':
+ return 0xe0
+ case '←', '⬅', '⭠':
+ return 0xe1
+ case 'Á':
+ return 0xe2
+ case 'Í':
+ return 0xe3
+ case 'Ó':
+ return 0xe4
+ case 'Ú':
+ return 0xe5
+ case 'Ý':
+ return 0xe6
+ case 'á':
+ return 0xe7
+ case 'í':
+ return 0xe8
+ case 'ó':
+ return 0xe9
+ case 'ú':
+ return 0xea
+ case 'ý':
+ return 0xeb
+ case 'Ô':
+ return 0xec
+ case 'ô':
+ return 0xed
+ case 'Č':
+ return 0xf0
+ case 'Ě':
+ return 0xf1
+ case 'Ř':
+ return 0xf2
+ case 'Š':
+ return 0xf3
+ case 'Ž':
+ return 0xf4
+ case 'č':
+ return 0xf5
+ case 'ě':
+ return 0xf6
+ case 'ř':
+ return 0xf7
+ case 'š':
+ return 0xf8
+ case 'ž':
+ return 0xf9
+ case '[':
+ return 0xfa
+ case '\\':
+ return 0xfb
+ case ']':
+ return 0xfc
+ case '{':
+ return 0xfd
+ case '|':
+ return 0xfe
+ case '}':
+ return 0xff
+ }
+ return 0x60 // ¿
+}