diff options
Diffstat (limited to 'cfa635/charset.go')
-rw-r--r-- | cfa635/charset.go | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/cfa635/charset.go b/cfa635/charset.go new file mode 100644 index 0000000..5521b3b --- /dev/null +++ b/cfa635/charset.go @@ -0,0 +1,397 @@ +// Copyright 2022 Benjamin Barenblat +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +package cfa635 + +import ( + "unicode" + "unicode/utf8" + + "golang.org/x/text/runes" + "golang.org/x/text/transform" +) + +// NewEncoder returns a Transformer that converts UTF-8 to the CFA635 display +// character set. The Transformer uses ¿ as the replacement character. +// +// The returned Transformer is lossy, converting various Unicode code points to +// the same byte. For example, U+DF LATIN SMALL LETTER SHARP S (ß) and U+03B2 +// GREEK SMALL LETTER BETA (β) are both converted to 0xbe. +// +// The returned Transformer will never map anything to bytes in the range 0x00, +// …, 0x0f. +func NewEncoder() transform.Transformer { + identityMapped := unicode.RangeTable{ + R16: []unicode.Range16{ + {0x20, 0x23, 1}, + {0x25, 0x3f, 1}, + {0x41, 0x5a, 1}, + {0x61, 0x7a, 1}}, + R32: nil, + LatinOffset: 83, + } + return runes.If(runes.In(&identityMapped), nil, encode{}) +} + +type encode struct{} + +func (_ encode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + for nDst < len(dst) && nSrc < len(src) { + r, rLen := utf8.DecodeRune(src[nSrc:]) + if r == utf8.RuneError { + err = transform.ErrShortSrc + break + } + dst[nDst] = encode1(r) + nDst++ + nSrc += rLen + } + return +} + +func (_ encode) Reset() {} + +func encode1(c rune) byte { + switch c { + case '⏵', '▶', '▸', '►', '⯈': + return 0x10 + case '⏴', '◀', '⯇': + return 0x11 + case '⏫': + return 0x12 + case '⏬': + return 0x13 + case '«', '≪', '《': + return 0x14 + case '»', '≫', '》': + return 0x15 + case '↖', '⬉', '⭦': + return 0x16 + case '↗', '⬈', '⭧': + return 0x17 + case '↙', '⬋', '⭩': + return 0x18 + case '↘', '⬊', '⭨': + return 0x19 + case '⏶', '▲', '▴': + return 0x1a + case '⏷', '▼', '▾': + return 0x1b + case '↲', '↵', '⏎', '⮐': + return 0x1c + case '^', '˄', 'ˆ', '⌃': + return 0x1d + case 'ᵛ': + return 0x1e + case 0xa0, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 0x202f, 0x2060, 0x3000: + return 0x20 + case 0x01c3: + return 0x21 + case 'ʺ', '˝', '״', '″', '〃': + return 0x22 + case '℔', '⌗', '♯', '⧣': + return 0x23 + case '¤': + return 0x24 + case '٪', '⁒': + return 0x25 + case 'ʹ', 'ʼ', 'ˈ', '׳', '‘', '’', '′', 'ꞌ': + return 0x27 + case '٭', '∗', '⚹': + return 0x2a + case '˖': + return 0x2b + case '‚': + return 0x2c + case '˗', '‐', '‑', '‒', '–', '−', '𐆑': + return 0x2d + case '․': + return 0x2e + case '⁄', '∕', '⟋': + return 0x2f + case '։', '׃', '፡', '∶', '꞉': + return 0x3a + case ';': + return 0x3b + case '˂', '‹', '〈', '⟨', '〈': + return 0x3c + case '᐀', '⹀', '゠', '꞊', '𐆐', '🟰': + return 0x3d + case '˃', '›', '〉', '⟩', '〉': + return 0x3e + case '¡': + return 0x40 + case 'Ä': + return 0x5b + case 'Ö': + return 0x5c + case 'Ñ': + return 0x5d + case 'Ü': + return 0x5e + case '§': + return 0x5d + case '¿': + return 0x60 + case 'ä': + return 0x7b + case 'ö': + return 0x7c + case 'ñ': + return 0x7d + case 'ü': + return 0x7e + case 'à': + return 0x7f + case '°', '˚', 'ᴼ', 'ᵒ', '⁰': + return 0x80 + case '¹': + return 0x81 + case '²': + return 0x82 + case '³': + return 0x83 + case '⁴': + return 0x84 + case '⁵': + return 0x85 + case '⁶': + return 0x86 + case '⁷': + return 0x87 + case '⁸': + return 0x88 + case '⁹': + return 0x89 + case '½': + return 0x8a + case '¼': + return 0x8b + case '±': + return 0x8c + case '≥': + return 0x8d + case '≤': + return 0x8e + case 'µ', 'μ': + return 0x8f + case '♪', '𝅘𝅥𝅮': + return 0x90 + case '♬': + return 0x91 + case '🔔', '🕭': + return 0x92 + case '♥', '❤', '💙', '💚', '💛', '💜', '🖤', '🤎', '🧡': + return 0x93 + case '◆', '♦': + return 0x94 + case '𐎂': + return 0x95 + case '「': + return 0x96 + case '」': + return 0x97 + case '“', '❝': + return 0x98 + case '”', '❞': + return 0x99 + case 'ɑ', 'α': + return 0x9c + case 'ɛ', 'ε': + return 0x9d + case 'δ': + return 0x9e + case '∞': + return 0x9f + case '@': + return 0xa0 + case '£': + return 0xa1 + case '$': + return 0xa2 + case '¥': + return 0xa3 + case 'è': + return 0xa4 + case 'é': + return 0xa5 + case 'ù': + return 0xa6 + case 'ì': + return 0xa7 + case 'ò': + return 0xa8 + case 'Ç': + return 0xa9 + case 'ᵖ': + return 0xaa + case 'Ø': + return 0xab + case 'ø': + return 0xac + case 'ʳ': + return 0xad + case 'Å', 'Å': + return 0xae + case 'å': + return 0xaf + case 'Δ', '∆', '⌂': + return 0xb0 + case '¢', 'ȼ', '₵': + return 0xb1 + case 'Φ': + return 0xb2 + case 'τ': + return 0xb3 + case 'λ': + return 0xb4 + case 'Ω', 'Ω': + return 0xb5 + case 'π': + return 0xb6 + case 'Ψ': + return 0xb7 + case 'Ʃ', 'Σ', '∑': + return 0xb8 + case 'Θ', 'ϴ', 'θ': + return 0xb9 + case 'Ξ': + return 0xba + case '⏺', '⚫', '⬤', '🔴': + return 0xbb + case 'Æ': + return 0xbc + case 'æ', 'ӕ': + return 0xbd + case 'ß', 'β': + return 0xbe + case 'É': + return 0xbf + case 'Γ': + return 0xc0 + case 'Λ': + return 0xc1 + case 'Π', '∏': + return 0xc2 + case 'Υ', 'ϓ': + return 0xc3 + case '_', 'ˍ': + return 0xc4 + case 'È': + return 0xc5 + case 'Ê': + return 0xc6 + case 'ê': + return 0xc7 + case 'ç': + return 0xc8 + case 'ğ', 'ǧ': + return 0xc9 + case 'Ş': + return 0xca + case 'ş', 'ș': + return 0xcb + case 'İ': + return 0xcc + case 'ı': + return 0xcd + case '~', '˜', '⁓', '∼', '〜', '~': + return 0xce + case '◇', '◊', '♢': + return 0xcf + case 'ƒ': + return 0xd5 + case 0x2588: + return 0xd6 + case 0x2589, 0x258a: + return 0xd7 + case 0x258b, 0x258c: + return 0xd8 + case 0x258d: + return 0xd9 + case 0x258e, 0x258f: + return 0xda + case '₧': + return 0xdb + case '◦': + return 0xdc + case '•', '⋅': + return 0xdd + case '↑', '⬆', '⭡': + return 0xde + case '→', '⮕', '⭢': + return 0xdf + case '↓', '⬇', '⭣': + return 0xe0 + case '←', '⬅', '⭠': + return 0xe1 + case 'Á': + return 0xe2 + case 'Í': + return 0xe3 + case 'Ó': + return 0xe4 + case 'Ú': + return 0xe5 + case 'Ý': + return 0xe6 + case 'á': + return 0xe7 + case 'í': + return 0xe8 + case 'ó': + return 0xe9 + case 'ú': + return 0xea + case 'ý': + return 0xeb + case 'Ô': + return 0xec + case 'ô': + return 0xed + case 'Č': + return 0xf0 + case 'Ě': + return 0xf1 + case 'Ř': + return 0xf2 + case 'Š': + return 0xf3 + case 'Ž': + return 0xf4 + case 'č': + return 0xf5 + case 'ě': + return 0xf6 + case 'ř': + return 0xf7 + case 'š': + return 0xf8 + case 'ž': + return 0xf9 + case '[': + return 0xfa + case '\\': + return 0xfb + case ']': + return 0xfc + case '{': + return 0xfd + case '|': + return 0xfe + case '}': + return 0xff + } + return 0x60 // ¿ +} |