// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package runes import ( "strings" "testing" "unicode/utf8" "golang.org/x/text/internal/testtext" "golang.org/x/text/transform" ) type transformTest struct { desc string szDst int atEOF bool repl string in string out string // result string of first call to Transform outFull string // transform of entire input string err error errSpan error nSpan int t transform.SpanningTransformer } const large = 10240 func (tt *transformTest) check(t *testing.T, i int) { if tt.t == nil { return } dst := make([]byte, tt.szDst) src := []byte(tt.in) nDst, nSrc, err := tt.t.Transform(dst, src, tt.atEOF) if err != tt.err { t.Errorf("%d:%s:error: got %v; want %v", i, tt.desc, err, tt.err) } if got := string(dst[:nDst]); got != tt.out { t.Errorf("%d:%s:out: got %q; want %q", i, tt.desc, got, tt.out) } // Calls tt.t.Transform for the remainder of the input. We use this to test // the nSrc return value. out := make([]byte, large) n := copy(out, dst[:nDst]) nDst, _, _ = tt.t.Transform(out[n:], src[nSrc:], true) if got, want := string(out[:n+nDst]), tt.outFull; got != want { t.Errorf("%d:%s:outFull: got %q; want %q", i, tt.desc, got, want) } tt.t.Reset() p := 0 for ; p < len(tt.in) && p < len(tt.outFull) && tt.in[p] == tt.outFull[p]; p++ { } if tt.nSpan != 0 { p = tt.nSpan } if n, err = tt.t.Span([]byte(tt.in), tt.atEOF); n != p || err != tt.errSpan { t.Errorf("%d:%s:span: got %d, %v; want %d, %v", i, tt.desc, n, err, p, tt.errSpan) } } func idem(r rune) rune { return r } func TestMap(t *testing.T) { runes := []rune{'a', 'ç', '中', '\U00012345', 'a'} // Default mapper used for this test. rotate := Map(func(r rune) rune { for i, m := range runes { if m == r { return runes[i+1] } } return r }) for i, tt := range []transformTest{{ desc: "empty", szDst: large, atEOF: true, in: "", out: "", outFull: "", t: rotate, }, { desc: "no change", szDst: 1, atEOF: true, in: "b", out: "b", outFull: "b", t: rotate, }, { desc: "short dst", szDst: 2, atEOF: true, in: "aaaa", out: "ç", outFull: "çççç", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: rotate, }, { desc: "short dst ascii, no change", szDst: 2, atEOF: true, in: "bbb", out: "bb", outFull: "bbb", err: transform.ErrShortDst, t: rotate, }, { desc: "short dst writing error", szDst: 2, atEOF: false, in: "a\x80", out: "ç", outFull: "ç\ufffd", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: rotate, }, { desc: "short dst writing incomplete rune", szDst: 2, atEOF: true, in: "a\xc0", out: "ç", outFull: "ç\ufffd", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: rotate, }, { desc: "short dst, longer", szDst: 5, atEOF: true, in: "Hellø", out: "Hell", outFull: "Hellø", err: transform.ErrShortDst, t: rotate, }, { desc: "short dst, single", szDst: 1, atEOF: false, in: "ø", out: "", outFull: "ø", err: transform.ErrShortDst, t: Map(idem), }, { desc: "short dst, longer, writing error", szDst: 8, atEOF: false, in: "\x80Hello\x80", out: "\ufffdHello", outFull: "\ufffdHello\ufffd", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: rotate, }, { desc: "short src", szDst: 2, atEOF: false, in: "a\xc2", out: "ç", outFull: "ç\ufffd", err: transform.ErrShortSrc, errSpan: transform.ErrEndOfSpan, t: rotate, }, { desc: "invalid input, atEOF", szDst: large, atEOF: true, in: "\x80", out: "\ufffd", outFull: "\ufffd", errSpan: transform.ErrEndOfSpan, t: rotate, }, { desc: "invalid input, !atEOF", szDst: large, atEOF: false, in: "\x80", out: "\ufffd", outFull: "\ufffd", errSpan: transform.ErrEndOfSpan, t: rotate, }, { desc: "incomplete rune !atEOF", szDst: large, atEOF: false, in: "\xc2", out: "", outFull: "\ufffd", err: transform.ErrShortSrc, errSpan: transform.ErrShortSrc, t: rotate, }, { desc: "invalid input, incomplete rune atEOF", szDst: large, atEOF: true, in: "\xc2", out: "\ufffd", outFull: "\ufffd", errSpan: transform.ErrEndOfSpan, t: rotate, }, { desc: "misc correct", szDst: large, atEOF: true, in: "a\U00012345 ç!", out: "ça 中!", outFull: "ça 中!", errSpan: transform.ErrEndOfSpan, t: rotate, }, { desc: "misc correct and invalid", szDst: large, atEOF: true, in: "Hello\x80 w\x80orl\xc0d!\xc0", out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", errSpan: transform.ErrEndOfSpan, t: rotate, }, { desc: "misc correct and invalid, short src", szDst: large, atEOF: false, in: "Hello\x80 w\x80orl\xc0d!\xc2", out: "Hello\ufffd w\ufffdorl\ufffdd!", outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", err: transform.ErrShortSrc, errSpan: transform.ErrEndOfSpan, t: rotate, }, { desc: "misc correct and invalid, short src, replacing RuneError", szDst: large, atEOF: false, in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2", out: "Hel?lo? w?orl?d!", outFull: "Hel?lo? w?orl?d!?", errSpan: transform.ErrEndOfSpan, err: transform.ErrShortSrc, t: Map(func(r rune) rune { if r == utf8.RuneError { return '?' } return r }), }} { tt.check(t, i) } } func TestRemove(t *testing.T) { remove := Remove(Predicate(func(r rune) bool { return strings.ContainsRune("aeiou\u0300\uFF24\U00012345", r) })) for i, tt := range []transformTest{ 0: { szDst: large, atEOF: true, in: "", out: "", outFull: "", t: remove, }, 1: { szDst: 0, atEOF: true, in: "aaaa", out: "", outFull: "", errSpan: transform.ErrEndOfSpan, t: remove, }, 2: { szDst: 1, atEOF: true, in: "aaaa", out: "", outFull: "", errSpan: transform.ErrEndOfSpan, t: remove, }, 3: { szDst: 1, atEOF: true, in: "baaaa", out: "b", outFull: "b", errSpan: transform.ErrEndOfSpan, t: remove, }, 4: { szDst: 2, atEOF: true, in: "açaaa", out: "ç", outFull: "ç", errSpan: transform.ErrEndOfSpan, t: remove, }, 5: { szDst: 2, atEOF: true, in: "aaaç", out: "ç", outFull: "ç", errSpan: transform.ErrEndOfSpan, t: remove, }, 6: { szDst: 2, atEOF: false, in: "a\x80", out: "", outFull: "\ufffd", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: remove, }, 7: { szDst: 1, atEOF: true, in: "a\xc0", out: "", outFull: "\ufffd", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: remove, }, 8: { szDst: 1, atEOF: false, in: "a\xc2", out: "", outFull: "\ufffd", err: transform.ErrShortSrc, errSpan: transform.ErrEndOfSpan, t: remove, }, 9: { szDst: large, atEOF: true, in: "\x80", out: "\ufffd", outFull: "\ufffd", errSpan: transform.ErrEndOfSpan, t: remove, }, 10: { szDst: large, atEOF: false, in: "\x80", out: "\ufffd", outFull: "\ufffd", errSpan: transform.ErrEndOfSpan, t: remove, }, 11: { szDst: large, atEOF: true, in: "\xc2", out: "\ufffd", outFull: "\ufffd", errSpan: transform.ErrEndOfSpan, t: remove, }, 12: { szDst: large, atEOF: false, in: "\xc2", out: "", outFull: "\ufffd", err: transform.ErrShortSrc, errSpan: transform.ErrShortSrc, t: remove, }, 13: { szDst: large, atEOF: true, in: "Hello \U00012345world!", out: "Hll wrld!", outFull: "Hll wrld!", errSpan: transform.ErrEndOfSpan, t: remove, }, 14: { szDst: large, atEOF: true, in: "Hello\x80 w\x80orl\xc0d!\xc0", out: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd", outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd", errSpan: transform.ErrEndOfSpan, t: remove, }, 15: { szDst: large, atEOF: false, in: "Hello\x80 w\x80orl\xc0d!\xc2", out: "Hll\ufffd w\ufffdrl\ufffdd!", outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd", err: transform.ErrShortSrc, errSpan: transform.ErrEndOfSpan, t: remove, }, 16: { szDst: large, atEOF: false, in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2", out: "Hello world!", outFull: "Hello world!", err: transform.ErrShortSrc, errSpan: transform.ErrEndOfSpan, t: Remove(Predicate(func(r rune) bool { return r == utf8.RuneError })), }, 17: { szDst: 4, atEOF: true, in: "Hellø", out: "Hll", outFull: "Hllø", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: remove, }, 18: { szDst: 4, atEOF: false, in: "Hellø", out: "Hll", outFull: "Hllø", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: remove, }, 19: { szDst: 8, atEOF: false, in: "\x80Hello\uFF24\x80", out: "\ufffdHll", outFull: "\ufffdHll\ufffd", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: remove, }, 20: { szDst: 8, atEOF: false, in: "Hllll", out: "Hllll", outFull: "Hllll", t: remove, }} { tt.check(t, i) } } func TestReplaceIllFormed(t *testing.T) { replace := ReplaceIllFormed() for i, tt := range []transformTest{ 0: { szDst: large, atEOF: true, in: "", out: "", outFull: "", t: replace, }, 1: { szDst: 1, atEOF: true, in: "aa", out: "a", outFull: "aa", err: transform.ErrShortDst, t: replace, }, 2: { szDst: 1, atEOF: true, in: "a\x80", out: "a", outFull: "a\ufffd", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: replace, }, 3: { szDst: 1, atEOF: true, in: "a\xc2", out: "a", outFull: "a\ufffd", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: replace, }, 4: { szDst: large, atEOF: true, in: "\x80", out: "\ufffd", outFull: "\ufffd", errSpan: transform.ErrEndOfSpan, t: replace, }, 5: { szDst: large, atEOF: false, in: "\x80", out: "\ufffd", outFull: "\ufffd", errSpan: transform.ErrEndOfSpan, t: replace, }, 6: { szDst: large, atEOF: true, in: "\xc2", out: "\ufffd", outFull: "\ufffd", errSpan: transform.ErrEndOfSpan, t: replace, }, 7: { szDst: large, atEOF: false, in: "\xc2", out: "", outFull: "\ufffd", err: transform.ErrShortSrc, errSpan: transform.ErrShortSrc, t: replace, }, 8: { szDst: large, atEOF: true, in: "Hello world!", out: "Hello world!", outFull: "Hello world!", t: replace, }, 9: { szDst: large, atEOF: true, in: "Hello\x80 w\x80orl\xc2d!\xc2", out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", errSpan: transform.ErrEndOfSpan, t: replace, }, 10: { szDst: large, atEOF: false, in: "Hello\x80 w\x80orl\xc2d!\xc2", out: "Hello\ufffd w\ufffdorl\ufffdd!", outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", err: transform.ErrShortSrc, errSpan: transform.ErrEndOfSpan, t: replace, }, 16: { szDst: 10, atEOF: false, in: "\x80Hello\x80", out: "\ufffdHello", outFull: "\ufffdHello\ufffd", err: transform.ErrShortDst, errSpan: transform.ErrEndOfSpan, t: replace, }, 17: { szDst: 10, atEOF: false, in: "\ufffdHello\ufffd", out: "\ufffdHello", outFull: "\ufffdHello\ufffd", err: transform.ErrShortDst, t: replace, }, } { tt.check(t, i) } } func TestMapAlloc(t *testing.T) { if n := testtext.AllocsPerRun(3, func() { Map(idem).Transform(nil, nil, false) }); n > 0 { t.Errorf("got %f; want 0", n) } } func rmNop(r rune) bool { return false } func TestRemoveAlloc(t *testing.T) { if n := testtext.AllocsPerRun(3, func() { Remove(Predicate(rmNop)).Transform(nil, nil, false) }); n > 0 { t.Errorf("got %f; want 0", n) } } func TestReplaceIllFormedAlloc(t *testing.T) { if n := testtext.AllocsPerRun(3, func() { ReplaceIllFormed().Transform(nil, nil, false) }); n > 0 { t.Errorf("got %f; want 0", n) } } func doBench(b *testing.B, t Transformer) { for _, bc := range []struct{ name, data string }{ {"ascii", testtext.ASCII}, {"3byte", testtext.ThreeByteUTF8}, } { dst := make([]byte, 2*len(bc.data)) src := []byte(bc.data) testtext.Bench(b, bc.name+"/transform", func(b *testing.B) { b.SetBytes(int64(len(src))) for i := 0; i < b.N; i++ { t.Transform(dst, src, true) } }) src = t.Bytes(src) t.Reset() testtext.Bench(b, bc.name+"/span", func(b *testing.B) { b.SetBytes(int64(len(src))) for i := 0; i < b.N; i++ { t.Span(src, true) } }) } } func BenchmarkRemove(b *testing.B) { doBench(b, Remove(Predicate(func(r rune) bool { return r == 'e' }))) } func BenchmarkMapAll(b *testing.B) { doBench(b, Map(func(r rune) rune { return 'a' })) } func BenchmarkMapNone(b *testing.B) { doBench(b, Map(func(r rune) rune { return r })) } func BenchmarkReplaceIllFormed(b *testing.B) { doBench(b, ReplaceIllFormed()) } var ( input = strings.Repeat("Thé qüick brøwn føx jumps øver the lazy døg. ", 100) )