1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
package vietoaq
import (
"regexp"
"strings"
"golang.org/x/text/unicode/norm"
)
var (
toneMap = "\u0304\u0301\u0308\u0309\u0302\u0300\u0303"
vietoaqMap = [7][2]rune{
{'r', 'l'}, {'p', 'b'}, {'x', 'z'}, {'n', 'm'}, {'t', 'd'}, {'k', 'g'}, {'f', 'v'}}
RegularSyllable = regexp.MustCompile(
`([bcdfghjklmnprstz']?|[cs]h)` + // onset
`([aeiuoyı])` + // first vowel of nucleus
`([` + toneMap + `]?)` + // tone
`([aeiouyı]{0,2})` + // remaining nucleus vowels
`(q?)` ) // regular coda
VietoaqSyllable = regexp.MustCompile(
`([bcdfghjklmnprstxz]|[cs]h)` + // onset
`([aeiuoy]{1,3})` + // nucleus
`([qrlpbxznmtdkgfv]?)` ) // Vietoaq coda
)
func toTransform(syll []string, padding bool) string {
onset, vow, tone, vows, coda :=
syll[1], syll[2], syll[3], syll[4], syll[5]
if onset == "" || onset == "'" {
onset = "x"
}
if tone != "" {
var qful int
if coda == "q" {
qful = 1
} else {
qful = 0
}
coda = string(vietoaqMap[strings.Index(toneMap, tone) / 2][qful])
}
return onset + strings.ReplaceAll(vow + vows, "ı", "i") + coda
}
func fromTransform(syll []string, padding bool) string {
onset, vow, tone, vows, coda :=
syll[1], syll[2][0:1], "", syll[2][1:], syll[3]
if coda != "" && coda != "q" {
codaRune := rune(coda[0])
var ii, jj int
for i, arr := range vietoaqMap {
for j, char := range arr {
if char == codaRune {
ii, jj = i, j
break
}
}
}
if jj == 1 {
coda = "q"
} else {
coda = ""
}
tone = string([]rune(toneMap)[ii])
if onset == "x" {
onset = ""
}
} else if vow == "i" {
vow = "ı"
}
if onset == "x" {
if padding || tone != "" {
onset = ""
} else {
onset = "'"
}
}
return onset + norm.NFC.String(vow + tone) +
strings.ReplaceAll(vows, "i", "ı") + coda
}
func To(regular string) string {
return syllableTransform(regular, RegularSyllable, toTransform)
}
func From(vietoaq string) string {
return syllableTransform(vietoaq, VietoaqSyllable, fromTransform)
}
func syllableTransform(input string, r *regexp.Regexp,
transform func([]string, bool)string) string {
interleaved := Syllables(strings.ToLower(norm.NFD.String(input)), r)
var sb strings.Builder
for i, s := range interleaved {
if i % 2 == 1 {
sb.WriteString(transform(s, interleaved[i - 1][0] != "" || i == 1))
} else {
sb.WriteString(s[0])
}
}
return norm.NFC.String(sb.String())
}
// returns an array of junk and Toaq, interleaved
func Syllables(s string, r *regexp.Regexp) [][]string {
acc := [][]string{}
for {
bounds := r.FindStringSubmatchIndex(s)
if bounds == nil {
break
}
preemptive := r.FindStringSubmatchIndex(s[bounds[1] - 1:])
if preemptive != nil && preemptive[0] == 0 &&
bounds[len(bounds) - 1] - bounds[len(bounds) - 2] > 0 {
bounds[1]--
bounds[len(bounds) - 1]--
}
acc = append(acc, []string{s[:bounds[0]]})
ln := len(bounds) / 2
contentful := make([]string, ln)
for j := 0; j < ln; j++ {
contentful[j] = s[bounds[2 * j]:bounds[2 * j + 1]]
}
acc = append(acc, contentful)
s = s[bounds[1]:]
}
acc = append(acc, []string{s})
return acc
}
|