diff options
Diffstat (limited to 'vietoaq/vietoaq.go')
| -rw-r--r-- | vietoaq/vietoaq.go | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/vietoaq/vietoaq.go b/vietoaq/vietoaq.go new file mode 100644 index 0000000..b4375e7 --- /dev/null +++ b/vietoaq/vietoaq.go @@ -0,0 +1,128 @@ +package vietoaq + +import ( + "regexp" + "strings" + + "golang.org/x/text/unicode/norm" +) + +var ( + toneMap = "\u0304\u0301\u0308\u0309\u0302\u0300\u0303" + vietoaqMap = [7][2]rune{ + {'r', 'l'}, {'p', 'b'}, {'x', 'z'}, {'n', 'm'}, {'t', 'd'}, {'k', 'g'}, {'f', 'v'}} + RegularSyllable = regexp.MustCompile( + `([bcdfghjklmnprstz']?|[cs]h)` + // onset + `([aeiuoyı])` + // first vowel of nucleus + `([` + toneMap + `]?)` + // tone + `([aeiouyı]{0,2})` + // remaining nucleus vowels + `(q?)` ) // regular coda + VietoaqSyllable = regexp.MustCompile( + `([bcdfghjklmnprstxz]|[cs]h)` + // onset + `([aeiuoy]{1,3})` + // nucleus + `([qrlpbxznmtdkgfv]?)` ) // Vietoaq coda +) + +func toTransform(syll []string, padding bool) string { + onset, vow, tone, vows, coda := + syll[1], syll[2], syll[3], syll[4], syll[5] + if onset == "" || onset == "'" { + onset = "x" + } + if tone != "" { + var qful int + if coda == "q" { + qful = 1 + } else { + qful = 0 + } + coda = string(vietoaqMap[strings.Index(toneMap, tone) / 2][qful]) + } + return onset + strings.ReplaceAll(vow + vows, "ı", "i") + coda +} + +func fromTransform(syll []string, padding bool) string { + onset, vow, tone, vows, coda := + syll[1], syll[2][0:1], "", syll[2][1:], syll[3] + if coda != "" && coda != "q" { + codaRune := rune(coda[0]) + var ii, jj int + for i, arr := range vietoaqMap { + for j, char := range arr { + if char == codaRune { + ii, jj = i, j + break + } + } + } + if jj == 1 { + coda = "q" + } else { + coda = "" + } + tone = string([]rune(toneMap)[ii]) + if onset == "x" { + onset = "" + } + } else if vow == "i" { + vow = "ı" + } + if onset == "x" { + if padding || tone != "" { + onset = "" + } else { + onset = "'" + } + } + return onset + norm.NFC.String(vow + tone) + + strings.ReplaceAll(vows, "i", "ı") + coda +} + +func To(regular string) string { + return syllableTransform(regular, RegularSyllable, toTransform) +} + +func From(vietoaq string) string { + return syllableTransform(vietoaq, VietoaqSyllable, fromTransform) +} + +func syllableTransform(input string, r *regexp.Regexp, + transform func([]string, bool)string) string { + interleaved := Syllables(strings.ToLower(norm.NFD.String(input)), r) + var sb strings.Builder + for i, s := range interleaved { + if i % 2 == 1 { + sb.WriteString(transform(s, interleaved[i - 1][0] != "" || i == 1)) + } else { + sb.WriteString(s[0]) + } + } + return norm.NFC.String(sb.String()) +} + +// returns an array of junk and Toaq, interleaved +func Syllables(s string, r *regexp.Regexp) [][]string { + acc := [][]string{} + for { + bounds := r.FindStringSubmatchIndex(s) + if bounds == nil { + break + } + preemptive := r.FindStringSubmatchIndex(s[bounds[1] - 1:]) + if preemptive != nil && preemptive[0] == 0 && + bounds[len(bounds) - 1] - bounds[len(bounds) - 2] > 0 { + bounds[1]-- + bounds[len(bounds) - 1]-- + } + acc = append(acc, []string{s[:bounds[0]]}) + ln := len(bounds) / 2 + contentful := make([]string, ln) + for j := 0; j < ln; j++ { + contentful[j] = s[bounds[2 * j]:bounds[2 * j + 1]] + } + acc = append(acc, contentful) + s = s[bounds[1]:] + } + acc = append(acc, []string{s}) + return acc +} |
