summaryrefslogtreecommitdiff
path: root/vietoaq
diff options
context:
space:
mode:
Diffstat (limited to 'vietoaq')
-rw-r--r--vietoaq/vietoaq.go128
-rw-r--r--vietoaq/vietoaq_test.go33
2 files changed, 161 insertions, 0 deletions
diff --git a/vietoaq/vietoaq.go b/vietoaq/vietoaq.go
new file mode 100644
index 0000000..b4375e7
--- /dev/null
+++ b/vietoaq/vietoaq.go
@@ -0,0 +1,128 @@
+package vietoaq
+
+import (
+ "regexp"
+ "strings"
+
+ "golang.org/x/text/unicode/norm"
+)
+
+var (
+ toneMap = "\u0304\u0301\u0308\u0309\u0302\u0300\u0303"
+ vietoaqMap = [7][2]rune{
+ {'r', 'l'}, {'p', 'b'}, {'x', 'z'}, {'n', 'm'}, {'t', 'd'}, {'k', 'g'}, {'f', 'v'}}
+ RegularSyllable = regexp.MustCompile(
+ `([bcdfghjklmnprstz']?|[cs]h)` + // onset
+ `([aeiuoyı])` + // first vowel of nucleus
+ `([` + toneMap + `]?)` + // tone
+ `([aeiouyı]{0,2})` + // remaining nucleus vowels
+ `(q?)` ) // regular coda
+ VietoaqSyllable = regexp.MustCompile(
+ `([bcdfghjklmnprstxz]|[cs]h)` + // onset
+ `([aeiuoy]{1,3})` + // nucleus
+ `([qrlpbxznmtdkgfv]?)` ) // Vietoaq coda
+)
+
+func toTransform(syll []string, padding bool) string {
+ onset, vow, tone, vows, coda :=
+ syll[1], syll[2], syll[3], syll[4], syll[5]
+ if onset == "" || onset == "'" {
+ onset = "x"
+ }
+ if tone != "" {
+ var qful int
+ if coda == "q" {
+ qful = 1
+ } else {
+ qful = 0
+ }
+ coda = string(vietoaqMap[strings.Index(toneMap, tone) / 2][qful])
+ }
+ return onset + strings.ReplaceAll(vow + vows, "ı", "i") + coda
+}
+
+func fromTransform(syll []string, padding bool) string {
+ onset, vow, tone, vows, coda :=
+ syll[1], syll[2][0:1], "", syll[2][1:], syll[3]
+ if coda != "" && coda != "q" {
+ codaRune := rune(coda[0])
+ var ii, jj int
+ for i, arr := range vietoaqMap {
+ for j, char := range arr {
+ if char == codaRune {
+ ii, jj = i, j
+ break
+ }
+ }
+ }
+ if jj == 1 {
+ coda = "q"
+ } else {
+ coda = ""
+ }
+ tone = string([]rune(toneMap)[ii])
+ if onset == "x" {
+ onset = ""
+ }
+ } else if vow == "i" {
+ vow = "ı"
+ }
+ if onset == "x" {
+ if padding || tone != "" {
+ onset = ""
+ } else {
+ onset = "'"
+ }
+ }
+ return onset + norm.NFC.String(vow + tone) +
+ strings.ReplaceAll(vows, "i", "ı") + coda
+}
+
+func To(regular string) string {
+ return syllableTransform(regular, RegularSyllable, toTransform)
+}
+
+func From(vietoaq string) string {
+ return syllableTransform(vietoaq, VietoaqSyllable, fromTransform)
+}
+
+func syllableTransform(input string, r *regexp.Regexp,
+ transform func([]string, bool)string) string {
+ interleaved := Syllables(strings.ToLower(norm.NFD.String(input)), r)
+ var sb strings.Builder
+ for i, s := range interleaved {
+ if i % 2 == 1 {
+ sb.WriteString(transform(s, interleaved[i - 1][0] != "" || i == 1))
+ } else {
+ sb.WriteString(s[0])
+ }
+ }
+ return norm.NFC.String(sb.String())
+}
+
+// returns an array of junk and Toaq, interleaved
+func Syllables(s string, r *regexp.Regexp) [][]string {
+ acc := [][]string{}
+ for {
+ bounds := r.FindStringSubmatchIndex(s)
+ if bounds == nil {
+ break
+ }
+ preemptive := r.FindStringSubmatchIndex(s[bounds[1] - 1:])
+ if preemptive != nil && preemptive[0] == 0 &&
+ bounds[len(bounds) - 1] - bounds[len(bounds) - 2] > 0 {
+ bounds[1]--
+ bounds[len(bounds) - 1]--
+ }
+ acc = append(acc, []string{s[:bounds[0]]})
+ ln := len(bounds) / 2
+ contentful := make([]string, ln)
+ for j := 0; j < ln; j++ {
+ contentful[j] = s[bounds[2 * j]:bounds[2 * j + 1]]
+ }
+ acc = append(acc, contentful)
+ s = s[bounds[1]:]
+ }
+ acc = append(acc, []string{s})
+ return acc
+}
diff --git a/vietoaq/vietoaq_test.go b/vietoaq/vietoaq_test.go
new file mode 100644
index 0000000..15a2455
--- /dev/null
+++ b/vietoaq/vietoaq_test.go
@@ -0,0 +1,33 @@
+package vietoaq
+
+import (
+ "testing"
+)
+
+var (
+ examples = map[string]string{
+ ``: ``,
+ `bbb`: `bbb`,
+ `jảq hủı óq`: `jam huin xob`,
+ `ýhō`: `xyphor`,
+ `gı'aq`: `gixaq`,
+ `gï'aq`: `gixxaq`,
+ `jảq'a`: `jamxa`,
+ `gï aq`: `gix xaq`,
+ `aq'aq aq`: `xaqxaq xaq`,
+ }
+)
+
+func TestVietoaq(t *testing.T) {
+ for regular, vietoaq := range examples {
+ vietoaq_ := To(regular)
+ if vietoaq_ != vietoaq {
+ t.Errorf(" to: %s -> %s != %s", regular, vietoaq_, vietoaq)
+ }
+ regular_ := From(vietoaq)
+ if regular_ != regular {
+ t.Errorf("from: %s -> %s != %s", vietoaq, regular_, regular)
+ }
+ }
+}
+