Source code for ctnx.legacy

# -*- coding: utf-8 -*-

"""This module contains legacy functions, classes and unused functions."""

import unicodedata
from .misc import separate_tone
from .constants import CHAR_ORDER_DICT

[docs]def remove_tones(t): tletters_dict = {'a': 'a', 'à': 'a', 'á': 'a', 'ả': 'a', 'ã': 'a', 'ạ': 'a', 'ă': 'ă', 'ằ': 'ă', 'ắ': 'ă', 'ẳ': 'ă', 'ẵ': 'ă', 'ặ': 'ă', 'â': 'â', 'ầ': 'â', 'ấ': 'â', 'ẩ': 'â', 'ẫ': 'â', 'ậ': 'â', 'e': 'e', 'è': 'e', 'é': 'e', 'ẻ': 'e', 'ẽ': 'e', 'ẹ': 'e', 'ê': 'ê', 'ề': 'ê', 'ế': 'ê', 'ể': 'ê', 'ễ': 'ê', 'ệ': 'ê', 'i': 'i', 'ì': 'i', 'í': 'i', 'ỉ': 'i', 'ĩ': 'i', 'ị': 'i', 'o': 'o', 'ò': 'o', 'ó': 'o', 'ỏ': 'o', 'õ': 'o', 'ọ': 'o', 'ô': 'ô', 'ồ': 'ô', 'ố': 'ô', 'ổ': 'ô', 'ỗ': 'ô', 'ộ': 'ô', 'ơ': 'ơ', 'ờ': 'ơ', 'ớ': 'ơ', 'ở': 'ơ', 'ỡ': 'ơ', 'ợ': 'ơ', 'u': 'u', 'ù': 'u', 'ú': 'u', 'ủ': 'u', 'ũ': 'u', 'ụ': 'u', 'ư': 'ư', 'ừ': 'ư', 'ứ': 'ư', 'ử': 'ư', 'ữ': 'ư', 'ự': 'ư', 'y': 'y', 'ỳ': 'y', 'ý': 'y', 'ỷ': 'y', 'ỹ': 'y', 'ỵ': 'y', 'A': 'A', 'À': 'A', 'Á': 'A', 'Ả': 'A', 'Ã': 'A', 'Ạ': 'A', 'Ă': 'Ă', 'Ằ': 'Ă', 'Ắ': 'Ă', 'Ẳ': 'Ă', 'Ẵ': 'Ă', 'Ặ': 'Ă', 'Â': 'Â', 'Ầ': 'Â', 'Ấ': 'Â', 'Ẩ': 'Â', 'Ẫ': 'Â', 'Ậ': 'Â', 'E': 'E', 'È': 'E', 'É': 'E', 'Ẻ': 'E', 'Ẽ': 'E', 'Ẹ': 'E', 'Ê': 'Ê', 'Ề': 'Ê', 'Ế': 'Ê', 'Ể': 'Ê', 'Ễ': 'Ê', 'Ệ': 'Ê', 'I': 'I', 'Ì': 'I', 'Í': 'I', 'Ỉ': 'I', 'Ĩ': 'I', 'Ị': 'I', 'O': 'O', 'Ò': 'O', 'Ó': 'O', 'Ỏ': 'O', 'Õ': 'O', 'Ọ': 'O', 'Ô': 'Ô', 'Ồ': 'Ô', 'Ố': 'Ô', 'Ổ': 'Ô', 'Ỗ': 'Ô', 'Ộ': 'Ô', 'Ơ': 'Ơ', 'Ờ': 'Ơ', 'Ớ': 'Ơ', 'Ở': 'Ơ', 'Ỡ': 'Ơ', 'Ợ': 'Ơ', 'U': 'U', 'Ù': 'U', 'Ú': 'U', 'Ủ': 'U', 'Ũ': 'U', 'Ụ': 'U', 'Ư': 'Ư', 'Ừ': 'Ư', 'Ứ': 'Ư', 'Ử': 'Ư', 'Ữ': 'Ư', 'Ự': 'Ư', 'Y': 'Y', 'Ỳ': 'Y', 'Ý': 'Y', 'Ỷ': 'Y', 'Ỹ': 'Y', 'Ỵ': 'Y', b'\xcc\x81'.decode() : '', b'\xcc\x80'.decode() : '', b'\xcc\x89'.decode() : '', b'\xcc\x83'.decode() : '', b'\xcc\xa3'.decode() : ''} r = '' for c in t: r += tletters_dict.get(c, c) return r
[docs]def separate_syllable(t): head_consonants = ('b', 'ch', 'c', 'd', 'đ', 'gh', 'gi', 'g', 'h', 'kh', 'k', 'l', 'm', 'ngh', 'ng', 'nh', 'ng', 'n', 'ph', 'p', 'qu', 'r', 's', 'th', 'tr', 't', 'v', 'x') wowels = ('a', 'ă', 'â', 'e', 'ê', 'i', 'o', 'ô', 'ơ', 'u', 'ư', 'y') diphthongs = ('ai', 'ao', 'au', 'ay', 'âu', 'ây', 'eo', 'êu', 'ia', 'iê', 'iu', 'oa', 'oe', 'oi', 'oo', 'ôi', 'ơi', 'ua', 'uâ', 'uê', 'ui', 'uô', 'uơ', 'uy', 'ưa', 'ưi', 'ươ', 'ưu', 'yê') triphthongs = ('iêu', 'oai', 'oay', 'uay', 'uây', 'uya', 'ươi', 'ươu', 'yêu') special_triphthongs = ('uyê',) phthongs = special_triphthongs + triphthongs + diphthongs + wowels tail_consonants = ('ch', 'c', 'm', 'ng', 'nh', 'n', 'p', 't') r = [] t = unicodedata.normalize('NFC', t) if ' ' in t: return r.append('') for part in head_consonants: tl = t.lower() if tl.startswith(part): r[-1] = t[:len(part)] t = t[len(part):] break t, tone = separate_tone(t) r.append('') for part in phthongs: tl = t.lower() if tl.startswith(part): r[-1] = t[:len(part)] t = t[len(part):] break r.append('') for part in tail_consonants: tl = t.lower() if tl.startswith(part): r[-1] = t[:len(part)] t = t[len(part):] break r.append(tone) return r
[docs]def merge_tone(lett, atone): tones = ('(Placeholder)', '\\', '/', '?', '~', '.') tone_names = ('', 'GRAVE', 'ACUTE', 'HOOK ABOVE', 'TILDE', 'DOT BELOW') name = unicodedata.name(lett) for i, tone in enumerate(tones): if atone == tone: if 'WITH' in name: name += ' AND ' else: name += ' WITH ' name += tone_names[i] break return unicodedata.lookup(name)
[docs]def merge_syllable(hcon , phthong, tcon, tone, old=False): special_triphthongs = ('uyê',) high_priorites = ('ê', 'ơ') phth_list = list(phthong) def new_rule(phthong): if len(phthong) == 1: phth_list[0] = merge_tone(phthong, tone) elif len(phthong) == 2: if phthong == 'ua': if hcon == 'q': phth_list[1] = merge_tone(phth_list[1], tone) else: phth_list[0] = merge_tone(phth_list[0], tone) elif phthong.startswith(('u', 'o')) \ or phth_list[1] in {'â', 'ê', 'ô', 'ơ', 'ư'}: phth_list[1] = merge_tone(phth_list[1], tone) else: phth_list[0] = merge_tone(phth_list[0], tone) elif len(phthong) == 3: if phthong in special_triphthongs: phth_list[2] = merge_tone(phth_list[2], tone) else: phth_list[1] = merge_tone(phth_list[1], tone) return ''.join(phth_list) def old_rule(phthong): for x in high_priorites: if x in phthong: i = phth_list.index(x) phth_list[i] = merge_tone(phth_list[i], tone) break else: if len(phthong) == 1: phth_list[0] = merge_tone(phth_list[0], tone) elif len(phthong) == 2: if tcon == '': phth_list[0] = merge_tone(phth_list[0], tone) else: phth_list[1] = merge_tone(phth_list[1], tone) elif len(phthong) == 3: phth_list[1] = merge_tone(phth_list[1], tone) return ''.join(phth_list) if hcon == 'c' and phthong[0] in {'e', 'ê', 'i', 'y'}: hcon = 'k' if hcon == 'k' and phthong[0] not in {'e', 'ê', 'i', 'y'}: hcon = 'c' if hcon == 'ng' and phthong[0] in {'e', 'ê', 'i'}: hcon = 'ngh' if hcon == 'ngh' and phthong[0] not in {'e', 'ê', 'i'}: hcon = 'ng' # if tcon in ("c", "p", "t"): # if tone in ("", "\\", "?"): tone = "." # elif tone == "~": tone = "/" if old: return ''.join((hcon, old_rule(phthong), tcon)) else: return ''.join((hcon, new_rule(phthong), tcon))
[docs]def noilais(w): def noilai_sub(syll_list, type = 0): for i in range(len(syll_list) // 2): syll1 = syll_list[i] syll2 = syll_list[-(i+1)] syll1_parts = separate_syllable(syll1) syll2_parts = separate_syllable(syll2) if type == 0: syll1_parts[0], syll2_parts[0] = syll2_parts[0], syll1_parts[0] elif type == 1: syll1_parts[1], syll2_parts[1] = syll2_parts[1], syll1_parts[1] syll1_parts[2], syll2_parts[2] = syll2_parts[2], syll1_parts[2] elif type == 2: syll1_parts[3], syll2_parts[3] = syll2_parts[3], syll1_parts[3] syll1 = merge_syllable(*syll1_parts) syll2 = merge_syllable(*syll2_parts) syll_list[i] = syll1 syll_list[-(i+1)] = syll2 return ' '.join(syll_list) r = list() w_sylls = w.split(' ') reversed_w_sylls = w_sylls[::-1] for i in range(3): r.append(noilai_sub(w_sylls, i)) r.append(noilai_sub(reversed_w_sylls, i)) return r
[docs]class visorted_key: def __init__(self, t=''): self.t = t self.untoned = remove_tones(t) def __repr__(self): return "<visort_key #{} '{}' ('{}')>".format(id(self), self.t, self.untoned) def __lt__(self, other): len_st, len_ot = len(self.t), len(other.t) d = CHAR_ORDER_DICT for i in range(min(len(self.t), len(other.t))): if self.t[i] != other.t[i]: if self.untoned[i] == other.untoned[i]: if len_st != len_ot: return len_st < len_ot if self.t[i] in d and other.t[i] in d: return d[self.t[i]] < d[other.t[i]] else: return self.t[i] < other.t[i] else: return len(self.t) < len(other.t)