Skip to content

Commit f2c86ac

Browse files
authored
Add B-K/umt5-thai-g2p-v2-0.5k (#1140)
* Add B-K/umt5-thai-g2p-v2-0.5k * Add umt5_thaig2p test
1 parent a1991ac commit f2c86ac

File tree

3 files changed

+41
-0
lines changed

3 files changed

+41
-0
lines changed

pythainlp/transliterate/core.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ def transliterate(
122122
* *tltk_ipa* - tltk, output is International Phonetic Alphabet (IPA)
123123
* *thaig2p_v2* - Thai Grapheme-to-Phoneme,
124124
output is IPA. https://huggingface.co/pythainlp/thaig2p-v2.0
125+
* *umt5_thaig2p* - Thai Grapheme-to-Phoneme,
126+
output is IPA, powered by UMT5.\
127+
https://huggingface.co/B-K/umt5-thai-g2p-v2-0.5k
125128
126129
:Example:
127130
::
@@ -174,6 +177,8 @@ def transliterate(
174177
from pythainlp.transliterate.iso_11940 import transliterate
175178
elif engine == "thaig2p_v2":
176179
from pythainlp.transliterate.thaig2p_v2 import transliterate
180+
elif engine == "umt5_thaig2p":
181+
from pythainlp.translate.umt5_thaig2p import transliterate
177182
else: # use default engine: "thaig2p"
178183
from pythainlp.transliterate.thaig2p import transliterate
179184

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# -*- coding: utf-8 -*-
2+
# SPDX-FileCopyrightText: 2016-2025 PyThaiNLP Project
3+
# SPDX-FileType: SOURCE
4+
# SPDX-License-Identifier: Apache-2.0
5+
"""
6+
umt5-thai-g2p-v2-0.5k
7+
8+
huggingface: https://huggingface.co/B-K/umt5-thai-g2p-v2-0.5k
9+
"""
10+
11+
# Use a pipeline as a high-level helper
12+
from transformers import pipeline
13+
14+
15+
class Umt5ThaiG2P:
16+
"""
17+
Latin transliteration of Thai words, using International Phonetic Alphabet
18+
"""
19+
20+
def __init__(self, device: str = "cpu"):
21+
self.pipe = pipeline("text2text-generation", model="B-K/umt5-thai-g2p-v2-0.5k", device=device)
22+
23+
def g2p(self, text: str) -> str:
24+
return self.pipe(text)[0]["generated_text"]
25+
26+
27+
_THAI_G2P = None
28+
29+
30+
def transliterate(text: str, device="cpu") -> str:
31+
global _THAI_G2P
32+
if _THAI_G2P is None:
33+
_THAI_G2P = Umt5ThaiG2P(device=device)
34+
return _THAI_G2P.g2p(text)

tests/extra/testx_transliterate.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ def test_transliterate(self):
140140
self.assertIsNotNone(transliterate("แมว", engine="thaig2p"))
141141
self.assertIsNotNone(transliterate("คน", engine="thaig2p_v2"))
142142
self.assertIsNotNone(transliterate("แมว", engine="thaig2p_v2"))
143+
self.assertIsNotNone(transliterate("คน", engine="umt5_thaig2p"))
144+
self.assertIsNotNone(transliterate("แมว", engine="umt5_thaig2p"))
143145
self.assertIsNotNone(transliterate("คน", engine="tltk_g2p"))
144146
self.assertIsNotNone(transliterate("แมว", engine="tltk_g2p"))
145147
self.assertIsNotNone(transliterate("คน", engine="tltk_ipa"))

0 commit comments

Comments
 (0)