Scrape Microsoft TTS supported languages (#91988)

* Update Microsoft TTS supported languages

`script.microsoft_tts` scrapes Microsoft Azure documentation for
the list of supported languages and saves them to
`homeassistant.generated.microsoft_tts` for use in the component.

This adds support for more TTS languages, like fa-ir (Persian).

* Improve xpath query for Microsoft TTS languages

* Remove asserts for Microsoft TTS languages

* Add more tests for Microsoft TTS languages
This commit is contained in:
Dara Adib 2023-05-25 04:46:34 -04:00 committed by GitHub
parent 6e8472b90f
commit 663f66a2b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 265 additions and 76 deletions

View File

@ -7,6 +7,7 @@ import voluptuous as vol
from homeassistant.components.tts import CONF_LANG, PLATFORM_SCHEMA, Provider
from homeassistant.const import CONF_API_KEY, CONF_REGION, CONF_TYPE, PERCENTAGE
from homeassistant.generated.microsoft_tts import SUPPORTED_LANGUAGES
import homeassistant.helpers.config_validation as cv
CONF_GENDER = "gender"
@ -17,80 +18,6 @@ CONF_PITCH = "pitch"
CONF_CONTOUR = "contour"
_LOGGER = logging.getLogger(__name__)
SUPPORTED_LANGUAGES = [
"ar-eg",
"ar-sa",
"bg-bg",
"ca-es",
"cs-cz",
"cy-gb",
"da-dk",
"de-at",
"de-ch",
"de-de",
"el-gr",
"en-au",
"en-ca",
"en-gb",
"en-hk",
"en-ie",
"en-in",
"en-nz",
"en-ph",
"en-sg",
"en-us",
"en-za",
"es-ar",
"es-co",
"es-es",
"es-mx",
"es-us",
"et-ee",
"fi-fi",
"fr-be",
"fr-ca",
"fr-ch",
"fr-fr",
"ga-ie",
"gu-in",
"he-il",
"hi-in",
"hr-hr",
"hu-hu",
"id-id",
"is-is",
"it-it",
"ja-jp",
"ko-kr",
"lt-lt",
"lv-lv",
"mr-in",
"ms-my",
"mt-mt",
"nb-no",
"nl-be",
"nl-nl",
"pl-pl",
"pt-br",
"pt-pt",
"ro-ro",
"ru-ru",
"sk-sk",
"sl-si",
"sv-se",
"sw-ke",
"ta-in",
"te-in",
"th-th",
"tr-tr",
"uk-ua",
"ur-pk",
"vi-vn",
"zh-cn",
"zh-hk",
"zh-tw",
]
GENDERS = ["Female", "Male"]
DEFAULT_LANG = "en-us"

View File

@ -0,0 +1,154 @@
"""Automatically generated file.
To update, run python3 -m script.microsoft_tts
"""
SUPPORTED_LANGUAGES = {
"af-za",
"am-et",
"ar-ae",
"ar-bh",
"ar-dz",
"ar-eg",
"ar-iq",
"ar-jo",
"ar-kw",
"ar-lb",
"ar-ly",
"ar-ma",
"ar-om",
"ar-qa",
"ar-sa",
"ar-sy",
"ar-tn",
"ar-ye",
"az-az",
"bg-bg",
"bn-bd",
"bn-in",
"bs-ba",
"ca-es",
"cs-cz",
"cy-gb",
"da-dk",
"de-at",
"de-ch",
"de-de",
"el-gr",
"en-au",
"en-ca",
"en-gb",
"en-hk",
"en-ie",
"en-in",
"en-ke",
"en-ng",
"en-nz",
"en-ph",
"en-sg",
"en-tz",
"en-us",
"en-za",
"es-ar",
"es-bo",
"es-cl",
"es-co",
"es-cr",
"es-cu",
"es-do",
"es-ec",
"es-es",
"es-gq",
"es-gt",
"es-hn",
"es-mx",
"es-ni",
"es-pa",
"es-pe",
"es-pr",
"es-py",
"es-sv",
"es-us",
"es-uy",
"es-ve",
"et-ee",
"eu-es",
"fa-ir",
"fi-fi",
"fil-ph",
"fr-be",
"fr-ca",
"fr-ch",
"fr-fr",
"ga-ie",
"gl-es",
"gu-in",
"he-il",
"hi-in",
"hr-hr",
"hu-hu",
"hy-am",
"id-id",
"is-is",
"it-it",
"ja-jp",
"jv-id",
"ka-ge",
"kk-kz",
"km-kh",
"kn-in",
"ko-kr",
"lo-la",
"lt-lt",
"lv-lv",
"mk-mk",
"ml-in",
"mn-mn",
"mr-in",
"ms-my",
"mt-mt",
"my-mm",
"nb-no",
"ne-np",
"nl-be",
"nl-nl",
"pl-pl",
"ps-af",
"pt-br",
"pt-pt",
"ro-ro",
"ru-ru",
"si-lk",
"sk-sk",
"sl-si",
"so-so",
"sq-al",
"sr-rs",
"su-id",
"sv-se",
"sw-ke",
"sw-tz",
"ta-in",
"ta-lk",
"ta-my",
"ta-sg",
"te-in",
"th-th",
"tr-tr",
"uk-ua",
"ur-in",
"ur-pk",
"uz-uz",
"vi-vn",
"wuu-cn",
"yue-cn",
"zh-cn",
"zh-cn-henan",
"zh-cn-liaoning",
"zh-cn-shaanxi",
"zh-cn-shandong",
"zh-cn-sichuan",
"zh-hk",
"zh-tw",
"zu-za",
}

25
script/microsoft_tts.py Normal file
View File

@ -0,0 +1,25 @@
"""Helper script to update supported languages for Microsoft Text-to-Speech (TTS)."""
from pathlib import Path
from lxml import html
import requests
from .hassfest.serializer import format_python_namespace
URL = "https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support"
XPATH_QUERY = "//section[@data-tab='tts']/table[1]/tbody/tr/td[1]/code/text()"
req = requests.get(URL)
req.raise_for_status()
tree = html.fromstring(req.content)
supported_languages_raw = tree.xpath(XPATH_QUERY)
supported_languages = {s.lower() for s in supported_languages_raw}
Path("homeassistant/generated/microsoft_tts.py").write_text(
format_python_namespace(
{
"SUPPORTED_LANGUAGES": supported_languages,
},
generator="script.microsoft_tts",
)
)

View File

@ -173,9 +173,92 @@ async def test_service_say_en_gb_service(hass: HomeAssistant, mock_tts, calls) -
}
async def test_service_say_fa_ir_config(hass: HomeAssistant, mock_tts, calls) -> None:
"""Test service call say with fa-ir code in the config."""
await async_setup_component(
hass,
tts.DOMAIN,
{
tts.DOMAIN: {
"platform": "microsoft",
"api_key": "",
"language": "fa-ir",
"type": "DilaraNeural",
}
},
)
await hass.services.async_call(
tts.DOMAIN,
"microsoft_say",
{
"entity_id": "media_player.something",
tts.ATTR_MESSAGE: "There is a person at the front door.",
},
blocking=True,
)
assert len(calls) == 1
await get_media_source_url(hass, calls[0].data[ATTR_MEDIA_CONTENT_ID])
assert len(mock_tts.mock_calls) == 2
assert mock_tts.mock_calls[1][2] == {
"language": "fa-ir",
"gender": "Female",
"voiceType": "DilaraNeural",
"output": "audio-24khz-96kbitrate-mono-mp3",
"rate": "0%",
"volume": "0%",
"pitch": "default",
"contour": "",
"text": "There is a person at the front door.",
}
async def test_service_say_fa_ir_service(hass: HomeAssistant, mock_tts, calls) -> None:
"""Test service call say with fa-ir code in the service."""
config = {
tts.DOMAIN: {
"platform": "microsoft",
"api_key": "",
"service_name": "microsoft_say",
}
}
await async_setup_component(hass, tts.DOMAIN, config)
await hass.services.async_call(
tts.DOMAIN,
"microsoft_say",
{
"entity_id": "media_player.something",
tts.ATTR_MESSAGE: "There is a person at the front door.",
tts.ATTR_LANGUAGE: "fa-ir",
tts.ATTR_OPTIONS: {"type": "DilaraNeural"},
},
blocking=True,
)
assert len(calls) == 1
await get_media_source_url(hass, calls[0].data[ATTR_MEDIA_CONTENT_ID])
assert len(mock_tts.mock_calls) == 2
assert mock_tts.mock_calls[1][2] == {
"language": "fa-ir",
"gender": "Female",
"voiceType": "DilaraNeural",
"output": "audio-24khz-96kbitrate-mono-mp3",
"rate": "0%",
"volume": "0%",
"pitch": "default",
"contour": "",
"text": "There is a person at the front door.",
}
def test_supported_languages() -> None:
"""Test list of supported languages."""
for lang in ["en-us", "en-gb"]:
for lang in ["en-us", "fa-ir", "en-gb"]:
assert lang in SUPPORTED_LANGUAGES
assert "en-US" not in SUPPORTED_LANGUAGES
for lang in [
@ -187,7 +270,7 @@ def test_supported_languages() -> None:
"en-us-jennyneural",
]:
assert lang not in {s.lower() for s in SUPPORTED_LANGUAGES}
assert len(SUPPORTED_LANGUAGES) > 70
assert len(SUPPORTED_LANGUAGES) > 100
async def test_invalid_language(hass: HomeAssistant, mock_tts, calls) -> None: