| from __future__ import annotations |
|
|
| from dataclasses import dataclass |
| from typing import Any, Dict, List |
|
|
| from .constants import DEFAULT_ESPEAK_VOICE, EMOTION_TO_SYMBOL, INTENSITY_SYMBOLS |
|
|
|
|
| @dataclass(frozen=True) |
| class PreparedInput: |
| text: str |
| phonemes: List[str] |
| token_ids: List[int] |
| emotion: str |
| intensity: float |
| emotion_symbol: str |
| intensity_symbol: str |
|
|
|
|
| def clamp_unit(value: float) -> float: |
| if value != value: |
| return 0.0 |
|
|
| if value < 0.0: |
| return 0.0 |
|
|
| if value > 1.0: |
| return 1.0 |
|
|
| return float(value) |
|
|
|
|
| def load_token_map(config: dict[str, Any]) -> Dict[str, int]: |
| phoneme_id_map = config.get("phoneme_id_map") |
| if not isinstance(phoneme_id_map, dict): |
| raise KeyError("config.json is missing phoneme_id_map") |
|
|
| token_map: Dict[str, int] = {} |
|
|
| for symbol, raw_value in phoneme_id_map.items(): |
| if isinstance(raw_value, int): |
| token_map[symbol] = raw_value |
| continue |
|
|
| if isinstance(raw_value, list) and len(raw_value) == 1: |
| token_map[symbol] = int(raw_value[0]) |
| continue |
|
|
| raise ValueError( |
| f"Unsupported token mapping for symbol {symbol!r}: expected int or single-item list" |
| ) |
|
|
| return token_map |
|
|
|
|
| def intensity_to_symbol(intensity: float) -> str: |
| value = clamp_unit(intensity) |
| idx = int(value * len(INTENSITY_SYMBOLS)) |
| idx = max(0, min(idx, len(INTENSITY_SYMBOLS) - 1)) |
| return INTENSITY_SYMBOLS[idx] |
|
|
|
|
| def normalize_emotion(emotion: str | None) -> str: |
| value = (emotion or "neutral").strip().lower() |
| if value not in EMOTION_TO_SYMBOL: |
| raise ValueError( |
| f"Unsupported emotion {emotion!r}. Expected one of: {', '.join(EMOTION_TO_SYMBOL)}" |
| ) |
|
|
| return value |
|
|
|
|
| def phonemize_full_utterance(text: str, espeak_voice: str = DEFAULT_ESPEAK_VOICE) -> List[str]: |
| try: |
| from piper_phonemize import phonemize_espeak |
| except ImportError as exc: |
| raise ImportError( |
| "wfloat-tts requires piper-phonemize for phonemization. " |
| "Install it with: pip install \"piper-phonemize==1.3.0\" " |
| "-f https://k2-fsa.github.io/icefall/piper_phonemize" |
| ) from exc |
|
|
| sentence_groups = phonemize_espeak(text, espeak_voice) |
| phonemes: List[str] = [] |
|
|
| for group in sentence_groups: |
| if not group: |
| continue |
|
|
| if phonemes: |
| phonemes.append(" ") |
|
|
| phonemes.extend(group) |
|
|
| return phonemes |
|
|
|
|
| def prepare_input( |
| text: str, |
| config: dict[str, Any], |
| emotion: str = "neutral", |
| intensity: float = 0.5, |
| espeak_voice: str = DEFAULT_ESPEAK_VOICE, |
| ) -> PreparedInput: |
| normalized_emotion = normalize_emotion(emotion) |
| normalized_intensity = clamp_unit(intensity) |
|
|
| phonemes = phonemize_full_utterance(text, espeak_voice=espeak_voice) |
| emotion_symbol = EMOTION_TO_SYMBOL[normalized_emotion] |
| intensity_symbol = intensity_to_symbol(normalized_intensity) |
| phonemes.extend([emotion_symbol, intensity_symbol]) |
|
|
| token_map = load_token_map(config) |
|
|
| missing = [symbol for symbol in phonemes if symbol not in token_map] |
| if missing: |
| joined = ", ".join(sorted(set(missing))) |
| raise KeyError(f"Missing symbol(s) in config.json phoneme_id_map: {joined}") |
|
|
| token_ids = [token_map[symbol] for symbol in phonemes] |
|
|
| return PreparedInput( |
| text=text, |
| phonemes=phonemes, |
| token_ids=token_ids, |
| emotion=normalized_emotion, |
| intensity=normalized_intensity, |
| emotion_symbol=emotion_symbol, |
| intensity_symbol=intensity_symbol, |
| ) |
|
|