| """Text parsing utilities for spatial directions."""
|
|
|
| import re
|
| from typing import Dict, Tuple, Optional
|
| import numpy as np
|
|
|
|
|
|
|
| DIRECTION_BINS = {
|
| "front": 0,
|
| "front-left": 45,
|
| "frontleft": 45,
|
| "left": 90,
|
| "back-left": 135,
|
| "backleft": 135,
|
| "back": 180,
|
| "back-right": -135,
|
| "backright": -135,
|
| "right": -90,
|
| "front-right": -45,
|
| "frontright": -45,
|
| }
|
|
|
| ELEVATION_BINS = {
|
| "down": -30,
|
| "below": -30,
|
| "lower": -30,
|
| "level": 0,
|
| "middle": 0,
|
| "center": 0,
|
| "up": 30,
|
| "above": 30,
|
| "upper": 30,
|
| }
|
|
|
| DISTANCE_BINS = {
|
| "near": 1.0,
|
| "close": 1.0,
|
| "mid": 2.5,
|
| "medium": 2.5,
|
| "far": 5.0,
|
| "distant": 5.0,
|
| }
|
|
|
| ROOM_SIZE_BINS = {
|
| "small": "small",
|
| "medium": "medium",
|
| "large": "large",
|
| }
|
|
|
| REVERB_BINS = {
|
| "dry": "dry",
|
| "medium": "medium",
|
| "wet": "wet",
|
| }
|
|
|
|
|
| def parse_spatial_text(text: str) -> Dict[str, any]:
|
| """
|
| Parse spatial text description into parameters.
|
|
|
| Args:
|
| text: Text like "front-left, up, near, small room, dry"
|
|
|
| Returns:
|
| Dictionary with keys:
|
| - azimuth_deg: float
|
| - elevation_deg: float
|
| - distance_m: float
|
| - room_size: str
|
| - reverb_level: str
|
| """
|
| text_lower = text.lower().strip()
|
|
|
|
|
| params = {
|
| "azimuth_deg": 0.0,
|
| "elevation_deg": 0.0,
|
| "distance_m": 2.5,
|
| "room_size": "medium",
|
| "reverb_level": "medium",
|
| }
|
|
|
|
|
| for direction, angle in DIRECTION_BINS.items():
|
| if direction in text_lower:
|
| params["azimuth_deg"] = float(angle)
|
| break
|
|
|
|
|
| for elevation, angle in ELEVATION_BINS.items():
|
| if elevation in text_lower:
|
| params["elevation_deg"] = float(angle)
|
| break
|
|
|
|
|
| for distance, dist_m in DISTANCE_BINS.items():
|
| if distance in text_lower:
|
| params["distance_m"] = dist_m
|
| break
|
|
|
|
|
| for room_size in ROOM_SIZE_BINS.keys():
|
| if room_size in text_lower:
|
| params["room_size"] = room_size
|
| break
|
|
|
|
|
| for reverb in REVERB_BINS.keys():
|
| if reverb in text_lower:
|
| params["reverb_level"] = reverb
|
| break
|
|
|
| return params
|
|
|
|
|
| def generate_random_spatial_text() -> Tuple[str, Dict[str, any]]:
|
| """
|
| Generate random spatial text and corresponding parameters.
|
|
|
| Returns:
|
| (text, params_dict)
|
| """
|
|
|
| direction = np.random.choice(list(DIRECTION_BINS.keys()))
|
| elevation_keys = ["down", "level", "up"]
|
| elevation = np.random.choice(elevation_keys)
|
| distance_keys = ["near", "mid", "far"]
|
| distance = np.random.choice(distance_keys)
|
| room_size = np.random.choice(["small", "medium", "large"])
|
| reverb = np.random.choice(["dry", "medium", "wet"])
|
|
|
|
|
| text = f"{direction}, {elevation}, {distance}, {room_size} room, {reverb}"
|
|
|
|
|
| params = {
|
| "azimuth_deg": float(DIRECTION_BINS[direction]),
|
| "elevation_deg": float(ELEVATION_BINS[elevation]),
|
| "distance_m": DISTANCE_BINS[distance],
|
| "room_size": room_size,
|
| "reverb_level": reverb,
|
| }
|
|
|
| return text, params
|
|
|
|
|
| def params_to_bins(params: Dict[str, any]) -> Dict[str, int]:
|
| """
|
| Convert continuous parameters to bin indices.
|
|
|
| Args:
|
| params: Dict with azimuth_deg, elevation_deg, distance_m, etc.
|
|
|
| Returns:
|
| Dict with bin indices
|
| """
|
|
|
| azimuth = params["azimuth_deg"]
|
| direction_angles = [0, 45, 90, 135, 180, -135, -90, -45]
|
| direction_bin = np.argmin([abs(azimuth - a) for a in direction_angles])
|
|
|
|
|
| elevation = params["elevation_deg"]
|
| elevation_angles = [-30, 0, 30]
|
| elevation_bin = np.argmin([abs(elevation - a) for a in elevation_angles])
|
|
|
|
|
| distance = params["distance_m"]
|
| distance_values = [1.0, 2.5, 5.0]
|
| distance_bin = np.argmin([abs(distance - d) for d in distance_values])
|
|
|
|
|
| room_sizes = ["small", "medium", "large"]
|
| room_bin = room_sizes.index(params.get("room_size", "medium"))
|
|
|
|
|
| reverb_levels = ["dry", "medium", "wet"]
|
| reverb_bin = reverb_levels.index(params.get("reverb_level", "medium"))
|
|
|
| return {
|
| "direction_bin": direction_bin,
|
| "elevation_bin": elevation_bin,
|
| "distance_bin": distance_bin,
|
| "room_bin": room_bin,
|
| "reverb_bin": reverb_bin,
|
| }
|
|
|
|
|
| def bins_to_one_hot(bins: Dict[str, int]) -> np.ndarray:
|
| """
|
| Convert bin indices to concatenated one-hot encoding.
|
|
|
| Args:
|
| bins: Dict with bin indices
|
|
|
| Returns:
|
| One-hot vector of shape (8 + 3 + 3 + 3 + 3 = 20,)
|
| """
|
| direction_oh = np.zeros(8)
|
| direction_oh[bins["direction_bin"]] = 1.0
|
|
|
| elevation_oh = np.zeros(3)
|
| elevation_oh[bins["elevation_bin"]] = 1.0
|
|
|
| distance_oh = np.zeros(3)
|
| distance_oh[bins["distance_bin"]] = 1.0
|
|
|
| room_oh = np.zeros(3)
|
| room_oh[bins["room_bin"]] = 1.0
|
|
|
| reverb_oh = np.zeros(3)
|
| reverb_oh[bins["reverb_bin"]] = 1.0
|
|
|
| return np.concatenate([direction_oh, elevation_oh, distance_oh, room_oh, reverb_oh])
|
|
|