Compare commits

2 Commits

Author SHA1 Message Date
1bd047932f Fist implementation of the satellite 2025-09-09 23:16:10 +02:00
af1e81243a Added audio commands + "server test" command 2025-09-09 22:35:33 +02:00
13 changed files with 940 additions and 132 deletions

23
requirements.txt Normal file
View File

@@ -0,0 +1,23 @@
cffi==1.17.1
click==8.2.1
comtypes==1.4.12
iniconfig==2.1.0
markdown-it-py==4.0.0
mdurl==0.1.2
numpy==2.3.2
packaging==25.0
pluggy==1.6.0
psutil==7.0.0
PyAudio==0.2.14
pycaw==20240210
pycparser==2.22
Pygments==2.19.2
pytest==8.4.1
PyYAML==6.0.2
rich==14.1.0
scipy==1.16.1
shellingham==1.5.4
sounddevice==0.5.2
typer==0.17.3
typing_extensions==4.15.0
wyoming==1.7.2

View File

@@ -9,15 +9,15 @@ audio_app = typer.Typer(help="Audio device operations")
# Add commands to subcommands # Add commands to subcommands
server_app.command("check")(server.check) server_app.command("check")(server.check)
server_app.command("test")(server.test)
server_app.command("satellite")(server.satellite)
audio_app.command("list")(audio.list_devices) audio_app.command("list")(audio.list_devices)
audio_app.command("test")(audio.test_device) audio_app.command("test")(audio.test_device)
audio_app.command("config")(audio.config_info)
audio_app.command("install")(audio.install) audio_app.command("install")(audio.install)
# Register subcommands # Register subcommands
app.add_typer(server_app, name="server") app.add_typer(server_app, name="server")
app.add_typer(audio_app, name="audio") app.add_typer(audio_app, name="audio")
if __name__ == "__main__": if __name__ == "__main__":
app() app()

View File

@@ -3,7 +3,8 @@ import numpy as np
import os import os
from typing import Optional, List from typing import Optional, List
from ..devices import get_audio_devices_windows, install_audio_cmdlets from ..devices import get_audio_devices_windows, install_audio_cmdlets, get_audio_devices_windows_from_pnp_devices, \
get_audio_devices_linux
from ..core.utils import * from ..core.utils import *
from ..config import AppConfig from ..config import AppConfig
@@ -34,20 +35,49 @@ def _detect_wsl() -> bool:
except Exception: except Exception:
return False return False
def _get_device_type(instance_id: str) -> str: def _get_device_type(instance_id: str) -> str:
"""Categorize device type based on InstanceId.""" """Categorize device type based on InstanceId."""
instance_lower = instance_id.lower() instance_lower = instance_id.lower()
if '0.0.1.00000000' in instance_lower: if '0.0.1.00000000' in instance_lower:
return "Input" return "Recording"
elif '0.0.0.00000000' in instance_lower: elif '0.0.0.00000000' in instance_lower:
return "Output" return "Playback"
else: else:
return "Unknown" return "Unknown"
def _get_short_device_id(instance_id: str) -> str:
"""Get device name based on InstanceId."""
if len(instance_id) == 68:
return instance_id[31:-1]
elif len(instance_id) == 55:
return instance_id[18:-1]
else:
return instance_id
def _get_colored_bool(status: bool) -> str:
"""Get colored status string based on status."""
if status:
return f"[green]{status}[/green]"
else:
return str(status)
def _get_colored_status(status: str) -> str:
"""Get colored status string based on status."""
if status == "Unknown":
return f"[red]{status}[/red]"
else:
return str(status)
def list_devices( def list_devices(
device: Optional[int] = typer.Argument(default=None, help="Show details of a device, by its Id"), device: Optional[int] = typer.Argument(default=None, help="Show details of a device, by its Id"),
pulse: bool = typer.Option(False, "--pulse", help="Show PulseAudio sources"), pulse: bool = typer.Option(False, "--pulse", help="Show PulseAudio sources"),
native: bool = typer.Option(False, "--native", help="Force native Linux view (WSL only)"), native: bool = typer.Option(False, "--native", help="Force native Linux view (WSL only)"),
all_devices: bool = typer.Option(False, "--all", help="Display all devices, even those not connected"),
sort_info: Optional[str] = typer.Option(None, "--sort", help="Sort by column"), sort_info: Optional[str] = typer.Option(None, "--sort", help="Sort by column"),
desc_info: bool = typer.Option(False, "--desc", help="Set the sorting order to descending"), desc_info: bool = typer.Option(False, "--desc", help="Set the sorting order to descending"),
filter_info: Optional[str] = typer.Option(None, "--filter", help="Filter by a column value. Use column=value"), filter_info: Optional[str] = typer.Option(None, "--filter", help="Filter by a column value. Use column=value"),
@@ -60,40 +90,61 @@ def list_devices(
# Determine title based on environment and options # Determine title based on environment and options
is_wsl = _detect_wsl() is_wsl = _detect_wsl()
use_windows = is_wsl and not native if is_wsl and not native:
if use_windows:
typer.echo(typer.style("WSL detected, showing Windows audio devices (use --native for WSL view)", typer.echo(typer.style("WSL detected, showing Windows audio devices (use --native for WSL view)",
fg=typer.colors.BLUE)) fg=typer.colors.BLUE))
result = get_audio_devices_windows() result = get_audio_devices_windows_from_pnp_devices() if all_devices else get_audio_devices_windows()
if not check_result(result): if not check_result(result):
return return
windows_devices = get_results(result.stdout, {"Index": '', "ID": '', 'Name': '', 'Type': '', 'Default': ''}) windows_devices = get_results(result.stdout)
if all_devices:
select_conf = [
SelectConf(attr="Index", default=""),
SelectConf(attr="InstanceId", to="ID", formatter=lambda item: _get_short_device_id(item.InstanceId)),
SelectConf(attr="FriendlyName", to="Name"),
SelectConf(attr="Type", formatter=lambda item: _get_device_type(item.InstanceId)),
SelectConf(attr="Status", formatter=lambda item: _get_colored_status(item.Status)),
]
else:
select_conf = [
SelectConf(attr="Index"),
SelectConf(attr="ID", formatter=lambda item: _get_short_device_id(item.ID)),
SelectConf(attr="Name"),
SelectConf(attr="Type"),
SelectConf(attr="Default", formatter=lambda item: _get_colored_bool(item.Default)),
]
windows_devices = select(windows_devices, select_conf)
# apply sorting and filtering # apply sorting and filtering
windows_devices = filter_and_sort(windows_devices, filter_info, sort_info, desc_info) windows_devices = filter_and_sort(windows_devices, filter_info, sort_info, desc_info)
display_as_table(windows_devices, [ display_as_table(windows_devices)
{"name": "Index"},
{"name": "ID"},
{"name": "Name"},
{"name": "Type"},
{"name": "Default"},
])
else: else:
linux_devices = get_linux_audio_devices() if native and not is_wsl:
display_linux_devices(linux_devices, config, pulse, sort_config) typer.echo(typer.style("Native is applicable only when WSL is detected.", fg=typer.colors.RED))
return
if all_devices and native:
typer.echo(typer.style("All devices is not applicable with native mode.", fg=typer.colors.RED))
return
result = get_audio_devices_linux()
if not check_result(result):
return
linux_devices = result.result
# apply sorting and filtering
linux_devices = filter_and_sort(linux_devices, filter_info, sort_info, desc_info)
display_as_table(linux_devices)
# Show legend # Show legend
typer.echo(f"\nLegend:") typer.echo(f"\nLegend:")
if use_windows: typer.echo(typer.style(" * = Configured device", fg=typer.colors.YELLOW))
typer.echo(typer.style(" ✓ = Device working", fg=typer.colors.GREEN)) typer.echo(f" IN/OUT = Input/Output channel count")
typer.echo(typer.style(" ✗ = Device problem", fg=typer.colors.RED))
else:
typer.echo(typer.style(" * = Configured device", fg=typer.colors.YELLOW))
typer.echo(f" IN/OUT = Input/Output channel count")
def install(): def install():
@@ -104,32 +155,6 @@ def install():
typer.echo(result.stdout) typer.echo(result.stdout)
def get_linux_audio_devices() -> list:
"""Get Linux audio devices using sounddevice."""
try:
devices = sd.query_devices()
linux_devices = []
for i, device in enumerate(devices):
hostapi_name = sd.query_hostapis(device['hostapi'])['name']
linux_devices.append(LinuxAudioDevice(
device_id=i,
name=device['name'],
max_input_channels=device['max_input_channels'],
max_output_channels=device['max_output_channels'],
default_samplerate=device['default_samplerate'],
hostapi_name=hostapi_name
))
return linux_devices
except Exception as e:
typer.echo(typer.style(f"Error querying Linux audio devices: {e}", fg=typer.colors.RED, bold=True))
raise typer.Exit(1)
def display_linux_devices(devices: list, def display_linux_devices(devices: list,
config: AppConfig, config: AppConfig,
show_pulse: bool = False, show_pulse: bool = False,
@@ -216,6 +241,7 @@ def test_device(
device: Optional[int] = typer.Option(None, "--device", "-d", help="Device ID to test (default: configured device)"), device: Optional[int] = typer.Option(None, "--device", "-d", help="Device ID to test (default: configured device)"),
duration: float = typer.Option(3.0, "--duration", help="Recording duration in seconds"), duration: float = typer.Option(3.0, "--duration", help="Recording duration in seconds"),
save: bool = typer.Option(False, "--save", help="Save recording to WAV file"), save: bool = typer.Option(False, "--save", help="Save recording to WAV file"),
play: bool = typer.Option(False, "--play", help="Play recorded audio through default speakers"),
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to config file") config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to config file")
): ):
"""Test audio recording from a specific device.""" """Test audio recording from a specific device."""
@@ -230,7 +256,11 @@ def test_device(
typer.echo("=" * 20) typer.echo("=" * 20)
# Get device info # Get device info
devices_list = get_linux_audio_devices() result = get_audio_devices_linux()
if not check_result(result):
return
devices_list = result.result
if test_device_id is not None: if test_device_id is not None:
if test_device_id >= len(devices_list): if test_device_id >= len(devices_list):
@@ -284,72 +314,20 @@ def test_device(
else: else:
typer.echo(typer.style(" Status: Good signal level", fg=typer.colors.GREEN)) typer.echo(typer.style(" Status: Good signal level", fg=typer.colors.GREEN))
# Play recorded audio if requested
if play:
typer.echo(f"\n{typer.style('Playing recorded audio...', fg=typer.colors.CYAN, bold=True)}")
sd.play(audio_data, samplerate=config.audio.sample_rate)
sd.wait()
typer.echo(typer.style("Playback completed!", fg=typer.colors.CYAN))
# Save if requested # Save if requested
if save: if save:
filename = f"test_device_{test_device_id or 'default'}_{int(duration)}s.wav" filename = f"test_device_{test_device_id or 'default'}_{int(duration)}s.wav"
# Note: WyomingAudioRecorder is not defined in the current code from scipy.io import wavfile
# This would need to be implemented or the save functionality modified wavfile.write(filename, config.audio.sample_rate, audio_int16)
typer.echo(f" Save functionality needs to be implemented") typer.echo(typer.style(f"Audio saved to: {filename}", fg=typer.colors.MAGENTA, bold=True))
except Exception as e: except Exception as e:
typer.echo(typer.style(f"Recording failed: {e}", fg=typer.colors.RED, bold=True)) typer.echo(typer.style(f"Recording failed: {e}", fg=typer.colors.RED, bold=True))
raise typer.Exit(1) raise typer.Exit(1)
def config_info(
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to config file")
):
"""Show current audio configuration."""
# Load configuration
config = AppConfig.load(config_file)
typer.echo(typer.style("Audio Configuration", fg=typer.colors.BLUE, bold=True))
typer.echo("=" * 21)
# Show current settings
typer.echo(f"\nCurrent settings:")
typer.echo(f" Sample rate: {typer.style(f'{config.audio.sample_rate} Hz', fg=typer.colors.CYAN)}")
typer.echo(f" Channels: {typer.style(str(config.audio.channels), fg=typer.colors.CYAN)}")
if config.audio.device is not None:
typer.echo(f" Device: {typer.style(str(config.audio.device), fg=typer.colors.CYAN)}")
# Show device details
try:
devices_list = get_linux_audio_devices()
if config.audio.device < len(devices_list):
device = devices_list[config.audio.device]
typer.echo(f"\nConfigured device details:")
typer.echo(f" Name: {device['name']}")
typer.echo(f" Input channels: {device['max_input_channels']}")
typer.echo(f" Default rate: {int(device['default_samplerate'])} Hz")
if device['max_input_channels'] == 0:
typer.echo(typer.style(" Warning: This device has no input channels!", fg=typer.colors.RED))
else:
typer.echo(typer.style(f" Warning: Configured device {config.audio.device} not found!",
fg=typer.colors.RED, bold=True))
except Exception as e:
typer.echo(typer.style(f" Error getting device info: {e}", fg=typer.colors.RED))
else:
typer.echo(f" Device: {typer.style('default', fg=typer.colors.CYAN)}")
# Show default device info
try:
default_device = sd.default.device
if hasattr(default_device, '__len__') and len(default_device) >= 2:
default_input = int(default_device[0])
else:
default_input = int(default_device)
devices_list = get_linux_audio_devices()
device = devices_list[default_input]
typer.echo(f"\nDefault input device:")
typer.echo(f" ID: {default_input}")
typer.echo(f" Name: {device['name']}")
typer.echo(f" Input channels: {device['max_input_channels']}")
except Exception as e:
typer.echo(typer.style(f" Error getting default device: {e}", fg=typer.colors.RED))
# Show configuration source info

View File

@@ -4,7 +4,61 @@ from contextlib import closing
import typer import typer
from typing import Optional from typing import Optional
from wyoming.audio import AudioStart, AudioChunk, AudioStop
from wyoming.client import AsyncTcpClient
from wyoming.asr import Transcribe, Transcript
from ..wyoming_client.satellite import SatelliteController
from ..wyoming_client.vad import AmplitudeVAD
from ..config import AppConfig from ..config import AppConfig
import numpy as np
import sounddevice as sd
import asyncio
async def _async_transcribe(host: str, port: int, timeout: float, pcm_bytes: bytes, lang: str) -> Optional[str]:
"""Stream raw PCM data to Wyoming ASR and return transcript text."""
# Instantiate the async TCP client
client = AsyncTcpClient(host, port)
# Audio parameters
rate = 16000
width = 2 # 16-bit
channels = 1
# The client instance is an async context manager.
async with client:
# 1. Send transcription request
await client.write_event(Transcribe(language=lang).event())
# 2. Start the audio stream
await client.write_event(AudioStart(rate, width, channels).event())
# 3. Send audio chunks
chunk_size = 2048 # A reasonable chunk size
for i in range(0, len(pcm_bytes), chunk_size):
chunk_bytes = pcm_bytes[i:i + chunk_size]
await client.write_event(AudioChunk(audio=chunk_bytes, rate=rate, width=width, channels=channels).event())
# 4. Stop the audio stream
await client.write_event(AudioStop().event())
# 5. Read events until a transcript arrives
transcript_text = None
try:
while True:
event = await asyncio.wait_for(client.read_event(), timeout=timeout)
if event is None:
break
if Transcript.is_type(event.type):
tr = Transcript.from_event(event)
transcript_text = tr.text
break
except asyncio.TimeoutError:
typer.echo(typer.style("Connection timed out waiting for transcript.", fg=typer.colors.YELLOW))
return transcript_text
def check_wyoming_server(host: str, port: int, timeout: float = 3.0) -> tuple[bool, float | None, str | None]: def check_wyoming_server(host: str, port: int, timeout: float = 3.0) -> tuple[bool, float | None, str | None]:
@@ -57,4 +111,117 @@ def check(
typer.echo(typer.style("Wyoming server unreachable!", fg=typer.colors.RED, bold=True)) typer.echo(typer.style("Wyoming server unreachable!", fg=typer.colors.RED, bold=True))
typer.echo(f"Server: {final_host}:{final_port}") typer.echo(f"Server: {final_host}:{final_port}")
typer.echo(typer.style(f"Error: {error}", fg=typer.colors.RED)) typer.echo(typer.style(f"Error: {error}", fg=typer.colors.RED))
raise typer.Exit(1) raise typer.Exit(1)
def test(
duration: float = typer.Option(3.0, "--duration", help="Recording duration in seconds"),
lang: str = typer.Option("fr", "--lang", help="Language code: 'fr' or 'en'"),
host: Optional[str] = typer.Option(None, "--host", "-h", help="Wyoming server host"),
port: Optional[int] = typer.Option(None, "--port", "-p", help="Wyoming server port"),
timeout: Optional[float] = typer.Option(None, "--timeout", "-t", help="Connection timeout in seconds"),
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to config file")
):
"""Record from default microphone, send to Wyoming ASR server, and print transcription."""
# Load configuration
config = AppConfig.load(config_file)
final_host = host or config.server.host
final_port = port or config.server.port
final_timeout = timeout or config.server.timeout
# Validate language (two-letter code)
lang = (lang or "fr").strip().lower()
if lang not in ("fr", "en"):
typer.echo(typer.style("Invalid --lang. Use 'fr' or 'en'.", fg=typer.colors.RED))
raise typer.Exit(2)
# Check server reachability first
reachable, latency, err = check_wyoming_server(final_host, final_port, final_timeout)
if not reachable:
typer.echo(typer.style(f"Cannot reach Wyoming server at {final_host}:{final_port}: {err}", fg=typer.colors.RED))
raise typer.Exit(1)
# Record audio (16 kHz mono float32)
sample_rate = 16000
channels = 1
typer.echo(typer.style("Recording...", fg=typer.colors.GREEN, bold=True))
try:
frames = int(duration * sample_rate)
audio = sd.rec(frames, samplerate=sample_rate, channels=channels, dtype="float32")
sd.wait()
except Exception as e:
typer.echo(typer.style(f"Audio recording failed: {e}", fg=typer.colors.RED))
raise typer.Exit(1)
# Convert to PCM16 bytes directly, no need for WAV wrapper
audio_int16 = np.clip(audio.flatten() * 32767.0, -32768, 32767).astype(np.int16)
pcm_bytes = audio_int16.tobytes()
# Send to Wyoming ASR (async)
try:
typer.echo(typer.style(f"Connecting to {final_host}:{final_port} (lang={lang})...", fg=typer.colors.CYAN))
# Run the async helper
transcript_text = asyncio.run(
_async_transcribe(final_host, final_port, final_timeout, pcm_bytes, lang)
)
if transcript_text:
typer.echo(typer.style("\nTranscription:", fg=typer.colors.GREEN, bold=True))
typer.echo(transcript_text)
else:
typer.echo(typer.style("No transcription received.", fg=typer.colors.YELLOW))
except Exception as e:
typer.echo(typer.style(f"ASR request failed: {e}", fg=typer.colors.RED))
raise typer.Exit(1)
def satellite(
chunk_duration: float = typer.Option(0.03, "--chunk-duration", help="Audio chunk duration in seconds"),
vad_threshold: float = typer.Option(0.01, "--vad-threshold", help="Voice activity detection threshold"),
speech_timeout: float = typer.Option(1.5, "--speech-timeout", help="Silence duration to end speech (seconds)"),
lang: str = typer.Option("fr", "--lang", help="Language code: 'fr' or 'en'"),
host: Optional[str] = typer.Option(None, "--host", "-h", help="Wyoming server host"),
port: Optional[int] = typer.Option(None, "--port", "-p", help="Wyoming server port"),
timeout: Optional[float] = typer.Option(None, "--timeout", "-t", help="Connection timeout in seconds"),
config_file: Optional[str] = typer.Option(None, "--config", "-c", help="Path to config file")
):
"""Run satellite mode with VAD-based audio streaming."""
# Load configuration
config = AppConfig.load(config_file)
final_host = host or config.server.host
final_port = port or config.server.port
final_timeout = timeout or config.server.timeout
# Validate language
lang = (lang or "fr").strip().lower()
if lang not in ("fr", "en"):
typer.echo(typer.style("Invalid --lang. Use 'fr' or 'en'.", fg=typer.colors.RED))
raise typer.Exit(2)
# Check server reachability first
reachable, latency, err = check_wyoming_server(final_host, final_port, final_timeout)
if not reachable:
typer.echo(typer.style(f"Cannot reach Wyoming server at {final_host}:{final_port}: {err}", fg=typer.colors.RED))
raise typer.Exit(1)
# Initialize VAD detector
vad_detector = AmplitudeVAD(
threshold=vad_threshold,
min_speech_duration=0.1,
min_silence_duration=speech_timeout,
sample_rate=16000
)
# Initialize and run satellite
controller = SatelliteController(
host=final_host,
port=final_port,
lang=lang,
vad_detector=vad_detector,
chunk_duration=chunk_duration,
timeout=final_timeout
)
controller.run()

View File

@@ -8,8 +8,10 @@ class Expando:
You can then access the property using dot '.' (ex. obj.prop1.prop2) You can then access the property using dot '.' (ex. obj.prop1.prop2)
""" """
def __init__(self, props): def __init__(self, props=None, **kwargs):
self._props = props self._props = props.copy() if props else {}
if kwargs:
self._props.update(kwargs)
def __getattr__(self, item): def __getattr__(self, item):
if item not in self._props: if item not in self._props:
@@ -21,10 +23,11 @@ class Expando:
def __setitem__(self, key, value): def __setitem__(self, key, value):
self._props[key] = value self._props[key] = value
def get(self, path): def get(self, path, default=None):
""" """
returns the value, from a string with represents the path returns the value, from a string with represents the path
:param path: :param path:
:param default: value to return if path is not found
:return: :return:
""" """
current = self._props current = self._props
@@ -38,7 +41,7 @@ class Expando:
else: else:
if current is None or attr not in current: if current is None or attr not in current:
return None return default
current = current[attr] current = current[attr]
return current return current

View File

@@ -2,7 +2,7 @@ import json
import subprocess import subprocess
from dataclasses import dataclass from dataclasses import dataclass
from subprocess import CompletedProcess from subprocess import CompletedProcess
from typing import Any from typing import Any, Callable
import typer import typer
from rich.console import Console from rich.console import Console
@@ -38,11 +38,26 @@ class FilterConf:
return FilterConf(parts[0].strip(), parts[1].strip()) return FilterConf(parts[0].strip(), parts[1].strip())
@dataclass
class SelectConf:
attr: str # property nam to select
to: str = None # rename the property
default: str = None # value to use if property is missing
formatter: Callable[[Any], Any] = None # function to apply to the property using the whole item
@dataclass @dataclass
class ProcessResult: class ProcessResult:
result: Any result: Any
error: str = None error: str = None
@property
def stderr(self):
return self.error
@property
def returncode(self):
return 0 if self.result is not None else 1
def sort_by(items: list[Expando], sort_conf: SortConf): def sort_by(items: list[Expando], sort_conf: SortConf):
"""Sort a list of items by a given property.""" """Sort a list of items by a given property."""
@@ -52,7 +67,6 @@ def sort_by(items: list[Expando], sort_conf: SortConf):
value = item.geti(property_name, "") value = item.geti(property_name, "")
# Convert None to empty string for consistent sorting # Convert None to empty string for consistent sorting
return "" if value is None else str(value) return "" if value is None else str(value)
if sort_conf is None: if sort_conf is None:
return items return items
@@ -86,6 +100,23 @@ def filter_by(items, filter_conf: FilterConf):
] ]
def select(items: list[Expando], settings: list[SelectConf]) -> list[Expando]:
res = []
for item in items:
new_item = {}
for setting in settings:
attr = setting.attr.strip()
key = setting.to or attr
value = setting.formatter(item) if setting.formatter else item.get(attr, setting.default)
new_item[key] = value
res.append(Expando(new_item))
return res
def run_ps_command(command: str) -> CompletedProcess[str]: def run_ps_command(command: str) -> CompletedProcess[str]:
"""Run a PowerShell command and return the output""" """Run a PowerShell command and return the output"""
completed = subprocess.run( completed = subprocess.run(
@@ -160,7 +191,7 @@ def run_ps_command_live(command: str) -> CompletedProcess[str]:
return completed return completed
def get_results(stdout: str, mappings) -> list[dict]: def get_results(stdout: str) -> list[Expando]:
stripped = stdout.strip() stripped = stdout.strip()
if not stripped: if not stripped:
typer.echo(typer.style("PowerShell returned empty output", fg=typer.colors.YELLOW)) typer.echo(typer.style("PowerShell returned empty output", fg=typer.colors.YELLOW))
@@ -169,15 +200,20 @@ def get_results(stdout: str, mappings) -> list[dict]:
data = json.loads(stripped) data = json.loads(stripped)
as_list = data if isinstance(data, list) else [data] as_list = data if isinstance(data, list) else [data]
res = [] return [Expando(item) for item in as_list]
for item in as_list:
mapped = {key: item.get(key, default_value) for key, default_value in mappings.items()}
res.append(Expando(mapped)) def display_as_table(result, columns_settings: list = None):
def _create_default_columns_settings():
if len(result) == 0:
return []
blue_print = result[0]
return [{"name": attr} for attr in blue_print.as_dict().keys()]
if columns_settings is None:
columns_settings = _create_default_columns_settings()
return res
def display_as_table(result, columns_settings: list):
formatters = {} formatters = {}
table = Table(show_header=True) table = Table(show_header=True)
for col in [c for c in columns_settings if "name" in c]: for col in [c for c in columns_settings if "name" in c]:

View File

@@ -50,7 +50,7 @@ def get_audio_devices_windows():
return run_ps_command(ps_script) return run_ps_command(ps_script)
def get_audio_devices_windows_from_pnp(): def get_audio_devices_windows_from_pnp_devices():
ps_script = "Get-PnpDevice -Class AudioEndpoint | Select-Object FriendlyName, Status, InstanceId | ConvertTo-Json" ps_script = "Get-PnpDevice -Class AudioEndpoint | Select-Object FriendlyName, Status, InstanceId | ConvertTo-Json"
return run_ps_command(ps_script) return run_ps_command(ps_script)

View File

View File

@@ -0,0 +1,67 @@
import numpy as np
from collections import deque
from typing import List, Optional
class AudioBuffer:
"""Circular audio buffer for pre-VAD audio storage."""
def __init__(self, max_duration: float = 1.0, sample_rate: int = 16000):
"""
Initialize audio buffer.
Args:
max_duration: Maximum buffer duration in seconds
sample_rate: Audio sample rate
"""
self.sample_rate = sample_rate
max_samples = int(max_duration * sample_rate)
self.buffer: deque = deque(maxlen=max_samples)
self.is_recording = False
self.recorded_chunks: List[np.ndarray] = []
def add_chunk(self, audio_chunk: np.ndarray) -> None:
"""Add audio chunk to buffer."""
# Always add to circular buffer
for sample in audio_chunk:
self.buffer.append(sample)
# If recording, also add to recorded chunks
if self.is_recording:
self.recorded_chunks.append(audio_chunk.copy())
def start_recording(self) -> np.ndarray:
"""Start recording and return pre-buffer content."""
self.is_recording = True
# Return current buffer content as pre-buffer
pre_buffer = np.array(list(self.buffer), dtype=np.float32)
self.recorded_chunks = [pre_buffer] if len(pre_buffer) > 0 else []
return pre_buffer
def stop_recording(self) -> np.ndarray:
"""Stop recording and return all recorded audio."""
self.is_recording = False
if not self.recorded_chunks:
return np.array([], dtype=np.float32)
# Concatenate all recorded chunks
full_recording = np.concatenate(self.recorded_chunks)
self.recorded_chunks = []
return full_recording
def get_current_recording(self) -> Optional[np.ndarray]:
"""Get current recording without stopping."""
if not self.is_recording or not self.recorded_chunks:
return None
return np.concatenate(self.recorded_chunks)
def clear(self) -> None:
"""Clear all buffers."""
self.buffer.clear()
self.recorded_chunks.clear()
self.is_recording = False

View File

@@ -0,0 +1,198 @@
from typing import Optional
from wyoming.asr import Transcript
from wyoming.audio import AudioStart, AudioChunk, AudioStop
from wyoming.asr import Transcribe
from wyoming.client import AsyncTcpClient
from ..wyoming_client.audio_buffer import AudioBuffer
from ..wyoming_client.vad import VADDetector
from queue import Queue, Empty
import typer
import asyncio
import threading
import numpy as np
import sounddevice as sd
import time
class SatelliteController:
"""Main satellite controller with VAD-based audio streaming."""
def __init__(self, host: str, port: int, lang: str, vad_detector: VADDetector,
chunk_duration: float = 0.03, timeout: float = 5.0):
self.host = host
self.port = port
self.lang = lang
self.vad_detector = vad_detector
self.timeout = timeout
# Audio settings
self.sample_rate = 16000
self.channels = 1
self.chunk_size = int(chunk_duration * self.sample_rate)
# Components
self.audio_buffer = AudioBuffer(max_duration=1.0, sample_rate=self.sample_rate)
# State
self.is_running = False
self.is_speaking = False
self.audio_queue = Queue()
self.transcription_queue = Queue()
def _audio_callback(self, indata, frames, time, status):
"""Callback for sounddevice audio stream."""
if status:
typer.echo(f"Audio callback status: {status}")
audio_chunk = indata[:, 0].copy() # Extract mono channel
self.audio_queue.put(audio_chunk)
async def _async_transcribe(self, pcm_bytes: bytes) -> Optional[str]:
"""Stream raw PCM data to Wyoming ASR and return transcript text."""
# Instantiate the async TCP client
client = AsyncTcpClient(self.host, self.port)
# Audio parameters
rate = 16000
width = 2 # 16-bit
channels = 1
# The client instance is an async context manager.
async with client:
# 1. Send transcription request
await client.write_event(Transcribe(language=self.lang).event())
# 2. Start the audio stream
await client.write_event(AudioStart(rate, width, channels).event())
# 3. Send audio chunks
chunk_size = 2048 # A reasonable chunk size
for i in range(0, len(pcm_bytes), chunk_size):
chunk_bytes = pcm_bytes[i:i + chunk_size]
await client.write_event(AudioChunk(audio=chunk_bytes, rate=rate, width=width, channels=channels).event())
# 4. Stop the audio stream
await client.write_event(AudioStop().event())
# 5. Read events until a transcript arrives
transcript_text = None
try:
while True:
event = await asyncio.wait_for(client.read_event(), timeout=self.timeout)
if event is None:
break
if Transcript.is_type(event.type):
tr = Transcript.from_event(event)
transcript_text = tr.text
break
except asyncio.TimeoutError:
typer.echo(typer.style("Connection timed out waiting for transcript.", fg=typer.colors.YELLOW))
return transcript_text
def _process_audio(self):
"""Process audio chunks with VAD in separate thread."""
while self.is_running:
try:
audio_chunk = self.audio_queue.get(timeout=0.1)
# Add to buffer
self.audio_buffer.add_chunk(audio_chunk)
# Check VAD
speech_detected = self.vad_detector.is_speech(audio_chunk)
if speech_detected and not self.is_speaking:
# Start of speech
typer.echo(typer.style("🎤 Speech detected", fg=typer.colors.GREEN))
self.is_speaking = True
pre_buffer = self.audio_buffer.start_recording()
# Start async transcription in background
threading.Thread(
target=self._start_transcription,
daemon=True
).start()
elif not speech_detected and self.is_speaking:
# End of speech
typer.echo(typer.style("🔇 Speech ended", fg=typer.colors.YELLOW))
self.is_speaking = False
full_recording = self.audio_buffer.stop_recording()
if len(full_recording) > 0:
# Queue for transcription
self.transcription_queue.put(full_recording)
except Empty:
continue
except Exception as e:
typer.echo(typer.style(f"Audio processing error: {e}", fg=typer.colors.RED))
def _start_transcription(self):
"""Handle transcription in background thread."""
try:
# Wait for audio to be queued
recording = self.transcription_queue.get(timeout=2.0)
# Convert to PCM16
audio_int16 = np.clip(recording * 32767.0, -32768, 32767).astype(np.int16)
pcm_bytes = audio_int16.tobytes()
# Send to Wyoming ASR
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
transcript_text = loop.run_until_complete(
self._async_transcribe(pcm_bytes)
)
if transcript_text:
typer.echo(typer.style(f"📝 {transcript_text}", fg=typer.colors.CYAN, bold=True))
else:
typer.echo(typer.style("❌ No transcription received", fg=typer.colors.YELLOW))
finally:
loop.close()
except Empty:
typer.echo(typer.style("⏰ Transcription timeout", fg=typer.colors.YELLOW))
except Exception as e:
typer.echo(typer.style(f"❌ Transcription error: {e}", fg=typer.colors.RED))
def run(self):
"""Run the satellite."""
typer.echo(typer.style("🛰️ Starting satellite mode...", fg=typer.colors.BLUE, bold=True))
typer.echo(f"Listening on default microphone ({self.sample_rate} Hz, {self.channels} ch)")
typer.echo(f"Wyoming server: {self.host}:{self.port} (lang: {self.lang})")
typer.echo("Press Ctrl+C to stop")
typer.echo("=" * 50)
self.is_running = True
# Start audio processing thread
audio_thread = threading.Thread(target=self._process_audio, daemon=True)
audio_thread.start()
try:
# Start audio stream
with sd.InputStream(
callback=self._audio_callback,
samplerate=self.sample_rate,
channels=self.channels,
blocksize=self.chunk_size,
dtype='float32'
):
# Keep running until interrupted
while self.is_running:
time.sleep(0.1)
except KeyboardInterrupt:
typer.echo(typer.style("\n🛑 Stopping satellite...", fg=typer.colors.YELLOW))
except Exception as e:
typer.echo(typer.style(f"❌ Satellite error: {e}", fg=typer.colors.RED))
finally:
self.is_running = False

76
src/wyoming_client/vad.py Normal file
View File

@@ -0,0 +1,76 @@
# VAD = Voice Activity Detection
from abc import ABC, abstractmethod
import numpy as np
from typing import Optional
class VADDetector(ABC):
"""Abstract base class for Voice Activity Detection."""
@abstractmethod
def is_speech(self, audio_chunk: np.ndarray) -> bool:
"""Return True if speech is detected in the audio chunk."""
pass
@abstractmethod
def reset(self) -> None:
"""Reset internal state."""
pass
class AmplitudeVAD(VADDetector):
"""Simple VAD based on RMS amplitude threshold."""
def __init__(self,
threshold: float = 0.01,
min_speech_duration: float = 0.1,
min_silence_duration: float = 0.5,
sample_rate: int = 16000):
"""
Initialize amplitude-based VAD.
Args:
threshold: RMS threshold for speech detection
min_speech_duration: Minimum duration to consider as speech (seconds)
min_silence_duration: Minimum silence to end speech (seconds)
sample_rate: Audio sample rate
"""
self.threshold = threshold
self.min_speech_frames = int(min_speech_duration * sample_rate)
self.min_silence_frames = int(min_silence_duration * sample_rate)
self.sample_rate = sample_rate
self.speech_frames = 0
self.silence_frames = 0
self.is_speaking = False
def is_speech(self, audio_chunk: np.ndarray) -> bool:
"""Detect speech in audio chunk based on RMS amplitude."""
# Calculate RMS
rms = np.sqrt(np.mean(audio_chunk.astype(np.float32) ** 2))
if rms > self.threshold:
# Potential speech detected
self.speech_frames += len(audio_chunk)
self.silence_frames = 0
# Start speaking if we have enough speech frames
if not self.is_speaking and self.speech_frames >= self.min_speech_frames:
self.is_speaking = True
else:
# Silence detected
self.silence_frames += len(audio_chunk)
self.speech_frames = max(0, self.speech_frames - len(audio_chunk) // 2) # Decay
# Stop speaking if we have enough silence
if self.is_speaking and self.silence_frames >= self.min_silence_frames:
self.is_speaking = False
return self.is_speaking
def reset(self) -> None:
"""Reset VAD state."""
self.speech_frames = 0
self.silence_frames = 0
self.is_speaking = False

View File

@@ -29,6 +29,10 @@ def test_i_can_get():
assert dynamic.get("a") == 10 assert dynamic.get("a") == 10
assert dynamic.get("b.c") == "value" assert dynamic.get("b.c") == "value"
assert dynamic.get("unknown") is None assert dynamic.get("unknown") is None
assert dynamic.get("unknown", "default") == "default"
assert dynamic.get("b.x") is None
assert dynamic.get("b.x", "default") == "default"
assert dynamic.get("b.x", None) is None
def test_i_can_get_insensitive(): def test_i_can_get_insensitive():

256
tests/test_select.py Normal file
View File

@@ -0,0 +1,256 @@
import pytest
from core.utils import SelectConf, select
from core.Expando import Expando
def test_i_can_select_basic_attributes():
"""Test basic selection of existing attributes."""
items = [
Expando({"name": "alice", "age": 25, "city": "paris"}),
Expando({"name": "bob", "age": 30, "city": "london"})
]
settings = [
SelectConf(attr="name"),
SelectConf(attr="age")
]
result = select(items, settings)
assert len(result) == 2
assert result[0].name == "alice"
assert result[0].age == 25
assert not hasattr(result[0], "city") # city not selected
assert result[1].name == "bob"
assert result[1].age == 30
def test_i_can_select_with_renamed_attributes():
"""Test renaming attributes using SelectConf.to."""
items = [
Expando({"name": "alice", "age": 25}),
Expando({"name": "bob", "age": 30})
]
settings = [
SelectConf(attr="name", to="full_name"),
SelectConf(attr="age", to="years")
]
result = select(items, settings)
assert len(result) == 2
assert result[0].full_name == "alice"
assert result[0].years == 25
assert not hasattr(result[0], "name") # original name not present
assert not hasattr(result[0], "age") # original name not present
assert result[1].full_name == "bob"
assert result[1].years == 30
def test_i_can_select_with_formatter():
"""Test applying formatters to selected attributes."""
items = [
Expando({"name": "alice", "age": 25}),
Expando({"name": "bob", "age": 30})
]
settings = [
SelectConf(attr="name", formatter=lambda item: item.name.upper()),
SelectConf(attr="age", formatter=lambda item: item.age * 2)
]
result = select(items, settings)
assert len(result) == 2
assert result[0].name == "ALICE"
assert result[0].age == 50
assert result[1].name == "BOB"
assert result[1].age == 60
def test_i_can_select_with_formatter_and_rename():
"""Test combining formatter and renaming."""
items = [
Expando({"name": "alice", "age": 25}),
Expando({"name": "bob", "age": 30})
]
settings = [
SelectConf(attr="name", to="upper_name", formatter=lambda item: item.name.upper()),
SelectConf(attr="age", to="double_age", formatter=lambda item: item.age * 2)
]
result = select(items, settings)
assert len(result) == 2
assert result[0].upper_name == "ALICE"
assert result[0].double_age == 50
assert not hasattr(result[0], "name")
assert not hasattr(result[0], "age")
assert result[1].upper_name == "BOB"
assert result[1].double_age == 60
def test_i_can_handle_empty_items_list():
"""Test that empty items list returns empty list."""
items = []
settings = [SelectConf(attr="name")]
result = select(items, settings)
assert result == []
def test_i_can_handle_empty_settings_list():
"""Test that empty settings list returns items with empty properties."""
items = [
Expando({"name": "alice", "age": 25}),
Expando({"name": "bob", "age": 30})
]
settings = []
result = select(items, settings)
assert len(result) == 2
# Each result should be an Expando with empty properties
assert result[0]._props == {}
assert result[1]._props == {}
def test_i_can_handle_missing_attributes():
"""Test that missing attributes return None (normal behavior)."""
items = [
Expando({"name": "alice"}), # no age attribute
Expando({"name": "bob", "age": 30})
]
settings = [
SelectConf(attr="name"),
SelectConf(attr="age") # missing in first item
]
result = select(items, settings)
assert len(result) == 2
assert result[0].name == "alice"
assert result[0].age is None # missing attribute returns None
assert result[1].name == "bob"
assert result[1].age == 30
def test_i_can_handle_whitespace_in_attr_names():
"""Test that whitespace in attribute names is stripped."""
items = [
Expando({"name": "alice", "age": 25})
]
settings = [
SelectConf(attr=" name "), # whitespace around attr
SelectConf(attr="\tage\n") # tabs and newlines
]
result = select(items, settings)
assert len(result) == 1
assert result[0].name == "alice"
assert result[0].age == 25
def test_i_can_select_multiple_attributes():
"""Test selecting multiple attributes from multiple items."""
items = [
Expando({"name": "alice", "age": 25, "city": "paris", "country": "france"}),
Expando({"name": "bob", "age": 30, "city": "london", "country": "uk"}),
Expando({"name": "charlie", "age": 35, "city": "madrid", "country": "spain"})
]
settings = [
SelectConf(attr="name"),
SelectConf(attr="city", to="location"),
SelectConf(attr="country", formatter=lambda item: item.country.upper())
]
result = select(items, settings)
assert len(result) == 3
assert result[0].name == "alice"
assert result[0].location == "paris"
assert result[0].country == "FRANCE"
assert result[1].name == "bob"
assert result[1].location == "london"
assert result[1].country == "UK"
assert result[2].name == "charlie"
assert result[2].location == "madrid"
assert result[2].country == "SPAIN"
def test_i_can_handle_formatter_with_whole_item():
"""Test that formatter receives the whole item, not just the attribute value."""
items = [
Expando({"first_name": "alice", "last_name": "smith"}),
Expando({"first_name": "bob", "last_name": "jones"})
]
settings = [
SelectConf(
attr="first_name",
to="full_name",
formatter=lambda item: f"{item.get('first_name')} {item.get('last_name')}"
)
]
result = select(items, settings)
assert len(result) == 2
assert result[0].full_name == "alice smith"
assert result[1].full_name == "bob jones"
def test_i_cannot_select_when_formatter_raises_exception():
"""Test that formatter exceptions are propagated."""
items = [
Expando({"name": "alice", "value": None})
]
settings = [
SelectConf(
attr="value",
formatter=lambda item: item.get("value").upper() # will fail on None
)
]
with pytest.raises(AttributeError):
select(items, settings)
def test_i_can_handle_none_values_in_formatter():
"""Test formatter handling None values gracefully when designed to do so."""
items = [
Expando({"name": "alice", "value": None}),
Expando({"name": "bob", "value": "test"})
]
settings = [
SelectConf(
attr="value",
formatter=lambda item: item.get("value").upper() if item.get("value") else "NO_VALUE"
)
]
result = select(items, settings)
assert len(result) == 2
assert result[0].value == "NO_VALUE"
assert result[1].value == "TEST"
def test_i_can_select_nested_attributes():
"""Test selecting nested attributes using Expando.get() path notation."""
items = [
Expando({"user": {"profile": {"name": "alice"}}, "age": 25}),
Expando({"user": {"profile": {"name": "bob"}}, "age": 30})
]
settings = [
SelectConf(attr="user.profile.name", to="name"),
SelectConf(attr="age")
]
result = select(items, settings)
assert len(result) == 2
assert result[0].name == "alice"
assert result[0].age == 25
assert result[1].name == "bob"
assert result[1].age == 30