Added audio generation
All checks were successful
Deploy Docs / build-and-deploy (push) Successful in 12s

This commit is contained in:
2026-02-11 20:58:35 +01:00
parent 149a142c22
commit 11ad746f04
15 changed files with 267 additions and 0 deletions

1
firmware/tools/audio/.gitattributes vendored Normal file
View File

@@ -0,0 +1 @@
samples/ filter=lfs diff=lfs merge=lfs -text

2
firmware/tools/audio/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
cache
previews

View File

@@ -0,0 +1,198 @@
import os
import yaml
import hashlib
import shutil
import subprocess
from pathlib import Path
from kokoro import KPipeline
import soundfile as sf
class AudioBuilder:
def __init__(self):
# 1. Defaults
self.config = {
'paths': {
'cache': Path("./cache"),
'output': Path("./lfs_source"),
'preview': Path("./previews")
},
'settings': { 'sample_rate': 16000 }
}
# 2. Global YAML laden (überschreibt Defaults)
self.load_global_config()
# Pfade sicherstellen
for p in self.config['paths'].values():
Path(p).mkdir(parents=True, exist_ok=True)
self.pipeline = KPipeline(lang_code='a')
def load_global_config(self):
if Path("global.yaml").exists():
with open("global.yaml", "r") as f:
g_cfg = yaml.safe_load(f)
# Pfade überschreiben und sofort in Path-Objekte wandeln
if 'paths' in g_cfg:
for key, value in g_cfg['paths'].items():
self.config['paths'][key] = Path(value)
# Settings (wie Sample Rate) überschreiben
if 'settings' in g_cfg:
self.config['settings'].update(g_cfg['settings'])
# Sicherstellen, dass der Standard-Sample-Pfad existiert, falls nicht in global.yaml
if 'samples' not in self.config['paths']:
self.config['paths']['samples'] = Path("./samples")
def get_hash(self, text, voice, filters, sample_rate):
"""Erzeugt einen MD5-Hash über alle Parameter, die das Audio-Ergebnis beeinflussen."""
# Wir kombinieren alle Parameter zu einem String
data = f"{text}{voice}{''.join(filters)}{sample_rate}"
return hashlib.md5(data.encode()).hexdigest()
def run_ffmpeg(self, cmd):
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
def process_asset(self, asset, default_voice, global_filters):
# 1. Parameter vorbereiten
asset_id = asset['id']
sr_val = self.config['settings']['sample_rate']
sr_str = str(sr_val)
filters = asset.get('filters', global_filters)
# Weiche: Ist es ein lokales Sample oder KI-Sprache?
is_sample = asset.get('type') == 'sample'
if is_sample:
# Pfad aus global.yaml nutzen
source_file = self.config['paths']['samples'] / asset['source']
# Hash basiert auf Dateiname + Filter + SR
h = self.get_hash(asset['source'], "LOCAL_SAMPLE", filters, sr_str)
input_for_ffmpeg = str(source_file)
else:
# Klassische KI-Sprache
voice = asset.get('voice', default_voice)
text = asset['text']
h = self.get_hash(text, voice, filters, sr_str)
# Input wird später das temp_wav sein
# Pfade definieren
cache_file = self.config['paths']['cache'] / h
preview_file = self.config['paths']['preview'] / f"{asset_id}.wav"
if not cache_file.exists():
print(f" [GEN] {asset_id}...")
# 2. Input-Vorbereitung
if not is_sample:
# KI-Sprache: Temp WAV erzeugen
temp_wav = self.config['paths']['cache'] / f"temp_{h}.wav"
generator = self.pipeline(text, voice=voice, speed=1.0)
for _, _, audio in generator:
sf.write(temp_wav, audio, 24000)
break
current_input = str(temp_wav)
else:
current_input = input_for_ffmpeg
# 3. Filter-String
f_str = ",".join(filters)
# 4. RAW-Export für nRF
self.run_ffmpeg([
'ffmpeg', '-y', '-i', current_input,
'-af', f_str,
'-ar', sr_str, '-ac', '1',
'-f', 's16le', '-acodec', 'pcm_s16le',
str(cache_file)
])
# 5. Preview-WAV
self.run_ffmpeg([
'ffmpeg', '-y', '-i', current_input,
'-af', f_str,
'-ac', '1',
str(preview_file)
])
# Aufräumen (nur wenn es ein KI-Temp-File war)
if not is_sample and Path(current_input).exists():
Path(current_input).unlink()
return cache_file
def generate_countdown(self, config):
"""Erzeugt einen zusammenhängenden 10-Sekunden-Countdown."""
c_id = config['id']
voice = config['voice']
filters = config.get('filters', [])
sr_str = str(self.config['settings']['sample_rate'])
# Eigener Hash für das gesamte Countdown-Objekt
h = self.get_hash(f"COUNTDOWN_LOGIC_V2_{c_id}", voice, filters, sr_str)
final_cache_file = self.config['paths']['cache'] / f"final_{h}"
if not final_cache_file.exists():
print(f" [GEN] Spezial-Asset: {c_id} (10 bis 1)")
# Die Texte für die Zahlen
numbers = [
"TEN!", "NINE!", "EIGHT!", "SEVEN!", "SIX!",
"FIVE!", "FOUR!", "THREE!", "TWO!!", "ONE!!!"
]
parts = []
for i, txt in enumerate(numbers):
# Jede Zahl als temporäres Asset durch die Standard-Pipeline jagen
# Wir nutzen eine interne ID, um Kollisionen im Preview-Ordner zu vermeiden
part_id = f"cnt_tmp_{i}"
part_file = self.process_asset(part_id, txt, voice, filters)
parts.append(part_file)
# Binäres Zusammenfügen (Da s16le keinen Header hat)
with open(final_cache_file, 'wb') as outfile:
for p_file in parts:
with open(p_file, 'rb') as infile:
outfile.write(infile.read())
print(f" [DONE] Countdown-Kette generiert: {final_cache_file.name}")
return final_cache_file
def build_target(self, target_name):
print(f"🚀 Baue Assets für Target: {target_name.upper()}")
out_dir = self.config['paths']['output']
out_dir.mkdir(parents=True, exist_ok=True)
for cfg_file in Path(".").glob("*.yaml"):
if cfg_file.name in ["global.yaml", "countdown.yaml"]:
continue
with open(cfg_file, "r") as f:
config = yaml.safe_load(f)
# Falls das YAML eine Liste ist (z.B. [- id: ...])
if isinstance(config, list):
assets = config
voice = None
global_filters = []
else:
# Falls es ein Dictionary ist (z.B. voice: ... assets: ...)
assets = config.get('assets', [])
voice = config.get('voice')
global_filters = config.get('filters', [])
for asset in assets:
if target_name in asset.get('targets', []):
source_cache = self.process_asset(asset, voice, global_filters)
if source_cache:
dest_file = out_dir / asset['id']
shutil.copy(source_cache, dest_file)
if __name__ == "__main__":
import sys
target = sys.argv[1] if len(sys.argv) > 1 else "vest"
AudioBuilder().build_target(target)

View File

@@ -0,0 +1,14 @@
id: "countdown"
voice: "am_michael"
targets: ["vest", "base"]
# Die Filter hier werden auf jede einzelne Zahl angewendet
filters:
- "lowshelf=f=100:g=20:enable='lt(t,0.1)'"
- "asetrate=24000*0.85"
- "atempo=1.17"
- "acontrast=80"
- "aecho=0.8:0.88:60:0.4"
- "atrim=end=1"
- "apad=whole_dur=1"
- "loudnorm=I=-12:TP=-1.0"

View File

@@ -0,0 +1,7 @@
paths:
cache: "./cache"
output: "../littlefs_generator/source_folder/a"
preview: "./previews"
samples: "./samples"
settings:
sample_rate: 16000

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,5 @@
kokoro>=0.1.0 # Die TTS-Pipeline
soundfile # Zum Schreiben der WAV-Dateien (sf.write)
numpy # Basis für Audio-Daten-Arrays
torch # Backend für Kokoro (KI-Modell)
PyYAML # Für deine voice_*.yaml Konfigurationsdateien

Binary file not shown.

View File

@@ -0,0 +1,11 @@
- id: "game_start"
type: "sample" # Neu: Unterscheidung zwischen TTS und Datei
source: "horn.ogg" # Die Datei in deinem samples/ Ordner
targets: ["vest", "base"]
filters:
- "atrim=start=0.15"
- "highpass=f=100"
- "lowpass=f=6000"
- "acompressor=threshold=-8dB:ratio=20:attack=1:release=30" # Maximale "Druckluft"
- "amix=inputs=1:weights=1.5" # Sättigung
- "loudnorm=I=-12:TP=-1.0"

View File

@@ -0,0 +1,17 @@
voice: "am_michael"
filters:
- "asetrate=24000*0.85"
- "atempo=1.17"
- "acompressor=threshold=-20dB:ratio=4:attack=5:release=50"
- "highpass=f=150"
- "lowpass=f=4000"
- "loudnorm=I=-14:TP=-1.5"
assets:
- id: "g1"
text: "Welcome to the Arena!"
targets: ["vest", "weapon"]
- id: "dead"
text: "YOU ARE DEAD!"
targets: ["vest"]

View File

@@ -0,0 +1,12 @@
voice: "af_bella"
filters:
- "highpass=f=200"
- "lowpass=f=4500"
- "compand=0.3|0.3:1|1:-90/-60|-60/-40|-40/-30|-20/-20:6:0:-90:0.2"
- "loudnorm=I=-14:TP=-1.5"
assets:
- id: "s1"
text: "Network parameters deployed. Connecting to the game leader..."
targets: ["vest", "weapon"]