Added audio generation
All checks were successful
Deploy Docs / build-and-deploy (push) Successful in 12s
All checks were successful
Deploy Docs / build-and-deploy (push) Successful in 12s
This commit is contained in:
1
firmware/tools/audio/.gitattributes
vendored
Normal file
1
firmware/tools/audio/.gitattributes
vendored
Normal file
@@ -0,0 +1 @@
|
||||
samples/ filter=lfs diff=lfs merge=lfs -text
|
||||
2
firmware/tools/audio/.gitignore
vendored
Normal file
2
firmware/tools/audio/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
cache
|
||||
previews
|
||||
198
firmware/tools/audio/build_audio.py
Normal file
198
firmware/tools/audio/build_audio.py
Normal file
@@ -0,0 +1,198 @@
|
||||
import os
|
||||
import yaml
|
||||
import hashlib
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from kokoro import KPipeline
|
||||
import soundfile as sf
|
||||
|
||||
class AudioBuilder:
|
||||
def __init__(self):
|
||||
# 1. Defaults
|
||||
self.config = {
|
||||
'paths': {
|
||||
'cache': Path("./cache"),
|
||||
'output': Path("./lfs_source"),
|
||||
'preview': Path("./previews")
|
||||
},
|
||||
'settings': { 'sample_rate': 16000 }
|
||||
}
|
||||
|
||||
# 2. Global YAML laden (überschreibt Defaults)
|
||||
self.load_global_config()
|
||||
|
||||
# Pfade sicherstellen
|
||||
for p in self.config['paths'].values():
|
||||
Path(p).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.pipeline = KPipeline(lang_code='a')
|
||||
|
||||
def load_global_config(self):
|
||||
if Path("global.yaml").exists():
|
||||
with open("global.yaml", "r") as f:
|
||||
g_cfg = yaml.safe_load(f)
|
||||
|
||||
# Pfade überschreiben und sofort in Path-Objekte wandeln
|
||||
if 'paths' in g_cfg:
|
||||
for key, value in g_cfg['paths'].items():
|
||||
self.config['paths'][key] = Path(value)
|
||||
|
||||
# Settings (wie Sample Rate) überschreiben
|
||||
if 'settings' in g_cfg:
|
||||
self.config['settings'].update(g_cfg['settings'])
|
||||
|
||||
# Sicherstellen, dass der Standard-Sample-Pfad existiert, falls nicht in global.yaml
|
||||
if 'samples' not in self.config['paths']:
|
||||
self.config['paths']['samples'] = Path("./samples")
|
||||
|
||||
def get_hash(self, text, voice, filters, sample_rate):
|
||||
"""Erzeugt einen MD5-Hash über alle Parameter, die das Audio-Ergebnis beeinflussen."""
|
||||
# Wir kombinieren alle Parameter zu einem String
|
||||
data = f"{text}{voice}{''.join(filters)}{sample_rate}"
|
||||
return hashlib.md5(data.encode()).hexdigest()
|
||||
|
||||
def run_ffmpeg(self, cmd):
|
||||
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
|
||||
|
||||
def process_asset(self, asset, default_voice, global_filters):
|
||||
# 1. Parameter vorbereiten
|
||||
asset_id = asset['id']
|
||||
sr_val = self.config['settings']['sample_rate']
|
||||
sr_str = str(sr_val)
|
||||
filters = asset.get('filters', global_filters)
|
||||
|
||||
# Weiche: Ist es ein lokales Sample oder KI-Sprache?
|
||||
is_sample = asset.get('type') == 'sample'
|
||||
|
||||
if is_sample:
|
||||
# Pfad aus global.yaml nutzen
|
||||
source_file = self.config['paths']['samples'] / asset['source']
|
||||
# Hash basiert auf Dateiname + Filter + SR
|
||||
h = self.get_hash(asset['source'], "LOCAL_SAMPLE", filters, sr_str)
|
||||
input_for_ffmpeg = str(source_file)
|
||||
else:
|
||||
# Klassische KI-Sprache
|
||||
voice = asset.get('voice', default_voice)
|
||||
text = asset['text']
|
||||
h = self.get_hash(text, voice, filters, sr_str)
|
||||
# Input wird später das temp_wav sein
|
||||
|
||||
# Pfade definieren
|
||||
cache_file = self.config['paths']['cache'] / h
|
||||
preview_file = self.config['paths']['preview'] / f"{asset_id}.wav"
|
||||
|
||||
if not cache_file.exists():
|
||||
print(f" [GEN] {asset_id}...")
|
||||
|
||||
# 2. Input-Vorbereitung
|
||||
if not is_sample:
|
||||
# KI-Sprache: Temp WAV erzeugen
|
||||
temp_wav = self.config['paths']['cache'] / f"temp_{h}.wav"
|
||||
generator = self.pipeline(text, voice=voice, speed=1.0)
|
||||
for _, _, audio in generator:
|
||||
sf.write(temp_wav, audio, 24000)
|
||||
break
|
||||
current_input = str(temp_wav)
|
||||
else:
|
||||
current_input = input_for_ffmpeg
|
||||
|
||||
# 3. Filter-String
|
||||
f_str = ",".join(filters)
|
||||
|
||||
# 4. RAW-Export für nRF
|
||||
self.run_ffmpeg([
|
||||
'ffmpeg', '-y', '-i', current_input,
|
||||
'-af', f_str,
|
||||
'-ar', sr_str, '-ac', '1',
|
||||
'-f', 's16le', '-acodec', 'pcm_s16le',
|
||||
str(cache_file)
|
||||
])
|
||||
|
||||
# 5. Preview-WAV
|
||||
self.run_ffmpeg([
|
||||
'ffmpeg', '-y', '-i', current_input,
|
||||
'-af', f_str,
|
||||
'-ac', '1',
|
||||
str(preview_file)
|
||||
])
|
||||
|
||||
# Aufräumen (nur wenn es ein KI-Temp-File war)
|
||||
if not is_sample and Path(current_input).exists():
|
||||
Path(current_input).unlink()
|
||||
|
||||
return cache_file
|
||||
|
||||
def generate_countdown(self, config):
|
||||
"""Erzeugt einen zusammenhängenden 10-Sekunden-Countdown."""
|
||||
c_id = config['id']
|
||||
voice = config['voice']
|
||||
filters = config.get('filters', [])
|
||||
sr_str = str(self.config['settings']['sample_rate'])
|
||||
|
||||
# Eigener Hash für das gesamte Countdown-Objekt
|
||||
h = self.get_hash(f"COUNTDOWN_LOGIC_V2_{c_id}", voice, filters, sr_str)
|
||||
final_cache_file = self.config['paths']['cache'] / f"final_{h}"
|
||||
|
||||
if not final_cache_file.exists():
|
||||
print(f" [GEN] Spezial-Asset: {c_id} (10 bis 1)")
|
||||
|
||||
# Die Texte für die Zahlen
|
||||
numbers = [
|
||||
"TEN!", "NINE!", "EIGHT!", "SEVEN!", "SIX!",
|
||||
"FIVE!", "FOUR!", "THREE!", "TWO!!", "ONE!!!"
|
||||
]
|
||||
|
||||
parts = []
|
||||
for i, txt in enumerate(numbers):
|
||||
# Jede Zahl als temporäres Asset durch die Standard-Pipeline jagen
|
||||
# Wir nutzen eine interne ID, um Kollisionen im Preview-Ordner zu vermeiden
|
||||
part_id = f"cnt_tmp_{i}"
|
||||
part_file = self.process_asset(part_id, txt, voice, filters)
|
||||
parts.append(part_file)
|
||||
|
||||
# Binäres Zusammenfügen (Da s16le keinen Header hat)
|
||||
with open(final_cache_file, 'wb') as outfile:
|
||||
for p_file in parts:
|
||||
with open(p_file, 'rb') as infile:
|
||||
outfile.write(infile.read())
|
||||
|
||||
print(f" [DONE] Countdown-Kette generiert: {final_cache_file.name}")
|
||||
|
||||
return final_cache_file
|
||||
|
||||
def build_target(self, target_name):
|
||||
print(f"🚀 Baue Assets für Target: {target_name.upper()}")
|
||||
out_dir = self.config['paths']['output']
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for cfg_file in Path(".").glob("*.yaml"):
|
||||
if cfg_file.name in ["global.yaml", "countdown.yaml"]:
|
||||
continue
|
||||
|
||||
with open(cfg_file, "r") as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
# Falls das YAML eine Liste ist (z.B. [- id: ...])
|
||||
if isinstance(config, list):
|
||||
assets = config
|
||||
voice = None
|
||||
global_filters = []
|
||||
else:
|
||||
# Falls es ein Dictionary ist (z.B. voice: ... assets: ...)
|
||||
assets = config.get('assets', [])
|
||||
voice = config.get('voice')
|
||||
global_filters = config.get('filters', [])
|
||||
|
||||
for asset in assets:
|
||||
if target_name in asset.get('targets', []):
|
||||
source_cache = self.process_asset(asset, voice, global_filters)
|
||||
if source_cache:
|
||||
dest_file = out_dir / asset['id']
|
||||
shutil.copy(source_cache, dest_file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
target = sys.argv[1] if len(sys.argv) > 1 else "vest"
|
||||
AudioBuilder().build_target(target)
|
||||
14
firmware/tools/audio/countdown.yaml
Normal file
14
firmware/tools/audio/countdown.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
id: "countdown"
|
||||
voice: "am_michael"
|
||||
targets: ["vest", "base"]
|
||||
|
||||
# Die Filter hier werden auf jede einzelne Zahl angewendet
|
||||
filters:
|
||||
- "lowshelf=f=100:g=20:enable='lt(t,0.1)'"
|
||||
- "asetrate=24000*0.85"
|
||||
- "atempo=1.17"
|
||||
- "acontrast=80"
|
||||
- "aecho=0.8:0.88:60:0.4"
|
||||
- "atrim=end=1"
|
||||
- "apad=whole_dur=1"
|
||||
- "loudnorm=I=-12:TP=-1.0"
|
||||
7
firmware/tools/audio/global.yaml
Normal file
7
firmware/tools/audio/global.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
paths:
|
||||
cache: "./cache"
|
||||
output: "../littlefs_generator/source_folder/a"
|
||||
preview: "./previews"
|
||||
samples: "./samples"
|
||||
settings:
|
||||
sample_rate: 16000
|
||||
BIN
firmware/tools/audio/lfs_source/countdown
Normal file
BIN
firmware/tools/audio/lfs_source/countdown
Normal file
Binary file not shown.
BIN
firmware/tools/audio/lfs_source/dead
Normal file
BIN
firmware/tools/audio/lfs_source/dead
Normal file
Binary file not shown.
BIN
firmware/tools/audio/lfs_source/g1
Normal file
BIN
firmware/tools/audio/lfs_source/g1
Normal file
Binary file not shown.
BIN
firmware/tools/audio/lfs_source/game_start
Normal file
BIN
firmware/tools/audio/lfs_source/game_start
Normal file
Binary file not shown.
BIN
firmware/tools/audio/lfs_source/s1
Normal file
BIN
firmware/tools/audio/lfs_source/s1
Normal file
Binary file not shown.
5
firmware/tools/audio/requirements.txt
Normal file
5
firmware/tools/audio/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
kokoro>=0.1.0 # Die TTS-Pipeline
|
||||
soundfile # Zum Schreiben der WAV-Dateien (sf.write)
|
||||
numpy # Basis für Audio-Daten-Arrays
|
||||
torch # Backend für Kokoro (KI-Modell)
|
||||
PyYAML # Für deine voice_*.yaml Konfigurationsdateien
|
||||
BIN
firmware/tools/audio/samples/horn.ogg
Normal file
BIN
firmware/tools/audio/samples/horn.ogg
Normal file
Binary file not shown.
11
firmware/tools/audio/sfx.yaml
Normal file
11
firmware/tools/audio/sfx.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
- id: "game_start"
|
||||
type: "sample" # Neu: Unterscheidung zwischen TTS und Datei
|
||||
source: "horn.ogg" # Die Datei in deinem samples/ Ordner
|
||||
targets: ["vest", "base"]
|
||||
filters:
|
||||
- "atrim=start=0.15"
|
||||
- "highpass=f=100"
|
||||
- "lowpass=f=6000"
|
||||
- "acompressor=threshold=-8dB:ratio=20:attack=1:release=30" # Maximale "Druckluft"
|
||||
- "amix=inputs=1:weights=1.5" # Sättigung
|
||||
- "loudnorm=I=-12:TP=-1.0"
|
||||
17
firmware/tools/audio/voice_game.yaml
Normal file
17
firmware/tools/audio/voice_game.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
voice: "am_michael"
|
||||
|
||||
filters:
|
||||
- "asetrate=24000*0.85"
|
||||
- "atempo=1.17"
|
||||
- "acompressor=threshold=-20dB:ratio=4:attack=5:release=50"
|
||||
- "highpass=f=150"
|
||||
- "lowpass=f=4000"
|
||||
- "loudnorm=I=-14:TP=-1.5"
|
||||
|
||||
assets:
|
||||
- id: "g1"
|
||||
text: "Welcome to the Arena!"
|
||||
targets: ["vest", "weapon"]
|
||||
- id: "dead"
|
||||
text: "YOU ARE DEAD!"
|
||||
targets: ["vest"]
|
||||
12
firmware/tools/audio/voice_system.yaml
Normal file
12
firmware/tools/audio/voice_system.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
voice: "af_bella"
|
||||
|
||||
filters:
|
||||
- "highpass=f=200"
|
||||
- "lowpass=f=4500"
|
||||
- "compand=0.3|0.3:1|1:-90/-60|-60/-40|-40/-30|-20/-20:6:0:-90:0.2"
|
||||
- "loudnorm=I=-14:TP=-1.5"
|
||||
|
||||
assets:
|
||||
- id: "s1"
|
||||
text: "Network parameters deployed. Connecting to the game leader..."
|
||||
targets: ["vest", "weapon"]
|
||||
Reference in New Issue
Block a user