diff --git a/firmware/tools/audio/.gitattributes b/firmware/tools/audio/.gitattributes new file mode 100644 index 0000000..fd2695f --- /dev/null +++ b/firmware/tools/audio/.gitattributes @@ -0,0 +1 @@ +samples/ filter=lfs diff=lfs merge=lfs -text diff --git a/firmware/tools/audio/.gitignore b/firmware/tools/audio/.gitignore new file mode 100644 index 0000000..731452d --- /dev/null +++ b/firmware/tools/audio/.gitignore @@ -0,0 +1,2 @@ +cache +previews diff --git a/firmware/tools/audio/build_audio.py b/firmware/tools/audio/build_audio.py new file mode 100644 index 0000000..aea8c05 --- /dev/null +++ b/firmware/tools/audio/build_audio.py @@ -0,0 +1,198 @@ +import os +import yaml +import hashlib +import shutil +import subprocess +from pathlib import Path +from kokoro import KPipeline +import soundfile as sf + +class AudioBuilder: + def __init__(self): + # 1. Defaults + self.config = { + 'paths': { + 'cache': Path("./cache"), + 'output': Path("./lfs_source"), + 'preview': Path("./previews") + }, + 'settings': { 'sample_rate': 16000 } + } + + # 2. Global YAML laden (überschreibt Defaults) + self.load_global_config() + + # Pfade sicherstellen + for p in self.config['paths'].values(): + Path(p).mkdir(parents=True, exist_ok=True) + + self.pipeline = KPipeline(lang_code='a') + + def load_global_config(self): + if Path("global.yaml").exists(): + with open("global.yaml", "r") as f: + g_cfg = yaml.safe_load(f) + + # Pfade überschreiben und sofort in Path-Objekte wandeln + if 'paths' in g_cfg: + for key, value in g_cfg['paths'].items(): + self.config['paths'][key] = Path(value) + + # Settings (wie Sample Rate) überschreiben + if 'settings' in g_cfg: + self.config['settings'].update(g_cfg['settings']) + + # Sicherstellen, dass der Standard-Sample-Pfad existiert, falls nicht in global.yaml + if 'samples' not in self.config['paths']: + self.config['paths']['samples'] = Path("./samples") + + def get_hash(self, text, voice, filters, sample_rate): + """Erzeugt einen MD5-Hash über alle Parameter, die das Audio-Ergebnis beeinflussen.""" + # Wir kombinieren alle Parameter zu einem String + data = f"{text}{voice}{''.join(filters)}{sample_rate}" + return hashlib.md5(data.encode()).hexdigest() + + def run_ffmpeg(self, cmd): + subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True) + + def process_asset(self, asset, default_voice, global_filters): + # 1. Parameter vorbereiten + asset_id = asset['id'] + sr_val = self.config['settings']['sample_rate'] + sr_str = str(sr_val) + filters = asset.get('filters', global_filters) + + # Weiche: Ist es ein lokales Sample oder KI-Sprache? + is_sample = asset.get('type') == 'sample' + + if is_sample: + # Pfad aus global.yaml nutzen + source_file = self.config['paths']['samples'] / asset['source'] + # Hash basiert auf Dateiname + Filter + SR + h = self.get_hash(asset['source'], "LOCAL_SAMPLE", filters, sr_str) + input_for_ffmpeg = str(source_file) + else: + # Klassische KI-Sprache + voice = asset.get('voice', default_voice) + text = asset['text'] + h = self.get_hash(text, voice, filters, sr_str) + # Input wird später das temp_wav sein + + # Pfade definieren + cache_file = self.config['paths']['cache'] / h + preview_file = self.config['paths']['preview'] / f"{asset_id}.wav" + + if not cache_file.exists(): + print(f" [GEN] {asset_id}...") + + # 2. Input-Vorbereitung + if not is_sample: + # KI-Sprache: Temp WAV erzeugen + temp_wav = self.config['paths']['cache'] / f"temp_{h}.wav" + generator = self.pipeline(text, voice=voice, speed=1.0) + for _, _, audio in generator: + sf.write(temp_wav, audio, 24000) + break + current_input = str(temp_wav) + else: + current_input = input_for_ffmpeg + + # 3. Filter-String + f_str = ",".join(filters) + + # 4. RAW-Export für nRF + self.run_ffmpeg([ + 'ffmpeg', '-y', '-i', current_input, + '-af', f_str, + '-ar', sr_str, '-ac', '1', + '-f', 's16le', '-acodec', 'pcm_s16le', + str(cache_file) + ]) + + # 5. Preview-WAV + self.run_ffmpeg([ + 'ffmpeg', '-y', '-i', current_input, + '-af', f_str, + '-ac', '1', + str(preview_file) + ]) + + # Aufräumen (nur wenn es ein KI-Temp-File war) + if not is_sample and Path(current_input).exists(): + Path(current_input).unlink() + + return cache_file + + def generate_countdown(self, config): + """Erzeugt einen zusammenhängenden 10-Sekunden-Countdown.""" + c_id = config['id'] + voice = config['voice'] + filters = config.get('filters', []) + sr_str = str(self.config['settings']['sample_rate']) + + # Eigener Hash für das gesamte Countdown-Objekt + h = self.get_hash(f"COUNTDOWN_LOGIC_V2_{c_id}", voice, filters, sr_str) + final_cache_file = self.config['paths']['cache'] / f"final_{h}" + + if not final_cache_file.exists(): + print(f" [GEN] Spezial-Asset: {c_id} (10 bis 1)") + + # Die Texte für die Zahlen + numbers = [ + "TEN!", "NINE!", "EIGHT!", "SEVEN!", "SIX!", + "FIVE!", "FOUR!", "THREE!", "TWO!!", "ONE!!!" + ] + + parts = [] + for i, txt in enumerate(numbers): + # Jede Zahl als temporäres Asset durch die Standard-Pipeline jagen + # Wir nutzen eine interne ID, um Kollisionen im Preview-Ordner zu vermeiden + part_id = f"cnt_tmp_{i}" + part_file = self.process_asset(part_id, txt, voice, filters) + parts.append(part_file) + + # Binäres Zusammenfügen (Da s16le keinen Header hat) + with open(final_cache_file, 'wb') as outfile: + for p_file in parts: + with open(p_file, 'rb') as infile: + outfile.write(infile.read()) + + print(f" [DONE] Countdown-Kette generiert: {final_cache_file.name}") + + return final_cache_file + + def build_target(self, target_name): + print(f"🚀 Baue Assets für Target: {target_name.upper()}") + out_dir = self.config['paths']['output'] + out_dir.mkdir(parents=True, exist_ok=True) + + for cfg_file in Path(".").glob("*.yaml"): + if cfg_file.name in ["global.yaml", "countdown.yaml"]: + continue + + with open(cfg_file, "r") as f: + config = yaml.safe_load(f) + + # Falls das YAML eine Liste ist (z.B. [- id: ...]) + if isinstance(config, list): + assets = config + voice = None + global_filters = [] + else: + # Falls es ein Dictionary ist (z.B. voice: ... assets: ...) + assets = config.get('assets', []) + voice = config.get('voice') + global_filters = config.get('filters', []) + + for asset in assets: + if target_name in asset.get('targets', []): + source_cache = self.process_asset(asset, voice, global_filters) + if source_cache: + dest_file = out_dir / asset['id'] + shutil.copy(source_cache, dest_file) + + +if __name__ == "__main__": + import sys + target = sys.argv[1] if len(sys.argv) > 1 else "vest" + AudioBuilder().build_target(target) \ No newline at end of file diff --git a/firmware/tools/audio/countdown.yaml b/firmware/tools/audio/countdown.yaml new file mode 100644 index 0000000..a96f316 --- /dev/null +++ b/firmware/tools/audio/countdown.yaml @@ -0,0 +1,14 @@ +id: "countdown" +voice: "am_michael" +targets: ["vest", "base"] + +# Die Filter hier werden auf jede einzelne Zahl angewendet +filters: + - "lowshelf=f=100:g=20:enable='lt(t,0.1)'" + - "asetrate=24000*0.85" + - "atempo=1.17" + - "acontrast=80" + - "aecho=0.8:0.88:60:0.4" + - "atrim=end=1" + - "apad=whole_dur=1" + - "loudnorm=I=-12:TP=-1.0" diff --git a/firmware/tools/audio/global.yaml b/firmware/tools/audio/global.yaml new file mode 100644 index 0000000..68d6118 --- /dev/null +++ b/firmware/tools/audio/global.yaml @@ -0,0 +1,7 @@ +paths: + cache: "./cache" + output: "../littlefs_generator/source_folder/a" + preview: "./previews" + samples: "./samples" +settings: + sample_rate: 16000 \ No newline at end of file diff --git a/firmware/tools/audio/lfs_source/countdown b/firmware/tools/audio/lfs_source/countdown new file mode 100644 index 0000000..69969ce Binary files /dev/null and b/firmware/tools/audio/lfs_source/countdown differ diff --git a/firmware/tools/audio/lfs_source/dead b/firmware/tools/audio/lfs_source/dead new file mode 100644 index 0000000..de511f7 Binary files /dev/null and b/firmware/tools/audio/lfs_source/dead differ diff --git a/firmware/tools/audio/lfs_source/g1 b/firmware/tools/audio/lfs_source/g1 new file mode 100644 index 0000000..2a4798d Binary files /dev/null and b/firmware/tools/audio/lfs_source/g1 differ diff --git a/firmware/tools/audio/lfs_source/game_start b/firmware/tools/audio/lfs_source/game_start new file mode 100644 index 0000000..cdfba39 Binary files /dev/null and b/firmware/tools/audio/lfs_source/game_start differ diff --git a/firmware/tools/audio/lfs_source/s1 b/firmware/tools/audio/lfs_source/s1 new file mode 100644 index 0000000..ee6228f Binary files /dev/null and b/firmware/tools/audio/lfs_source/s1 differ diff --git a/firmware/tools/audio/requirements.txt b/firmware/tools/audio/requirements.txt new file mode 100644 index 0000000..a5ab253 --- /dev/null +++ b/firmware/tools/audio/requirements.txt @@ -0,0 +1,5 @@ +kokoro>=0.1.0 # Die TTS-Pipeline +soundfile # Zum Schreiben der WAV-Dateien (sf.write) +numpy # Basis für Audio-Daten-Arrays +torch # Backend für Kokoro (KI-Modell) +PyYAML # Für deine voice_*.yaml Konfigurationsdateien diff --git a/firmware/tools/audio/samples/horn.ogg b/firmware/tools/audio/samples/horn.ogg new file mode 100644 index 0000000..3c5dc56 Binary files /dev/null and b/firmware/tools/audio/samples/horn.ogg differ diff --git a/firmware/tools/audio/sfx.yaml b/firmware/tools/audio/sfx.yaml new file mode 100644 index 0000000..92b2448 --- /dev/null +++ b/firmware/tools/audio/sfx.yaml @@ -0,0 +1,11 @@ +- id: "game_start" + type: "sample" # Neu: Unterscheidung zwischen TTS und Datei + source: "horn.ogg" # Die Datei in deinem samples/ Ordner + targets: ["vest", "base"] + filters: + - "atrim=start=0.15" + - "highpass=f=100" + - "lowpass=f=6000" + - "acompressor=threshold=-8dB:ratio=20:attack=1:release=30" # Maximale "Druckluft" + - "amix=inputs=1:weights=1.5" # Sättigung + - "loudnorm=I=-12:TP=-1.0" diff --git a/firmware/tools/audio/voice_game.yaml b/firmware/tools/audio/voice_game.yaml new file mode 100644 index 0000000..ba7dc96 --- /dev/null +++ b/firmware/tools/audio/voice_game.yaml @@ -0,0 +1,17 @@ +voice: "am_michael" + +filters: + - "asetrate=24000*0.85" + - "atempo=1.17" + - "acompressor=threshold=-20dB:ratio=4:attack=5:release=50" + - "highpass=f=150" + - "lowpass=f=4000" + - "loudnorm=I=-14:TP=-1.5" + +assets: + - id: "g1" + text: "Welcome to the Arena!" + targets: ["vest", "weapon"] + - id: "dead" + text: "YOU ARE DEAD!" + targets: ["vest"] \ No newline at end of file diff --git a/firmware/tools/audio/voice_system.yaml b/firmware/tools/audio/voice_system.yaml new file mode 100644 index 0000000..ee05ca4 --- /dev/null +++ b/firmware/tools/audio/voice_system.yaml @@ -0,0 +1,12 @@ +voice: "af_bella" + +filters: + - "highpass=f=200" + - "lowpass=f=4500" + - "compand=0.3|0.3:1|1:-90/-60|-60/-40|-40/-30|-20/-20:6:0:-90:0.2" + - "loudnorm=I=-14:TP=-1.5" + +assets: + - id: "s1" + text: "Network parameters deployed. Connecting to the game leader..." + targets: ["vest", "weapon"]