""" Test Milestone 20: Groq Whisper transcription client. Note: Full transcription tests require real audio bytes. We test pre-flight filters and API reachability here. Silent/near-silent audio will return "no_speech" — that is correct behaviour. To test with real speech: record a short voice note and save as thirdeye/scripts/test_voice.ogg before running this test. """ import asyncio import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) def _make_minimal_ogg() -> bytes: """ Generate a minimal valid OGG container header (silent). Whisper will return no_speech for this — that IS the correct result. We use it to confirm the API is reachable and credentials work. """ ogg_magic = b"OggS" header = b"\x00\x02" + b"\x00" * 8 + b"\x00\x00\x00\x01" + b"\x00\x00\x00\x00" + b"\x00\x00\x00\x00" + b"\x01\x1e" vorbis_id = b"\x01vorbis" + b"\x00" * 23 return ogg_magic + header + vorbis_id async def test_config_loaded(): """Test that GROQ_API_KEY is present (needed for Whisper).""" from backend.config import GROQ_API_KEY, ENABLE_VOICE_TRANSCRIPTION print("Testing voice transcription config...") assert GROQ_API_KEY and len(GROQ_API_KEY) > 5, ( "GROQ_API_KEY is missing. Groq Whisper uses the same key as your LLM providers." ) print(f" ✅ GROQ_API_KEY present ({len(GROQ_API_KEY)} chars)") print(f" ✅ ENABLE_VOICE_TRANSCRIPTION: {ENABLE_VOICE_TRANSCRIPTION}") async def test_pre_flight_filters(): """Test that duration and size filters work before hitting the API.""" from backend.agents.voice_transcriber import transcribe_audio print("\nTesting pre-flight filters (no API calls made)...") result = await transcribe_audio(b"", filename="audio.ogg") assert not result["ok"] and result["reason"] == "empty" print(" ✅ Empty bytes -> reason='empty'") result = await transcribe_audio(b"fake", filename="audio.ogg", duration_seconds=1) assert not result["ok"] and result["reason"] == "too_short" print(" ✅ 1s audio -> reason='too_short' (min is 2s)") result = await transcribe_audio(b"fake", filename="audio.ogg", duration_seconds=9999) assert not result["ok"] and result["reason"] == "too_long" print(" ✅ 9999s audio -> reason='too_long' (max is 300s)") big_bytes = b"x" * (26 * 1024 * 1024) result = await transcribe_audio(big_bytes, filename="audio.ogg", duration_seconds=30) assert not result["ok"] and result["reason"] == "file_too_large" print(" ✅ 26MB audio -> reason='file_too_large' (Groq limit is 25MB)") async def test_api_reachable(): """ Test that Groq Whisper API is reachable and authenticates correctly. A 401 means your GROQ_API_KEY is wrong. """ from backend.agents.voice_transcriber import transcribe_audio print("\nTesting Groq Whisper API reachability...") minimal_ogg = _make_minimal_ogg() result = await transcribe_audio(minimal_ogg, filename="test.ogg", duration_seconds=5) if result["ok"]: print(f" ✅ API reachable — transcript: '{result['transcript'][:60]}'") elif result["reason"] == "no_speech": print(f" ✅ API reachable — silent audio correctly returned no_speech") elif result["reason"] == "api_error" and "401" in result.get("error", ""): raise AssertionError( f"Authentication failed — check GROQ_API_KEY in .env\nError: {result['error']}" ) else: print(f" ⚠️ API returned: reason={result['reason']}, error={result.get('error')} (non-fatal)") async def test_real_audio_file(): """ Test with a real OGG voice file if one exists at scripts/test_voice.ogg. OPTIONAL — skip if file not present. """ from backend.agents.voice_transcriber import transcribe_audio test_file = os.path.join(os.path.dirname(__file__), "test_voice.ogg") if not os.path.exists(test_file): print("\n ⏭️ Skipping real audio test — place a voice note OGG at scripts/test_voice.ogg to enable") return print(f"\nTesting with real audio file: {test_file}") with open(test_file, "rb") as f: audio_bytes = f.read() result = await transcribe_audio(audio_bytes, filename="test_voice.ogg", duration_seconds=30) assert result["ok"], f"Real audio transcription failed: {result}" assert len(result["transcript"]) > 5 print(f" ✅ Transcript ({result['word_count']} words): {result['transcript'][:120]}...") print(f" Language detected: {result['language']}") async def test_format_duration(): """Test the duration formatting helper.""" from backend.agents.voice_transcriber import format_duration print("\nTesting format_duration()...") assert format_duration(45) == "45s" assert format_duration(90) == "1m 30s" assert format_duration(0) == "0s" assert format_duration(None) == "?" print(" ✅ 45 -> '45s', 90 -> '1m 30s', None -> '?'") async def main(): print("Running Milestone 20 tests...\n") await test_config_loaded() await test_pre_flight_filters() await test_api_reachable() await test_real_audio_file() await test_format_duration() print("\n🎉 MILESTONE 20 PASSED — Groq Whisper client working") asyncio.run(main())