mirror of
https://github.com/arkorty/B.Tech-Project-III.git
synced 2026-04-19 12:41:48 +00:00
init
This commit is contained in:
132
thirdeye/scripts/test_m20.py
Normal file
132
thirdeye/scripts/test_m20.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
Test Milestone 20: Groq Whisper transcription client.
|
||||
|
||||
Note: Full transcription tests require real audio bytes.
|
||||
We test pre-flight filters and API reachability here.
|
||||
Silent/near-silent audio will return "no_speech" — that is correct behaviour.
|
||||
To test with real speech: record a short voice note and save as
|
||||
thirdeye/scripts/test_voice.ogg before running this test.
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
|
||||
def _make_minimal_ogg() -> bytes:
|
||||
"""
|
||||
Generate a minimal valid OGG container header (silent).
|
||||
Whisper will return no_speech for this — that IS the correct result.
|
||||
We use it to confirm the API is reachable and credentials work.
|
||||
"""
|
||||
ogg_magic = b"OggS"
|
||||
header = b"\x00\x02" + b"\x00" * 8 + b"\x00\x00\x00\x01" + b"\x00\x00\x00\x00" + b"\x00\x00\x00\x00" + b"\x01\x1e"
|
||||
vorbis_id = b"\x01vorbis" + b"\x00" * 23
|
||||
return ogg_magic + header + vorbis_id
|
||||
|
||||
|
||||
async def test_config_loaded():
|
||||
"""Test that GROQ_API_KEY is present (needed for Whisper)."""
|
||||
from backend.config import GROQ_API_KEY, ENABLE_VOICE_TRANSCRIPTION
|
||||
|
||||
print("Testing voice transcription config...")
|
||||
assert GROQ_API_KEY and len(GROQ_API_KEY) > 5, (
|
||||
"GROQ_API_KEY is missing. Groq Whisper uses the same key as your LLM providers."
|
||||
)
|
||||
print(f" ✅ GROQ_API_KEY present ({len(GROQ_API_KEY)} chars)")
|
||||
print(f" ✅ ENABLE_VOICE_TRANSCRIPTION: {ENABLE_VOICE_TRANSCRIPTION}")
|
||||
|
||||
|
||||
async def test_pre_flight_filters():
|
||||
"""Test that duration and size filters work before hitting the API."""
|
||||
from backend.agents.voice_transcriber import transcribe_audio
|
||||
|
||||
print("\nTesting pre-flight filters (no API calls made)...")
|
||||
|
||||
result = await transcribe_audio(b"", filename="audio.ogg")
|
||||
assert not result["ok"] and result["reason"] == "empty"
|
||||
print(" ✅ Empty bytes -> reason='empty'")
|
||||
|
||||
result = await transcribe_audio(b"fake", filename="audio.ogg", duration_seconds=1)
|
||||
assert not result["ok"] and result["reason"] == "too_short"
|
||||
print(" ✅ 1s audio -> reason='too_short' (min is 2s)")
|
||||
|
||||
result = await transcribe_audio(b"fake", filename="audio.ogg", duration_seconds=9999)
|
||||
assert not result["ok"] and result["reason"] == "too_long"
|
||||
print(" ✅ 9999s audio -> reason='too_long' (max is 300s)")
|
||||
|
||||
big_bytes = b"x" * (26 * 1024 * 1024)
|
||||
result = await transcribe_audio(big_bytes, filename="audio.ogg", duration_seconds=30)
|
||||
assert not result["ok"] and result["reason"] == "file_too_large"
|
||||
print(" ✅ 26MB audio -> reason='file_too_large' (Groq limit is 25MB)")
|
||||
|
||||
|
||||
async def test_api_reachable():
|
||||
"""
|
||||
Test that Groq Whisper API is reachable and authenticates correctly.
|
||||
A 401 means your GROQ_API_KEY is wrong.
|
||||
"""
|
||||
from backend.agents.voice_transcriber import transcribe_audio
|
||||
|
||||
print("\nTesting Groq Whisper API reachability...")
|
||||
minimal_ogg = _make_minimal_ogg()
|
||||
result = await transcribe_audio(minimal_ogg, filename="test.ogg", duration_seconds=5)
|
||||
|
||||
if result["ok"]:
|
||||
print(f" ✅ API reachable — transcript: '{result['transcript'][:60]}'")
|
||||
elif result["reason"] == "no_speech":
|
||||
print(f" ✅ API reachable — silent audio correctly returned no_speech")
|
||||
elif result["reason"] == "api_error" and "401" in result.get("error", ""):
|
||||
raise AssertionError(
|
||||
f"Authentication failed — check GROQ_API_KEY in .env\nError: {result['error']}"
|
||||
)
|
||||
else:
|
||||
print(f" ⚠️ API returned: reason={result['reason']}, error={result.get('error')} (non-fatal)")
|
||||
|
||||
|
||||
async def test_real_audio_file():
|
||||
"""
|
||||
Test with a real OGG voice file if one exists at scripts/test_voice.ogg.
|
||||
OPTIONAL — skip if file not present.
|
||||
"""
|
||||
from backend.agents.voice_transcriber import transcribe_audio
|
||||
|
||||
test_file = os.path.join(os.path.dirname(__file__), "test_voice.ogg")
|
||||
if not os.path.exists(test_file):
|
||||
print("\n ⏭️ Skipping real audio test — place a voice note OGG at scripts/test_voice.ogg to enable")
|
||||
return
|
||||
|
||||
print(f"\nTesting with real audio file: {test_file}")
|
||||
with open(test_file, "rb") as f:
|
||||
audio_bytes = f.read()
|
||||
|
||||
result = await transcribe_audio(audio_bytes, filename="test_voice.ogg", duration_seconds=30)
|
||||
assert result["ok"], f"Real audio transcription failed: {result}"
|
||||
assert len(result["transcript"]) > 5
|
||||
print(f" ✅ Transcript ({result['word_count']} words): {result['transcript'][:120]}...")
|
||||
print(f" Language detected: {result['language']}")
|
||||
|
||||
|
||||
async def test_format_duration():
|
||||
"""Test the duration formatting helper."""
|
||||
from backend.agents.voice_transcriber import format_duration
|
||||
|
||||
print("\nTesting format_duration()...")
|
||||
assert format_duration(45) == "45s"
|
||||
assert format_duration(90) == "1m 30s"
|
||||
assert format_duration(0) == "0s"
|
||||
assert format_duration(None) == "?"
|
||||
print(" ✅ 45 -> '45s', 90 -> '1m 30s', None -> '?'")
|
||||
|
||||
|
||||
async def main():
|
||||
print("Running Milestone 20 tests...\n")
|
||||
await test_config_loaded()
|
||||
await test_pre_flight_filters()
|
||||
await test_api_reachable()
|
||||
await test_real_audio_file()
|
||||
await test_format_duration()
|
||||
print("\n🎉 MILESTONE 20 PASSED — Groq Whisper client working")
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user